diff --git a/README.md b/README.md index ad7acd7..7808759 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # OptiMHC -**An optimum rescoring pipeline for immunopeptidomics data that significantly enhances peptide identification performance.** +**An optimized rescoring pipeline for immunopeptidomics data that significantly enhances peptide identification performance.** OptiMHC integrates multiple rescoring features with machine learning-based rescoring to maximize the number of confidently identified peptides from mass spectrometry experiments. @@ -240,12 +240,6 @@ Here are some examples: -### GUI (Experimental) - -```bash -optimhc gui -``` - ### Full CLI Help ```bash diff --git a/docs/api/core.md b/docs/api/core.md index 744a49c..e5aa4c1 100644 --- a/docs/api/core.md +++ b/docs/api/core.md @@ -24,12 +24,6 @@ options: members: true -## Logging - -::: optimhc.core.logging_helper - options: - members: true - ## Utilities ::: optimhc.utils diff --git a/docs/api/features.md b/docs/api/features.md index 99f852a..6b92874 100644 --- a/docs/api/features.md +++ b/docs/api/features.md @@ -2,54 +2,54 @@ ## Base Class -::: optimhc.feature_generator.base_feature_generator +::: optimhc.feature.base_feature_generator options: members: true ## Basic -::: optimhc.feature_generator.basic +::: optimhc.feature.basic options: members: true ## Spectral Similarity -::: optimhc.feature_generator.spectral_similarity +::: optimhc.feature.spectral_similarity options: members: true ## DeepLC -::: optimhc.feature_generator.DeepLC +::: optimhc.feature.deeplc options: members: true ## Overlapping Peptide -::: optimhc.feature_generator.overlapping_peptide +::: optimhc.feature.overlapping_peptide options: members: true ## PWM -::: optimhc.feature_generator.PWM +::: optimhc.feature.pwm options: members: true ## MHCflurry -::: optimhc.feature_generator.mhcflurry +::: optimhc.feature.mhcflurry options: members: true ## NetMHCpan -::: optimhc.feature_generator.netMHCpan +::: optimhc.feature.netmhcpan options: members: true ## NetMHCIIpan -::: optimhc.feature_generator.netMHCIIpan +::: optimhc.feature.netmhciipan options: members: true diff --git a/docs/development/index.md b/docs/development/index.md index cfe6391..63e4f42 100644 --- a/docs/development/index.md +++ b/docs/development/index.md @@ -20,7 +20,7 @@ uv sync --locked --group dev Alternatively, using pip: ```bash -pip install -e ".[gui]" +pip install -e . pip install pytest ruff pre-commit ``` @@ -42,7 +42,7 @@ uv run ruff format . # Format uv run ruff format --check . # Check without modifying ``` -Configuration: `line-length = 99`, rules `["E", "F", "I"]` (pycodestyle errors, pyflakes, isort). `E501` (line too long) is ignored. Ruff excludes `docs/`, `examples/`, and `optimhc/gui/`. +Configuration: `line-length = 99`, rules `["E", "F", "I"]` (pycodestyle errors, pyflakes, isort). `E501` (line too long) is ignored. Ruff excludes `docs/` and `examples/`. ## Pre-commit Hooks diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 50611e4..926650d 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -71,7 +71,4 @@ netMHCIIpan -v # Should print the version number optimhc --help ``` -You should see the available commands: `pipeline`, `experiment`, and `gui`. - -!!! note "GUI" - The Streamlit GUI is currently under development. +You should see the available commands: `pipeline` and `experiment`. diff --git a/docs/index.md b/docs/index.md index 73ce52c..041865c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # OptiMHC -**An optimum rescoring pipeline for immunopeptidomics data that significantly enhances peptide identification performance.** +**An optimized rescoring pipeline for immunopeptidomics data that significantly enhances peptide identification performance.** OptiMHC integrates multiple rescoring features with machine learning-based rescoring to maximize the number of confidently identified peptides from mass spectrometry experiments. diff --git a/optimhc/cli.py b/optimhc/cli.py index 2f7c589..fae3e67 100644 --- a/optimhc/cli.py +++ b/optimhc/cli.py @@ -1,27 +1,55 @@ -import importlib.util import json import logging -import os -import sys import click +from optimhc import __version__ from optimhc.core import Pipeline from optimhc.core.config import Config -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s %(name)s: %(message)s", - handlers=[logging.StreamHandler()], -) - logger = logging.getLogger(__name__) +LOG_MAPPING = { + "DEBUG": logging.DEBUG, + "INFO": logging.INFO, + "WARNING": logging.WARNING, + "ERROR": logging.ERROR, + "CRITICAL": logging.CRITICAL, +} + + +def setup_logging(level: str = "INFO") -> None: + if level not in LOG_MAPPING: + raise ValueError(f"Invalid log level: {level}") + logging.basicConfig( + level=LOG_MAPPING[level], + format="%(asctime)s [%(levelname)s] %(name)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + force=True, + ) + + # mhctools attaches its own INFO-level handlers to its loggers + # https://github.com/openvax/mhctools/blob/master/mhctools/logging.conf + for name in [ + "mhctools", + "mhctools.base_commandline_predictor", + "mhctools.netmhc", + "mhctools.netmhciipan", + "mhctools.process_helpers", + "mhctools.cleanup_context", + ]: + lg = logging.getLogger(name) + lg.handlers.clear() + lg.disabled = True + lg.propagate = False + lg.setLevel(logging.CRITICAL) + @click.group() +@click.version_option(version=__version__, prog_name="optimhc") def cli(): """ - optiMHC - A high-performance rescoring pipeline for immunopeptidomics data. + OptiMHC - A optimized rescoring pipeline for immunopeptidomics data. """ pass @@ -106,13 +134,8 @@ def pipeline( model, ): """Run the optiMHC pipeline with the specified configuration.""" - # Load configuration - if config: - pipeline_config = Config(config) - else: - pipeline_config = Config() + pipeline_config = Config(config) if config else Config() - # Override with command-line parameters if inputtype: pipeline_config["inputType"] = inputtype if inputfile: @@ -143,10 +166,9 @@ def pipeline( if model: pipeline_config["rescore"]["model"] = model - # Run pipeline + setup_logging(pipeline_config["logLevel"]) pipeline_config.validate() - pipeline = Pipeline(pipeline_config) - pipeline.run() + Pipeline(pipeline_config).run() @cli.command() @@ -158,63 +180,10 @@ def pipeline( ) def experiment(config): """Run multiple experiments with different feature combinations.""" - # Load configuration pipeline_config = Config(config) + setup_logging(pipeline_config["logLevel"]) - # Run experiments - pipeline = Pipeline(pipeline_config) - pipeline.run_experiments() - - -@cli.command() -def gui(): - """Launch the optiMHC GUI.""" - if importlib.util.find_spec("streamlit") is None: - print("Error: Streamlit is not installed. Install GUI dependencies with:") - print("pip install optimhc[gui]") - return - - import subprocess - - # Get the path to the GUI app - gui_path = os.path.join(os.path.dirname(__file__), "gui", "app.py") - - if not os.path.exists(gui_path): - print(f"Error: GUI application not found at {gui_path}") - return - - # Create a temporary launcher script that uses the correct imports - import tempfile - - launcher_content = """ -import os -import sys -import streamlit - -# Add the root directory to the path -sys.path.insert(0, '{}') - -# Import the app module properly -from optimhc.gui.app import main - -if __name__ == "__main__": - main() - """.format(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) - - fd, temp_path = tempfile.mkstemp(suffix=".py") - with os.fdopen(fd, "w") as f: - f.write(launcher_content) - - # Launch Streamlit with the temporary script - print("Starting optiMHC GUI...") - try: - subprocess.run([sys.executable, "-m", "streamlit", "run", temp_path]) - finally: - # Clean up the temporary file - try: - os.unlink(temp_path) - except OSError: - pass + Pipeline(pipeline_config).run_experiments() if __name__ == "__main__": diff --git a/optimhc/core/config.py b/optimhc/core/config.py index a88fabf..89c6d18 100644 --- a/optimhc/core/config.py +++ b/optimhc/core/config.py @@ -59,11 +59,15 @@ def load_config(config_path): Load and parse a configuration file using YAML. Merges loaded config with default configuration. - Parameters: - config_path (str): Path to the YAML configuration file. + Parameters + ---------- + config_path : str + Path to the YAML configuration file. - Returns: - dict: A dictionary containing all configurations. + Returns + ------- + dict + A dictionary containing all configurations. """ logger.info(f"Loading configuration from {config_path}") with open(config_path, "r") as f: diff --git a/optimhc/core/feature_generation.py b/optimhc/core/feature_generation.py index d83fff6..e9cf448 100644 --- a/optimhc/core/feature_generation.py +++ b/optimhc/core/feature_generation.py @@ -1,30 +1,12 @@ -""" -feature_generation.py - -Implements feature generation logic for optiMHC, supporting multiple feature generators -(Basic, OverlappingPeptide, PWM, MHCflurry, NetMHCpan, NetMHCIIpan, DeepLC, SpectralSimilarity, etc.). -""" - import gc import logging -import os -import re - -from optimhc.feature_generator.netMHCIIpan import NetMHCIIpanFeatureGenerator -# The reason why we need to import the feature generators here is that -# the package 'mhctools' affect the logging configuration of optiMHC. -# TODO: find a better way to handle this. -from optimhc.feature_generator.netMHCpan import NetMHCpanFeatureGenerator +import optimhc.feature # noqa: F401 -- triggers generator registration +from optimhc.feature.factory import feature_generator_factory logger = logging.getLogger(__name__) -# TODO: refactor the code to pass config as a parameter to the generators -# TODO: factory method for feature generators -# TODO: for allele-specific generators, we need to test the validation of the allele input first - - def generate_features(psms, config): """ Generate features from different generators according to the configuration. @@ -35,321 +17,21 @@ def generate_features(psms, config): A container object holding PSMs and relevant data. config : dict Configuration dictionary loaded from YAML or CLI. - - Returns - ------- - None - Features are added in-place to the PsmContainer. - - Examples - -------- - >>> generate_features(psms, config) """ - remove_modification = True - remove_pre_nxt_aa = config["removePreNxtAA"] - n_processes = config["numProcesses"] - show_progress = config["showProgress"] - mod_dict = config.get("modificationMap", None) - if mod_dict == {}: - mod_dict = None feature_generators = config.get("featureGenerator", None) - allele = config.get("allele", None) - unique_peptides = list(set(psms.peptides)) - - if feature_generators is not None: - for generator_config in feature_generators: - if not isinstance(generator_config, dict): - logger.warning("Feature generator config is not a dictionary, skipping...") - continue - - generator_type = generator_config.get("name") - logger.info(f"Generating features with {generator_type}...") - generator_params = generator_config.get("params", {}) - - if generator_type == "OverlappingPeptide": - from optimhc.feature_generator.overlapping_peptide import ( - OverlappingPeptideFeatureGenerator, - assign_brother_aggregated_feature, - ) - - overlapping_peptide = OverlappingPeptideFeatureGenerator( - unique_peptides, - min_overlap_length=generator_params.get("minOverlapLength", 8), - min_length=generator_params.get("minLength", 8), - max_length=generator_params.get("maxLength", 25), - remove_pre_nxt_aa=remove_pre_nxt_aa, - remove_modification=remove_modification, - ) - overlapping_features = overlapping_peptide.generate_features() - full_data = overlapping_peptide.get_full_data() - - psms.add_metadata( - full_data[["Peptide", "contig_member_count", "ContigSequence"]], - psms_key=psms.peptide_column, - metadata_key="Peptide", - source="OverlappingPeptide", - ) - psms.add_features( - overlapping_features, - psms_key=psms.peptide_column, - feature_key=overlapping_peptide.id_column, - source="OverlappingPeptide", - ) - score = generator_params.get("overlappingScore", None) - if score: - assign_brother_aggregated_feature( - psms, - feature_columns=score, - overlapping_source="OverlappingPeptide", - source_name="ContigFeatures", - ) - - del overlapping_peptide, overlapping_features, full_data - gc.collect() - - elif generator_type == "Basic": - from optimhc.feature_generator.basic import BasicFeatureGenerator - - basic_generator = BasicFeatureGenerator( - psms.psms[psms.peptide_column].tolist(), - remove_pre_nxt_aa=remove_pre_nxt_aa, - remove_modification=remove_modification, - ) - basic_features = basic_generator.generate_features() - psms.add_features_by_index( - basic_features[basic_generator.feature_columns], source="Basic" - ) - - del basic_generator, basic_features - gc.collect() - - elif generator_type == "PWM": - from optimhc.feature_generator.PWM import PWMFeatureGenerator - - pwm_generator = PWMFeatureGenerator( - unique_peptides, - alleles=allele, - mhc_class=generator_params.get("class", "I"), - remove_modification=remove_modification, - remove_pre_nxt_aa=remove_pre_nxt_aa, - ) - pwm_features = pwm_generator.generate_features() - psms.add_features( - pwm_features, - psms_key=psms.peptide_column, - feature_key=pwm_generator.id_column, - source="PWM", - ) - - del pwm_generator, pwm_features - gc.collect() - - elif generator_type == "MHCflurry": - from optimhc.feature_generator.mhcflurry import ( - MHCflurryFeatureGenerator, - ) - - mhcflurry_generator = MHCflurryFeatureGenerator( - unique_peptides, - alleles=allele, - remove_pre_nxt_aa=remove_pre_nxt_aa, - remove_modification=remove_modification, - ) - mhcflurry_features = mhcflurry_generator.generate_features() - psms.add_features( - mhcflurry_features, - psms_key=psms.peptide_column, - feature_key=mhcflurry_generator.id_column, - source="MHCflurry", - ) - - del mhcflurry_generator, mhcflurry_features - gc.collect() - - elif generator_type == "NetMHCpan": - # from optimhc.feature_generator.netMHCpan import NetMHCpanFeatureGenerator - netmhcpan_generator = NetMHCpanFeatureGenerator( - unique_peptides, - alleles=allele, - mode=generator_params.get("mode", "best"), - remove_pre_nxt_aa=remove_pre_nxt_aa, - remove_modification=remove_modification, - n_processes=n_processes, - show_progress=show_progress, - ) - netmhcpan_features = netmhcpan_generator.generate_features() - psms.add_features( - netmhcpan_features, - psms_key=psms.peptide_column, - feature_key=netmhcpan_generator.id_column, - source="NetMHCpan", - ) - - del netmhcpan_generator, netmhcpan_features - gc.collect() - - elif generator_type == "NetMHCIIpan": - # from optimhc.feature_generator.netMHCIIpan import NetMHCIIpanFeatureGenerator - netmhciipan_generator = NetMHCIIpanFeatureGenerator( - unique_peptides, - alleles=allele, - mode=generator_params.get("mode", "best"), - remove_pre_nxt_aa=remove_pre_nxt_aa, - remove_modification=remove_modification, - n_processes=n_processes, - show_progress=show_progress, - ) - netmhciipan_features = netmhciipan_generator.generate_features() - psms.add_features( - netmhciipan_features, - psms_key=psms.peptide_column, - feature_key=netmhciipan_generator.id_column, - source="NetMHCIIpan", - ) - - del netmhciipan_generator, netmhciipan_features - gc.collect() - - elif generator_type == "DeepLC": - from optimhc.feature_generator.DeepLC import DeepLCFeatureGenerator - - deeplc_generator = DeepLCFeatureGenerator( - psms, - calibration_criteria_column=generator_params.get("calibrationCriteria"), - lower_score_is_better=generator_params.get("lowerIsBetter"), - calibration_set_size=generator_params.get("calibrationSize", 0.1), - processes=n_processes, - # TODO: Check here carefully - # Since DeepLC is GPU-based, - # the processes here is not the same meaning as the n_processes in multi-threading - model_path=generator_params.get("model_path", None), - remove_pre_nxt_aa=remove_pre_nxt_aa, - mod_dict=mod_dict, - ) - deeplc_features = deeplc_generator.generate_features() - psms.add_features_by_index( - deeplc_features[deeplc_generator.feature_columns], source="DeepLC" - ) - - del deeplc_generator, deeplc_features - gc.collect() - - elif generator_type == "SpectralSimilarity": - from optimhc.feature_generator.spectral_similarity import ( - SpectralSimilarityFeatureGenerator, - ) - - # Match PSMs with the spectra - mzML_dir = generator_params.get("mzmlDir", None) - if mzML_dir is None: - logger.error( - "mzML_dir is not provided for SpectralSimilarity feature generator." - ) - continue - - pattern = generator_params.get("spectrumIdPattern", None) - mz_file_names = [] - spectrum_ids = psms.spectrum_ids - - if pattern: - logger.info( - f"Using pattern: {pattern} to extract mzML file names from spectrum IDs." - ) - for spectrum_id in spectrum_ids: - mz_file_names.append(re.match(pattern, spectrum_id).group(1)) - logger.info(f"mzML file names: {list(set(mz_file_names))}") - else: - logger.info("Spectrum ID pattern is not provided.") - if psms.ms_data_file_column is not None: - logger.info( - f"Trying to extract mzML file names from {psms.ms_data_file_column}" - ) - logger.info( - f"MS data file format: {set(psms.psms[psms.ms_data_file_column])}" - ) - - for ms_data_file in psms.psms[psms.ms_data_file_column]: - mz_file_basename = os.path.basename(ms_data_file).split(".")[0] - if mz_file_basename.endswith(".mzML"): - mz_file_basename = mz_file_basename[:-5] - elif mz_file_basename.endswith("mzML"): - mz_file_basename = mz_file_basename[:-4] - mz_file_names.append(mz_file_basename) - - logger.info(f"mzML file names: {list(set(mz_file_names))}") - else: - logger.info("MS data file information is not provided.") - logger.info( - r"Trying to use the default pattern: (.+?)\.\d+\.\d+\.\d+ to extract mzML file names from spectrum IDs." - ) - for spectrum_id in spectrum_ids: - mz_file_names.append( - re.match(r"(.+?)\.\d+\.\d+\.\d+", spectrum_id).group(1) - ) - - mz_file_paths = [ - os.path.join(mzML_dir, f"{mz_file}.mzML") for mz_file in mz_file_names - ] - mz_file_paths_set = set(mz_file_paths) - logger.info(f"mz_file_paths: {mz_file_paths_set}") - - for mz_file_path in mz_file_paths_set: - if not os.path.exists(mz_file_path): - logger.error(f"mzML file not found: {mz_file_path}") - continue - - model_type = generator_params.get("model", None) - if model_type is None: - logger.error( - "Model type is not provided for SpectralSimilarity feature generator." - ) - raise ValueError( - "Model type is required for SpectralSimilarity feature generator." - ) - - collision_energy = generator_params.get("collisionEnergy", None) - instrument = generator_params.get("instrument", None) - fragmentation_type = generator_params.get("fragmentationType", None) - spectral_similarity_generator = SpectralSimilarityFeatureGenerator( - spectrum_ids=psms.spectrum_ids, - peptides=psms.peptides, - charges=psms.charges, - scan_ids=psms.scan_ids, - mz_file_paths=mz_file_paths, - model_type=generator_params.get("model"), - collision_energies=( - [collision_energy] * len(psms.peptides) if collision_energy else None - ), - instruments=([instrument] * len(psms.peptides) if instrument else None), - fragmentation_types=( - [fragmentation_type] * len(psms.peptides) if fragmentation_type else None - ), - remove_pre_nxt_aa=remove_pre_nxt_aa, - mod_dict=mod_dict, - url=generator_params.get("url"), - ssl=generator_params.get("ssl", True), - top_n=generator_params.get("numTopPeaks", 36), - tolerance_ppm=generator_params.get("tolerance", 20), - ) - - spectral_similarity_features = spectral_similarity_generator.generate_features() - psms.add_features( - spectral_similarity_features, - psms_key=[ - psms.spectrum_column, - psms.peptide_column, - psms.charge_column, - ], - feature_key=spectral_similarity_generator.id_column, - source="SpectralSimilarity", - ) - del ( - spectral_similarity_generator, - spectral_similarity_features, - mz_file_paths, - mz_file_names, - ) - gc.collect() - - else: - logger.warning(f"Unknown feature generator: {generator_type}, skipping...") + if not feature_generators: + return + + for generator_config in feature_generators: + if not isinstance(generator_config, dict): + logger.warning("Feature generator config is not a dictionary, skipping...") + continue + + name = generator_config.get("name") + params = generator_config.get("params", {}) + + logger.info(f"Generating features with {name}...") + generator_cls = feature_generator_factory.get_generator(name) + generator = generator_cls.from_config(psms, config, params) + generator.apply(psms, source=name) + gc.collect() diff --git a/optimhc/core/logging_helper.py b/optimhc/core/logging_helper.py deleted file mode 100644 index 100cfc3..0000000 --- a/optimhc/core/logging_helper.py +++ /dev/null @@ -1,110 +0,0 @@ -import logging - - -def setup_loggers(log_file=None, log_level="INFO"): - """ - Create or update all loggers so that each logger has a StreamHandler and optionally a FileHandler. - This ensures all log messages are displayed in the console and optionally saved to a file. - - Parameters - ---------- - log_file : str, optional - Path to the log file. If None, no file logging is set up. - log_level : str, optional - Logging level (DEBUG, INFO, WARNING, ERROR). Default is "INFO". - """ - # Disable mhctools logging, avoid the warning message when multiprocessing - for logger_name in [ - "mhctools", - "mhctools.base_commandline_predictor", - "mhctools.netmhc", - "mhctools.netmhciipan", - ]: - logger = logging.getLogger(logger_name) - logger.disabled = True - logger.propagate = False - logger.setLevel(logging.CRITICAL) - - loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] - level = getattr(logging, log_level.upper(), logging.INFO) - - # debug_logging() - - for lg in loggers: - if lg.name.startswith("mhctools"): - continue - - lg.disabled = False - has_stream_handler = any( - isinstance(handler, logging.StreamHandler) for handler in lg.handlers - ) - if not has_stream_handler: - console_handler = logging.StreamHandler() - console_handler.setLevel(level) - formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") - console_handler.setFormatter(formatter) - lg.addHandler(console_handler) - - if log_file: - has_file_handler = any( - isinstance(handler, logging.FileHandler) for handler in lg.handlers - ) - if not has_file_handler: - file_handler = logging.FileHandler(log_file, mode="a") - file_handler.setLevel(level) - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) - file_handler.setFormatter(formatter) - lg.addHandler(file_handler) - - lg.propagate = False - lg.setLevel(level) - - if lg.name.startswith("optimhc"): - lg.disabled = False - - root_logger = logging.getLogger() - root_logger.disabled = False - root_logger.setLevel(level) - - -def debug_logging(): - """ - Print debugging information for all loggers that start with 'optimhc' and - the root logger. This helps verify that logger configurations are set properly. - """ - print("\n=== Debugging Loggers ===\n") - loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict.keys()] - for lg in loggers: - if lg.name.startswith("optimhc"): - print(f"Logger Name: {lg.name}") - print(f" - Effective Level: {logging.getLevelName(lg.getEffectiveLevel())}") - print(f" - Explicit Level: {logging.getLevelName(lg.level)} (default: NOTSET)") - print(f" - Propagate: {lg.propagate}") - print(f" - Disabled: {lg.disabled}") - - if lg.handlers: - for handler in lg.handlers: - print(f" Handler: {type(handler).__name__}") - print(f" - Level: {logging.getLevelName(handler.level)}") - print(f" - Formatter: {handler.formatter}") - if isinstance(handler, logging.FileHandler): - print(f" - Log File: {handler.baseFilename}") - print(f" - Stream: {getattr(handler, 'stream', None)}") - else: - print(" No handlers attached to the logger.") - print("") - - root_logger = logging.getLogger() - print("Root Logger:") - print(f" - Level: {logging.getLevelName(root_logger.level)}") - print(f" - Handlers: {len(root_logger.handlers)}") - for handler in root_logger.handlers: - print(f" Handler: {type(handler).__name__}") - print(f" - Level: {logging.getLevelName(handler.level)}") - print(f" - Formatter: {handler.formatter}") - if isinstance(handler, logging.FileHandler): - print(f" - Log File: {handler.baseFilename}") - print(f" - Stream: {getattr(handler, 'stream', None)}") - print("\n=== End of Logger Debugging ===\n") diff --git a/optimhc/core/pipeline.py b/optimhc/core/pipeline.py index 32b5653..ab4fad8 100644 --- a/optimhc/core/pipeline.py +++ b/optimhc/core/pipeline.py @@ -15,7 +15,6 @@ from optimhc.core.config import Config from optimhc.core.feature_generation import generate_features -from optimhc.core.logging_helper import setup_loggers from optimhc.parser import read_pepxml, read_pin from optimhc.rescore import mokapot from optimhc.rescore.model import RandomForestPercolatorModel, XGBoostPercolatorModel @@ -67,7 +66,6 @@ def __init__(self, config): self.experiment = self.config.get("experimentName", "optimhc_experiment") self.output_dir = os.path.join(self.config["outputDir"], self.experiment) os.makedirs(self.output_dir, exist_ok=True) - setup_loggers(os.path.join(self.output_dir, "log"), self.config.get("logLevel", "INFO")) self.visualization_enabled = self.config.get("visualization", True) self.save_models = self.config.get("saveModels", True) diff --git a/optimhc/feature/__init__.py b/optimhc/feature/__init__.py new file mode 100644 index 0000000..c69db0c --- /dev/null +++ b/optimhc/feature/__init__.py @@ -0,0 +1,27 @@ +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.basic import BasicFeatureGenerator +from optimhc.feature.deeplc import DeepLCFeatureGenerator +from optimhc.feature.factory import feature_generator_factory +from optimhc.feature.mhcflurry import MHCflurryFeatureGenerator +from optimhc.feature.netmhciipan import NetMHCIIpanFeatureGenerator +from optimhc.feature.netmhcpan import NetMHCpanFeatureGenerator +from optimhc.feature.overlapping_peptide import ( + OverlappingPeptideFeatureGenerator, +) +from optimhc.feature.pwm import PWMFeatureGenerator +from optimhc.feature.spectral_similarity import ( + SpectralSimilarityFeatureGenerator, +) + +__all__ = [ + "feature_generator_factory", + "BaseFeatureGenerator", + "BasicFeatureGenerator", + "PWMFeatureGenerator", + "OverlappingPeptideFeatureGenerator", + "MHCflurryFeatureGenerator", + "NetMHCpanFeatureGenerator", + "NetMHCIIpanFeatureGenerator", + "DeepLCFeatureGenerator", + "SpectralSimilarityFeatureGenerator", +] diff --git a/optimhc/feature/base_feature_generator.py b/optimhc/feature/base_feature_generator.py new file mode 100644 index 0000000..8d9424f --- /dev/null +++ b/optimhc/feature/base_feature_generator.py @@ -0,0 +1,80 @@ +from abc import ABC, abstractmethod +from typing import List + +import pandas as pd + +from optimhc.psm_container import PsmContainer + + +class BaseFeatureGenerator(ABC): + """Abstract base class for all feature generators in the rescoring pipeline. + + Subclasses must implement: + - ``feature_columns`` -- names of generated feature columns + - ``id_column`` -- merge key column(s) + - ``generate_features()`` -- pure computation, returns a DataFrame + - ``from_config()`` -- construct an instance from pipeline config + + The default ``apply()`` merges features by peptide column. + Override it for index-based merges, composite keys, or post-processing. + """ + + @property + @abstractmethod + def feature_columns(self) -> List[str]: + """Return a list of feature column names produced by this generator.""" + ... + + @property + @abstractmethod + def id_column(self) -> List[str]: + """Return the column(s) used as merge key(s) with the PsmContainer.""" + ... + + @abstractmethod + def generate_features(self) -> pd.DataFrame: + """Generate features and return them as a DataFrame.""" + ... + + @classmethod + def from_config( + cls, + psms: PsmContainer, + config: dict, + params: dict, + ) -> "BaseFeatureGenerator": + """Construct a generator instance from pipeline configuration. + + Parameters + ---------- + psms : PsmContainer + The PSM container with all current data. + config : dict + The full pipeline configuration. + params : dict + Generator-specific parameters from + ``config["featureGenerator"][i]["params"]``. + """ + raise NotImplementedError(f"{cls.__name__} must implement from_config()") + + def apply(self, psms: PsmContainer, source: str) -> None: + """Generate features and merge them into the PsmContainer. + + The default implementation merges by peptide column using + ``add_features()``. Override for different merge strategies + (index-based, composite key) or additional post-processing. + + Parameters + ---------- + psms : PsmContainer + The PSM container to add features to (modified in-place). + source : str + Name of this feature source (e.g. ``"Basic"``, ``"PWM"``). + """ + features = self.generate_features() + psms.add_features( + features, + psms_key=psms.peptide_column, + feature_key=self.id_column, + source=source, + ) diff --git a/optimhc/feature_generator/basic.py b/optimhc/feature/basic.py similarity index 85% rename from optimhc/feature_generator/basic.py rename to optimhc/feature/basic.py index 2d29472..2e0fde3 100644 --- a/optimhc/feature_generator/basic.py +++ b/optimhc/feature/basic.py @@ -1,5 +1,3 @@ -# feature_generator/basic.py - import logging from typing import List @@ -7,7 +5,8 @@ from scipy.stats import entropy # Import entropy from scipy from optimhc import utils -from optimhc.feature_generator.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.factory import feature_generator_factory logger = logging.getLogger(__name__) @@ -85,11 +84,15 @@ def _preprocess_peptide(self, peptide: str) -> str: """ Preprocess peptide sequence by removing adjacent amino acids and modifications. - Parameters: - peptide (str): Original peptide sequence. + Parameters + ---------- + peptide : str + Original peptide sequence. - Returns: - str: Preprocessed peptide sequence. + Returns + ------- + str + Preprocessed peptide sequence. """ if self.remove_pre_nxt_aa: peptide = utils.strip_flanking_and_charge(peptide) @@ -101,11 +104,15 @@ def _shannon_entropy(self, sequence: str) -> float: """ Calculate the Shannon entropy of a peptide sequence. - Parameters: - sequence (str): Peptide sequence. + Parameters + ---------- + sequence : str + Peptide sequence. - Returns: - float: Shannon entropy value. + Returns + ------- + float + Shannon entropy value. """ if len(sequence) == 0: return 0.0 @@ -162,3 +169,18 @@ def generate_features(self) -> pd.DataFrame: logger.info(f"Generated basic features for {len(features_df)} peptides.") return features_df + + @classmethod + def from_config(cls, psms, config, params): + return cls( + peptides=psms.peptides, + remove_pre_nxt_aa=config["removePreNxtAA"], + remove_modification=True, + ) + + def apply(self, psms, source): + features = self.generate_features() + psms.add_features_by_index(features[self.feature_columns], source=source) + + +feature_generator_factory.register_generator("Basic", BasicFeatureGenerator) diff --git a/optimhc/feature_generator/DeepLC.py b/optimhc/feature/deeplc.py similarity index 92% rename from optimhc/feature_generator/DeepLC.py rename to optimhc/feature/deeplc.py index 0060e8a..348f791 100644 --- a/optimhc/feature_generator/DeepLC.py +++ b/optimhc/feature/deeplc.py @@ -1,4 +1,3 @@ -# feature_generator/DeepLC.py # TODO: Use koina for prediction import logging @@ -9,7 +8,8 @@ from deeplc import DeepLC from optimhc import utils -from optimhc.feature_generator.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.factory import feature_generator_factory from optimhc.psm_container import PsmContainer logger = logging.getLogger(__name__) @@ -78,28 +78,30 @@ def __init__( DeepLC retraining is on by default. Add ``deeplc_retrain: False`` as a keyword argument to disable retraining. - Parameters: - psms: PsmContainer + Parameters + ---------- + psms : PsmContainer PSMs to generate features for. - calibration_criteria_column: str + calibration_criteria_column : str Column name in the PSMs DataFrame to use for DeepLC calibration. - lower_score_is_better - Whether a lower PSM score denotes a better matching PSM. Default: False - calibration_set_size: int or float + lower_score_is_better : bool + Whether a lower PSM score denotes a better matching PSM. Default: False. + calibration_set_size : int or float Amount of best PSMs to use for DeepLC calibration. If this value is lower than the number of available PSMs, all PSMs will be used. (default: 0.15) - processes: {int, None} + processes : int or None Number of processes to use in DeepLC. Defaults to 1. - model_path: str + model_path : str Path to the DeepLC model. If None, the default model will be used. - remove_pre_nxt_aa: bool + remove_pre_nxt_aa : bool Whether to remove the first and last amino acids from the peptide sequence. - Default: True - mod_dict: dict - Dictionary of modifications to be used for DeepLC. If None, no modifications will be used. - *args: list + Default: True. + mod_dict : dict + Dictionary of modifications to be used for DeepLC. If None, no modifications + will be used. + *args : list Additional positional arguments are passed to DeepLC. - kwargs: dict + **kwargs : dict Additional keyword arguments are passed to DeepLC. """ self.psms = psms @@ -456,3 +458,26 @@ def save_raw_predictions(self, file_path: str, **kwargs) -> None: logger.info(f"Raw predictions saved to {file_path}") else: logger.warning("Raw predictions have not been generated yet.") + + @classmethod + def from_config(cls, psms, config, params): + mod_dict = config.get("modificationMap", None) + if mod_dict == {}: + mod_dict = None + return cls( + psms=psms, + calibration_criteria_column=params.get("calibrationCriteria"), + lower_score_is_better=params.get("lowerIsBetter"), + calibration_set_size=params.get("calibrationSize", 0.1), + processes=config.get("numProcesses", 1), + model_path=params.get("model_path", None), + remove_pre_nxt_aa=config["removePreNxtAA"], + mod_dict=mod_dict, + ) + + def apply(self, psms, source): + features = self.generate_features() + psms.add_features_by_index(features[self.feature_columns], source=source) + + +feature_generator_factory.register_generator("DeepLC", DeepLCFeatureGenerator) diff --git a/optimhc/feature/factory.py b/optimhc/feature/factory.py new file mode 100644 index 0000000..561c0c8 --- /dev/null +++ b/optimhc/feature/factory.py @@ -0,0 +1,37 @@ +import logging +from typing import Dict, List, Type + +from optimhc.feature.base_feature_generator import BaseFeatureGenerator + +logger = logging.getLogger(__name__) + + +class FeatureGeneratorFactory: + """Registry and factory for feature generators. + + Each generator module registers itself at import time by calling + ``feature_generator_factory.register_generator(name, cls)``. + The orchestrator retrieves generators by name via ``get_generator(name)``. + """ + + def __init__(self): + self._registry: Dict[str, Type[BaseFeatureGenerator]] = {} + + def register_generator(self, name: str, generator_class: Type[BaseFeatureGenerator]): + """Register a feature generator class under *name*.""" + self._registry[name] = generator_class + + def get_generator(self, name: str) -> Type[BaseFeatureGenerator]: + """Return the generator class registered under *name*.""" + if name not in self._registry: + raise ValueError( + f"Unknown feature generator: '{name}'. Available: {sorted(self._registry.keys())}" + ) + return self._registry[name] + + def list_generators(self) -> List[str]: + """Return sorted list of registered generator names.""" + return sorted(self._registry.keys()) + + +feature_generator_factory = FeatureGeneratorFactory() diff --git a/optimhc/feature_generator/mhcflurry.py b/optimhc/feature/mhcflurry.py similarity index 95% rename from optimhc/feature_generator/mhcflurry.py rename to optimhc/feature/mhcflurry.py index 1cc3b30..d151f06 100644 --- a/optimhc/feature_generator/mhcflurry.py +++ b/optimhc/feature/mhcflurry.py @@ -5,7 +5,8 @@ from mhcflurry import Class1PresentationPredictor from optimhc import utils -from optimhc.feature_generator.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.factory import feature_generator_factory logger = logging.getLogger(__name__) @@ -338,3 +339,15 @@ def predictions_to_dataframe(self) -> pd.DataFrame: if self.predictions is None: raise ValueError("No predictions available. Please run 'generate_features' first.") return self.predictions + + @classmethod + def from_config(cls, psms, config, params): + return cls( + peptides=list(set(psms.peptides)), + alleles=config.get("allele", []), + remove_pre_nxt_aa=config["removePreNxtAA"], + remove_modification=True, + ) + + +feature_generator_factory.register_generator("MHCflurry", MHCflurryFeatureGenerator) diff --git a/optimhc/feature_generator/netMHCIIpan.py b/optimhc/feature/netmhciipan.py similarity index 97% rename from optimhc/feature_generator/netMHCIIpan.py rename to optimhc/feature/netmhciipan.py index b65db9d..336fc5e 100644 --- a/optimhc/feature_generator/netMHCIIpan.py +++ b/optimhc/feature/netmhciipan.py @@ -1,5 +1,3 @@ -# feature_generator/netMHCIIpan.py - # TODO: set 'BA' and 'EL' as optional parameters for the user to choose the prediction method. import logging @@ -12,7 +10,8 @@ from tqdm import tqdm from optimhc import utils -from optimhc.feature_generator.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.factory import feature_generator_factory logger = logging.getLogger(__name__) @@ -647,3 +646,18 @@ def save_raw_predictions(self, file_path: str, **kwargs) -> None: logger.info(f"Raw prediction results saved to: {file_path}") else: logger.warning("No raw prediction results available to save.") + + @classmethod + def from_config(cls, psms, config, params): + return cls( + peptides=list(set(psms.peptides)), + alleles=config.get("allele", []), + mode=params.get("mode", "best"), + remove_pre_nxt_aa=config["removePreNxtAA"], + remove_modification=True, + n_processes=config.get("numProcesses", 1), + show_progress=config.get("showProgress", False), + ) + + +feature_generator_factory.register_generator("NetMHCIIpan", NetMHCIIpanFeatureGenerator) diff --git a/optimhc/feature_generator/netMHCpan.py b/optimhc/feature/netmhcpan.py similarity index 97% rename from optimhc/feature_generator/netMHCpan.py rename to optimhc/feature/netmhcpan.py index 089bd6d..7816da6 100644 --- a/optimhc/feature_generator/netMHCpan.py +++ b/optimhc/feature/netmhcpan.py @@ -1,5 +1,3 @@ -# feature_generators/netmhcpan_feature_generator.py - # TODO: Except 'best' mode, the other modes seems to be not working properly. We need to investigate this issue. import logging @@ -12,6 +10,7 @@ from tqdm import tqdm from optimhc import utils +from optimhc.feature.factory import feature_generator_factory from .base_feature_generator import BaseFeatureGenerator @@ -674,3 +673,18 @@ def predictions_to_dataframe(self) -> pd.DataFrame: # logger.info(f"Generated best allele information for {len(best_allele_df)} peptides.") # return best_allele_df + + @classmethod + def from_config(cls, psms, config, params): + return cls( + peptides=list(set(psms.peptides)), + alleles=config.get("allele", []), + mode=params.get("mode", "best"), + remove_pre_nxt_aa=config["removePreNxtAA"], + remove_modification=True, + n_processes=config.get("numProcesses", 1), + show_progress=config.get("showProgress", False), + ) + + +feature_generator_factory.register_generator("NetMHCpan", NetMHCpanFeatureGenerator) diff --git a/optimhc/feature_generator/numba_utils.py b/optimhc/feature/numba_utils.py similarity index 100% rename from optimhc/feature_generator/numba_utils.py rename to optimhc/feature/numba_utils.py diff --git a/optimhc/feature_generator/overlapping_peptide.py b/optimhc/feature/overlapping_peptide.py similarity index 89% rename from optimhc/feature_generator/overlapping_peptide.py rename to optimhc/feature/overlapping_peptide.py index 42d4f04..d50faef 100644 --- a/optimhc/feature_generator/overlapping_peptide.py +++ b/optimhc/feature/overlapping_peptide.py @@ -1,5 +1,3 @@ -# feature_generator/overlapping_peptide.py - import logging from collections import defaultdict from typing import Dict, List, Tuple, Union @@ -11,7 +9,8 @@ from tqdm import tqdm from optimhc import utils -from optimhc.feature_generator.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.factory import feature_generator_factory from optimhc.psm_container import PsmContainer logger = logging.getLogger(__name__) @@ -688,111 +687,43 @@ def get_full_data(self) -> pd.DataFrame: self.full_data = self.overlap_data.merge(full_data_df, on="clean_peptide", how="left") return self.full_data + @classmethod + def from_config(cls, psms, config, params): + instance = cls( + peptides=list(set(psms.peptides)), + min_overlap_length=params.get("minOverlapLength", 8), + min_length=params.get("minLength", 8), + max_length=params.get("maxLength", 25), + remove_pre_nxt_aa=config["removePreNxtAA"], + remove_modification=True, + ) + instance._overlapping_score = params.get("overlappingScore", None) + return instance + + def apply(self, psms, source): + features = self.generate_features() + full_data = self.get_full_data() + + psms.add_metadata( + full_data[["Peptide", "contig_member_count", "ContigSequence"]], + psms_key=psms.peptide_column, + metadata_key="Peptide", + source=source, + ) + psms.add_features( + features, + psms_key=psms.peptide_column, + feature_key=self.id_column, + source=source, + ) -''' -# TODO: test - -def assign_brother_aggregated_feature( - psms: PsmContainer, - feature_columns: Union[str, List[str]], - overlapping_source: str, - source_name: str = 'OverlappingGroupFeatures' -) -> None: - """ - Assign aggregated features based on brother peptides to the PSMs. - - For PSMs with the same ContigSequence (brother peptides), compute the mean of specified features - and assign these aggregated features back to each PSM in the group. - If a PSM does not have a ContigSequence (no brothers), its new features will be set to the original values. - - Metadata in the PSM container: - { - "source_name": { - "metadata_field_1": "value1", - "metadata_field_2": "value2" - } - } - - Parameters: - psms (PsmContainer): PSM container containing the peptides and features. - feature_columns (Union[str, List[str]]): Name of the feature column(s) to aggregate. - overlapping_source (str): Source name of the overlapping peptide features. - source_name (str): Name of the new feature source. - - Returns: - None - """ - if isinstance(feature_columns, str): - feature_columns = [feature_columns] - psms_df = psms.psms - - if psms.metadata_column is None: - raise ValueError("The PSMs do not contain metadata.") - metadata = psms_df[psms.metadata_column] - print(metadata) - - - def get_overlapping_data(x): - try: - return x.get(overlapping_source, {}) - except AttributeError: - logger.error(f"Metadata for PSM {x} is not a dictionary.") - return {} - - def get_contig_sequence(x): - try: - return x.get('ContigSequence', None) - except AttributeError: - logger.error(f"Invalid metadata for PSM {x}.") - return None - - overlapping_data = metadata.apply(get_overlapping_data) - contig_sequences = overlapping_data.apply(get_contig_sequence) - print(overlapping_data) - print(contig_sequences) - - psms_df['ContigSequence'] = contig_sequences - - for feature in feature_columns: - if feature not in psms_df.columns: - raise ValueError(f"Feature column '{feature}' not found in PSMs.") - - grouped_mean = psms_df.groupby('ContigSequence')[feature_columns].mean().reset_index() - #grouped_sum = psms_df.groupby('ContigSequence')[feature_columns].sum().reset_index() - - """ - grouped = grouped_mean.merge(grouped_sum, - on='ContigSequence', - suffixes=('_brother_mean', '_brother_sum')) - """ - psms_with_agg = psms_df.merge(grouped_mean, - on='ContigSequence', - how='left', - suffixes=('', '_brother_mean')) - - - # use the original feature values if the aggregated values are missing - for feature in feature_columns: - mean_feature = feature + '_brother_mean' - sum_feature = feature + '_brother_sum' - psms_with_agg[mean_feature].fillna(psms_with_agg[feature], inplace=True) - psms_with_agg[sum_feature].fillna(psms_with_agg[feature], inplace=True) - - - agg_feature_columns = [] - for feature in feature_columns: - mean_feature = feature + '_brother_mean' - sum_feature = feature + '_brother_sum' - agg_feature_columns.append(mean_feature) - agg_feature_columns.append(sum_feature) - - new_features_df = psms_with_agg[agg_feature_columns] - new_features_df.columns = agg_feature_columns - - psms.add_features_by_index(new_features_df, source=source_name) - - -''' + if self._overlapping_score: + assign_brother_aggregated_feature( + psms, + feature_columns=self._overlapping_score, + overlapping_source=source, + source_name="ContigFeatures", + ) def assign_brother_aggregated_feature( @@ -880,3 +811,8 @@ def get_contig_sequence(x): new_features_df = psms_with_agg[agg_feature_columns] psms.add_features_by_index(features_df=new_features_df, source=source_name) + + +feature_generator_factory.register_generator( + "OverlappingPeptide", OverlappingPeptideFeatureGenerator +) diff --git a/optimhc/feature_generator/PWM.py b/optimhc/feature/pwm.py similarity index 98% rename from optimhc/feature_generator/PWM.py rename to optimhc/feature/pwm.py index 84b541d..913298c 100644 --- a/optimhc/feature_generator/PWM.py +++ b/optimhc/feature/pwm.py @@ -1,5 +1,3 @@ -# feature_generator/PWM.py - import logging import os from typing import Dict, List, Optional, Tuple, Union @@ -8,7 +6,8 @@ import pandas as pd from optimhc import utils -from optimhc.feature_generator.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.factory import feature_generator_factory logger = logging.getLogger(__name__) @@ -700,3 +699,16 @@ def feature_columns(self) -> List[str]: feature_columns.append(f"N_Flank_PWM_Score_{allele}") feature_columns.append(f"C_Flank_PWM_Score_{allele}") return feature_columns + + @classmethod + def from_config(cls, psms, config, params): + return cls( + peptides=list(set(psms.peptides)), + alleles=config.get("allele", []), + mhc_class=params.get("class", "I"), + remove_modification=True, + remove_pre_nxt_aa=config["removePreNxtAA"], + ) + + +feature_generator_factory.register_generator("PWM", PWMFeatureGenerator) diff --git a/optimhc/feature_generator/spectral_similarity.py b/optimhc/feature/spectral_similarity.py similarity index 75% rename from optimhc/feature_generator/spectral_similarity.py rename to optimhc/feature/spectral_similarity.py index 698452f..9f3a273 100644 --- a/optimhc/feature_generator/spectral_similarity.py +++ b/optimhc/feature/spectral_similarity.py @@ -1,5 +1,3 @@ -# feature_generator/spectral_similarity.py - import logging from typing import Dict, List, Optional, Tuple @@ -8,8 +6,9 @@ from koinapy import Koina from optimhc import utils -from optimhc.feature_generator.base_feature_generator import BaseFeatureGenerator -from optimhc.feature_generator.numba_utils import align_peaks, compute_similarity_features +from optimhc.feature.base_feature_generator import BaseFeatureGenerator +from optimhc.feature.factory import feature_generator_factory +from optimhc.feature.numba_utils import align_peaks, compute_similarity_features from optimhc.parser import extract_mzml_data logger = logging.getLogger(__name__) @@ -25,18 +24,30 @@ class SpectralSimilarityFeatureGenerator(BaseFeatureGenerator): 3. Align experimental and predicted spectra 4. Calculate similarity metrics as features - Parameters: - peptides (List[str]): List of peptide sequences - charges (List[int]): List of charge states - scan_ids (List[int]): List of scan IDs - mz_file_paths (List[str]): List of mzML file paths - model_type (str): Prediction model type, either "HCD" or "CID" - collision_energies (List[float]): List of collision energies, required when model_type is "HCD" - remove_pre_nxt_aa (bool): Whether to remove preceding and next amino acids, default is True - remove_modification (bool): Whether to remove modifications, default is True - url (str): Koina server URL, default is "koina.wilhelmlab.org:443" - top_n (int): Number of top peaks to use for alignment, default is 12 - tolerance_ppm (float): Mass tolerance for alignment in ppm, default is 20 + Parameters + ---------- + peptides : list of str + List of peptide sequences. + charges : list of int + List of charge states. + scan_ids : list of int + List of scan IDs. + mz_file_paths : list of str + List of mzML file paths. + model_type : str + Prediction model type, either "HCD" or "CID". + collision_energies : list of float + List of collision energies, required when model_type is "HCD". + remove_pre_nxt_aa : bool + Whether to remove preceding and next amino acids, default is True. + remove_modification : bool + Whether to remove modifications, default is True. + url : str + Koina server URL, default is "koina.wilhelmlab.org:443". + top_n : int + Number of top peaks to use for alignment, default is 12. + tolerance_ppm : float + Mass tolerance for alignment in ppm, default is 20. """ def __init__( @@ -89,7 +100,7 @@ def __init__( ) self.df["processed_peptide"] = self.df["peptide"].apply(self._preprocess_peptide) - logger.info(f"Recevied {len(self.df)} PSMs for spectral similarity feature generation") + logger.info(f"Received {len(self.df)} PSMs for spectral similarity feature generation") @property def id_column(self) -> List[str]: @@ -118,8 +129,10 @@ def input_df(self) -> pd.DataFrame: """ Return the generated features as a DataFrame. - Returns: - pd.DataFrame: DataFrame containing the generated features + Returns + ------- + pd.DataFrame + DataFrame containing the generated features. """ return self.df @@ -304,38 +317,40 @@ def _predict_theoretical_spectra( else: raise ValueError(f"Unsupported model type: {self.model_type}") - # Save the raw prediction results - self._raw_predictions = predictions.copy() - - # Convert prediction results to a suitable format - pred_df = predictions.copy() - pred_df.rename( - columns={ - "peptide_sequences": "processed_peptide", - "precursor_charges": "charge", - "intensities": "pred_intensity", - "mz": "pred_mz", - }, - inplace=True, - ) + # Save the raw prediction results + self._raw_predictions = predictions.copy() + + # Convert prediction results to a suitable format + pred_df = predictions.copy() + pred_df.rename( + columns={ + "peptide_sequences": "processed_peptide", + "precursor_charges": "charge", + "intensities": "pred_intensity", + "mz": "pred_mz", + }, + inplace=True, + ) - # Group by peptide and charge, convert predicted mz and intensity to lists - grouped_df = ( - pred_df.groupby(["processed_peptide", "charge"]) - .agg({"pred_intensity": list, "pred_mz": list, "annotation": list}) - .reset_index() - ) + # Group by peptide and charge, convert predicted mz and intensity to lists + grouped_df = ( + pred_df.groupby(["processed_peptide", "charge"]) + .agg({"pred_intensity": list, "pred_mz": list, "annotation": list}) + .reset_index() + ) - logger.info(f"Successfully predicted {len(grouped_df)} theoretical spectra") - return grouped_df + logger.info(f"Successfully predicted {len(grouped_df)} theoretical spectra") + return grouped_df @property def raw_predictions(self) -> pd.DataFrame: """ Returns the raw prediction results from Koina. - Returns: - pd.DataFrame: Raw prediction results DataFrame + Returns + ------- + pd.DataFrame + Raw prediction results DataFrame. """ if self._raw_predictions is None: if self.results is None: @@ -346,8 +361,10 @@ def get_raw_predictions(self) -> pd.DataFrame: """ Get the raw prediction results DataFrame from Koina. - Returns: - pd.DataFrame: Raw prediction results DataFrame + Returns + ------- + pd.DataFrame + Raw prediction results DataFrame. """ return self.raw_predictions @@ -355,9 +372,12 @@ def save_raw_predictions(self, file_path: str, **kwargs) -> None: """ Save the raw prediction results to a file. - Parameters: - file_path (str): Path to save the file - **kwargs: Other parameters passed to pandas.DataFrame.to_csv + Parameters + ---------- + file_path : str + Path to save the file. + **kwargs + Other parameters passed to ``pandas.DataFrame.to_csv``. """ if "index" not in kwargs: kwargs["index"] = False @@ -415,20 +435,27 @@ def _align_spectra_all_peaks( """ Align experimental and predicted spectra using ppm tolerance. - Parameters: - exp_mz (List[float]): Experimental m/z values - exp_intensity (List[float]): Experimental intensity values - pred_mz (List[float]): Predicted m/z values - pred_intensity (List[float]): Predicted intensity values - pred_annotation (Optional[List[str]]): Predicted fragment annotations - - Returns: - Tuple[np.ndarray, np.ndarray, np.ndarray, Dict]: - - Aligned experimental intensity vector - - Predicted intensity vector - - Matching index pairs as int array of shape (N, 2), - where column 0 is pred_idx and column 1 is exp_idx (-1 = no match) - - Additional info including original sorted arrays + Parameters + ---------- + exp_mz : list of float + Experimental m/z values. + exp_intensity : list of float + Experimental intensity values. + pred_mz : list of float + Predicted m/z values. + pred_intensity : list of float + Predicted intensity values. + pred_annotation : list of str, optional + Predicted fragment annotations. + + Returns + ------- + tuple of (np.ndarray, np.ndarray, np.ndarray, dict) + - Aligned experimental intensity vector + - Predicted intensity vector + - Matching index pairs as int array of shape (N, 2), + where column 0 is pred_idx and column 1 is exp_idx (-1 = no match) + - Additional info including original sorted arrays """ # Sort both experimental and predicted spectra by m/z exp_mz_sorted, exp_intensity_sorted, _ = self._sort_spectrum_by_mz(exp_mz, exp_intensity) @@ -479,19 +506,25 @@ def _get_top_peaks_vectors( top_n: int, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ - Extract top N peaks based on predicted intensity for similarity calculation + Extract top N peaks based on predicted intensity for similarity calculation. - Parameters: - aligned_exp_intensity (np.ndarray): Aligned experimental intensity vector - aligned_pred_intensity (np.ndarray): Aligned predicted intensity vector - matched_indices (np.ndarray): Matching index pairs, shape (N, 2) - top_n (int): Number of top peaks to extract + Parameters + ---------- + aligned_exp_intensity : np.ndarray + Aligned experimental intensity vector. + aligned_pred_intensity : np.ndarray + Aligned predicted intensity vector. + matched_indices : np.ndarray + Matching index pairs, shape (N, 2). + top_n : int + Number of top peaks to extract. - Returns: - Tuple[np.ndarray, np.ndarray, np.ndarray]: - - Top N experimental intensity vector - - Top N predicted intensity vector - - Top N matching index pairs, shape (top_n, 2) + Returns + ------- + tuple of (np.ndarray, np.ndarray, np.ndarray) + - Top N experimental intensity vector + - Top N predicted intensity vector + - Top N matching index pairs, shape (top_n, 2) """ num_peaks = min(top_n, len(aligned_pred_intensity)) top_pred_indices = np.argsort(-aligned_pred_intensity)[:num_peaks] @@ -586,10 +619,12 @@ def _calculate_similarity_features( def _generate_features(self) -> pd.DataFrame: """ - Generate spectral similarity features + Generate spectral similarity features. - Returns: - pd.DataFrame: DataFrame containing generated features + Returns + ------- + pd.DataFrame + DataFrame containing generated features. """ psm_df = self.df.copy() pred_spectra_df = self._predict_theoretical_spectra( @@ -790,3 +825,99 @@ def get_full_data(self) -> pd.DataFrame: results and raw data used in feature generation. """ return self.results + + @staticmethod + def _resolve_mzml_paths(psms, params): + """Resolve per-PSM mzML file paths from config and PsmContainer.""" + import os + import re + + mzml_dir = params.get("mzmlDir", None) + if mzml_dir is None: + raise ValueError("mzmlDir is required for SpectralSimilarity feature generator.") + + pattern = params.get("spectrumIdPattern", None) + mz_file_names = [] + spectrum_ids = psms.spectrum_ids + + if pattern: + logger.info(f"Using pattern: {pattern} to extract mzML file names from spectrum IDs.") + for spectrum_id in spectrum_ids: + mz_file_names.append(re.match(pattern, spectrum_id).group(1)) + logger.info(f"mzML file names: {list(set(mz_file_names))}") + else: + logger.info("Spectrum ID pattern is not provided.") + if psms.ms_data_file_column is not None: + logger.info(f"Trying to extract mzML file names from {psms.ms_data_file_column}") + logger.info(f"MS data file format: {set(psms.psms[psms.ms_data_file_column])}") + for ms_data_file in psms.psms[psms.ms_data_file_column]: + mz_file_basename = os.path.basename(ms_data_file).split(".")[0] + if mz_file_basename.endswith(".mzML"): + mz_file_basename = mz_file_basename[:-5] + elif mz_file_basename.endswith("mzML"): + mz_file_basename = mz_file_basename[:-4] + mz_file_names.append(mz_file_basename) + logger.info(f"mzML file names: {list(set(mz_file_names))}") + else: + logger.info("MS data file information is not provided.") + logger.info( + r"Trying to use the default pattern: (.+?)\.\d+\.\d+\.\d+ " + "to extract mzML file names from spectrum IDs." + ) + for spectrum_id in spectrum_ids: + mz_file_names.append(re.match(r"(.+?)\.\d+\.\d+\.\d+", spectrum_id).group(1)) + + mz_file_paths = [os.path.join(mzml_dir, f"{mz_file}.mzML") for mz_file in mz_file_names] + for mz_file_path in set(mz_file_paths): + if not os.path.exists(mz_file_path): + logger.error(f"mzML file not found: {mz_file_path}") + + return mz_file_paths + + @classmethod + def from_config(cls, psms, config, params): + mz_file_paths = cls._resolve_mzml_paths(psms, params) + mod_dict = config.get("modificationMap", None) + if mod_dict == {}: + mod_dict = None + + model_type = params.get("model", None) + if model_type is None: + raise ValueError("Model type is required for SpectralSimilarity feature generator.") + + n = len(psms.peptides) + collision_energy = params.get("collisionEnergy", None) + instrument = params.get("instrument", None) + fragmentation_type = params.get("fragmentationType", None) + + return cls( + spectrum_ids=psms.spectrum_ids, + peptides=psms.peptides, + charges=psms.charges, + scan_ids=psms.scan_ids, + mz_file_paths=mz_file_paths, + model_type=model_type, + collision_energies=[collision_energy] * n if collision_energy else None, + instruments=[instrument] * n if instrument else None, + fragmentation_types=[fragmentation_type] * n if fragmentation_type else None, + remove_pre_nxt_aa=config["removePreNxtAA"], + mod_dict=mod_dict, + url=params.get("url"), + ssl=params.get("ssl", True), + top_n=params.get("numTopPeaks", 36), + tolerance_ppm=params.get("tolerance", 20), + ) + + def apply(self, psms, source): + features = self.generate_features() + psms.add_features( + features, + psms_key=[psms.spectrum_column, psms.peptide_column, psms.charge_column], + feature_key=self.id_column, + source=source, + ) + + +feature_generator_factory.register_generator( + "SpectralSimilarity", SpectralSimilarityFeatureGenerator +) diff --git a/optimhc/feature_generator/__init__.py b/optimhc/feature_generator/__init__.py deleted file mode 100644 index 7d30ecc..0000000 --- a/optimhc/feature_generator/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -import warnings - -warnings.filterwarnings("ignore") diff --git a/optimhc/feature_generator/base_feature_generator.py b/optimhc/feature_generator/base_feature_generator.py deleted file mode 100644 index 7cac35c..0000000 --- a/optimhc/feature_generator/base_feature_generator.py +++ /dev/null @@ -1,35 +0,0 @@ -# feature_generators/base_feature_generator.py - -from abc import ABC, abstractmethod -from typing import List - -import pandas as pd - - -class BaseFeatureGenerator(ABC): - """ - Abstract base class for all feature generators in the rescoring pipeline. - """ - - @property - @abstractmethod - def feature_columns(self) -> List[str]: - """ - Returns a list of feature names generated by the feature generator. - """ - pass - - @property - @abstractmethod - def id_column(self) -> List[str]: - """ - Returns the column or columns used as key or keys to merge features with PSMs. - """ - pass - - @abstractmethod - def generate_features(self) -> pd.DataFrame: - """ - Generates features. - """ - pass diff --git a/optimhc/gui/__init__.py b/optimhc/gui/__init__.py deleted file mode 100644 index a750dbc..0000000 --- a/optimhc/gui/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -optiMHC GUI module for Streamlit-based interface. -""" - -__all__ = ["app"] diff --git a/optimhc/gui/app.py b/optimhc/gui/app.py deleted file mode 100644 index 91db588..0000000 --- a/optimhc/gui/app.py +++ /dev/null @@ -1,76 +0,0 @@ -""" -Main application for the optiMHC GUI. -""" - -import streamlit as st -import os -import sys -from pathlib import Path - -# Add optiMHC root to path if needed -optimhc_root = str(Path(__file__).parent.parent.parent) -if optimhc_root not in sys.path: - sys.path.append(optimhc_root) - -# Import style utilities -from optimhc.gui.style import set_page_config, apply_custom_css, footer - -# Import page modules -from optimhc.gui.pages import home, configure, run, results - - -def main(): - """ - Main application entry point. - """ - # Set up page config - set_page_config() - - # Apply custom CSS - apply_custom_css() - - # Initialize session state for navigation - if "page" not in st.session_state: - st.session_state.page = "home" - - # Sidebar navigation - st.sidebar.title("Navigation") - - # Navigation buttons - if st.sidebar.button("Home", use_container_width=True): - st.session_state.page = "home" - st.rerun() - - if st.sidebar.button("Configure", use_container_width=True): - st.session_state.page = "configure" - st.rerun() - - if st.sidebar.button("Run Pipeline", use_container_width=True): - st.session_state.page = "run" - st.rerun() - - if st.sidebar.button("Results", use_container_width=True): - st.session_state.page = "results" - st.rerun() - - # Version info in sidebar - st.sidebar.markdown("---") - from optimhc import __version__ - st.sidebar.info(f"optiMHC v{__version__}") - - # Render the selected page - if st.session_state.page == "home": - home.render() - elif st.session_state.page == "configure": - configure.render() - elif st.session_state.page == "run": - run.render() - elif st.session_state.page == "results": - results.render() - - # Footer - footer() - - -if __name__ == "__main__": - main() diff --git a/optimhc/gui/components/__init__.py b/optimhc/gui/components/__init__.py deleted file mode 100644 index 1338c94..0000000 --- a/optimhc/gui/components/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -GUI components for the optiMHC Streamlit interface. -""" - -__all__ = [ - "config_form", - "file_upload", - "log_viewer", - "pipeline_control", - "results_viewer", - "feature_generator_form", - "rescore_form", - "modification_map" -] diff --git a/optimhc/gui/components/config_form.py b/optimhc/gui/components/config_form.py deleted file mode 100644 index 1344885..0000000 --- a/optimhc/gui/components/config_form.py +++ /dev/null @@ -1,399 +0,0 @@ -""" -Configuration form component for optiMHC GUI. -""" - -import os -import streamlit as st -import yaml -from typing import Dict, Any, List, Optional -import json - -# Import optiMHC config defaults -from optimhc.core.config import DEFAULT_CONFIG - - -def feature_generator_form(existing_generators: List[Dict[str, Any]] = None) -> List[Dict[str, Any]]: - """ - Create a form for configuring feature generators. - - Args: - existing_generators: List of existing feature generator configurations - - Returns: - List of feature generator configurations - """ - if existing_generators is None: - existing_generators = [] - - feature_generators = [] - - # Known feature generators and their parameters - generator_options = { - "Basic": {}, - "PWM": {"class": ["I", "II"]}, - "MHCflurry": {}, - "NetMHCpan": {}, - "NetMHCIIpan": {}, - "DeepLC": { - "calibrationCriteria": ["expect", "xcorr", "hyperscore"], - "lowerIsBetter": [True, False], - "calibrationSize": [0.1, 0.2, 0.3] - }, - "SpectralSimilarity": { - "model": ["AlphaPeptDeep_ms2_generic", "AlphaPeptDeep_ms2_HCD", "AlphaPeptDeep_ms2_CID"], - "instrument": ["LUMOS", "QE", "VELOS", "FUSION"], - "numTopPeaks": [10, 20, 36, 50] - }, - "OverlappingPeptide": { - "minOverlapLength": [7, 8, 9], - "minLength": [7, 8, 9], - "maxLength": [15, 20, 25], - "overlappingScore": ["expect", "xcorr", "hyperscore"] - } - } - - st.subheader("Feature Generators") - - # Use session state to keep track of the number of generators - if "num_generators" not in st.session_state: - st.session_state.num_generators = max(1, len(existing_generators)) - - # Add/remove generator controls outside of the form - col1, col2 = st.columns([1, 5]) - with col1: - if st.button("➕ Add Generator", key="add_generator"): - st.session_state.num_generators += 1 - st.rerun() - with col2: - if st.session_state.num_generators > 1 and st.button("➖ Remove Last Generator", key="remove_generator"): - st.session_state.num_generators -= 1 - st.rerun() - - # Generate forms for each feature generator - for i in range(st.session_state.num_generators): - with st.expander(f"Feature Generator {i+1}", expanded=True): - existing_gen = {} if i >= len(existing_generators) else existing_generators[i] - - # Feature generator name - generator_name = st.selectbox( - "Generator Type", - options=list(generator_options.keys()), - key=f"gen_type_{i}", - index=list(generator_options.keys()).index(existing_gen.get("name", "Basic")) if existing_gen.get("name") in generator_options else 0 - ) - - # Feature generator parameters - params = {} - if generator_options[generator_name]: - st.markdown("**Parameters:**") - for param_name, param_options in generator_options[generator_name].items(): - existing_params = existing_gen.get("params", {}) - existing_value = existing_params.get(param_name) - - # Handle different parameter types - if isinstance(param_options, list): - if all(isinstance(x, bool) for x in param_options): - param_value = st.checkbox( - param_name, - value=existing_value if existing_value is not None else param_options[0], - key=f"gen_{i}_{param_name}" - ) - elif all(isinstance(x, (int, float)) for x in param_options): - param_value = st.number_input( - param_name, - value=existing_value if existing_value is not None else param_options[0], - key=f"gen_{i}_{param_name}" - ) - else: - param_value = st.selectbox( - param_name, - options=param_options, - index=param_options.index(existing_value) if existing_value in param_options else 0, - key=f"gen_{i}_{param_name}" - ) - else: - param_value = st.text_input( - param_name, - value=str(existing_value) if existing_value is not None else "", - key=f"gen_{i}_{param_name}" - ) - - params[param_name] = param_value - - # Add to list of generators - generator_config = {"name": generator_name} - if params: - generator_config["params"] = params - - feature_generators.append(generator_config) - - return feature_generators - - -def rescore_form(existing_rescore: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Create a form for rescoring settings. - - Args: - existing_rescore: Existing rescore configuration - - Returns: - Rescore configuration dictionary - """ - if existing_rescore is None: - existing_rescore = DEFAULT_CONFIG["rescore"] - - st.subheader("Rescoring Settings") - - rescore_model = st.selectbox( - "Rescoring Model", - options=["Percolator", "XGBoost", "RandomForest"], - index=["Percolator", "XGBoost", "RandomForest"].index(existing_rescore.get("model", "Percolator")), - help="Model to use for rescoring" - ) - - test_fdr = st.number_input( - "Test FDR", - min_value=0.001, - max_value=0.1, - value=float(existing_rescore.get("testFDR", 0.01)), - step=0.001, - format="%.3f", - help="FDR threshold for testing" - ) - - num_jobs = st.number_input( - "Number of Jobs", - min_value=1, - max_value=32, - value=int(existing_rescore.get("numJobs", 1)), - help="Number of parallel jobs for model training" - ) - - return { - "model": rescore_model, - "testFDR": test_fdr, - "numJobs": num_jobs - } - - -def config_form(existing_config: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Create a form for configuring the pipeline. - - Args: - existing_config: Existing configuration dictionary - - Returns: - Configuration dictionary - """ - if existing_config is None: - existing_config = DEFAULT_CONFIG - - st.subheader("Basic Settings") - - experiment_name = st.text_input( - "Experiment Name", - value=existing_config.get("experimentName", ""), - help="Name of the experiment" - ) - - input_type = st.selectbox( - "Input Type", - options=["pepxml", "pin"], - index=["pepxml", "pin"].index(existing_config.get("inputType", "pepxml")), - help="Type of input file" - ) - - # For GUI, we'll handle input files differently than the direct file paths - input_files = existing_config.get("inputFile", []) - if isinstance(input_files, str): - input_files = [input_files] - - input_files_str = st.text_area( - "Input Files", - value="\n".join(input_files) if input_files else "", - height=100, - help="One file path per line. Use file uploader to add files." - ) - - input_files = [f for f in input_files_str.strip().split("\n") if f] - - decoy_prefix = st.text_input( - "Decoy Prefix", - value=existing_config.get("decoyPrefix", "DECOY_"), - help="Prefix used to identify decoy sequences" - ) - - output_dir = st.text_input( - "Output Directory", - value=existing_config.get("outputDir", "./results"), - help="Directory where results will be saved" - ) - - # Allele settings - st.subheader("Allele Settings") - - alleles = existing_config.get("allele", []) - if isinstance(alleles, str): - alleles = [alleles] - - alleles_str = st.text_area( - "Alleles", - value="\n".join(alleles) if alleles else "", - height=100, - help="One allele per line, e.g., HLA-A*02:01" - ) - - alleles = [a for a in alleles_str.strip().split("\n") if a] - - # Performance settings - st.subheader("Performance Settings") - - col1, col2 = st.columns(2) - - with col1: - num_processes = st.number_input( - "Number of Processes", - min_value=1, - max_value=64, - value=int(existing_config.get("numProcesses", 4)), - help="Number of parallel processes" - ) - - with col2: - show_progress = st.checkbox( - "Show Progress", - value=existing_config.get("showProgress", True), - help="Show progress bars during processing" - ) - - col1, col2 = st.columns(2) - - with col1: - visualization = st.checkbox( - "Enable Visualization", - value=existing_config.get("visualization", True), - help="Generate visualizations of results" - ) - - with col2: - remove_pre_nxt_aa = st.checkbox( - "Remove Pre/Next Amino Acids", - value=existing_config.get("removePreNxtAA", False), - help="Remove pre/post neighboring amino acids in sequence processing" - ) - - log_level = st.selectbox( - "Log Level", - options=["DEBUG", "INFO", "WARNING", "ERROR"], - index=["DEBUG", "INFO", "WARNING", "ERROR"].index(existing_config.get("logLevel", "INFO")), - help="Logging verbosity level" - ) - - # Advanced sections - - # Feature generators - feature_generators = feature_generator_form(existing_config.get("featureGenerator", [])) - - # Rescoring - rescore = rescore_form(existing_config.get("rescore", {})) - - # Combine all settings - config = { - "experimentName": experiment_name, - "inputType": input_type, - "inputFile": input_files, - "decoyPrefix": decoy_prefix, - "outputDir": output_dir, - "allele": alleles, - "numProcesses": num_processes, - "showProgress": show_progress, - "visualization": visualization, - "removePreNxtAA": remove_pre_nxt_aa, - "logLevel": log_level, - "featureGenerator": feature_generators, - "rescore": rescore - } - - return config - - -def render_config_summary(config: Dict[str, Any]): - """ - Render a summary of the configuration as a YAML code block. - - Args: - config: Configuration dictionary - """ - st.subheader("Configuration Summary") - - # Create a simplified copy of the configuration to display - display_config = config.copy() - - # Convert to YAML string - config_yaml = yaml.dump(display_config, default_flow_style=False, sort_keys=False) - - # Display as a code block with syntax highlighting - st.code(config_yaml, language="yaml") - - -def validate_config(config: Dict[str, Any]) -> List[str]: - """ - Validate configuration for obvious errors. - - Args: - config: Configuration dictionary - - Returns: - List of error messages, empty if configuration is valid - """ - errors = [] - - # Check required fields - required_fields = ["experimentName", "inputType", "inputFile", "outputDir", "allele"] - for field in required_fields: - if field not in config or not config[field]: - errors.append(f"Missing required field: {field}") - - # Check inputType - if config.get("inputType") not in ["pepxml", "pin"]: - errors.append("Input type must be 'pepxml' or 'pin'") - - # Check feature generators - generators = config.get("featureGenerator", []) - if not generators: - errors.append("At least one feature generator is required") - - # Check if SpectralSimilarity has required parameters - for gen in generators: - if gen.get("name") == "SpectralSimilarity": - params = gen.get("params", {}) - # Check instrument - instrument = params.get("instrument") - if instrument and instrument not in ["QE", "LUMOS", "TIMSTOF", "SCIEXTOF"]: - errors.append(f"Invalid instrument '{instrument}' for SpectralSimilarity. Must be one of: QE, LUMOS, TIMSTOF, SCIEXTOF") - - # Check mzML directory - if "mzmlDir" not in params: - errors.append("SpectralSimilarity requires 'mzmlDir' parameter") - - # Check spectrum ID pattern - if "spectrumIdPattern" not in params: - errors.append("SpectralSimilarity requires 'spectrumIdPattern' parameter to extract mzML filenames from spectrum IDs") - - # Check rescore settings - rescore = config.get("rescore", {}) - if not rescore or "model" not in rescore: - errors.append("Rescore model is required") - - if "testFDR" in rescore and (rescore["testFDR"] <= 0 or rescore["testFDR"] > 1): - errors.append("Test FDR must be between 0 and 1") - - # Check modification map format - mod_map = config.get("modificationMap", {}) - for mass, unimod in mod_map.items(): - if not unimod.startswith("UNIMOD:"): - errors.append(f"Invalid UNIMOD format for mass {mass}: {unimod}. Must start with 'UNIMOD:'") - - return errors diff --git a/optimhc/gui/components/feature_generator_form.py b/optimhc/gui/components/feature_generator_form.py deleted file mode 100644 index 44e7e1d..0000000 --- a/optimhc/gui/components/feature_generator_form.py +++ /dev/null @@ -1,267 +0,0 @@ -""" -Feature generator form component for optiMHC GUI. -""" - -import streamlit as st -from typing import Dict, Any, List - -def feature_generator_form(existing_generators: List[Dict[str, Any]] = None) -> List[Dict[str, Any]]: - """ - Create a form for configuring feature generators. - - Args: - existing_generators: List of existing feature generator configurations - - Returns: - List of feature generator configurations - """ - if existing_generators is None: - existing_generators = [] - - # Convert existing generators to a dict for easier lookup - existing_gen_dict = {} - for gen in existing_generators: - existing_gen_dict[gen["name"]] = gen.get("params", {}) - - feature_generators = [] - - st.subheader("Feature Generators") - - # Determine the MHC class from existing configuration - # Look for PWM first as it has explicit class parameter - mhc_class = None - for gen in existing_generators: - if gen["name"] == "PWM" and "params" in gen and "class" in gen["params"]: - mhc_class = gen["params"]["class"] - break - - # If PWM not found, infer from presence of NetMHCIIpan - if mhc_class is None: - if any(gen["name"] == "NetMHCIIpan" for gen in existing_generators): - mhc_class = "II" - else: - mhc_class = "I" # Default to class I - - # MHC class selection - mhc_class = st.radio( - "MHC Class", - options=["I", "II"], - index=0 if mhc_class == "I" else 1, - horizontal=True, - help="Select MHC class for appropriate feature generators." - ) - - st.markdown("---") - st.markdown("Select which feature generators to use in the pipeline:") - - # Basic feature generator (always available) - if st.checkbox("Basic", value="Basic" in existing_gen_dict or not existing_generators, key="basic_gen"): - feature_generators.append({"name": "Basic"}) - - # PWM feature generator (class parameter set automatically based on MHC class selection) - if st.checkbox("PWM", value="PWM" in existing_gen_dict, key="pwm_gen"): - feature_generators.append({ - "name": "PWM", - "params": {"class": mhc_class} - }) - - # Class I specific generators - if mhc_class == "I": - # MHCflurry (class I only) - if st.checkbox("MHCflurry", value="MHCflurry" in existing_gen_dict, key="mhcflurry_gen"): - feature_generators.append({"name": "MHCflurry"}) - - # NetMHCpan (class I only) - if st.checkbox("NetMHCpan", value="NetMHCpan" in existing_gen_dict, key="netmhcpan_gen"): - feature_generators.append({"name": "NetMHCpan"}) - - # Class II specific generators - else: # mhc_class == "II" - # NetMHCIIpan (class II only) - if st.checkbox("NetMHCIIpan", value="NetMHCIIpan" in existing_gen_dict, key="netmhciipan_gen"): - feature_generators.append({"name": "NetMHCIIpan"}) - - # DeepLC feature generator (available for both classes) - if st.checkbox("DeepLC", value="DeepLC" in existing_gen_dict, key="deeplc_gen"): - deeplc_params = {} - - col1, col2 = st.columns(2) - with col1: - calibration_criteria = st.text_input( - "Calibration Criteria", - value=existing_gen_dict.get("DeepLC", {}).get("calibrationCriteria", "expect"), - key="deeplc_calibration_criteria", - help="Criteria for calibration (e.g., expect, xcorr, hyperscore)" - ) - deeplc_params["calibrationCriteria"] = calibration_criteria - - with col2: - lower_is_better = st.checkbox( - "Lower Is Better", - value=existing_gen_dict.get("DeepLC", {}).get("lowerIsBetter", True), - key="deeplc_lower_is_better", - help="Whether lower values of the calibration criteria are better (True for expect, False for xcorr/hyperscore)" - ) - deeplc_params["lowerIsBetter"] = lower_is_better - - calibration_size = st.slider( - "Calibration Size", - min_value=0.05, - max_value=0.5, - value=float(existing_gen_dict.get("DeepLC", {}).get("calibrationSize", 0.1)), - step=0.05, - key="deeplc_calibration_size", - help="Fraction of PSMs to use for calibration (0.05-0.5)" - ) - deeplc_params["calibrationSize"] = calibration_size - - feature_generators.append({"name": "DeepLC", "params": deeplc_params}) - - # SpectralSimilarity feature generator (with AlphaPeptDeep as default) - if st.checkbox("SpectralSimilarity", value="SpectralSimilarity" in existing_gen_dict or not existing_generators, key="spectra_similarity_gen"): - ss_params = {} - - st.markdown("#### SpectralSimilarity Settings") - - model = st.selectbox( - "Model", - options=["AlphaPeptDeep_ms2_generic"], - index=["AlphaPeptDeep_ms2_generic"].index( - existing_gen_dict.get("SpectralSimilarity", {}).get("model", "AlphaPeptDeep_ms2_generic") - ), - key="spectra_similarity_model", - help="Prediction model for theoretical spectra" - ) - ss_params["model"] = model - - instrument = st.selectbox( - "Instrument", - options=["QE", "LUMOS", "TIMSTOF", "SCIEXTOF"], - index=["QE", "LUMOS", "TIMSTOF", "SCIEXTOF"].index( - existing_gen_dict.get("SpectralSimilarity", {}).get("instrument", "LUMOS") - ), - key="spectra_similarity_instrument", - help="Available instruments: QE, LUMOS, TIMSTOF, SCIEXTOF" - ) - ss_params["instrument"] = instrument - - # mzML directory path - mzml_dir = st.text_input( - "mzML Directory Path", - value=existing_gen_dict.get("SpectralSimilarity", {}).get("mzmlDir", "./data"), - key="spectra_similarity_mzml_dir", - help="Path to directory containing mzML files" - ) - if mzml_dir: - ss_params["mzmlDir"] = mzml_dir - - # Spectrum ID pattern - spectrum_id_pattern = st.text_input( - "Spectrum ID Pattern", - value=existing_gen_dict.get("SpectralSimilarity", {}).get("spectrumIdPattern", "(.+?)\\.\\d+\\.\\d+\\.\\d+"), - key="spectra_similarity_spectrum_id_pattern", - help="Regular expression pattern to extract mzML filename from spectrum IDs. Default pattern: (.+?)\\.\\d+\\.\\d+\\.\\d+" - ) - if spectrum_id_pattern: - ss_params["spectrumIdPattern"] = spectrum_id_pattern - - collision_energy = st.number_input( - "Collision Energy", - min_value=20, - max_value=40, - value=int(existing_gen_dict.get("SpectralSimilarity", {}).get("collisionEnergy", 28)), - key="spectra_similarity_collision_energy", - help="Collision energy used during acquisition (typical range: 25-30)" - ) - ss_params["collisionEnergy"] = collision_energy - - tolerance = st.slider( - "Tolerance (ppm)", - min_value=10, - max_value=50, - value=int(existing_gen_dict.get("SpectralSimilarity", {}).get("tolerance", 20)), - step=5, - key="spectra_similarity_tolerance", - help="Mass tolerance in ppm for peak matching (10-50 ppm)" - ) - ss_params["tolerance"] = tolerance - - num_top_peaks = st.slider( - "Number of Top Peaks", - min_value=10, - max_value=100, - value=int(existing_gen_dict.get("SpectralSimilarity", {}).get("numTopPeaks", 36)), - step=2, - key="spectra_similarity_num_top_peaks", - help="Number of most intense peaks to consider for matching" - ) - ss_params["numTopPeaks"] = num_top_peaks - - url = st.text_input( - "API URL", - value=existing_gen_dict.get("SpectralSimilarity", {}).get("url", "koina.wilhelmlab.org:443"), - key="spectra_similarity_url", - help="AlphaPept API URL (default: koina.wilhelmlab.org:443)" - ) - if url: - ss_params["url"] = url - - feature_generators.append({"name": "SpectralSimilarity", "params": ss_params}) - - # OverlappingPeptide feature generator - if st.checkbox("OverlappingPeptide", value="OverlappingPeptide" in existing_gen_dict, key="overlapping_peptide_gen"): - op_params = {} - - st.markdown("#### OverlappingPeptide Settings") - - col1, col2 = st.columns(2) - with col1: - min_overlap_length = st.number_input( - "Min Overlap Length", - min_value=5, - max_value=15, - value=int(existing_gen_dict.get("OverlappingPeptide", {}).get("minOverlapLength", 7)), - key="op_min_overlap_length", - help="Minimum number of amino acids that must overlap" - ) - op_params["minOverlapLength"] = min_overlap_length - - with col2: - overlapping_score = st.text_input( - "Overlapping Score", - value=existing_gen_dict.get("OverlappingPeptide", {}).get("overlappingScore", "expect"), - key="op_overlapping_score", - help="Score to use for overlapping peptides (e.g., expect, xcorr, hyperscore)" - ) - op_params["overlappingScore"] = overlapping_score - - col1, col2 = st.columns(2) - with col1: - min_length = st.number_input( - "Min Length", - min_value=5, - max_value=15, - value=int(existing_gen_dict.get("OverlappingPeptide", {}).get("minLength", 7 if mhc_class == "I" else 9)), - key="op_min_length", - help="Minimum peptide length to consider" - ) - op_params["minLength"] = min_length - - with col2: - max_length = st.number_input( - "Max Length", - min_value=10, - max_value=50, - value=int(existing_gen_dict.get("OverlappingPeptide", {}).get("maxLength", 20 if mhc_class == "I" else 30)), - key="op_max_length", - help="Maximum peptide length to consider" - ) - op_params["maxLength"] = max_length - - feature_generators.append({"name": "OverlappingPeptide", "params": op_params}) - - # Warning if no generators selected - if not feature_generators: - st.warning("Please select at least one feature generator.") - - return feature_generators diff --git a/optimhc/gui/components/file_upload.py b/optimhc/gui/components/file_upload.py deleted file mode 100644 index f830666..0000000 --- a/optimhc/gui/components/file_upload.py +++ /dev/null @@ -1,159 +0,0 @@ -""" -File upload component for optiMHC GUI. -""" - -import os -import streamlit as st -import yaml -import tempfile -from typing import Dict, Any, Optional, Tuple -from optimhc.gui.utils import load_config_from_yaml - - -def config_file_uploader() -> Optional[Dict[str, Any]]: - """ - Display a file uploader for configuration files. - - Returns: - Configuration dictionary if a file is uploaded, None otherwise - """ - uploaded_file = st.file_uploader( - "Upload configuration file", - type=["yaml", "yml"], - help="Upload a YAML configuration file" - ) - - if uploaded_file is not None: - try: - config = yaml.safe_load(uploaded_file) - st.success(f"Configuration file '{uploaded_file.name}' loaded successfully") - return config - except Exception as e: - st.error(f"Error loading configuration file: {str(e)}") - return None - - return None - - -def input_path_field(input_type: str, value: str = "", placeholder: str = "") -> str: - """ - Display an input field for file paths. - - Args: - input_type: Type of input (pepxml, pin, mzML directory) - value: Current value - placeholder: Placeholder text - - Returns: - String containing file paths, one per line - """ - if input_type.lower() in ["pepxml", "pin"]: - help_text = f"Enter the full path to your {input_type} files, one per line" - elif input_type.lower() == "mzml": - help_text = "Enter the full path to your mzML directory" - else: - help_text = "Enter file paths, one per line" - - paths = st.text_area( - f"{input_type} File Paths", - value=value, - placeholder=placeholder, - height=100, - help=help_text, - key=f"{input_type.lower()}_paths" - ) - - return paths - - -def yaml_example(example_type: str = "class_i") -> str: - """ - Return an example YAML configuration by reading from example files. - - Args: - example_type: Type of example (class_i or class_ii) - - Returns: - Example YAML configuration as a string - """ - # Get the path to the examples directory - current_dir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - ) - examples_dir = os.path.join(current_dir, "examples") - - # Determine which example file to read - if example_type.lower() == "class_i": - example_file = os.path.join(examples_dir, "classI_example.yaml") - default_config = """ -experimentName: class_I_example -inputType: pepxml -inputFile: - - ./data/YE_20180428_SK_HLA_A0202_3Ips_a50mio_R1_01.pep.xml -decoyPrefix: DECOY_ -outputDir: ./examples/results/class_I_example -visualization: True -removePreNxtAA: False -numProcesses: 4 -showProgress: True -modificationMap: - "147.035385": "UNIMOD:35" # Oxidation (M) - "160.030649": "UNIMOD:4" # Carbamidomethyl (C) -allele: - - HLA-A*02:02 -featureGenerator: - - name: Basic - - name: PWM - params: - class: I - - name: MHCflurry - - name: NetMHCpan -rescore: - testFDR: 0.01 - model: Percolator - numJobs: 4 -""" - else: # class_ii - example_file = os.path.join(examples_dir, "classII_example.yaml") - default_config = """ -experimentName: class_II_example -inputType: pepxml -inputFile: - - ./data/AG20201214_FAIMS_DPB0101_DPA0201_93e6_1hr.pep.xml -decoyPrefix: DECOY_ -outputDir: ./examples/results/class_II_example -visualization: True -removePreNxtAA: False -numProcesses: 4 -showProgress: True -modificationMap: - "147.035385": "UNIMOD:35" # Oxidation (M) - "160.030649": "UNIMOD:4" # Carbamidomethyl (C) -allele: - - HLA-DPA1*02:01-DPB1*01:01 -featureGenerator: - - name: Basic - - name: PWM - params: - class: II - - name: NetMHCIIpan -rescore: - testFDR: 0.01 - model: Percolator - numJobs: 4 -""" - - try: - if not os.path.exists(example_file): - st.warning(f"Example file not found: {example_file}, using default configuration") - return default_config - - with open(example_file, 'r') as f: - content = f.read() - if not content: - st.warning(f"Example file is empty: {example_file}, using default configuration") - return default_config - return content - except Exception as e: - st.warning(f"Error reading example file: {str(e)}, using default configuration") - return default_config diff --git a/optimhc/gui/components/log_viewer.py b/optimhc/gui/components/log_viewer.py deleted file mode 100644 index c1719ce..0000000 --- a/optimhc/gui/components/log_viewer.py +++ /dev/null @@ -1,250 +0,0 @@ -""" -Log viewer component for optiMHC GUI. -""" - -import os -from typing import List, Optional -import streamlit as st - - -def display_logs(logs: List[str]): - """ - Display logs as read-only text with auto-scrolling. - - Args: - logs: List of log messages - """ - if not logs: - st.info("No logs to display yet...") - return - - # Join logs with newlines - log_text = "\n".join(logs) - - # Use a container with custom CSS to create a taller scrollable area - log_container = st.container() - - with log_container: - # Add custom CSS for taller log area with scrollbar - st.markdown(""" - - """, unsafe_allow_html=True) - - # Use st.code for read-only display - st.code(log_text, language="plain") - - # Add JavaScript to auto-scroll to bottom - # This ensures the latest logs are always visible - auto_scroll_js = """ - - """ - st.components.v1.html(auto_scroll_js, height=0) - - -def find_pipeline_log_file() -> Optional[str]: - """ - Find the pipeline log file based on configuration. - - Returns: - Path to the log file or None if not found - """ - # Try to get output directory and experiment name from config - output_dir = None - experiment_name = None - - if "config" in st.session_state: - config = st.session_state.config - output_dir = config.get("outputDir") - experiment_name = config.get("experimentName") - - # First, check if we already know the log file path from pipeline execution - if "pipeline_log_path" in st.session_state and st.session_state.pipeline_log_path: - log_path = st.session_state.pipeline_log_path - if os.path.exists(log_path): - return log_path - - # Next, try to find log file in the expected pipeline output directory - if output_dir and experiment_name: - experiment_dir = os.path.join(output_dir, experiment_name) - if os.path.exists(experiment_dir): - # Pipeline's default log file - log_path = os.path.join(experiment_dir, "log") - if os.path.exists(log_path): - return log_path - - # Look for any log files in the experiment directory - for root, _, files in os.walk(experiment_dir): - for file in files: - if file.endswith(".log") or file == "log": - return os.path.join(root, file) - - # If no log in experiment directory, search the main output directory - if output_dir and os.path.exists(output_dir): - log_files = [] - for root, _, files in os.walk(output_dir): - log_files.extend([os.path.join(root, f) for f in files - if f.endswith(".log") or f == "log"]) - - if log_files: - # Return the most recently modified log file - return max(log_files, key=os.path.getmtime) - - return None - - -def read_log_file(log_path, max_lines=1000): - """ - Read log content from file. - - Args: - log_path: Path to the log file - max_lines: Maximum number of lines to read - - Returns: - List of log lines - """ - try: - if not os.path.exists(log_path): - return [] - - with open(log_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - # Return the last max_lines lines - return [line.rstrip() for line in lines[-max_lines:]] - except Exception as e: - print(f"Error reading log file: {str(e)}") - return [] - - -def update_logs(): - """ - Update logs from the log file. - - Returns: - True if logs were updated, False otherwise - """ - # Find the log file - log_path = find_pipeline_log_file() - if not log_path: - return False - - # Read the log file - logs = read_log_file(log_path) - if not logs: - return False - - # Update session state - st.session_state.logs = logs - return True - - -def log_viewer(process=None): - """ - Simple log viewer with manual refresh button. - - Args: - process: Optional subprocess to monitor for status - """ - st.subheader("Log Output") - - # Status indicator if process is provided - if process: - if process.poll() is None: - st.caption("📋 Process is running...") - else: - ret_code = process.poll() - if ret_code == 0: - st.caption("✅ Process completed successfully. Return code: 0") - else: - st.caption(f"⚠️ Process completed with errors. Return code: {ret_code}") - - # Find log file - log_path = find_pipeline_log_file() - - # Controls row - col1, col2, col3 = st.columns([2, 1, 1]) - - with col1: - if log_path: - st.caption(f"Log file: {log_path}") - else: - st.caption("No log file found") - - with col2: - # Clear logs button - if st.button("Clear Logs"): - st.session_state.logs = [] - st.rerun() - - with col3: - # Refresh button - if st.button("Refresh Logs"): - update_logs() - st.rerun() - - # Debug info (collapsed) - with st.expander("Debug Info", expanded=False): - log_path = find_pipeline_log_file() or "Not found" - log_exists = "Yes" if log_path != "Not found" and os.path.exists(log_path) else "No" - log_size = "0" if log_path == "Not found" or not os.path.exists(log_path) else str(os.path.getsize(log_path)) - - process_info = "" - if process: - process_info = f""" -Process PID: {process.pid} -Process Return Code: {process.poll()} -Has stdout: {"Yes" if hasattr(process, 'stdout') and process.stdout else "No"} -""" - - st.code(f"""{process_info} -Log Count: {len(st.session_state.logs) if "logs" in st.session_state else 0} -Log File: {log_path} -Log File Exists: {log_exists} -Log File Size: {log_size} bytes - """) - - # Initialize logs if needed - if "logs" not in st.session_state: - st.session_state.logs = [] - # Try to load logs the first time - update_logs() - - # Display the logs - display_logs(st.session_state.logs) diff --git a/optimhc/gui/components/modification_map.py b/optimhc/gui/components/modification_map.py deleted file mode 100644 index 6a61af0..0000000 --- a/optimhc/gui/components/modification_map.py +++ /dev/null @@ -1,102 +0,0 @@ -""" -Modification map component for optiMHC GUI. -""" - -import streamlit as st -from typing import Dict, Any, Optional - -def modification_map_form(existing_map: Optional[Dict[str, str]] = None) -> Dict[str, str]: - """ - Create a form for modification map configuration. - - Args: - existing_map: Existing modification map configuration - - Returns: - Modification map dictionary mapping masses to UNIMOD values - """ - if existing_map is None: - existing_map = { - "147.035385": "UNIMOD:35", # Oxidation (M) - Full modified residue mass - "160.030649": "UNIMOD:4", # Carbamidomethyl (C) - Full modified residue mass - "166.998359": "UNIMOD:21" # Phospho (S) - Full modified residue mass - } - - st.subheader("Modification Map") - - st.markdown(""" - Specify the mapping from modification masses to UNIMOD identifiers. - The mass value should be the FULL modified residue mass (amino acid + modification) as found in pepXML parameters. - All modifications need to be explicitly encoded in the sequence (e.g., C[UNIMOD:4] for carbamidomethylated cysteine). - """) - - # Create a container for the dynamic map - modification_map = {} - - # Use session state to track number of modification entries - if "num_modifications" not in st.session_state: - st.session_state.num_modifications = len(existing_map) - st.session_state.modification_masses = list(existing_map.keys()) - st.session_state.modification_values = list(existing_map.values()) - - # Add/remove modification controls - col1, col2 = st.columns([1, 5]) - with col1: - if st.button("➕ Add Modification", key="add_modification"): - st.session_state.num_modifications += 1 - st.session_state.modification_masses.append("") - st.session_state.modification_values.append("UNIMOD:") - st.rerun() - with col2: - if st.session_state.num_modifications > 0 and st.button("➖ Remove Last Modification", key="remove_modification"): - st.session_state.num_modifications -= 1 - if st.session_state.modification_masses: - st.session_state.modification_masses.pop() - if st.session_state.modification_values: - st.session_state.modification_values.pop() - st.rerun() - - # Create a table-like interface for modifications - if st.session_state.num_modifications > 0: - col1, col2 = st.columns(2) - with col1: - st.markdown("**Mass (Residue+Modification)**") - with col2: - st.markdown("**UNIMOD Identifier**") - - for i in range(st.session_state.num_modifications): - col1, col2 = st.columns(2) - with col1: - mass = st.text_input( - "Mass", - value=st.session_state.modification_masses[i] if i < len(st.session_state.modification_masses) else "", - key=f"mod_mass_{i}", - label_visibility="collapsed" - ) - st.session_state.modification_masses[i] = mass - - with col2: - unimod = st.text_input( - "UNIMOD", - value=st.session_state.modification_values[i] if i < len(st.session_state.modification_values) else "UNIMOD:", - key=f"mod_unimod_{i}", - label_visibility="collapsed" - ) - st.session_state.modification_values[i] = unimod - - # Add to modification map - if mass and unimod: - modification_map[mass] = unimod - - # Information about common modifications - with st.expander("Common Modifications (Note: Values are examples, check your pepXML)", expanded=False): - st.markdown(""" - | Mass (Full) | UNIMOD ID | Modification | Target Residues | - |------|-----------|--------------|--------------| - | 147.035385 | UNIMOD:35 | Oxidation | M | - | 160.030649 | UNIMOD:4 | Carbamidomethyl | C | - - Note: These are full masses (amino acid + modification). You must check your pepXML file parameters to find the exact masses used in your data. - """) - - return modification_map diff --git a/optimhc/gui/components/pipeline_control.py b/optimhc/gui/components/pipeline_control.py deleted file mode 100644 index 8dc4726..0000000 --- a/optimhc/gui/components/pipeline_control.py +++ /dev/null @@ -1,177 +0,0 @@ -""" -Pipeline control component for optiMHC GUI. -""" - -import os -import subprocess -import sys -import tempfile -import time -from typing import Dict, Any, Optional, Tuple -import streamlit as st -import yaml - -from optimhc.gui.utils import create_temp_config_file, run_pipeline_command - - -def pipeline_status_indicator(running: bool = False, success: Optional[bool] = None): - """ - Display a status indicator for the pipeline. - - Args: - running: Whether the pipeline is currently running - success: Whether the pipeline completed successfully - """ - if running: - st.info("Pipeline is running...") - elif success is not None: - if success: - st.success("Pipeline completed successfully") - else: - st.error("Pipeline failed") - else: - st.info("Pipeline not yet started") - - -def pipeline_control_panel(config: Dict[str, Any]): - """ - Create a control panel for running the pipeline. - - Args: - config: Configuration dictionary - """ - st.subheader("Pipeline Control") - - # Initialize session state - if "pipeline_running" not in st.session_state: - st.session_state.pipeline_running = False - - if "pipeline_process" not in st.session_state: - st.session_state.pipeline_process = None - - if "pipeline_start_time" not in st.session_state: - st.session_state.pipeline_start_time = None - - if "pipeline_config_path" not in st.session_state: - st.session_state.pipeline_config_path = None - - # Display status - col1, col2 = st.columns([1, 3]) - - with col1: - if st.session_state.pipeline_running: - pipeline_status_indicator(running=True) - else: - if st.session_state.pipeline_process is not None: - return_code = st.session_state.pipeline_process.poll() - pipeline_status_indicator(success=(return_code == 0)) - else: - pipeline_status_indicator() - - with col2: - if st.session_state.pipeline_start_time: - elapsed_time = time.time() - st.session_state.pipeline_start_time - st.text(f"Running for: {int(elapsed_time // 60)}m {int(elapsed_time % 60)}s") - - # Control buttons - start_disabled = st.session_state.pipeline_running - stop_disabled = not st.session_state.pipeline_running - - col1, col2 = st.columns(2) - - with col1: - if st.button("Start Pipeline", disabled=start_disabled, key="start_pipeline"): - # Check if the configuration is valid - if not config.get("experimentName"): - st.error("Experiment name is required") - return - - if not config.get("inputFile"): - st.error("At least one input file is required") - return - - if not config.get("allele"): - st.error("At least one allele is required") - return - - if not config.get("featureGenerator"): - st.error("At least one feature generator is required") - return - - # Create a temporary configuration file - config_path = create_temp_config_file(config) - st.session_state.pipeline_config_path = config_path - - # Run the pipeline as a subprocess - st.session_state.pipeline_process = run_pipeline_command(config_path) - st.session_state.pipeline_running = True - st.session_state.pipeline_start_time = time.time() - - # Initialize logs - if "logs" not in st.session_state: - st.session_state.logs = [] - - # Rerun to update UI - st.rerun() - - with col2: - if st.button("Stop Pipeline", disabled=stop_disabled, key="stop_pipeline"): - if st.session_state.pipeline_process: - # Terminate the process - st.session_state.pipeline_process.terminate() - st.session_state.pipeline_running = False - - # Wait for process to terminate - try: - st.session_state.pipeline_process.wait(timeout=5) - except subprocess.TimeoutExpired: - # Force kill if it doesn't terminate gracefully - st.session_state.pipeline_process.kill() - - st.warning("Pipeline was stopped by user") - - # No longer need to cleanup config file since it's part of the output - - # Rerun to update UI - st.rerun() - - # Save configuration button - if st.button("Save Configuration to File"): - # Create a download button for the configuration - config_yaml = yaml.dump(config, default_flow_style=False) - - # Use streamlit's download button - filename = f"{config.get('experimentName', 'optimhc_config')}.yaml" - st.download_button( - label="Download Configuration File", - data=config_yaml, - file_name=filename, - mime="text/yaml" - ) - - -def check_pipeline_status(): - """ - Check the status of a running pipeline. - - Returns: - Tuple of (running, return_code) - """ - running = st.session_state.get("pipeline_running", False) - process = st.session_state.get("pipeline_process", None) - - if process is None: - return False, None - - # Check if process is still running - return_code = process.poll() - - if return_code is not None and running: - # Process has completed - st.session_state.pipeline_running = False - - # No longer need to cleanup config file since it's part of the output - - return False, return_code - - return running, return_code diff --git a/optimhc/gui/components/rescore_form.py b/optimhc/gui/components/rescore_form.py deleted file mode 100644 index e30b540..0000000 --- a/optimhc/gui/components/rescore_form.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -Rescoring form component for optiMHC GUI. -""" - -import streamlit as st -from typing import Dict, Any - -# Import optiMHC config defaults -from optimhc.core.config import DEFAULT_CONFIG - -def rescore_form(existing_rescore: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Create a form for rescoring settings. - - Args: - existing_rescore: Existing rescore configuration - - Returns: - Rescore configuration dictionary - """ - if existing_rescore is None: - existing_rescore = DEFAULT_CONFIG["rescore"] - - st.subheader("Rescoring Settings") - - rescore_model = st.selectbox( - "Rescoring Model", - options=["Percolator", "XGBoost", "RandomForest"], - index=["Percolator", "XGBoost", "RandomForest"].index(existing_rescore.get("model", "Percolator")), - help="Model to use for rescoring" - ) - - test_fdr = st.number_input( - "Test FDR", - min_value=0.001, - max_value=0.1, - value=float(existing_rescore.get("testFDR", 0.01)), - step=0.001, - format="%.3f", - help="FDR threshold for testing" - ) - - num_jobs = st.number_input( - "Number of Jobs", - min_value=1, - max_value=32, - value=int(existing_rescore.get("numJobs", 1)), - help="Number of parallel jobs for model training" - ) - - return { - "model": rescore_model, - "testFDR": test_fdr, - "numJobs": num_jobs - } diff --git a/optimhc/gui/components/results_viewer.py b/optimhc/gui/components/results_viewer.py deleted file mode 100644 index 05be9e7..0000000 --- a/optimhc/gui/components/results_viewer.py +++ /dev/null @@ -1,248 +0,0 @@ -""" -Results viewer component for optiMHC GUI. -""" - -import os -import glob -import base64 -from typing import List, Dict, Any, Optional -import streamlit as st -import pandas as pd -import plotly.express as px -from pathlib import Path - -from optimhc.gui.utils import scan_output_directory - - -def get_image_as_base64(file_path): - """ - Get image file as base64 string. - - Args: - file_path: Path to image file - - Returns: - Base64 encoded image - """ - with open(file_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode() - - -def display_image(file_path, caption=None): - """ - Display an image with caption. - - Args: - file_path: Path to image file - caption: Optional caption for the image - """ - try: - # Use HTML to have more control over image sizing - img_format = file_path.split('.')[-1].lower() - img_base64 = get_image_as_base64(file_path) - html = f'' - - if caption: - html = f'{html}
{caption}
' - - st.markdown(html, unsafe_allow_html=True) - except Exception as e: - st.error(f"Error displaying image {os.path.basename(file_path)}: {str(e)}") - - -def display_csv(file_path, caption=None): - """ - Display a CSV file as a dataframe. - - Args: - file_path: Path to CSV file - caption: Optional caption for the table - """ - try: - # Determine file type and use appropriate reader - if file_path.endswith('.csv'): - df = pd.read_csv(file_path) - elif file_path.endswith('.tsv'): - df = pd.read_csv(file_path, sep='\t') - elif file_path.endswith('.xlsx'): - df = pd.read_excel(file_path) - else: - st.warning(f"Unsupported file format: {os.path.basename(file_path)}") - return - - if caption: - st.markdown(f"**{caption}**") - - # Display dataframe with pagination if large - if len(df) > 100: - # Add pagination controls - page_size = st.slider(f"Rows per page for {os.path.basename(file_path)}", 10, 100, 50) - page = st.number_input(f"Page for {os.path.basename(file_path)}", min_value=1, max_value=max(1, len(df) // page_size + 1), value=1) - - start_idx = (page - 1) * page_size - end_idx = min(start_idx + page_size, len(df)) - - st.dataframe(df.iloc[start_idx:end_idx]) - st.text(f"Showing rows {start_idx+1}-{end_idx} of {len(df)}") - else: - st.dataframe(df) - - # Option to download the file - csv_data = df.to_csv(index=False).encode('utf-8') - st.download_button( - label=f"Download {os.path.basename(file_path)}", - data=csv_data, - file_name=os.path.basename(file_path), - mime="text/csv" - ) - except Exception as e: - st.error(f"Error displaying data from {os.path.basename(file_path)}: {str(e)}") - - -def results_viewer(output_dir: str): - """ - Display results from the output directory, focusing only on figures. - - Args: - output_dir: Path to the output directory - """ - st.subheader("Results Visualization") - - if not os.path.exists(output_dir): - st.warning(f"Output directory '{output_dir}' does not exist yet") - return - - # Check if there's an experiment name in the session state - experiment_name = None - if "config" in st.session_state: - experiment_name = st.session_state.config.get("experimentName") - - # Look for figures in experiment subdirectory if it exists - if experiment_name: - experiment_dir = os.path.join(output_dir, experiment_name) - if os.path.exists(experiment_dir): - # Update output_dir to use the experiment directory - output_dir = experiment_dir - - # Check for figures subfolder - figures_dir = os.path.join(output_dir, "figures") - has_figures_subdir = os.path.exists(figures_dir) and os.path.isdir(figures_dir) - - # Get all figure files - figure_files = [] - - # First check figures subdirectory if it exists - if has_figures_subdir: - figure_files = glob.glob(os.path.join(figures_dir, "*.png")) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.jpg"))) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.svg"))) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.pdf"))) - - # If no figures found in subfolder (or none exists), check main directory - if not figure_files: - figure_files = glob.glob(os.path.join(output_dir, "*.png")) - figure_files.extend(glob.glob(os.path.join(output_dir, "*.jpg"))) - figure_files.extend(glob.glob(os.path.join(output_dir, "*.svg"))) - figure_files.extend(glob.glob(os.path.join(output_dir, "*.pdf"))) - - # Show figures - if figure_files: - figure_count = len(figure_files) - - # Show information about found figures - st.caption(f"Found {figure_count} figures in {figures_dir if has_figures_subdir else output_dir}") - - # Create a selector for figures if there are more than one - if figure_count > 1: - # Sort figures by name for consistency - figure_files.sort() - figure_names = [os.path.basename(f) for f in figure_files] - - # Figure selection dropdown - selected_figure = st.selectbox( - "Select figure to view:", - figure_names, - key="figure_selector" - ) - - selected_index = figure_names.index(selected_figure) - - # Display the selected figure - st.markdown("### Selected Figure") - display_image(figure_files[selected_index], caption=selected_figure) - - # Show thumbnail gallery with 3 columns - st.markdown("### All Figures") - - # Use columns for gallery display - cols = st.columns(min(3, figure_count)) - for i, file_path in enumerate(figure_files): - with cols[i % 3]: - try: - img_format = file_path.split('.')[-1].lower() - if img_format in ['png', 'jpg', 'jpeg', 'svg']: - img_base64 = get_image_as_base64(file_path) - html = f'' - st.markdown(html, unsafe_allow_html=True) - st.caption(os.path.basename(file_path)) - else: - # For non-image formats like PDF - st.info(f"{os.path.basename(file_path)} (PDF file)") - except Exception as e: - st.error(f"Error with image {os.path.basename(file_path)}") - else: - # Just show the single figure - display_image(figure_files[0], caption=os.path.basename(figure_files[0])) - else: - st.info(f"No figures found in the output directory. Run the pipeline to generate results.") - - # Refresh button - if st.button("Refresh Results", key="refresh_results_btn"): - st.rerun() - - -def results_summary(output_dir: str): - """ - Display a summary of results. - - Args: - output_dir: Path to the output directory - """ - if not os.path.exists(output_dir): - return - - # Look for summary statistics - summary_files = glob.glob(os.path.join(output_dir, "**/summary*.csv"), recursive=True) - summary_files.extend(glob.glob(os.path.join(output_dir, "**/stats*.csv"), recursive=True)) - - if not summary_files: - return - - st.subheader("Results Summary") - - try: - # Use the first summary file found - df = pd.read_csv(summary_files[0]) - - # Display key statistics - cols = st.columns(3) - - if 'total_psms' in df.columns: - with cols[0]: - st.metric("Total PSMs", df['total_psms'].iloc[0]) - - if 'target_psms' in df.columns: - with cols[1]: - st.metric("Target PSMs", df['target_psms'].iloc[0]) - - if 'decoy_psms' in df.columns: - with cols[2]: - st.metric("Decoy PSMs", df['decoy_psms'].iloc[0]) - - # Generate a simple chart if possible - if {'fdr', 'target_psms'}.issubset(df.columns): - st.subheader("FDR vs. Target PSMs") - fig = px.line(df, x='fdr', y='target_psms', title='PSMs at different FDR thresholds') - st.plotly_chart(fig, use_container_width=True) - except Exception as e: - st.error(f"Error generating results summary: {str(e)}") diff --git a/optimhc/gui/pages/__init__.py b/optimhc/gui/pages/__init__.py deleted file mode 100644 index 0be9679..0000000 --- a/optimhc/gui/pages/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -Page modules for the optiMHC Streamlit interface. -""" - -__all__ = [ - "home", - "configure", - "run", - "results" -] diff --git a/optimhc/gui/pages/configure.py b/optimhc/gui/pages/configure.py deleted file mode 100644 index 1fb7b07..0000000 --- a/optimhc/gui/pages/configure.py +++ /dev/null @@ -1,308 +0,0 @@ -""" -Configuration page for the optiMHC GUI. -""" - -import streamlit as st -import os -import yaml -from typing import Dict, Any - -from optimhc.gui.style import main_header, sub_header, info_box, warning_box -from optimhc.gui.components.config_form import validate_config, render_config_summary -from optimhc.gui.components.file_upload import config_file_uploader, input_path_field, yaml_example -from optimhc.gui.components.feature_generator_form import feature_generator_form -from optimhc.gui.components.rescore_form import rescore_form -from optimhc.gui.components.modification_map import modification_map_form - - -def render(): - """ - Render the configuration page. - """ - main_header("Configure Pipeline") - - st.markdown(""" - Configure your optiMHC pipeline by filling out the form below, or upload an existing configuration file. - """) - - # Initialize session state for configuration - if "config" not in st.session_state: - st.session_state.config = {} - - # File upload option and example buttons - col1, col2, col3 = st.columns([2, 1, 1]) - - with col1: - uploaded_config = config_file_uploader() - - if uploaded_config: - # Validate configuration - errors = validate_config(uploaded_config) - - if errors: - for error in errors: - st.error(error) - else: - # Save configuration to session state - st.session_state.config = uploaded_config - - # Handle modification map in session state for the form - if "modificationMap" in uploaded_config and uploaded_config["modificationMap"]: - st.session_state.num_modifications = len(uploaded_config["modificationMap"]) - st.session_state.modification_masses = list(uploaded_config["modificationMap"].keys()) - st.session_state.modification_values = list(uploaded_config["modificationMap"].values()) - - st.success("Configuration loaded successfully!") - st.rerun() - - with col2: - if st.button("Load Class I Example", use_container_width=True): - example_config = yaml.safe_load(yaml_example("class_i")) - # Ensure modification map is included - if "modificationMap" not in example_config or not example_config["modificationMap"]: - example_config["modificationMap"] = { - "147.035385": "UNIMOD:35", # Oxidation (M) - full modified residue mass - "160.030649": "UNIMOD:4" # Carbamidomethyl (C) - full modified residue mass - } - st.session_state.config = example_config - # Also update the modification map in session state for the form - st.session_state.num_modifications = len(example_config["modificationMap"]) - st.session_state.modification_masses = list(example_config["modificationMap"].keys()) - st.session_state.modification_values = list(example_config["modificationMap"].values()) - st.success("Class I example configuration loaded!") - st.rerun() - - with col3: - if st.button("Load Class II Example", use_container_width=True): - example_config = yaml.safe_load(yaml_example("class_ii")) - # Ensure modification map is included - if "modificationMap" not in example_config or not example_config["modificationMap"]: - example_config["modificationMap"] = { - "147.035385": "UNIMOD:35", # Oxidation (M) - full modified residue mass - "160.030649": "UNIMOD:4" # Carbamidomethyl (C) - full modified residue mass - } - st.session_state.config = example_config - # Also update the modification map in session state for the form - st.session_state.num_modifications = len(example_config["modificationMap"]) - st.session_state.modification_masses = list(example_config["modificationMap"].keys()) - st.session_state.modification_values = list(example_config["modificationMap"].values()) - st.success("Class II example configuration loaded!") - st.rerun() - - # Setup configuration tabs - tabs = st.tabs(["Basic Settings", "Feature Generators", "Rescoring", "Summary"]) - - # Tab 1: Basic Settings - with tabs[0]: - st.subheader("Basic Settings") - - # Experiment name - experiment_name = st.text_input( - "Experiment Name", - value=st.session_state.config.get("experimentName", ""), - placeholder="my_experiment", - help="Name of the experiment" - ) - - # Input type and files - col1, col2 = st.columns(2) - with col1: - input_type = st.selectbox( - "Input Type", - options=["pepxml", "pin"], - index=["pepxml", "pin"].index(st.session_state.config.get("inputType", "pepxml")), - help="Type of input file" - ) - - with col2: - decoy_prefix = st.text_input( - "Decoy Prefix", - value=st.session_state.config.get("decoyPrefix", "DECOY_"), - help="Prefix used to identify decoy sequences" - ) - - # Input files - use direct path input - input_files = st.session_state.config.get("inputFile", []) - if isinstance(input_files, str): - input_files = [input_files] - - input_files_str = input_path_field( - input_type, - value="\n".join(input_files) if input_files else "", - placeholder="../examples/data/YE_20180428_SK_HLA_A0202_3Ips_a50mio_R1_01.pep.xml" - ) - - # Output directory - output_dir = st.text_input( - "Output Directory", - value=st.session_state.config.get("outputDir", "../examples/results"), - placeholder="../examples/results/my_experiment", - help="Directory where results will be saved" - ) - - # Allele settings - st.subheader("Allele Settings") - - alleles = st.session_state.config.get("allele", []) - if isinstance(alleles, str): - alleles = [alleles] - - alleles_str = st.text_area( - "Alleles", - value="\n".join(alleles) if alleles else "", - placeholder="HLA-A*02:01\nHLA-B*07:02", - height=100, - help="One allele per line, e.g., HLA-A*02:01 for class I or HLA-DPA1*02:01-DPB1*01:01 for class II" - ) - - # Modification map - st.subheader("Modification Map") - st.markdown("Maps full modified residue masses to UNIMOD identifiers.") - modification_map = modification_map_form(st.session_state.config.get("modificationMap", {})) - - # Performance settings - st.subheader("Performance Settings") - - col1, col2 = st.columns(2) - - with col1: - num_processes = st.number_input( - "Number of Processes", - min_value=1, - max_value=64, - value=int(st.session_state.config.get("numProcesses", 4)), - help="Number of parallel processes" - ) - - with col2: - show_progress = st.checkbox( - "Show Progress", - value=st.session_state.config.get("showProgress", True), - help="Show progress bars during processing" - ) - - col1, col2 = st.columns(2) - - with col1: - visualization = st.checkbox( - "Enable Visualization", - value=st.session_state.config.get("visualization", True), - help="Generate visualizations of results" - ) - - with col2: - remove_pre_nxt_aa = st.checkbox( - "Remove Pre/Next Amino Acids", - value=st.session_state.config.get("removePreNxtAA", False), - help="Remove pre/post neighboring amino acids in sequence processing" - ) - - log_level = st.selectbox( - "Log Level", - options=["DEBUG", "INFO", "WARNING", "ERROR"], - index=["DEBUG", "INFO", "WARNING", "ERROR"].index(st.session_state.config.get("logLevel", "INFO")), - help="Logging verbosity level" - ) - - # Save basic settings - if st.button("Save Basic Settings", key="save_basic"): - # Process input fields - input_files = [f for f in input_files_str.strip().split("\n") if f] - alleles = [a for a in alleles_str.strip().split("\n") if a] - - # Update configuration - st.session_state.config.update({ - "experimentName": experiment_name, - "inputType": input_type, - "inputFile": input_files, - "decoyPrefix": decoy_prefix, - "outputDir": output_dir, - "allele": alleles, - "modificationMap": modification_map, - "numProcesses": num_processes, - "showProgress": show_progress, - "visualization": visualization, - "removePreNxtAA": remove_pre_nxt_aa, - "logLevel": log_level - }) - - st.success("Basic settings saved!") - st.rerun() - - # Tab 2: Feature Generators - with tabs[1]: - # Feature generators configuration - feature_generators = feature_generator_form(st.session_state.config.get("featureGenerator", [])) - - # Save feature generators - if st.button("Save Feature Generators", key="save_features"): - st.session_state.config["featureGenerator"] = feature_generators - st.success("Feature generators saved!") - - # Tab 3: Rescoring - with tabs[2]: - # Rescoring configuration - rescore_config = rescore_form(st.session_state.config.get("rescore", {})) - - # Save rescoring settings - if st.button("Save Rescoring Settings", key="save_rescore"): - st.session_state.config["rescore"] = rescore_config - st.success("Rescoring settings saved!") - - # Tab 4: Summary - with tabs[3]: - # Full configuration summary - if st.session_state.config: - st.subheader("Configuration Summary") - - # Display configuration summary - render_config_summary(st.session_state.config) - - # Validate configuration - errors = validate_config(st.session_state.config) - - if errors: - st.error("Configuration has errors:") - for error in errors: - st.error(f"- {error}") - else: - st.success("Configuration is valid!") - - # Option to download configuration - if st.button("Download Configuration as YAML"): - # Create YAML content - config_yaml = yaml.dump(st.session_state.config, default_flow_style=False) - - # Use streamlit's download button - filename = f"{st.session_state.config.get('experimentName', 'optimhc_config')}.yaml" - st.download_button( - label="Download Configuration File", - data=config_yaml, - file_name=filename, - mime="text/yaml" - ) - else: - st.warning("No configuration has been saved yet. Please fill out and save the forms in the other tabs.") - - # Navigation buttons - st.markdown("---") - col1, col2 = st.columns(2) - - with col1: - if st.button("← Back to Home"): - st.session_state.page = "home" - st.rerun() - - with col2: - if st.button("Continue to Run →"): - # Check if we have a valid configuration - if not st.session_state.config: - warning_box("Please configure the pipeline first.") - else: - errors = validate_config(st.session_state.config) - if errors: - for error in errors: - st.error(error) - else: - st.session_state.page = "run" - st.rerun() diff --git a/optimhc/gui/pages/home.py b/optimhc/gui/pages/home.py deleted file mode 100644 index b91b833..0000000 --- a/optimhc/gui/pages/home.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Home page for the optiMHC GUI. -""" - -import streamlit as st -from optimhc.gui.style import main_header, sub_header, info_box - - -def render(): - """ - Render the home page. - """ - main_header("Welcome to optiMHC") - - st.markdown(""" - optiMHC is a high-performance rescoring pipeline for immunopeptidomics data, designed to enhance - peptide identification by integrating multiple feature generators and machine learning-based rescoring. - """) - - # Feature overview - sub_header("Features") - - col1, col2 = st.columns(2) - - with col1: - st.markdown(""" - - **Multiple Feature Generators** - - Basic sequence features - - Position Weight Matrix (PWM) - - MHC binding prediction (MHCflurry, NetMHCpan) - - Chromatographic retention time (DeepLC) - - Spectrum similarity - - Overlapping peptides - - - **Machine Learning Rescoring** - - Percolator - - XGBoost - - Random Forest - """) - - with col2: - st.markdown(""" - - **Flexible Configuration** - - YAML-based configuration - - Command-line interface - - Python API - - - **Comprehensive Visualization** - - ROC curves - - Score distributions - - FDR vs. identifications - - Feature importance - """) - - # Quick start guide - sub_header("Quick Start") - - st.markdown(""" - To get started with optiMHC: - - 1. Go to the **Configure** page to set up your pipeline parameters - 2. Upload your input files or specify their paths - 3. Run the pipeline from the **Run** page - 4. View your results in the **Results** page - """) - - # Warning about GUI being in development - info_box(""" - Note: This GUI is currently in development. For advanced use cases or troubleshooting, - please refer to the command-line interface or the documentation. - """) - - # Footer - st.markdown("---") - st.markdown("optiMHC is developed and maintained by Zixiang Shang. For more information, please visit the [GitHub repository](https://github.com/5h4ng/optiMHC).") diff --git a/optimhc/gui/pages/results.py b/optimhc/gui/pages/results.py deleted file mode 100644 index 7e7163c..0000000 --- a/optimhc/gui/pages/results.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -Results page for the optiMHC GUI. -""" - -import streamlit as st -import os -import glob -from typing import Dict, Any, List - -from optimhc.gui.style import main_header, sub_header, info_box, warning_box -from optimhc.gui.components.results_viewer import results_viewer - - -def find_figure_files(output_dir: str, experiment_name: str = None) -> List[str]: - """ - Find all figure files in the output directory or its figures subdirectory. - - Args: - output_dir: Path to the output directory - experiment_name: Optional experiment name subdirectory - - Returns: - List of paths to figure files - """ - figure_files = [] - - # If experiment name is provided, look in that subdirectory - if experiment_name: - experiment_dir = os.path.join(output_dir, experiment_name) - if os.path.exists(experiment_dir): - # Look for figures subfolder inside experiment directory - figures_dir = os.path.join(experiment_dir, "figures") - if os.path.exists(figures_dir) and os.path.isdir(figures_dir): - figure_files = glob.glob(os.path.join(figures_dir, "*.png")) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.jpg"))) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.svg"))) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.pdf"))) - - # If no figures found in subfolder, look in experiment directory - if not figure_files: - figure_files = glob.glob(os.path.join(experiment_dir, "*.png")) - figure_files.extend(glob.glob(os.path.join(experiment_dir, "*.jpg"))) - figure_files.extend(glob.glob(os.path.join(experiment_dir, "*.svg"))) - figure_files.extend(glob.glob(os.path.join(experiment_dir, "*.pdf"))) - - # If no figures found in experiment directory or no experiment name, - # check main output directory - if not figure_files: - # Check for figures subfolder in main directory - figures_dir = os.path.join(output_dir, "figures") - if os.path.exists(figures_dir) and os.path.isdir(figures_dir): - figure_files = glob.glob(os.path.join(figures_dir, "*.png")) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.jpg"))) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.svg"))) - figure_files.extend(glob.glob(os.path.join(figures_dir, "*.pdf"))) - - # If still no figures, check main directory - if not figure_files: - figure_files = glob.glob(os.path.join(output_dir, "*.png")) - figure_files.extend(glob.glob(os.path.join(output_dir, "*.jpg"))) - figure_files.extend(glob.glob(os.path.join(output_dir, "*.svg"))) - figure_files.extend(glob.glob(os.path.join(output_dir, "*.pdf"))) - - return figure_files - - -def render(): - """ - Render the results page focusing on figure visualization. - """ - main_header("Results Visualization") - - # Check if we have a configuration - if "config" not in st.session_state or not st.session_state.config: - warning_box("No configuration available. Please configure and run the pipeline first.") - - if st.button("Go to Configuration"): - st.session_state.page = "configure" - st.rerun() - return - - # Get output directory and experiment name from configuration - output_dir = st.session_state.config.get("outputDir", "./results") - experiment_name = st.session_state.config.get("experimentName", "") - - # Allow user to change the output directory - output_dir = st.text_input("Output Directory", value=output_dir) - - # Check if directory exists - if not os.path.exists(output_dir): - warning_box(f"Output directory '{output_dir}' does not exist. Please run the pipeline first or check the directory path.") - - # Show navigation buttons - col1, col2 = st.columns(2) - with col1: - if st.button("← Back to Run Pipeline"): - st.session_state.page = "run" - st.rerun() - with col2: - if st.button("Go to Home"): - st.session_state.page = "home" - st.rerun() - return - - # Show experiment details if available - if experiment_name: - sub_header(f"Experiment: {experiment_name}") - - # Display results using our results_viewer component - # The component will handle finding the appropriate figure files - results_viewer(output_dir) - - # Navigation buttons - st.markdown("---") - col1, col2 = st.columns(2) - - with col1: - if st.button("← Back to Run"): - st.session_state.page = "run" - st.rerun() - - with col2: - if st.button("Back to Home"): - st.session_state.page = "home" - st.rerun() diff --git a/optimhc/gui/pages/run.py b/optimhc/gui/pages/run.py deleted file mode 100644 index ee4f4f2..0000000 --- a/optimhc/gui/pages/run.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -Run page for the optiMHC GUI. -""" - -import streamlit as st -import os -import time -from typing import Dict, Any - -from optimhc.gui.style import main_header, sub_header, info_box, warning_box -from optimhc.gui.components.pipeline_control import pipeline_control_panel, check_pipeline_status -from optimhc.gui.components.log_viewer import log_viewer -from optimhc.gui.components.config_form import render_config_summary - - -def render(): - """ - Render the run page. - """ - main_header("Run Pipeline") - - # Check if we have a configuration - if "config" not in st.session_state or not st.session_state.config: - warning_box("No configuration available. Please configure the pipeline first.") - - if st.button("Go to Configuration"): - st.session_state.page = "configure" - st.rerun() - return - - # Display configuration summary - with st.expander("Configuration Summary", expanded=False): - render_config_summary(st.session_state.config) - - # Pipeline control panel - pipeline_control_panel(st.session_state.config) - - # Separator - st.markdown("---") - - # Check pipeline status - running, return_code = check_pipeline_status() - - # Show log viewer if process exists - if "pipeline_process" in st.session_state and st.session_state.pipeline_process: - log_viewer(st.session_state.pipeline_process) - - # Navigation buttons - col1, col2 = st.columns(2) - - with col1: - if st.button("← Back to Configure"): - st.session_state.page = "configure" - st.rerun() - - with col2: - # Only enable results button if pipeline has completed - results_disabled = running or ("pipeline_process" not in st.session_state) - - if st.button("View Results →", disabled=results_disabled): - st.session_state.page = "results" - st.rerun() diff --git a/optimhc/gui/style.py b/optimhc/gui/style.py deleted file mode 100644 index d83b7aa..0000000 --- a/optimhc/gui/style.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -Styling utilities for the optiMHC Streamlit interface. -""" - -import streamlit as st - - -def apply_custom_css(): - """ - Apply custom CSS styling to the Streamlit app. - """ - st.markdown(""" - - """, unsafe_allow_html=True) - - -def set_page_config(): - """ - Set up the Streamlit page configuration with theme and layout settings. - """ - st.set_page_config( - page_title="optiMHC", - page_icon="🧬", - layout="wide", - initial_sidebar_state="expanded", - ) - - -def main_header(text): - """ - Display a main header with custom styling. - - Args: - text: Header text to display - """ - st.markdown(f'

{text}

', unsafe_allow_html=True) - - -def sub_header(text): - """ - Display a sub-header with custom styling. - - Args: - text: Sub-header text to display - """ - st.markdown(f'

{text}

', unsafe_allow_html=True) - - -def info_box(text): - """ - Display an information box with custom styling. - - Args: - text: Information text to display - """ - st.markdown(f'
{text}
', unsafe_allow_html=True) - - -def success_box(text): - """ - Display a success box with custom styling. - - Args: - text: Success message to display - """ - st.markdown(f'
{text}
', unsafe_allow_html=True) - - -def warning_box(text): - """ - Display a warning box with custom styling. - - Args: - text: Warning message to display - """ - st.markdown(f'
{text}
', unsafe_allow_html=True) - - -def error_box(text): - """ - Display an error box with custom styling. - - Args: - text: Error message to display - """ - st.markdown(f'
{text}
', unsafe_allow_html=True) - - -def footer(): - """ - Display a footer with copyright and version information. - """ - from optimhc import __version__ as version - - st.markdown( - f'', - unsafe_allow_html=True - ) diff --git a/optimhc/gui/utils.py b/optimhc/gui/utils.py deleted file mode 100644 index 53831fc..0000000 --- a/optimhc/gui/utils.py +++ /dev/null @@ -1,243 +0,0 @@ -""" -Utility functions for the optiMHC GUI. -""" - -import os -import subprocess -import sys -import tempfile -from pathlib import Path -import yaml -import json -import streamlit as st -import pandas as pd -from typing import Dict, List, Any, Optional, Union - - -def load_config_from_yaml(file_path: str) -> Dict[str, Any]: - """ - Load configuration from a YAML file. - - Args: - file_path: Path to the YAML configuration file - - Returns: - Dictionary containing the configuration - """ - try: - with open(file_path, 'r') as f: - config = yaml.safe_load(f) - return config - except Exception as e: - st.error(f"Error loading configuration file: {str(e)}") - return {} - - -def save_config_to_yaml(config: Dict[str, Any], file_path: str) -> bool: - """ - Save configuration to a YAML file. - - Args: - config: Configuration dictionary - file_path: Path to save the YAML file - - Returns: - True if successful, False otherwise - """ - try: - with open(file_path, 'w') as f: - yaml.dump(config, f, default_flow_style=False) - return True - except Exception as e: - st.error(f"Error saving configuration file: {str(e)}") - return False - - -def create_temp_config_file(config: Dict[str, Any]) -> str: - """ - Create a temporary configuration file. - - Args: - config: Configuration dictionary - - Returns: - Path to the temporary file - """ - fd, temp_path = tempfile.mkstemp(suffix='.yaml') - with os.fdopen(fd, 'w') as f: - yaml.dump(config, f, default_flow_style=False) - return temp_path - - -def run_pipeline_command(config_path: str) -> subprocess.Popen: - """ - Run the optiMHC pipeline as a subprocess. - - Args: - config_path: Path to the configuration file - - Returns: - Subprocess object - """ - # Load configuration to access output directory and experiment name - config = {} - try: - with open(config_path, 'r') as f: - config = yaml.safe_load(f) - - # Store config in session state for log finding - st.session_state.config = config - except Exception as e: - print(f"Error loading configuration: {str(e)}") - - # Identify where logs will be stored based on pipeline conventions - output_dir = config.get("outputDir", "") - experiment_name = config.get("experimentName", "") - - # The actual log file that the pipeline will create - if output_dir and experiment_name: - experiment_dir = os.path.join(output_dir, experiment_name) - os.makedirs(experiment_dir, exist_ok=True) - pipeline_log_path = os.path.join(experiment_dir, "log") - - # Store the expected log file path in session state - st.session_state.pipeline_log_path = pipeline_log_path - else: - st.session_state.pipeline_log_path = None - - # Reset log position counter - st.session_state.log_position = 0 - - # Set environment variables to ensure output is not buffered - my_env = os.environ.copy() - my_env["PYTHONUNBUFFERED"] = "1" # Prevent Python from buffering output - my_env["PYTHONIOENCODING"] = "utf-8" # Ensure UTF-8 encoding - - # Run the pipeline command - cmd = [sys.executable, "-u", "-m", "optimhc", "pipeline", "--config", config_path] - process = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - bufsize=1, # Line buffered - universal_newlines=True, - env=my_env - ) - - # Start a thread to read and print output (for debugging) - def monitor_output(): - try: - for line in iter(process.stdout.readline, ''): - print(line.strip()) # Print to console for debugging - except Exception as e: - print(f"Error in monitor thread: {str(e)}") - - # Start thread in daemon mode so it won't block program exit - import threading - monitor_thread = threading.Thread(target=monitor_output, daemon=True) - monitor_thread.start() - - return process - - -def scan_output_directory(output_dir: str) -> Dict[str, List[str]]: - """ - Scan the output directory for results. - - Args: - output_dir: Path to the output directory - - Returns: - Dictionary with lists of files grouped by type - """ - if not os.path.exists(output_dir): - return { - 'figures': [], - 'tables': [], - 'logs': [], - 'other': [] - } - - result_files = { - 'figures': [], - 'tables': [], - 'logs': [], - 'other': [] - } - - for root, _, files in os.walk(output_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(('.png', '.jpg', '.jpeg', '.svg', '.pdf')): - result_files['figures'].append(file_path) - elif file.endswith(('.csv', '.tsv', '.xlsx')): - result_files['tables'].append(file_path) - elif file.endswith('.log'): - result_files['logs'].append(file_path) - else: - result_files['other'].append(file_path) - - return result_files - - -def get_config_summary(config: Dict[str, Any]) -> Dict[str, Any]: - """ - Create a summary of the configuration for display. - - Args: - config: Configuration dictionary - - Returns: - Dictionary with summarized configuration - """ - summary = { - 'Experiment Name': config.get('experimentName', 'N/A'), - 'Input Type': config.get('inputType', 'N/A'), - 'Output Directory': config.get('outputDir', 'N/A'), - 'Alleles': ', '.join(config.get('allele', [])) if config.get('allele') else 'N/A', - 'Feature Generators': [gen.get('name') for gen in config.get('featureGenerator', [])] - if config.get('featureGenerator') else 'N/A', - 'Rescoring Model': config.get('rescore', {}).get('model', 'N/A') if config.get('rescore') else 'N/A', - 'Modification Map': f"{len(config.get('modificationMap', {}))} modifications" - if config.get('modificationMap') else 'Default' - } - return summary - - -def parse_feature_generator_json(json_str: str) -> Optional[Dict[str, Any]]: - """ - Parse JSON string for feature generator configuration. - - Args: - json_str: JSON string to parse - - Returns: - Dictionary containing the feature generator configuration or None if invalid - """ - try: - return json.loads(json_str) - except json.JSONDecodeError: - return None - - -def format_log_message(message: str, level: str = "INFO") -> str: - """ - Format a log message for display. - - Args: - message: Log message to format - level: Log level - - Returns: - Formatted log message - """ - level_colors = { - "DEBUG": "gray", - "INFO": "black", - "WARNING": "orange", - "ERROR": "red", - "CRITICAL": "crimson" - } - color = level_colors.get(level, "black") - return f'[{level}] {message}' diff --git a/optimhc/psm_container.py b/optimhc/psm_container.py index f8e0b27..6aa1e5a 100644 --- a/optimhc/psm_container.py +++ b/optimhc/psm_container.py @@ -69,7 +69,7 @@ def __init__( protein_column: str, rescoring_features: Dict[str, List[str]], hit_rank_column: Optional[str] = None, - charge_column: Optional[int] = None, + charge_column: Optional[str] = None, retention_time_column: Optional[str] = None, calculated_mass_column: Optional[str] = None, metadata_column: Optional[str] = None, @@ -144,13 +144,13 @@ def check_rescoring_features(features: Dict[str, List[str]]): logger.info("PsmContainer initialized with %d PSM entries.", len(self._psms)) if self.ms_data_file_column: logger.info( - "PSMs originated from %d MS data file(S).", + "PSMs originated from %d MS data file(s).", len(self._psms[ms_data_file_column].unique()), ) logger.info("target psms: %d", len(self.target_psms)) logger.info("decoy psms: %d", len(self.decoy_psms)) logger.info("unique peptides: %d", len(np.unique(self.peptides))) - logger.info("rescoing features: %s", rescoring_features) + logger.info("rescoring features: %s", rescoring_features) @property def psms(self) -> pd.DataFrame: diff --git a/pyproject.toml b/pyproject.toml index 2faa19b..234f742 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,13 +34,6 @@ dependencies = [ "numba>=0.64.0", ] -[project.optional-dependencies] -gui = [ - "streamlit>=1.24.0", - "plotly>=5.13.0", - "watchdog>=3.0.0", -] - [dependency-groups] dev = [ "pytest>=8.4.0", @@ -62,7 +55,7 @@ include = ["optimhc*"] optimhc = ["PWMs*"] [tool.ruff] -exclude = ["docs", "examples", "optimhc/gui/"] +exclude = ["docs", "examples"] line-length = 99 target-version = "py311" diff --git a/tests/feature_generator/__init__.py b/tests/feature/__init__.py similarity index 100% rename from tests/feature_generator/__init__.py rename to tests/feature/__init__.py diff --git a/tests/feature_generator/test_spectral_similarity.py b/tests/feature/test_spectral_similarity.py similarity index 96% rename from tests/feature_generator/test_spectral_similarity.py rename to tests/feature/test_spectral_similarity.py index 60fe8b1..b9829a3 100644 --- a/tests/feature_generator/test_spectral_similarity.py +++ b/tests/feature/test_spectral_similarity.py @@ -2,7 +2,7 @@ import numpy as np -from optimhc.feature_generator.spectral_similarity import SpectralSimilarityFeatureGenerator +from optimhc.feature.spectral_similarity import SpectralSimilarityFeatureGenerator def _make_generator(tolerance_ppm=20.0, top_n=36): diff --git a/uv.lock b/uv.lock index 11824bf..e7479df 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10, <3.13" resolution-markers = [ "python_full_version >= '3.12'", @@ -17,22 +17,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/a6/907a406bb7d359e6a63f99c313846d9eec4f7e6f7437809e03aa00fa3074/absl_py-2.4.0-py3-none-any.whl", hash = "sha256:88476fd881ca8aab94ffa78b7b6c632a782ab3ba1cd19c9bd423abc4fb4cd28d", size = 135750, upload-time = "2026-01-28T10:17:04.19Z" }, ] -[[package]] -name = "altair" -version = "6.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2" }, - { name = "jsonschema" }, - { name = "narwhals" }, - { name = "packaging" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f7/c0/184a89bd5feba14ff3c41cfaf1dd8a82c05f5ceedbc92145e17042eb08a4/altair-6.0.0.tar.gz", hash = "sha256:614bf5ecbe2337347b590afb111929aa9c16c9527c4887d96c9bc7f6640756b4", size = 763834, upload-time = "2025-11-12T08:59:11.519Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/db/33/ef2f2409450ef6daa61459d5de5c08128e7d3edb773fefd0a324d1310238/altair-6.0.0-py3-none-any.whl", hash = "sha256:09ae95b53d5fe5b16987dccc785a7af8588f2dca50de1e7a156efa8a461515f8", size = 795410, upload-time = "2025-11-12T08:59:09.804Z" }, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -78,15 +62,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2b/03/13dde6512ad7b4557eb792fbcf0c653af6076b81e5941d36ec61f7ce6028/astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8", size = 12732, upload-time = "2019-12-22T18:12:11.297Z" }, ] -[[package]] -name = "attrs" -version = "25.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, -] - [[package]] name = "biopython" version = "1.86" @@ -123,24 +98,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/13/00db03b01e54070d5b0ec9c71eef86e61afa733d9af76e5b9b09f5dc9165/biopython-1.86-cp312-cp312-win_amd64.whl", hash = "sha256:35a6b9c5dcdfb5c2631a313a007f3f41a7d72573ba2b68c962e10ea92096ff3b", size = 2733610, upload-time = "2025-10-28T21:32:34.99Z" }, ] -[[package]] -name = "blinker" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, -] - -[[package]] -name = "cachetools" -version = "6.2.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/39/91/d9ae9a66b01102a18cd16db0cf4cd54187ffe10f0865cc80071a4104fbb3/cachetools-6.2.6.tar.gz", hash = "sha256:16c33e1f276b9a9c0b49ab5782d901e3ad3de0dd6da9bf9bcd29ac5672f2f9e6", size = 32363, upload-time = "2026-01-27T20:32:59.956Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/90/45/f458fa2c388e79dd9d8b9b0c99f1d31b568f27388f2fdba7bb66bbc0c6ed/cachetools-6.2.6-py3-none-any.whl", hash = "sha256:8c9717235b3c651603fff0076db52d6acbfd1b338b8ed50256092f7ce9c85bda", size = 11668, upload-time = "2026-01-27T20:32:58.527Z" }, -] - [[package]] name = "certifi" version = "2026.2.25" @@ -480,30 +437,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/33/f1c6a276de27b7d7339a34749cc33fa87f077f921969c47185d34a887ae2/gast-0.7.0-py3-none-any.whl", hash = "sha256:99cbf1365633a74099f69c59bd650476b96baa5ef196fec88032b00b31ba36f7", size = 22966, upload-time = "2025-11-29T15:30:03.983Z" }, ] -[[package]] -name = "gitdb" -version = "4.0.12" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "smmap" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" }, -] - -[[package]] -name = "gitpython" -version = "3.1.46" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "gitdb" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/b5/59d16470a1f0dfe8c793f9ef56fd3826093fc52b3bd96d6b9d6c26c7e27b/gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f", size = 215371, upload-time = "2026-01-01T15:37:32.073Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" }, -] - [[package]] name = "google-pasta" version = "0.2.0" @@ -693,18 +626,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310, upload-time = "2023-12-13T20:37:23.244Z" }, ] -[[package]] -name = "jinja2" -version = "3.1.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, -] - [[package]] name = "job-pool" version = "0.2.6" @@ -726,33 +647,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, ] -[[package]] -name = "jsonschema" -version = "4.26.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "jsonschema-specifications" }, - { name = "referencing" }, - { name = "rpds-py" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, -] - -[[package]] -name = "jsonschema-specifications" -version = "2025.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "referencing" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, -] - [[package]] name = "keras" version = "3.12.1" @@ -1244,15 +1138,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/bc/465daf1de06409cdd4532082806770ee0d8d7df434da79c76564d0f69741/namex-0.1.0-py3-none-any.whl", hash = "sha256:e2012a474502f1e2251267062aae3114611f07df4224b6e06334c57b0f2ce87c", size = 5905, upload-time = "2025-05-26T23:17:37.695Z" }, ] -[[package]] -name = "narwhals" -version = "2.17.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/75/59/81d0f4cad21484083466f278e6b392addd9f4205b48d45b5c8771670ebf8/narwhals-2.17.0.tar.gz", hash = "sha256:ebd5bc95bcfa2f8e89a8ac09e2765a63055162837208e67b42d6eeb6651d5e67", size = 620306, upload-time = "2026-02-23T09:44:34.142Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/27/20770bd6bf8fbe1e16f848ba21da9df061f38d2e6483952c29d2bb5d1d8b/narwhals-2.17.0-py3-none-any.whl", hash = "sha256:2ac5307b7c2b275a7d66eeda906b8605e3d7a760951e188dcfff86e8ebe083dd", size = 444897, upload-time = "2026-02-23T09:44:32.006Z" }, -] - [[package]] name = "networkx" version = "3.2.1" @@ -1362,13 +1247,6 @@ dependencies = [ { name = "xgboost" }, ] -[package.optional-dependencies] -gui = [ - { name = "plotly" }, - { name = "streamlit" }, - { name = "watchdog" }, -] - [package.dev-dependencies] dev = [ { name = "pre-commit" }, @@ -1389,19 +1267,15 @@ requires-dist = [ { name = "numba", specifier = ">=0.64.0" }, { name = "numpy", specifier = "==1.26.4" }, { name = "pandas", specifier = "==2.2.3" }, - { name = "plotly", marker = "extra == 'gui'", specifier = ">=5.13.0" }, { name = "pyteomics", specifier = "==4.7.5" }, { name = "pyyaml", specifier = "==6.0.2" }, { name = "scikit-learn", specifier = "==1.5.2" }, { name = "scipy", specifier = "==1.13.1" }, { name = "seaborn", specifier = "==0.13.2" }, { name = "setuptools", specifier = ">=61,<82" }, - { name = "streamlit", marker = "extra == 'gui'", specifier = ">=1.24.0" }, { name = "tqdm", specifier = "==4.67.0" }, - { name = "watchdog", marker = "extra == 'gui'", specifier = ">=3.0.0" }, { name = "xgboost", specifier = "==1.7.6" }, ] -provides-extras = ["gui"] [package.metadata.requires-dev] dev = [ @@ -1559,19 +1433,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" }, ] -[[package]] -name = "plotly" -version = "6.5.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "narwhals" }, - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e3/4f/8a10a9b9f5192cb6fdef62f1d77fa7d834190b2c50c0cd256bd62879212b/plotly-6.5.2.tar.gz", hash = "sha256:7478555be0198562d1435dee4c308268187553cc15516a2f4dd034453699e393", size = 7015695, upload-time = "2026-01-14T21:26:51.222Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/67/f95b5460f127840310d2187f916cf0023b5875c0717fdf893f71e1325e87/plotly-6.5.2-py3-none-any.whl", hash = "sha256:91757653bd9c550eeea2fa2404dba6b85d1e366d54804c340b2c874e5a7eb4a4", size = 9895973, upload-time = "2026-01-14T21:26:47.135Z" }, -] - [[package]] name = "pluggy" version = "1.6.0" @@ -1789,19 +1650,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, ] -[[package]] -name = "pydeck" -version = "0.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2" }, - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/ca/40e14e196864a0f61a92abb14d09b3d3da98f94ccb03b49cf51688140dab/pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605", size = 3832240, upload-time = "2024-05-10T15:36:21.153Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038", size = 6900403, upload-time = "2024-05-10T15:36:17.36Z" }, -] - [[package]] name = "pyensembl" version = "2.3.13" @@ -2004,20 +1852,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" }, ] -[[package]] -name = "referencing" -version = "0.37.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "rpds-py" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, -] - [[package]] name = "requests" version = "2.32.5" @@ -2046,70 +1880,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] -[[package]] -name = "rpds-py" -version = "0.30.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/06/0c/0c411a0ec64ccb6d104dcabe0e713e05e153a9a2c3c2bd2b32ce412166fe/rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288", size = 370490, upload-time = "2025-11-30T20:21:33.256Z" }, - { url = "https://files.pythonhosted.org/packages/19/6a/4ba3d0fb7297ebae71171822554abe48d7cab29c28b8f9f2c04b79988c05/rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00", size = 359751, upload-time = "2025-11-30T20:21:34.591Z" }, - { url = "https://files.pythonhosted.org/packages/cd/7c/e4933565ef7f7a0818985d87c15d9d273f1a649afa6a52ea35ad011195ea/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389a2d49eded1896c3d48b0136ead37c48e221b391c052fba3f4055c367f60a6", size = 389696, upload-time = "2025-11-30T20:21:36.122Z" }, - { url = "https://files.pythonhosted.org/packages/5e/01/6271a2511ad0815f00f7ed4390cf2567bec1d4b1da39e2c27a41e6e3b4de/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32c8528634e1bf7121f3de08fa85b138f4e0dc47657866630611b03967f041d7", size = 403136, upload-time = "2025-11-30T20:21:37.728Z" }, - { url = "https://files.pythonhosted.org/packages/55/64/c857eb7cd7541e9b4eee9d49c196e833128a55b89a9850a9c9ac33ccf897/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f207f69853edd6f6700b86efb84999651baf3789e78a466431df1331608e5324", size = 524699, upload-time = "2025-11-30T20:21:38.92Z" }, - { url = "https://files.pythonhosted.org/packages/9c/ed/94816543404078af9ab26159c44f9e98e20fe47e2126d5d32c9d9948d10a/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67b02ec25ba7a9e8fa74c63b6ca44cf5707f2fbfadae3ee8e7494297d56aa9df", size = 412022, upload-time = "2025-11-30T20:21:40.407Z" }, - { url = "https://files.pythonhosted.org/packages/61/b5/707f6cf0066a6412aacc11d17920ea2e19e5b2f04081c64526eb35b5c6e7/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0e95f6819a19965ff420f65578bacb0b00f251fefe2c8b23347c37174271f3", size = 390522, upload-time = "2025-11-30T20:21:42.17Z" }, - { url = "https://files.pythonhosted.org/packages/13/4e/57a85fda37a229ff4226f8cbcf09f2a455d1ed20e802ce5b2b4a7f5ed053/rpds_py-0.30.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:a452763cc5198f2f98898eb98f7569649fe5da666c2dc6b5ddb10fde5a574221", size = 404579, upload-time = "2025-11-30T20:21:43.769Z" }, - { url = "https://files.pythonhosted.org/packages/f9/da/c9339293513ec680a721e0e16bf2bac3db6e5d7e922488de471308349bba/rpds_py-0.30.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e0b65193a413ccc930671c55153a03ee57cecb49e6227204b04fae512eb657a7", size = 421305, upload-time = "2025-11-30T20:21:44.994Z" }, - { url = "https://files.pythonhosted.org/packages/f9/be/522cb84751114f4ad9d822ff5a1aa3c98006341895d5f084779b99596e5c/rpds_py-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:858738e9c32147f78b3ac24dc0edb6610000e56dc0f700fd5f651d0a0f0eb9ff", size = 572503, upload-time = "2025-11-30T20:21:46.91Z" }, - { url = "https://files.pythonhosted.org/packages/a2/9b/de879f7e7ceddc973ea6e4629e9b380213a6938a249e94b0cdbcc325bb66/rpds_py-0.30.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:da279aa314f00acbb803da1e76fa18666778e8a8f83484fba94526da5de2cba7", size = 598322, upload-time = "2025-11-30T20:21:48.709Z" }, - { url = "https://files.pythonhosted.org/packages/48/ac/f01fc22efec3f37d8a914fc1b2fb9bcafd56a299edbe96406f3053edea5a/rpds_py-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7c64d38fb49b6cdeda16ab49e35fe0da2e1e9b34bc38bd78386530f218b37139", size = 560792, upload-time = "2025-11-30T20:21:50.024Z" }, - { url = "https://files.pythonhosted.org/packages/e2/da/4e2b19d0f131f35b6146425f846563d0ce036763e38913d917187307a671/rpds_py-0.30.0-cp310-cp310-win32.whl", hash = "sha256:6de2a32a1665b93233cde140ff8b3467bdb9e2af2b91079f0333a0974d12d464", size = 221901, upload-time = "2025-11-30T20:21:51.32Z" }, - { url = "https://files.pythonhosted.org/packages/96/cb/156d7a5cf4f78a7cc571465d8aec7a3c447c94f6749c5123f08438bcf7bc/rpds_py-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:1726859cd0de969f88dc8673bdd954185b9104e05806be64bcd87badbe313169", size = 235823, upload-time = "2025-11-30T20:21:52.505Z" }, - { url = "https://files.pythonhosted.org/packages/4d/6e/f964e88b3d2abee2a82c1ac8366da848fce1c6d834dc2132c3fda3970290/rpds_py-0.30.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425", size = 370157, upload-time = "2025-11-30T20:21:53.789Z" }, - { url = "https://files.pythonhosted.org/packages/94/ba/24e5ebb7c1c82e74c4e4f33b2112a5573ddc703915b13a073737b59b86e0/rpds_py-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d", size = 359676, upload-time = "2025-11-30T20:21:55.475Z" }, - { url = "https://files.pythonhosted.org/packages/84/86/04dbba1b087227747d64d80c3b74df946b986c57af0a9f0c98726d4d7a3b/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4", size = 389938, upload-time = "2025-11-30T20:21:57.079Z" }, - { url = "https://files.pythonhosted.org/packages/42/bb/1463f0b1722b7f45431bdd468301991d1328b16cffe0b1c2918eba2c4eee/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f", size = 402932, upload-time = "2025-11-30T20:21:58.47Z" }, - { url = "https://files.pythonhosted.org/packages/99/ee/2520700a5c1f2d76631f948b0736cdf9b0acb25abd0ca8e889b5c62ac2e3/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4", size = 525830, upload-time = "2025-11-30T20:21:59.699Z" }, - { url = "https://files.pythonhosted.org/packages/e0/ad/bd0331f740f5705cc555a5e17fdf334671262160270962e69a2bdef3bf76/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97", size = 412033, upload-time = "2025-11-30T20:22:00.991Z" }, - { url = "https://files.pythonhosted.org/packages/f8/1e/372195d326549bb51f0ba0f2ecb9874579906b97e08880e7a65c3bef1a99/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89", size = 390828, upload-time = "2025-11-30T20:22:02.723Z" }, - { url = "https://files.pythonhosted.org/packages/ab/2b/d88bb33294e3e0c76bc8f351a3721212713629ffca1700fa94979cb3eae8/rpds_py-0.30.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d", size = 404683, upload-time = "2025-11-30T20:22:04.367Z" }, - { url = "https://files.pythonhosted.org/packages/50/32/c759a8d42bcb5289c1fac697cd92f6fe01a018dd937e62ae77e0e7f15702/rpds_py-0.30.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038", size = 421583, upload-time = "2025-11-30T20:22:05.814Z" }, - { url = "https://files.pythonhosted.org/packages/2b/81/e729761dbd55ddf5d84ec4ff1f47857f4374b0f19bdabfcf929164da3e24/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7", size = 572496, upload-time = "2025-11-30T20:22:07.713Z" }, - { url = "https://files.pythonhosted.org/packages/14/f6/69066a924c3557c9c30baa6ec3a0aa07526305684c6f86c696b08860726c/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed", size = 598669, upload-time = "2025-11-30T20:22:09.312Z" }, - { url = "https://files.pythonhosted.org/packages/5f/48/905896b1eb8a05630d20333d1d8ffd162394127b74ce0b0784ae04498d32/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85", size = 561011, upload-time = "2025-11-30T20:22:11.309Z" }, - { url = "https://files.pythonhosted.org/packages/22/16/cd3027c7e279d22e5eb431dd3c0fbc677bed58797fe7581e148f3f68818b/rpds_py-0.30.0-cp311-cp311-win32.whl", hash = "sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c", size = 221406, upload-time = "2025-11-30T20:22:13.101Z" }, - { url = "https://files.pythonhosted.org/packages/fa/5b/e7b7aa136f28462b344e652ee010d4de26ee9fd16f1bfd5811f5153ccf89/rpds_py-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825", size = 236024, upload-time = "2025-11-30T20:22:14.853Z" }, - { url = "https://files.pythonhosted.org/packages/14/a6/364bba985e4c13658edb156640608f2c9e1d3ea3c81b27aa9d889fff0e31/rpds_py-0.30.0-cp311-cp311-win_arm64.whl", hash = "sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229", size = 229069, upload-time = "2025-11-30T20:22:16.577Z" }, - { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, - { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, - { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, - { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, - { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, - { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, - { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, - { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, - { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, - { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, - { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, - { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, - { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, - { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, - { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, - { url = "https://files.pythonhosted.org/packages/69/71/3f34339ee70521864411f8b6992e7ab13ac30d8e4e3309e07c7361767d91/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58", size = 372292, upload-time = "2025-11-30T20:24:16.537Z" }, - { url = "https://files.pythonhosted.org/packages/57/09/f183df9b8f2d66720d2ef71075c59f7e1b336bec7ee4c48f0a2b06857653/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a", size = 362128, upload-time = "2025-11-30T20:24:18.086Z" }, - { url = "https://files.pythonhosted.org/packages/7a/68/5c2594e937253457342e078f0cc1ded3dd7b2ad59afdbf2d354869110a02/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb", size = 391542, upload-time = "2025-11-30T20:24:20.092Z" }, - { url = "https://files.pythonhosted.org/packages/49/5c/31ef1afd70b4b4fbdb2800249f34c57c64beb687495b10aec0365f53dfc4/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c", size = 404004, upload-time = "2025-11-30T20:24:22.231Z" }, - { url = "https://files.pythonhosted.org/packages/e3/63/0cfbea38d05756f3440ce6534d51a491d26176ac045e2707adc99bb6e60a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3", size = 527063, upload-time = "2025-11-30T20:24:24.302Z" }, - { url = "https://files.pythonhosted.org/packages/42/e6/01e1f72a2456678b0f618fc9a1a13f882061690893c192fcad9f2926553a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5", size = 413099, upload-time = "2025-11-30T20:24:25.916Z" }, - { url = "https://files.pythonhosted.org/packages/b8/25/8df56677f209003dcbb180765520c544525e3ef21ea72279c98b9aa7c7fb/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738", size = 392177, upload-time = "2025-11-30T20:24:27.834Z" }, - { url = "https://files.pythonhosted.org/packages/4a/b4/0a771378c5f16f8115f796d1f437950158679bcd2a7c68cf251cfb00ed5b/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f", size = 406015, upload-time = "2025-11-30T20:24:29.457Z" }, - { url = "https://files.pythonhosted.org/packages/36/d8/456dbba0af75049dc6f63ff295a2f92766b9d521fa00de67a2bd6427d57a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877", size = 423736, upload-time = "2025-11-30T20:24:31.22Z" }, - { url = "https://files.pythonhosted.org/packages/13/64/b4d76f227d5c45a7e0b796c674fd81b0a6c4fbd48dc29271857d8219571c/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a", size = 573981, upload-time = "2025-11-30T20:24:32.934Z" }, - { url = "https://files.pythonhosted.org/packages/20/91/092bacadeda3edf92bf743cc96a7be133e13a39cdbfd7b5082e7ab638406/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4", size = 599782, upload-time = "2025-11-30T20:24:35.169Z" }, - { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, -] - [[package]] name = "ruff" version = "0.15.4" @@ -2300,15 +2070,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] -[[package]] -name = "smmap" -version = "5.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" }, -] - [[package]] name = "sqlalchemy" version = "2.0.47" @@ -2343,44 +2104,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/9f/7c378406b592fcf1fc157248607b495a40e3202ba4a6f1372a2ba6447717/sqlalchemy-2.0.47-py3-none-any.whl", hash = "sha256:e2647043599297a1ef10e720cf310846b7f31b6c841fee093d2b09d81215eb93", size = 1940159, upload-time = "2026-02-24T17:15:07.158Z" }, ] -[[package]] -name = "streamlit" -version = "1.54.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "altair" }, - { name = "blinker" }, - { name = "cachetools" }, - { name = "click" }, - { name = "gitpython" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "pillow" }, - { name = "protobuf" }, - { name = "pyarrow" }, - { name = "pydeck" }, - { name = "requests" }, - { name = "tenacity" }, - { name = "toml" }, - { name = "tornado" }, - { name = "typing-extensions" }, - { name = "watchdog", marker = "sys_platform != 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/be/66/d887ee80ea85f035baee607c60af024994e17ae9b921277fca9675e76ecf/streamlit-1.54.0.tar.gz", hash = "sha256:09965e6ae7eb0357091725de1ce2a3f7e4be155c2464c505c40a3da77ab69dd8", size = 8662292, upload-time = "2026-02-04T16:37:54.734Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/1d/40de1819374b4f0507411a60f4d2de0d620a9b10c817de5925799132b6c9/streamlit-1.54.0-py3-none-any.whl", hash = "sha256:a7b67d6293a9f5f6b4d4c7acdbc4980d7d9f049e78e404125022ecb1712f79fc", size = 9119730, upload-time = "2026-02-04T16:37:52.199Z" }, -] - -[[package]] -name = "tenacity" -version = "9.1.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" }, -] - [[package]] name = "tensorboard" version = "2.20.0" @@ -2490,15 +2213,6 @@ version = "0.0.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5c/87/75d3fbe15aabd45d9b70702241787cf1f7f30dd9fabcd9bc89d828c7661d/tinytimer-0.0.0.tar.gz", hash = "sha256:6ad13c8f01ab6094e58081a5367ffc4c5831f2d6b29034d2434d8ae106308fa5", size = 2069, upload-time = "2015-03-20T20:00:55.612Z" } -[[package]] -name = "toml" -version = "0.10.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" }, -] - [[package]] name = "tomli" version = "2.4.0" @@ -2535,23 +2249,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" }, ] -[[package]] -name = "tornado" -version = "6.5.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" }, - { url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" }, - { url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" }, - { url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" }, - { url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" }, - { url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" }, - { url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" }, - { url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" }, - { url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" }, -] - [[package]] name = "tqdm" version = "4.67.0" @@ -2683,35 +2380,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/55/896b06bf93a49bec0f4ae2a6f1ed12bd05c8860744ac3a70eda041064e4d/virtualenv-21.1.0-py3-none-any.whl", hash = "sha256:164f5e14c5587d170cf98e60378eb91ea35bf037be313811905d3a24ea33cc07", size = 5825072, upload-time = "2026-02-27T08:49:27.516Z" }, ] -[[package]] -name = "watchdog" -version = "6.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/56/90994d789c61df619bfc5ce2ecdabd5eeff564e1eb47512bd01b5e019569/watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26", size = 96390, upload-time = "2024-11-01T14:06:24.793Z" }, - { url = "https://files.pythonhosted.org/packages/55/46/9a67ee697342ddf3c6daa97e3a587a56d6c4052f881ed926a849fcf7371c/watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112", size = 88389, upload-time = "2024-11-01T14:06:27.112Z" }, - { url = "https://files.pythonhosted.org/packages/44/65/91b0985747c52064d8701e1075eb96f8c40a79df889e59a399453adfb882/watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3", size = 89020, upload-time = "2024-11-01T14:06:29.876Z" }, - { url = "https://files.pythonhosted.org/packages/e0/24/d9be5cd6642a6aa68352ded4b4b10fb0d7889cb7f45814fb92cecd35f101/watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c", size = 96393, upload-time = "2024-11-01T14:06:31.756Z" }, - { url = "https://files.pythonhosted.org/packages/63/7a/6013b0d8dbc56adca7fdd4f0beed381c59f6752341b12fa0886fa7afc78b/watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2", size = 88392, upload-time = "2024-11-01T14:06:32.99Z" }, - { url = "https://files.pythonhosted.org/packages/d1/40/b75381494851556de56281e053700e46bff5b37bf4c7267e858640af5a7f/watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c", size = 89019, upload-time = "2024-11-01T14:06:34.963Z" }, - { url = "https://files.pythonhosted.org/packages/39/ea/3930d07dafc9e286ed356a679aa02d777c06e9bfd1164fa7c19c288a5483/watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948", size = 96471, upload-time = "2024-11-01T14:06:37.745Z" }, - { url = "https://files.pythonhosted.org/packages/12/87/48361531f70b1f87928b045df868a9fd4e253d9ae087fa4cf3f7113be363/watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860", size = 88449, upload-time = "2024-11-01T14:06:39.748Z" }, - { url = "https://files.pythonhosted.org/packages/5b/7e/8f322f5e600812e6f9a31b75d242631068ca8f4ef0582dd3ae6e72daecc8/watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0", size = 89054, upload-time = "2024-11-01T14:06:41.009Z" }, - { url = "https://files.pythonhosted.org/packages/30/ad/d17b5d42e28a8b91f8ed01cb949da092827afb9995d4559fd448d0472763/watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881", size = 87902, upload-time = "2024-11-01T14:06:53.119Z" }, - { url = "https://files.pythonhosted.org/packages/5c/ca/c3649991d140ff6ab67bfc85ab42b165ead119c9e12211e08089d763ece5/watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11", size = 88380, upload-time = "2024-11-01T14:06:55.19Z" }, - { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" }, - { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" }, - { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" }, - { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" }, - { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" }, - { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" }, - { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" }, - { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" }, - { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, -] - [[package]] name = "werkzeug" version = "3.1.6"