From 1f7eb033801e80896edb7a7618ea2dbe85c59597 Mon Sep 17 00:00:00 2001 From: zshang Date: Fri, 6 Mar 2026 10:58:19 +0800 Subject: [PATCH 1/3] Update rescore configuration to include trainFDR parameter --- optimhc/core/config.py | 2 +- optimhc/core/pipeline.py | 11 ++++++----- optimhc/rescore/mokapot.py | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/optimhc/core/config.py b/optimhc/core/config.py index eeb3071..e3dfa4b 100644 --- a/optimhc/core/config.py +++ b/optimhc/core/config.py @@ -20,7 +20,7 @@ "removePreNxtAA": False, "showProgress": True, "logLevel": "INFO", - "rescore": {"testFDR": 0.01, "model": "Percolator", "numJobs": 1}, + "rescore": {"testFDR": 0.01, "trainFDR": 0.01, "model": "Percolator", "numJobs": 1}, } diff --git a/optimhc/core/pipeline.py b/optimhc/core/pipeline.py index 054a410..6574ec4 100644 --- a/optimhc/core/pipeline.py +++ b/optimhc/core/pipeline.py @@ -73,6 +73,7 @@ def __init__(self, config): self.save_models = self.config.get("saveModels", True) self.to_flashlfq = self.config.get("toFlashLFQ", True) self.test_fdr = self.config.get("rescore", {}).get("testFDR", 0.01) + self.train_fdr = self.config.get("rescore", {}).get("trainFDR", 0.01) self.model_type = self.config.get("rescore", {}).get("model", "Percolator") self.n_jobs = self.config.get("rescore", {}).get("numJobs", 1) @@ -143,7 +144,6 @@ def rescore(self, psms, model_type=None, n_jobs=None, test_fdr=None, rescoring_f Number of parallel jobs. test_fdr : float, optional FDR threshold. - rescoring_features : list, optional List of features to use for rescoring. Returns @@ -161,14 +161,15 @@ def rescore(self, psms, model_type=None, n_jobs=None, test_fdr=None, rescoring_f model_type = model_type if model_type is not None else self.model_type n_jobs = n_jobs if n_jobs is not None else self.n_jobs + train_fdr = getattr(self, "train_fdr", 0.01) if model_type == "XGBoost": - model = XGBoostPercolatorModel(n_jobs=n_jobs) + model = XGBoostPercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs) elif model_type == "RandomForest": - model = RandomForestPercolatorModel(n_jobs=n_jobs) + model = RandomForestPercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs) elif model_type == "Percolator": - model = PercolatorModel(n_jobs=n_jobs) + model = PercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs) else: - model = PercolatorModel(n_jobs=n_jobs) + model = PercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs) kwargs = {} if rescoring_features is not None: diff --git a/optimhc/rescore/mokapot.py b/optimhc/rescore/mokapot.py index 341662e..0fc3f58 100644 --- a/optimhc/rescore/mokapot.py +++ b/optimhc/rescore/mokapot.py @@ -58,7 +58,7 @@ def rescore( """ psms = convert_to_mokapot_dataset(psms, rescoring_features=rescoring_features) logger.info("Rescoring PSMs with mokapot.") - results, models = mokapot.brew(psms, model=model, test_fdr=test_fdr, **kwargs) + results, models = mokapot.brew(psms, model=[model], test_fdr=test_fdr, **kwargs) return results, models From 63b3a13663af8d78fec788cba40d7416c8a7927a Mon Sep 17 00:00:00 2001 From: zshang Date: Fri, 6 Mar 2026 11:02:18 +0800 Subject: [PATCH 2/3] Update README --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bc4cb04..a79b20e 100644 --- a/README.md +++ b/README.md @@ -71,9 +71,10 @@ Rescore parameters control how the rescoring step is executed and include: | Parameter | Type | Example | Description | | --------- | ------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `testFDR` | Float | `0.01` | The false-discovery rate threshold at which to evaluate the learned models. | -| `model` | String | `Percolator` | Model to use for rescoring (valid options include `Percolator`, `XGBoost`, or `RandomForest`). | -| `numJobs` | Integer | `4` | The number of parallel jobs to run. This value is passed to Scikit-learn's n_jobs parameter to control parallelism for model training or scoring. Set to -1 to use all available CPU cores. | +| `testFDR` | Float | `0.01` | The false-discovery rate threshold at which to evaluate the learned models and report final results. | +| `trainFDR` | Float | `0.01` | The FDR threshold used during model training to select positive PSMs in each iteration. Increase this value (e.g. `0.05`) if training fails with "No PSMs found below the eval_fdr" on challenging datasets. | +| `model` | String | `Percolator` | Model to use for rescoring (valid options include `Percolator`, `XGBoost`, or `RandomForest`). | +| `numJobs` | Integer | `4` | The number of parallel jobs to run. This value is passed to Scikit-learn's n_jobs parameter to control parallelism for model training or scoring. Set to -1 to use all available CPU cores. | #### Example YAML Configuration From b38994cb4ad77ae38542d7b877aef8cc4abc98ec Mon Sep 17 00:00:00 2001 From: zshang Date: Fri, 6 Mar 2026 11:31:23 +0800 Subject: [PATCH 3/3] Enhance argument handling --- optimhc/rescore/mokapot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/optimhc/rescore/mokapot.py b/optimhc/rescore/mokapot.py index 0fc3f58..53cfc06 100644 --- a/optimhc/rescore/mokapot.py +++ b/optimhc/rescore/mokapot.py @@ -58,7 +58,8 @@ def rescore( """ psms = convert_to_mokapot_dataset(psms, rescoring_features=rescoring_features) logger.info("Rescoring PSMs with mokapot.") - results, models = mokapot.brew(psms, model=[model], test_fdr=test_fdr, **kwargs) + model_arg = [model] if model is not None else None + results, models = mokapot.brew(psms, model=model_arg, test_fdr=test_fdr, **kwargs) return results, models