From d3c075614d4de07c21c5c386d9f0642adf1ed642 Mon Sep 17 00:00:00 2001 From: nroope Date: Fri, 19 Dec 2025 16:42:06 +0100 Subject: [PATCH 1/4] Update README.md --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c3c6eaf..d5d245b 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,11 @@ ## Prune and Quantize ML models PQuant is a library for training compressed machine learning models, developed at CERN as part of the [Next Generation Triggers](https://nextgentriggers.web.cern.ch/t13/) project. -Installation via pip: ```pip install pquant-ml```. -To run the code, [HGQ2](https://github.com/calad0i/HGQ2) is also needed. +Installation via pip: ```pip install pquant-ml```. + +With TensorFlow ```pip install pquant-ml[tensorflow]```. + +With PyTorch ```pip install pquant-ml[torch]```. PQuant replaces the layers and activations it finds with a Compressed (in the case of layers) or Quantized (in the case of activations) variant. These automatically handle the quantization of the weights, biases and activations, and the pruning of the weights. Both PyTorch and TensorFlow models are supported. @@ -47,6 +50,12 @@ For detailed documentation check this page: [PQuantML documentation](https://pqu ### Authors - Roope Niemi (CERN) - Anastasiia Petrovych (CERN) + - Arghya Das (Purdue University) + - Enrico Lupi (CERN) - Chang Sun (Caltech) + - Dimitrios Danopoulos (CERN) + - Marlon Joshua Helbing + - Mia Liu (Purdue University) - Michael Kagan (SLAC National Accelerator Laboratory) - Vladimir Loncar (CERN) + - Maurizio Pierini (CERN) From b31bf2c99e023c759b47f5712fed57b49bbbfc58 Mon Sep 17 00:00:00 2001 From: Anastasiia Date: Mon, 12 Jan 2026 11:47:48 +0100 Subject: [PATCH 2/4] Add removed property at training model (#22) --- src/pquant/data_models/training_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pquant/data_models/training_model.py b/src/pquant/data_models/training_model.py index 78d0c37..228c1f6 100644 --- a/src/pquant/data_models/training_model.py +++ b/src/pquant/data_models/training_model.py @@ -11,3 +11,5 @@ class BaseTrainingModel(BaseModel): rewind: str = Field(default="never") rounds: int = Field(default=1) save_weights_epoch: int = Field(default=-1) + pruning_first: bool = Field(default=False) + \ No newline at end of file From b37020941b96f58be5089531a5e17cb731242fbe Mon Sep 17 00:00:00 2001 From: Anastasiia Date: Mon, 12 Jan 2026 15:44:08 +0100 Subject: [PATCH 3/4] Modified 'post_round' function condition (#23) --- src/pquant/core/keras/layers.py | 5 +++-- src/pquant/core/torch/layers.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pquant/core/keras/layers.py b/src/pquant/core/keras/layers.py index b615cbe..823ff49 100644 --- a/src/pquant/core/keras/layers.py +++ b/src/pquant/core/keras/layers.py @@ -1347,11 +1347,12 @@ def call(self, x, training=None): def call_post_round_functions(model, rewind, rounds, r): + last_round = (r == rounds - 1) if rewind == "round": rewind_weights_functions(model) - elif rewind == "post-ticket-search" and r == rounds - 1: + elif rewind == "post-ticket-search" and last_round: rewind_weights_functions(model) - else: + elif not last_round: post_round_functions(model) diff --git a/src/pquant/core/torch/layers.py b/src/pquant/core/torch/layers.py index 6e67655..c13228f 100644 --- a/src/pquant/core/torch/layers.py +++ b/src/pquant/core/torch/layers.py @@ -1404,11 +1404,12 @@ def apply_final_compression(module): def call_post_round_functions(model, rewind, rounds, r): + last_round = (r == rounds - 1) if rewind == "round": rewind_weights_functions(model) - elif rewind == "post-ticket-search" and r == rounds - 1: + elif rewind == "post-ticket-search" and last_round: rewind_weights_functions(model) - elif r != rounds - 1: + elif not last_round: post_round_functions(model) From 755aab23abf608870743a03a183956babc2a1faa Mon Sep 17 00:00:00 2001 From: Anastasiia Petrovych Date: Tue, 3 Mar 2026 15:30:55 +0100 Subject: [PATCH 4/4] Add backend addapter for hpo platform and fixed serialization issue in pdp method --- .readthedocs.yaml | 1 - README.md | 8 +- docs/Makefile | 2 +- docs/requirements.txt | 4 +- docs/source/_static/custom.css | 48 ++++----- docs/source/conf.py | 2 +- docs/source/faq.md | 4 +- docs/source/getting_started.md | 30 +++--- docs/source/index.rst | 10 +- docs/source/install.md | 2 +- docs/source/status.md | 4 +- .../core/hyperparameter_optimization.py | 102 ++++++++++++++---- src/pquant/data_models/quantization_model.py | 2 +- src/pquant/data_models/training_model.py | 2 - src/pquant/pruning_methods/pdp.py | 2 +- 15 files changed, 137 insertions(+), 86 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 575f578..f72c324 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -20,4 +20,3 @@ sphinx: # python: # install: # - requirements: docs/requirements.txt - diff --git a/README.md b/README.md index d5d245b..9eb54dd 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@ ## Prune and Quantize ML models PQuant is a library for training compressed machine learning models, developed at CERN as part of the [Next Generation Triggers](https://nextgentriggers.web.cern.ch/t13/) project. -Installation via pip: ```pip install pquant-ml```. +Installation via pip: ```pip install pquant-ml```. -With TensorFlow ```pip install pquant-ml[tensorflow]```. +With TensorFlow ```pip install pquant-ml[tensorflow]```. With PyTorch ```pip install pquant-ml[torch]```. -PQuant replaces the layers and activations it finds with a Compressed (in the case of layers) or Quantized (in the case of activations) variant. These automatically handle the quantization of the weights, biases and activations, and the pruning of the weights. -Both PyTorch and TensorFlow models are supported. +PQuant replaces the layers and activations it finds with a Compressed (in the case of layers) or Quantized (in the case of activations) variant. These automatically handle the quantization of the weights, biases and activations, and the pruning of the weights. +Both PyTorch and TensorFlow models are supported. ### Layers that can be compressed diff --git a/docs/Makefile b/docs/Makefile index 5647f38..e88d665 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -18,4 +18,4 @@ help: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @sphinx-apidoc -f -o autodoc/ ../src/HGQ - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/requirements.txt b/docs/requirements.txt index 726ada1..950c059 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,5 @@ -sphinx furo myst-parser -sphinx_rtd_theme +sphinx sphinx-autodoc-typehints +sphinx_rtd_theme diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css index 3300cdf..e9e710a 100644 --- a/docs/source/_static/custom.css +++ b/docs/source/_static/custom.css @@ -51,12 +51,12 @@ html body nav.wy-nav-top, } .wy-body-for-nav { - background-color: #ffffff !important; + background-color: #ffffff !important; } .wy-nav-content { background-color: #ffffff !important; - max-width: 1200px !important; + max-width: 1200px !important; } .wy-side-nav-search { @@ -71,19 +71,19 @@ html body nav.wy-nav-top, } .wy-nav-side { - background-color: #b30000 !important; + background-color: #b30000 !important; } .wy-menu-vertical a { - color: #ffffff !important; + color: #ffffff !important; } .wy-menu-vertical a:hover { - background-color: #990000 !important; + background-color: #990000 !important; } .wy-menu-vertical li.current > a, .wy-menu-vertical li.toctree-l1.current > a { - background-color: #660000 !important; + background-color: #660000 !important; color: #ffffff !important; } @@ -110,7 +110,7 @@ h1, h2, h3, h4, h5, h6 { background-color: #ffffff !important; } -.rst-content > .document > .toctree-wrapper, +.rst-content > .document > .toctree-wrapper, .rst-content > .document > .section { background-color: #ffffff !important; } @@ -121,7 +121,7 @@ h1, h2, h3, h4, h5, h6 { } .rst-content table th { - background-color: #ffe6e6 !important; + background-color: #ffe6e6 !important; color: #b30000 !important; } @@ -158,7 +158,7 @@ h1, h2, h3, h4, h5, h6 { color: #000000 !important; padding: 8px !important; margin: 12px !important; - width: calc(100% - 24px) !important; + width: calc(100% - 24px) !important; box-sizing: border-box !important; } @@ -170,7 +170,7 @@ h1, h2, h3, h4, h5, h6 { .wy-side-nav-search { background-color: #ffffff !important; - padding: 20px 15px !important; + padding: 20px 15px !important; border-bottom: 2px solid #b30000 !important; } @@ -184,19 +184,19 @@ h1, h2, h3, h4, h5, h6 { } .wy-side-nav-search .version-switch { - color: #b30000 !important; + color: #b30000 !important; font-weight: 600 !important; } .wy-side-nav-search .version-switch :hover { - color: #990000 !important; + color: #990000 !important; } .wy-side-nav-search .fa-caret-down { color: #b30000 !important; } .wy-side-nav-search select.version-switch { - color: #b30000 !important; + color: #b30000 !important; background-color: #ffffff !important; border: 2px solid #b30000 !important; font-weight: 600 !important; @@ -205,8 +205,8 @@ h1, h2, h3, h4, h5, h6 { } .wy-side-nav-search select.rtd-version-select { - color: #b30000 !important; - background-color: #ffffff !important; + color: #b30000 !important; + background-color: #ffffff !important; border: 2px solid #b30000 !important; font-weight: 600 !important; padding: 6px !important; @@ -223,16 +223,16 @@ h1, h2, h3, h4, h5, h6 { } .wy-side-nav-search input[type="search"] { - height: 100% !important; - width: 100% !important; - font-size: 18px !important; - padding: 10px 14px !important; + height: 100% !important; + width: 100% !important; + font-size: 18px !important; + padding: 10px 14px !important; margin: 0 !important; - border-radius: 6px !important; - border: none !important; - box-shadow: none !important; - background: #ffffff !important; - color: #000000 !important; + border-radius: 6px !important; + border: none !important; + box-shadow: none !important; + background: #ffffff !important; + color: #000000 !important; box-sizing: border-box !important; } diff --git a/docs/source/conf.py b/docs/source/conf.py index 64ac78d..fe398a4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -67,4 +67,4 @@ html_css_files = [ 'custom.css', -] \ No newline at end of file +] diff --git a/docs/source/faq.md b/docs/source/faq.md index 5a3f9f1..c8637a9 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -12,12 +12,12 @@ An example to install PyTorch with CUDA 13.0: pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu130 ``` ## Can I use MLflow locally? -Yes. +Yes. PQuantML integrates with MLflow for experiment tracking and model logging and local usage is fully supported. -### Start local MLFlow UI: +### Start local MLFlow UI: ```python mlflow ui --host 0.0.0.0 --port 5000 ``` diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md index 63b5f0b..c5c99d6 100644 --- a/docs/source/getting_started.md +++ b/docs/source/getting_started.md @@ -39,13 +39,13 @@ def build_model(config): class Model(torch.nn.Module): def __init__(self): super().__init__() - self.dense1 = PQDense(config, 16, 64, + self.dense1 = PQDense(config, 16, 64, in_quant_bits = (1, 3, 3)) self.relu = PQActivation(config, "relu") self.dense2 = PQDense(config, 64, 32) self.dense3 = PQDense(config, 32, 32) - self.dense4 = PQDense(config, 32, 5, - quantize_output=True, + self.dense4 = PQDense(config, 32, 5, + quantize_output=True, out_quant_bits=(1, 3, 3)) def forward(self, x): @@ -78,7 +78,7 @@ def build_model(): x = self.relu(self.dense3(x)) x = self.dense4(x) return x - + return Model() @@ -86,10 +86,10 @@ def build_model(): model = add_compression_layers(model, config) ``` -### Fine-Tuning with PQuantML +### Fine-Tuning with PQuantML PQuantML provides an automated fine-tuning and hyperparameter-optimization workflow through the `TuningTask API`. This allows you to search for optimal pruning, quantization, and training parameters using your own training, validation, and objective functions. -```python +```python from pquant.core.finetuning import TuningTask, TuningConfig # Convert defined yaml file into the object @@ -142,13 +142,13 @@ Training is handled through the `train_model(...)` wrapper: ```python from pquant import train_model -trained_model = train_model(model = model, - config = config, - train_func = ..., - valid_func = ..., - trainloader = ..., +trained_model = train_model(model = model, + config = config, + train_func = ..., + valid_func = ..., + trainloader = ..., device="cuda", - testloader = ..., + testloader = ..., loss_func = loss_func, optimizer = optimizer, scheduler=scheduler @@ -164,15 +164,15 @@ def build_model(config): class Model(torch.nn.Module): def __init__(self): super().__init__() - self.dense1 = PQDense(config, 16, 64, + self.dense1 = PQDense(config, 16, 64, in_quant_bits = (1, 3, 3)) self.relu1 = PQActivation(config, "relu") self.relu2 = PQActivation(config, "relu") self.relu3 = PQActivation(config, "relu") self.dense2 = PQDense(config, 64, 32) self.dense3 = PQDense(config, 32, 32) - self.dense4 = PQDense(config, 32, 5, - quantize_output=True, + self.dense4 = PQDense(config, 32, 5, + quantize_output=True, out_quant_bits=(1, 3, 3)) def forward(self, x): diff --git a/docs/source/index.rst b/docs/source/index.rst index 344ba5d..e697789 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -16,10 +16,10 @@ PQuantML Welcome to the official documentation for **PQuantML**, a hardware-aware model compression framework supporting: -- Joint pruning + quantization -- Layer-wise precision configuration -- Flexible training pipelines -- PyTorch and TensorFlow backends +- Joint pruning + quantization +- Layer-wise precision configuration +- Flexible training pipelines +- PyTorch and TensorFlow backends - Integration with hardware-friendly toolchains (e.g., hls4ml) PQuantML enables efficient deployment of compact neural networks on resource-constrained hardware such as FPGAs and embedded accelerators. @@ -49,7 +49,7 @@ Contents .. toctree:: :maxdepth: 2 - + status install getting_started diff --git a/docs/source/install.md b/docs/source/install.md index ef039ab..e9ba183 100644 --- a/docs/source/install.md +++ b/docs/source/install.md @@ -1,6 +1,6 @@ # Installation -Use `pip install pquant-ml` to install the latest version from PyPI. You will need an environment with `python>=3.10,<=3.12` installed. +Use `pip install pquant-ml` to install the latest version from PyPI. You will need an environment with `python>=3.10,<=3.12` installed. ```{warning} diff --git a/docs/source/status.md b/docs/source/status.md index 15f1aa9..8dd291f 100644 --- a/docs/source/status.md +++ b/docs/source/status.md @@ -1,6 +1,6 @@ # PQuantML Status -This page tracks the development status of PQuantML features +This page tracks the development status of PQuantML features ## Release: v1.0.0 @@ -13,5 +13,3 @@ This page tracks the development status of PQuantML features | hls4ml integration | ✅ Complete | Works in v1.0.0 | | FITCompress | 🚧 Partially implemented | Works through PyTorch only | | Documentation | 🚧 Improving | Expanded daily | - - diff --git a/src/pquant/core/hyperparameter_optimization.py b/src/pquant/core/hyperparameter_optimization.py index 55a6077..85b0560 100644 --- a/src/pquant/core/hyperparameter_optimization.py +++ b/src/pquant/core/hyperparameter_optimization.py @@ -6,7 +6,6 @@ import keras import optuna -import torch import yaml from pydantic import BaseModel, Field, field_validator @@ -37,10 +36,9 @@ def get_sampler(sampler_type, **kwargs): raise ValueError(f"Unknown sampler type: {sampler_type}") -def log_model_by_backend(model, name, signature=None, registered_model_name=None): +def log_model_by_backend(model, name, backend, signature=None, registered_model_name=None): import mlflow - backend = keras.backend.backend() kwargs = { "artifact_path": name, "signature": signature, @@ -61,7 +59,7 @@ class MetricFunction(BaseModel): @field_validator('direction') def validate_direction(cls, direction): if direction not in constants.FINETUNING_DIRECTION: - raise ValueError("direction must be 'maximize' or 'minimize'") + raise ValueError("Direction must be 'maximize' or 'minimize'") return direction @@ -115,6 +113,58 @@ def get_dict(self): return self.model_dump(mode="json") +class BackendAdapter: + def __init__(self, model): + self.backend = self._detect_backend(model) + self.device = None + + def clone_model(self, model): + if self.backend == constants.TORCH_BACKEND: + return copy.deepcopy(model) + elif self.backend == constants.TF_BACKEND: + new_model = keras.models.clone_model(model) + new_model.set_weights(model.get_weights()) + return new_model + + def get_backend(self): + return self.backend + + def get_device(self): + return self.device + + def _detect_backend(self, model): + if hasattr(model, "parameters"): + return constants.TORCH_BACKEND + elif isinstance(model, keras.Model): + return constants.TF_BACKEND + else: + raise ValueError("Unsupported model type") + + def move_to_device(self, model): + if self.backend == constants.TORCH_BACKEND: + self.device = next(model.parameters()).device + return model.to(self.device) + return model + + def eval(self, model): + if self.backend == constants.TORCH_BACKEND: + model.eval() + return model + + def tensor_to_numpy(self, tensor): + if self.backend == constants.TORCH_BACKEND: + return tensor.detach().cpu().numpy() + elif self.backend == constants.TF_BACKEND: + return tensor.numpy() + + def forward(self, model, x): + if self.backend == constants.TORCH_BACKEND: + x = x.to(self.device) + return model(x) + elif self.backend == constants.TF_BACKEND: + return model(x, training=False) + + class TuningTask: def __init__(self, config: PQConfig): self.config = config @@ -124,7 +174,6 @@ def __init__(self, config: PQConfig): self._validation_function: Optional[Callable] = None self._optimizer_function: Optional[Callable] = None self._scheduler_function: Optional[Callable] = None - self.device = "cuda" if torch.cuda.is_available() else "cpu" self.enable_mlflow = False self.tracking_uri = None self.storage_db = None @@ -245,16 +294,17 @@ def register_hyperparameter(self, name, optuna_func, *args, **kwargs): def objective(self, trial, model, train_func, valid_func, **kwargs): from pquant import add_compression_layers, train_model + config_copy = copy.deepcopy(self.config) for param_name, (optuna_func, func_args, func_kwargs) in self.hyperparameters.items(): new_value = optuna_func(trial, *func_args, **func_kwargs) logging.info(f"Suggested {param_name} = {new_value}") applied = False for sub_config in [ - self.config.training_parameters, - self.config.pruning_parameters, - self.config.quantization_parameters, - self.config.fitcompress_parameters, + config_copy.training_parameters, + config_copy.pruning_parameters, + config_copy.quantization_parameters, + config_copy.fitcompress_parameters, ]: if hasattr(sub_config, param_name): setattr(sub_config, param_name, new_value) @@ -266,29 +316,32 @@ def objective(self, trial, model, train_func, valid_func, **kwargs): trainloader = kwargs['trainloader'] raw_input_batch = next(iter(trainloader)) sample_input = raw_input_batch[0] - sample_output = model(sample_input.to(next(model.parameters()).device)) + model_copy = self.adapter.clone_model(model) + model_copy = self.adapter.move_to_device(model_copy) + sample_output = self.adapter.forward(model_copy, sample_input) input_shape = sample_input.shape - compressed_model = add_compression_layers(model, self.config, input_shape) + compressed_model = add_compression_layers(model_copy, config_copy, input_shape) optimizer_func = self.get_optimizer_function() - optimizer = optimizer_func(self.config, compressed_model) + optimizer = optimizer_func(config_copy, compressed_model) scheduler_func = self.get_scheduler_function() - scheduler = scheduler_func(optimizer, self.config) + scheduler = scheduler_func(optimizer, config_copy) + device = self.adapter.get_device() trained_model = train_model( compressed_model, - self.config, + config_copy, train_func, valid_func, optimizer=optimizer, scheduler=scheduler, - device=self.device, + device=device, writer=None, **kwargs, ) - trained_model.eval() + self.adapter.eval(trained_model) objectives = [ - metric_object.function_name(trained_model, device=self.device, **kwargs) + metric_object.function_name(trained_model, device=device, **kwargs) for _, metric_object in self.objectives.items() ] @@ -297,23 +350,27 @@ def objective(self, trial, model, train_func, valid_func, **kwargs): from mlflow.models import infer_signature with mlflow.start_run(nested=True): - mlflow.log_params({param_name: getattr(self.config, param_name) for param_name in self.config.model_fields}) + mlflow.log_params({param_name: getattr(config_copy, param_name) for param_name in config_copy.model_fields}) mlflow.log_metrics({key: val for key, val in zip(self.objectives.keys(), objectives)}) - signature = infer_signature(sample_input.cpu().numpy(), sample_output.detach().cpu().numpy()) + signature = infer_signature( + self.adapter.tensor_to_numpy(sample_input), self.adapter.tensor_to_numpy(sample_output) + ) mlflow.log_text(yaml.safe_dump(self.get_dict()), "config.yaml") - model_name = self.config.hpo_parameters.model_name + model_name = config_copy.hpo_parameters.model_name log_model_by_backend( model=trained_model, name=model_name, signature=signature, registered_model_name=model_name, + backend=self.adapter.get_backend(), ) return objectives if len(objectives) > 1 else objectives[0] def run_optimization(self, model, **kwargs): hpo_parameters = self.config.hpo_parameters + num_trials = hpo_parameters.num_trials if self.enable_mlflow: import mlflow @@ -330,12 +387,11 @@ def run_optimization(self, model, **kwargs): load_if_exists=True, directions=[metric_object.direction for _, metric_object in self.objectives.items()], ) - - num_trials = hpo_parameters.num_trials + self.adapter = BackendAdapter(model) study.optimize( lambda trial: self.objective( trial, - copy.deepcopy(model.cpu()).to(self.device), + model, self.get_training_function(), self.get_validation_function(), **kwargs, diff --git a/src/pquant/data_models/quantization_model.py b/src/pquant/data_models/quantization_model.py index 31cefd8..a3c71b7 100644 --- a/src/pquant/data_models/quantization_model.py +++ b/src/pquant/data_models/quantization_model.py @@ -1,5 +1,5 @@ -from typing import List from enum import Enum + from pydantic import BaseModel, Field diff --git a/src/pquant/data_models/training_model.py b/src/pquant/data_models/training_model.py index f841d70..1619b59 100644 --- a/src/pquant/data_models/training_model.py +++ b/src/pquant/data_models/training_model.py @@ -1,5 +1,3 @@ -from typing import Literal - from pydantic import BaseModel, ConfigDict, Field diff --git a/src/pquant/pruning_methods/pdp.py b/src/pquant/pruning_methods/pdp.py index 48799f8..5a07d59 100644 --- a/src/pquant/pruning_methods/pdp.py +++ b/src/pquant/pruning_methods/pdp.py @@ -170,5 +170,5 @@ def post_epoch_function(self, epoch, total_epochs): def get_config(self): config = super().get_config() - config.update({"config": self.config.get_dict(), "layer_type": self.layer_type, "mask": self.mask}) + config.update({"config": self.config.get_dict(), "layer_type": self.layer_type}) return config