diff --git a/.gitignore b/.gitignore index 9c3b44d..3ceb093 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ old/ build/ *.mp4 + +checkpoints/ diff --git a/docker/Dockerfile b/docker/Dockerfile index 4876623..d3313ff 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,7 +5,8 @@ RUN pip3 install \ matplotlib \ scikit-learn \ tensorflow-model-optimization \ - kagglehub + kagglehub \ + pandas # Set backward compatibility for tfmot RUN pip3 install tf_keras --no-deps diff --git a/functions.diff b/functions.diff new file mode 100644 index 0000000..1d2b0d0 --- /dev/null +++ b/functions.diff @@ -0,0 +1,99 @@ +diff --git a/src/stage/functions.py b/src/stage/functions.py +index 3e06394..919615c 100644 +--- a/src/stage/functions.py ++++ b/src/stage/functions.py +@@ -277,62 +277,49 @@ def model_quantize(model: tf.keras.Model, **params) -> tf.keras.Model: + + # --- Alpha Initialization for QAT --- + +- +-def compute_alpha_dict(model, x_train, batch_size=128): +- """Computes alpha values for weights and activations.""" +- alpha_dict = {} +- # Compute weight alphas +- for layer in tqdm( +- model.layers, +- desc="Computing weight alphas", +- file=sys.stdout, +- leave=False, +- ): +- if layer.get_weights(): +- alpha_dict[layer.name] = {} +- # Simplified alpha calculation for weights +- weights = layer.get_weights()[0] +- alpha_dict[layer.name]["kernel"] = np.max(np.abs(weights)) +- +- # Compute activation alphas ++def get_activations_output(model, x_train, batch_size=128): ++ """Gets the activations of the model for the training data.""" + intermediate_model = models.Model( + inputs=model.input, outputs=[layer.output for layer in model.layers] + ) + activations = intermediate_model.predict( + x_train, batch_size=batch_size, verbose=0 + ) ++ return activations ++ ++def compute_alpha_dict(model, x_train, batch_size=128): ++ """Computes alpha values for weights and activations in a single comprehension.""" ++ activations = get_activations_output(model, x_train, batch_size) + +- for layer, activation_data in tqdm( +- zip(model.layers, activations), +- total=len(model.layers), +- desc="Computing activation alphas", +- file=sys.stdout, +- leave=False, +- ): +- if layer.name not in alpha_dict: +- alpha_dict[layer.name] = {} +- alpha_dict[layer.name]["activation"] = np.max(np.abs(activation_data)) ++ alpha_dict = { ++ layer.name: { ++ **{weight.name: np.max(np.abs(weight.numpy())) for weight in layer.weights}, ++ 'activation': np.max(np.abs(activation_data)) ++ } ++ for layer, activation_data in zip(model.layers, activations) ++ } + + return alpha_dict + +- +-def apply_alpha_dict(q_model, alpha_dict): ++def apply_alpha_dict(model, alpha_dict): + """Applies pre-computed alpha values to a quantized model.""" +- for layer in q_model.layers: +- original_name = layer.name.replace("quant_", "") +- if original_name in alpha_dict: +- for alpha_type in ["kernel", "activation"]: +- if new_alpha := alpha_dict[original_name].get(alpha_type): +- for weight_var in layer.weights: +- if ( +- alpha_type in weight_var.name +- and "alpha" in weight_var.name +- ): +- weight_var.assign(new_alpha) +- print( +- f"Updated {weight_var.name} with alpha: {new_alpha:.4f}" +- ) +- return q_model ++ for layer in model.layers: ++ original_layer_name = layer.name.replace("quant_", "") ++ ++ if original_layer_name not in alpha_dict: ++ continue ++ ++ for weight in layer.weights: ++ if weight.name not in alpha_dict[original_layer_name]: ++ continue ++ ++ # See the quantizers weight naming convention ++ # No name_suffix for now ++ weight.assign( ++ alpha_dict[original_layer_name][weight.name] ++ ) ++ print(f"Updated {weight.name} with alpha: {alpha_dict[original_layer_name][weight.name]:.4f}") ++ return model + + + def model_initialize_parameters(model, ref_model, **params) -> tf.keras.Model: diff --git a/org.MD b/org.MD new file mode 100644 index 0000000..e42f5d9 --- /dev/null +++ b/org.MD @@ -0,0 +1,21 @@ +# Dataset + - cifar10 has more characteristics +# Model + - Lenet5_custom_v2 + +# QConfig +All Uniform arithmetic +2->8 uniform + +Flex -- Uniform +4, 6 y 8 bits. +levels [2-20] que sean validos. + + +Setear la seed y probar un par de seeds. +3 runs con seeds distintas. + +Graficar la media y la varianza de todas las corridas. + +## Second step + - dsp vemos. diff --git a/src/quantizers/flex_quantizer.py b/src/quantizers/flex_quantizer.py index bec60cc..bed5988 100644 --- a/src/quantizers/flex_quantizer.py +++ b/src/quantizers/flex_quantizer.py @@ -157,10 +157,8 @@ def grad(upstream): ##### dq_dx uses STE ##### dq_dx = tf.where( tf.logical_and( - tf.greater_equal(x, self.thresholds[0]), - tf.less_equal( - x, self.thresholds[-1] - ), # should it be alpha? + tf.greater_equal(x, thresholds[0]), + tf.less_equal(x, thresholds[-1]), # should it be alpha? ), upstream, tf.zeros_like(x), @@ -207,7 +205,7 @@ def grad(upstream): ##### dq_dthresholds using piecewise-STE ##### dq_dthresholds = tf.zeros_like(thresholds) - for i in range(1, self.thresholds.shape[0] - 1): + for i in range(1, thresholds.shape[0] - 1): delta_y = qlevels[i - 1] - qlevels[i] delta_x = thresholds[i + 1] - thresholds[i - 1] @@ -215,8 +213,8 @@ def grad(upstream): # Fall within the range of the two borderline levels masked_upstream = tf.where( tf.logical_and( - tf.greater_equal(x, self.thresholds[i - 1]), - tf.less_equal(x, self.thresholds[i + 1]), + tf.greater_equal(x, thresholds[i - 1]), + tf.less_equal(x, thresholds[i + 1]), ), upstream, tf.zeros_like(x), diff --git a/src/quantizers/integration_test.py b/src/quantizers/integration_test.py new file mode 100755 index 0000000..913b794 --- /dev/null +++ b/src/quantizers/integration_test.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +import unittest + +from tensorflow.keras.layers import Dense +from tensorflow.keras.models import Sequential + +from configs.qmodel import apply_quantization +from quantizers.uniform_quantizer import UniformQuantizer + + +class TestQuantizers(unittest.TestCase): + def test_quantizers(self): + model = Sequential( + [ + Dense(10, activation="relu", input_shape=(20,), name="dense1"), + Dense(5, activation="softmax"), + ] + ) + + qconfig = { + "dense1": { + "weights": { + "kernel": UniformQuantizer( + bits=4, + signed=True, + ), + "bias": UniformQuantizer( + bits=4, + signed=True, + ), + }, + "activations": { + "activation": UniformQuantizer( + bits=4, + signed=True, + ) + }, + } + } + + apply_quantization(model, qconfig) + # print(quantized_model.weights) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/quantizers/uniform_quantizer.py b/src/quantizers/uniform_quantizer.py index 384b848..2a3f7cd 100755 --- a/src/quantizers/uniform_quantizer.py +++ b/src/quantizers/uniform_quantizer.py @@ -66,10 +66,10 @@ def __call__(self, w): # Use epsilon to avoid dividing by zero during backpropagation. return tf.clip_by_value(w, tf.keras.backend.epsilon(), np.inf) + print(f"name is {name}") alpha = layer.add_weight( name=f"{name}{self.name_suffix}_alpha", initializer=self.initializer, - # shape=(1,), trainable=True, dtype=tf.float32, regularizer=self.regularizer, diff --git a/src/stage/__init__.py b/src/stage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_activations_in_16bits.png b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_activations_in_16bits.png new file mode 100644 index 0000000..12aee99 Binary files /dev/null and b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_activations_in_16bits.png differ diff --git a/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_activations_in_8bits.png b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_activations_in_8bits.png new file mode 100644 index 0000000..1241ab9 Binary files /dev/null and b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_activations_in_8bits.png differ diff --git a/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_but_last_activations_in_10bits.png b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_but_last_activations_in_10bits.png new file mode 100644 index 0000000..baa3b5d Binary files /dev/null and b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_but_last_activations_in_10bits.png differ diff --git a/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_but_last_activations_in_8bits.png b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_but_last_activations_in_8bits.png new file mode 100644 index 0000000..401427a Binary files /dev/null and b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_all_but_last_activations_in_8bits.png differ diff --git a/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_no_activation_quantized.png b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_no_activation_quantized.png new file mode 100644 index 0000000..ecec973 Binary files /dev/null and b/src/stage/cifar_10_colo_custom_cnn1_for_cifar10_model_accuracy_vs_size_no_activation_quantized.png differ diff --git a/src/stage/cifar_10_lenet5_model_accuracy_vs_size.png b/src/stage/cifar_10_lenet5_model_accuracy_vs_size.png new file mode 100644 index 0000000..8ecfb7e Binary files /dev/null and b/src/stage/cifar_10_lenet5_model_accuracy_vs_size.png differ diff --git a/src/stage/fashion_mnist_lenet5_model_accuracy_vs_size.png b/src/stage/fashion_mnist_lenet5_model_accuracy_vs_size.png new file mode 100644 index 0000000..bdb970c Binary files /dev/null and b/src/stage/fashion_mnist_lenet5_model_accuracy_vs_size.png differ diff --git a/src/stage/flex_main.py b/src/stage/flex_main.py new file mode 100755 index 0000000..7a11a8d --- /dev/null +++ b/src/stage/flex_main.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 + +from functools import partial +from pathlib import Path + +import tensorflow as tf +from functions import FUNCTION_MAP + +from stage import Stage + +# --- Configuration for All 7 Pipeline Stages --- +# This list defines the blueprint for our pipeline. Each dictionary +# will be used to initialize a Stage object. + + +stages_hyperparams = [ + # Stage 0: Model creation + { + "name": "model_creation", + "seed": 12345, + "function": "model_create", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "model_name": "lenet5_custom", + }, + }, + # Stage 1: Initial training + { + "name": "initial_training", + "seed": 12345, + "function": "model_train", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "epochs": 50, + "batch_size": 1024, + "learning_rate": 0.001, + "validation_split": 0.1, + }, + }, + # Stage 2: BN folding + { + "name": "bnf", + "seed": 12345, + "function": "model_transform_bnf", + "kwargs": { + "merge_activation": True, + }, + }, + # Stage 3: Post BN folding training + { + "name": "pbnf_training", + "seed": 12345, + "function": "model_train", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "epochs": 1, + "batch_size": 32, + "learning_rate": 0.0005, + "validation_split": 0.1, + }, + }, + # Stage 4: Model quantization + { + "name": "quantization", + "seed": 12345, + "function": "model_quantize", + "kwargs": { + "input_shape": [None, 32, 32, 3], + # 'kernel' is set to None because it will be dynamically + # updated inside the experimental loop below. + "kernel": None, + "bias": [ + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + ], + "activations": [ + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + ], + }, + }, + # Stage 5: Alpha initialization + { + "name": "alpha_initialization", # Fixed typo from original "initialiation" + "seed": 12345, + "function": "model_initialize_parameters", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "type": "alpha", + }, + }, + # Stage 6: QAT + { + "name": "qat", + "seed": 12345, + "function": "model_train", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "epochs": 10, + "batch_size": 32, + "learning_rate": 0.0001, + "validation_split": 0.1, + }, + }, + { + "name": "final_evaluation", + "seed": 12345, + "function": "model_evaluate", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + }, + }, +] + +if __name__ == "__main__": + + # This is the main experimental loop from your coworker's script. + # It runs the entire 7-stage pipeline multiple times. + # bits = range(8, 11) + bits = range(1, 11) + n_levels = [2, 3, 4, 5, 6, 7, 8, 12, 16, 20] + combinations = [(b, n) for b in bits for n in n_levels if n <= 2**b] + + for bits, n_levels in combinations: + print( + f"\n{'='*20} STARTING EXPERIMENT: FLEX BITS = {bits}, N_LEVELS = {n_levels} {'='*20}\n" + ) + + # --- Configure the Experiment --- + # Dynamically set the 'kernel' quantization parameter for this specific run. + kernel_config = [ + {"type": "flexible", "bits": bits, "n_levels": n_levels} + for _ in range(5) + ] + stages_hyperparams[4]["kwargs"]["kernel"] = kernel_config + + # Create the list of Stage objects from the (now updated) configurations + dataset = stages_hyperparams[0]["kwargs"].get("dataset") + model_name = stages_hyperparams[0]["kwargs"].get("model_name") + pipeline = [ + Stage( + function=FUNCTION_MAP[config["function"]], + initial_config=config, + checkpoint_path=Path("checkpoints") + / f"flex_{model_name}-{dataset}", + metadata_path=Path(f"{bits}_bit-{n_levels}_levels"), + ) + for config in stages_hyperparams + ] + + # --- The Orchestrator --- + # It tracks both the model object and the hash of the last operation + model: tf.keras.Model | None = None + previous_hash: str | None = None + + # The loop's responsibility is to pass the state (model & hash) between stages + for stage in pipeline: + # We need to set the ref model + if stage.initial_config["name"] == "alpha_initialization": + assert ( + ref_model is not None + ), "Reference model for alpha initialization is not set." + stage.function = partial(stage.function, ref_model=ref_model) + model, previous_hash = stage.run( + input_model=model, previous_hash=previous_hash + ) + + # Save the ref model after the last stage we dont quantize + if stage.initial_config["name"] == "pbnf_training": + ref_model = tf.keras.models.clone_model(model) + + print( + f"\n{'='*20} FINISHED EXPERIMENT: FLEX BITS = {bits}, N_LEVELS = {n_levels} {'='*20}\n" + ) + print( + f"Final model for {bits}-bit experiment corresponds to hash: {previous_hash}" + ) diff --git a/src/stage/functions.py b/src/stage/functions.py new file mode 100644 index 0000000..19f55fe --- /dev/null +++ b/src/stage/functions.py @@ -0,0 +1,526 @@ +# functions.py + + +import numpy as np +import tensorflow as tf +from tensorflow.keras import layers, models +from tensorflow.keras.datasets import mnist +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.utils import to_categorical + +from configs.qmodel import apply_quantization +from quantizers.flex_quantizer import FlexQuantizer +from quantizers.uniform_quantizer import UniformQuantizer + +# --- Data Loading --- + + +def load_data(dataset_name: str) -> dict: + """Loads and preprocesses the specified dataset.""" + if dataset_name == "mnist": + (x_train, y_train), (x_test, y_test) = mnist.load_data() + + # Reshape and normalize images + x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0 + x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0 + + # One-hot encode labels + y_train = to_categorical(y_train, 10) + y_test = to_categorical(y_test, 10) + + return { + "x_train": x_train, + "y_train": y_train, + "x_test": x_test, + "y_test": y_test, + } + if dataset_name == "fashion_mnist": + (x_train, y_train), (x_test, y_test) = ( + tf.keras.datasets.fashion_mnist.load_data() + ) + + # Reshape and normalize images + x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0 + x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0 + + # One-hot encode labels + y_train = to_categorical(y_train, 10) + y_test = to_categorical(y_test, 10) + + return { + "x_train": x_train, + "y_train": y_train, + "x_test": x_test, + "y_test": y_test, + } + if dataset_name == "cifar10": + (x_train, y_train), (x_test, y_test) = ( + tf.keras.datasets.cifar10.load_data() + ) + + # Normalize images + x_train = x_train.astype("float32") / 255.0 + x_test = x_test.astype("float32") / 255.0 + + # One-hot encode labels + y_train = to_categorical(y_train, 10) + y_test = to_categorical(y_test, 10) + + return { + "x_train": x_train, + "y_train": y_train, + "x_test": x_test, + "y_test": y_test, + } + if dataset_name == "cifar100": + (x_train, y_train), (x_test, y_test) = ( + tf.keras.datasets.cifar100.load_data() + ) + + # Normalize images + x_train = x_train.astype("float32") / 255.0 + x_test = x_test.astype("float32") / 255.0 + + # One-hot encode labels + y_train = to_categorical(y_train, 100) + y_test = to_categorical(y_test, 100) + + return { + "x_train": x_train, + "y_train": y_train, + "x_test": x_test, + "y_test": y_test, + } + else: + raise ValueError(f"Unknown dataset: {dataset_name!r}") + + +# --- Core Model Operations --- + + +def model_create(model, **params: dict) -> tf.keras.Model: + """Creates a new Keras model based on the specified architecture.""" + model_name = params["model_name"] + input_shape = params["input_shape"] + categories = params["categories"] + if model_name == "lenet5_custom": + new_model = models.Sequential( + [ + layers.Conv2D( + 6, + kernel_size=5, + activation="relu", + padding="same", + input_shape=input_shape[1:], + ), + layers.AveragePooling2D(), + layers.Conv2D(16, kernel_size=5, activation="relu"), + layers.AveragePooling2D(), + layers.Flatten(), + layers.Dense(120, activation="relu"), + layers.Dense(84, activation="relu"), + layers.Dense(categories, activation="softmax"), + ], + name=model_name, + ) + new_model.compile( + optimizer=Adam(), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + return new_model + + if model_name == "lenet5_custom_v2": + new_model = models.Sequential( + [ + layers.Conv2D( + 32, + kernel_size=5, + activation="relu", + padding="same", + input_shape=input_shape[1:], + ), + layers.AveragePooling2D(), + layers.Conv2D(64, kernel_size=5, activation="relu"), + layers.AveragePooling2D(), + layers.Conv2D(64, kernel_size=5, activation="relu"), + layers.AveragePooling2D(), + layers.Flatten(), + layers.Dense(128, activation="relu"), + layers.Dense(256, activation="relu"), + layers.Dense(categories, activation="softmax"), + ], + name=model_name, + ) + new_model.compile( + optimizer=Adam(), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + return new_model + + if model_name == "colo_custom_cnn1_for_cifar10": + new_model = models.Sequential( + [ + layers.Conv2D( + 64, + (3, 3), + padding="same", + activation="relu", + input_shape=input_shape[1:], + ), + layers.MaxPooling2D((2, 2)), + layers.Conv2D(128, (3, 3), padding="same", activation="relu"), + layers.MaxPooling2D((2, 2)), + layers.Flatten(), + layers.Dense(256, activation="relu"), + layers.Dropout(0.5), + layers.Dense(categories, activation="softmax"), + ] + ) + new_model.compile( + optimizer=Adam(), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + return new_model + + if model_name == "vgg16": + new_model = tf.keras.applications.VGG16( + include_top=True, + weights=None, + input_shape=input_shape[1:], + classes=categories, + ) + new_model.compile( + optimizer=Adam(), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + return new_model + else: + raise ValueError(f"Unknown model_name: {model_name!r}") + + +def model_train(model: tf.keras.Model, **params: dict) -> tf.keras.Model: + """Trains the model with the given parameters.""" + if model is None: + raise ValueError("model_train received an empty model.") + + data = load_data(params["dataset"]) + + model.compile( + optimizer=Adam(learning_rate=params["learning_rate"]), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + if params.get("epochs", 0) > 0: + model.fit( + data["x_train"], + data["y_train"], + batch_size=params["batch_size"], + epochs=params["epochs"], + validation_split=params["validation_split"], + verbose=1, # Set to 1 to see progress + ) + return model + + +# --- Model Transformation and Quantization --- + + +def apply_bn_folding( + model: tf.keras.Model, merge_activation: bool = False +) -> tf.keras.Model: + """Fuses Conv/Dense layers with subsequent BatchNormalization layers.""" + if not isinstance(model, tf.keras.Sequential): + raise TypeError( + "BN folding currently only supports Sequential models." + ) + + def is_relu(layer): + return isinstance(layer, layers.ReLU) or ( + isinstance(layer, layers.Activation) + and layer.get_config().get("activation") == "relu" + ) + + new_layers = [] + i = 0 + while i < len(model.layers): + layer = model.layers[i] + + # Check if we can fold this layer with the next one + if ( + isinstance(layer, (layers.Conv2D, layers.Dense)) + and i + 1 < len(model.layers) + and isinstance(model.layers[i + 1], layers.BatchNormalization) + ): + + conv_layer = layer + bn_layer = model.layers[i + 1] + + # Get weights + conv_weights = conv_layer.get_weights() + bn_weights = bn_layer.get_weights() + + kernel = conv_weights[0] + bias = ( + conv_weights[1] + if conv_layer.use_bias + else np.zeros(kernel.shape[-1]) + ) + + gamma, beta, moving_mean, moving_variance = bn_weights + epsilon = bn_layer.epsilon + + # Calculate new weights and biases + scale = gamma / np.sqrt(moving_variance + epsilon) + new_bias = beta + (bias - moving_mean) * scale + + if isinstance(conv_layer, layers.Dense): + new_kernel = kernel * scale + else: # Conv2D + new_kernel = kernel * scale.reshape((1, 1, 1, -1)) + + # Create new layer configuration + new_config = conv_layer.get_config() + new_config["use_bias"] = True + new_config["activation"] = conv_layer.activation + + i += 2 # Skip original conv and BN + + # Check for merging activation + if ( + merge_activation + and i < len(model.layers) + and is_relu(model.layers[i]) + ): + new_config["activation"] = "relu" + i += 1 + + # Create the new fused layer + fused_layer = type(conv_layer).from_config(new_config) + new_layers.append(fused_layer) + + # Build and set weights for the new layer + fused_layer.build(conv_layer.input_shape) + fused_layer.set_weights([new_kernel, new_bias]) + + else: + new_layers.append(layer) + i += 1 + + return models.Sequential(new_layers) + + +def model_transform_bnf( + model: tf.keras.Model, **params: dict +) -> tf.keras.Model: + """Applies Batch Normalization Folding to the model.""" + print("Function: model_transform_bnf called") + if model is None: + raise ValueError("model_transform_bnf received an empty model.") + new_model = apply_bn_folding( + model, merge_activation=params.get("merge_activation", False) + ) + new_model.compile( + optimizer=Adam(), loss="categorical_crossentropy", metrics=["accuracy"] + ) + return new_model + + +def model_quantize(model: tf.keras.Model, **params) -> tf.keras.Model: + """Applies quantization to the model.""" + print("Function: model_quantize called") + if model is None: + raise ValueError("model_quantize received an empty model.") + # (Your quantization logic here) + kernel = params["kernel"] + bias = params["bias"] + activations = params["activations"] + + # Layers initialization + layers = list() + supported = ("conv2d", "dense") + for layer in model.layers: + if any(kw in layer.name for kw in supported): + layers.append(layer.name) + + # QConfig initialization + qconfig = dict() + for layer in layers: + qconfig[layer] = dict() + for layer in layers: + for k in ("weights", "activations"): + qconfig[layer][k] = dict() + + for layer, k, b, a in zip(layers, kernel, bias, activations): + # Kernel + if k["type"] == "uniform": + qconfig[layer]["weights"]["kernel"] = UniformQuantizer( + bits=k["bits"], signed=True + ) + elif k["type"] == "flexible": + qconfig[layer]["weights"]["kernel"] = FlexQuantizer( + bits=k["bits"], n_levels=k["n_levels"], signed=True + ) + else: + pass + # Bias + if b["type"] == "uniform": + qconfig[layer]["weights"]["bias"] = UniformQuantizer( + bits=b["bits"], signed=True + ) + elif b["type"] == "flexible": + qconfig[layer]["weights"]["bias"] = FlexQuantizer( + bits=b["bits"], n_levels=b["n_levels"], signed=True + ) + else: + pass + # Arctivations + if a["type"] == "uniform": + qconfig[layer]["activations"]["activation"] = UniformQuantizer( + bits=a["bits"], signed=False + ) + elif a["type"] == "flexible": + qconfig[layer]["activations"]["activation"] = FlexQuantizer( + bits=a["bits"], n_levels=a["n_levels"], signed=False + ) + else: + pass + # End logic + quantized_model = apply_quantization(model, qconfig) + quantized_model.compile( + optimizer=Adam(), loss="categorical_crossentropy", metrics=["accuracy"] + ) + return quantized_model + + +# --- Alpha Initialization for QAT --- + + +def compute_alpha_dict( + model, x_train, batch_size=1, sample_size=512, random_state=None +): + """Compute the maximum absolute values of weights and activations using + only `sample_size` samples from x_train (if specified), processed in + batches of `batch_size`. + + Args: + model: A tf.keras.Model instance. + x_train: Training data array of shape (N, ...). + batch_size: Size of each batch for predict_on_batch. + sample_size: Optional number of samples to draw (approximate). + random_state: Seed for reproducible sampling. + + Returns: + alpha_dict: A dict mapping each layer name to a sub‐dict containing: + 'activation': maximum |activation| over the sampled data, + weight_name : maximum |weight| for each weight in the layer. + """ + # 1) Prepare the sample subset + n_total = x_train.shape[0] + if sample_size is not None and sample_size < n_total: + # Create a RNG for reproducible sampling + rng = np.random.RandomState(random_state) + # Randomly choose `sample_size` distinct indices + idx = rng.choice(n_total, size=sample_size, replace=False) + x_sample = x_train[idx] + else: + # Use the entire dataset if no sampling or sample_size >= total + x_sample = x_train + + # 2) Initialize dictionary of maximums for weights and activations + alpha_dict = {} + for layer in model.layers: + # Compute max absolute value for each weight tensor in this layer + weights_max = { + w.name: float(np.max(np.abs(w.numpy()))) for w in layer.weights + } + # Start activation max at zero + alpha_dict[layer.name] = {"activation": 0.0, **weights_max} + + # 3) Build an intermediate model that outputs every layer's activations + intermediate = tf.keras.Model( + inputs=model.input, outputs=[lay.output for lay in model.layers] + ) + + # 4) Iterate over the sampled data in small batches + n_samples = x_sample.shape[0] + for start in range(0, n_samples, batch_size): + x_batch = x_sample[start : start + batch_size] + # Predict activations for this batch (low memory overhead) + acts = intermediate.predict_on_batch(x_batch) + # Update the activation max per layer if this batch has a larger value + for lay, act in zip(model.layers, acts): + batch_max = float(np.max(np.abs(act))) + if batch_max > alpha_dict[lay.name]["activation"]: + alpha_dict[lay.name]["activation"] = batch_max + + return alpha_dict + + +# def compute_flex_dict(model, x_train, batch_size=128): + + +def apply_alpha_dict(model, alpha_dict): + """Applies pre-computed alpha values to a quantized model.""" + for layer in model.layers: + original_layer_name = layer.name.replace("quant_", "") + + if original_layer_name not in alpha_dict: + continue + + for weight in layer.weights: + if ( + not weight.name.endswith("_alpha") + or weight.name not in alpha_dict[original_layer_name] + ): + continue + + # See the quantizers weight naming convention + # No name_suffix for now + weight.assign(alpha_dict[original_layer_name][weight.name]) + print( + f"Updated {weight.name} with alpha: {alpha_dict[original_layer_name][weight.name]:.4f}" + ) + return model + + +def model_initialize_parameters(model, ref_model, **params) -> tf.keras.Model: + """Initializes quantization parameters (alphas) using a reference model.""" + print("Function: model_initialize_parameters called") + if ref_model is None: + raise ValueError( + "model_initialize_parameters requires a ref_model, but none was provided." + ) + if params["type"] == "alpha": + data = load_data(params["dataset"]) + alpha_dict = compute_alpha_dict(ref_model, data["x_train"]) + model = apply_alpha_dict(model, alpha_dict) + return model + raise ValueError( + f"Unknown parameter initialization type: {params['type']!r}" + ) + + +def model_evaluate(model, **params): + """Evaluates the model on the test dataset.""" + if model is None: + raise ValueError("model_evaluate received an empty model.") + data = load_data(params["dataset"]) + loss, accuracy = model.evaluate(data["x_test"], data["y_test"], verbose=0) + print(f"Evaluation results - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}") + return model + + +# --- Function Map --- +FUNCTION_MAP = { + "model_create": model_create, + "model_evaluate": model_evaluate, + "model_train": model_train, + "model_transform_bnf": model_transform_bnf, # Assuming you will add this + "model_quantize": model_quantize, + "model_initialize_parameters": model_initialize_parameters, +} diff --git a/src/stage/main.py b/src/stage/main.py new file mode 100755 index 0000000..fecd3ec --- /dev/null +++ b/src/stage/main.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 + +from functools import partial +from pathlib import Path + +import tensorflow as tf +from functions import FUNCTION_MAP + +from stage import Stage + +# --- Configuration for All 7 Pipeline Stages --- +# This list defines the blueprint for our pipeline. Each dictionary +# will be used to initialize a Stage object. + +stages_hyperparams = [ + # Stage 0: Model creation + { + "name": "model_creation", + "seed": 12345, + "function": "model_create", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "model_name": "colo_custom_cnn1_for_cifar10", + }, + }, + # Stage 1: Initial training + { + "name": "initial_training", + "seed": 12345, + "function": "model_train", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "epochs": 100, + "batch_size": 512, + "learning_rate": 0.0005, + "validation_split": 0.1, + }, + }, + # Stage 2: Model quantization + { + "name": "quantization", + "seed": 12345, + "function": "model_quantize", + "kwargs": { + "input_shape": [None, 32, 32, 3], + # 'kernel' is set to None because it will be dynamically + # updated inside the experimental loop below. + "kernel": None, + "bias": [ + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + {"type": "uniform", "bits": 8}, + ], + "activations": [ + {"type": "uniform", "bits": 16}, + {"type": "uniform", "bits": 16}, + {"type": "uniform", "bits": 16}, + {"type": "uniform", "bits": 16}, + ], + }, + }, + # Stage 3: Alpha initialization + { + "name": "alpha_initialization", # Fixed typo from original "initialiation" + "seed": 12345, + "function": "model_initialize_parameters", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "type": "alpha", + }, + }, + # Stage 4: QAT + { + "name": "qat", + "seed": 12345, + "function": "model_train", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + "epochs": 25, + "batch_size": 512, + "learning_rate": 0.0001, + "validation_split": 0.1, + }, + }, + # Stage 5: Final Evaluation + { + "name": "final_evaluation", + "seed": 12345, + "function": "model_evaluate", + "kwargs": { + "dataset": "cifar10", + "input_shape": [None, 32, 32, 3], + "categories": 10, + }, + }, +] + +if __name__ == "__main__": + + # This is the main experimental loop from your coworker's script. + # It runs the entire 7-stage pipeline multiple times. + # for bits in range(1, 25): + for bits in [1, 2, 3, 4, 5, 6, 8, 10, 16, 24]: + print( + f"\n{'='*20} STARTING EXPERIMENT: UNIFORM BITS = {bits} {'='*20}\n" + ) + + # --- Configure the Experiment --- + # Dynamically set the 'kernel' quantization parameter for this specific run. + kernel_config = [{"type": "uniform", "bits": bits} for _ in range(5)] + stages_hyperparams[2]["kwargs"]["kernel"] = kernel_config + + # Create the list of Stage objects from the (now updated) configurations + dataset = stages_hyperparams[0]["kwargs"].get("dataset") + model_name = stages_hyperparams[0]["kwargs"].get("model_name") + pipeline = [ + Stage( + function=FUNCTION_MAP[config["function"]], + initial_config=config, + checkpoint_path=Path("checkpoints") + / f"{model_name}-{dataset}", + metadata_path=Path(f"{bits}_bit"), + ) + for config in stages_hyperparams + ] + + # --- The Orchestrator --- + # It tracks both the model object and the hash of the last operation + model: tf.keras.Model | None = None + previous_hash: str | None = None + + # The loop's responsibility is to pass the state (model & hash) between stages + for stage in pipeline: + # We need to set the ref model + if stage.initial_config["name"] == "alpha_initialization": + assert ( + ref_model is not None + ), "Reference model for alpha initialization is not set." + stage.function = partial(stage.function, ref_model=ref_model) + model, previous_hash = stage.run( + input_model=model, previous_hash=previous_hash + ) + + # Save the ref model after the last stage we dont quantize + if stage.initial_config["name"] == "initial_training": + ref_model = tf.keras.models.clone_model(model) + + print( + f"\n{'='*20} FINISHED EXPERIMENT: UNIFORM BITS = {bits} {'='*20}\n" + ) + print( + f"Final model for {bits}-bit experiment corresponds to hash: {previous_hash}" + ) diff --git a/src/stage/model_accuracy_vs_size.png b/src/stage/model_accuracy_vs_size.png new file mode 100644 index 0000000..4439c29 Binary files /dev/null and b/src/stage/model_accuracy_vs_size.png differ diff --git a/src/stage/plot.py b/src/stage/plot.py new file mode 100644 index 0000000..3f57fbe --- /dev/null +++ b/src/stage/plot.py @@ -0,0 +1,141 @@ +from pathlib import Path + +import matplotlib.pyplot as plt +import pandas as pd + +# path = Path("checkpoints/lenet5_custom-fashion_mnist") +# path = Path("checkpoints/flex_lenet5_custom-cifar10") +# path = Path("checkpoints/flex_lenet5_custom-cifar10") +# path = Path("checkpoints/lenet5_custom-mnist") +path = Path("checkpoints/colo_custom_cnn1_for_cifar10-cifar10") + +experiment_paths = [ + experiment_path + for experiment_path in path.iterdir() + if experiment_path.is_dir() and experiment_path.name != "artifacts" +] + +df = pd.DataFrame() +for experiment_path in experiment_paths: + if not experiment_path.exists(): + print(f"Metadata path '{experiment_path}' does not exist. Skipping.") + continue + for metadata_file in experiment_path.glob("*.json"): + with metadata_file.open("r") as f: + metadata = pd.read_json(f, orient="index").T + metadata["experiment"] = experiment_path.name + # Select only the desired columns + selected_columns = [ + "experiment", + "name", + "loss", + "accuracy", + "complexity", + ] + metadata = metadata[ + [col for col in selected_columns if col in metadata.columns] + ] + df = pd.concat([df, metadata], ignore_index=True) + +simple_df = df[ + df["name"].isin(["initial_training", "final_evaluation"]) +].copy() + +simple_df["name"] = simple_df["name"].replace( + { + "initial_training": "Original", + "final_evaluation": "Quantized", + } +) + +# Convert complexity from bits to Kbits +simple_df["complexity"] = simple_df["complexity"] / 1024 + +pivoted_df = simple_df.pivot_table( + index="experiment", columns="name", values=["accuracy", "complexity"] +) + + +# The accuracy of the common points doesn't match, fix that. + +df = pivoted_df.copy() +# Sort the DataFrame by this new column +df = df.sort_values(by=("complexity", "Quantized"), ascending=True) +pd.set_option("display.max_rows", None) +print(df) + +# df.sort_index(inplace=True, sort_by=['complexity']) +# --- 2. Create the Plot (using tuple access) --- + +# Get the data for the original model from the first row +# Note the use of tuples to access the columns +original_accuracy = df[("accuracy", "Original")].iloc[0] +original_size = df[("complexity", "Original")].iloc[0] + +# Set up the plot size and style +plt.style.use("seaborn-v0_8-whitegrid") +fig, ax = plt.subplots(figsize=(10, 7)) + +# --- 3. Plot Each Point --- + +# Plot the single point for the Original Model +ax.scatter( + x=original_size, + y=original_accuracy, + marker="*", + s=250, + color="red", + label="Original Model", + zorder=5, +) + +# Plot the points for ALL of your Quantized Models +# We use tuples to get the correct columns for the X and Y axes +ax.scatter( + x=df[("complexity", "Quantized")], + y=df[("accuracy", "Quantized")], + s=60, + color="royalblue", + label="Quantized Models", +) + +# Plot the line connecting quantized models and the original model as the final item +quantized_sizes = df[("complexity", "Quantized")].tolist() +quantized_accuracies = df[("accuracy", "Quantized")].tolist() + +# Append the original model as the final item +quantized_sizes.append(original_size) +quantized_accuracies.append(original_accuracy) + +ax.plot( + quantized_sizes, + quantized_accuracies, + color="royalblue", + linestyle="--", + linewidth=1, + zorder=1, +) + +# --- 4. Add Labels to make the plot readable --- + +# Loop through the DataFrame index (e.g., '1_bit', '2_bit') +for experiment_name in df.index: + ax.annotate( + experiment_name.rstrip("_bit"), + ( + df.loc[experiment_name, ("complexity", "Quantized")], + df.loc[experiment_name, ("accuracy", "Quantized")], + ), + textcoords="offset points", + xytext=(-5, 15), # Shift right and down + ha="left", + va="top", + ) + +# Add titles and labels for the axes +ax.set_title("Model Accuracy vs. Size Trade-off", fontsize=16) +ax.set_xlabel("Model Size (Complexity in KB)", fontsize=12) +ax.set_ylabel("Model Accuracy", fontsize=12) +ax.legend(fontsize=11) + +plt.savefig("model_accuracy_vs_size.png", dpi=300, bbox_inches="tight") diff --git a/src/stage/script.sh b/src/stage/script.sh new file mode 100755 index 0000000..7ef396e --- /dev/null +++ b/src/stage/script.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python main.py 2> ./errors.log | tee ./output.log diff --git a/src/stage/stage.py b/src/stage/stage.py new file mode 100644 index 0000000..f307838 --- /dev/null +++ b/src/stage/stage.py @@ -0,0 +1,217 @@ +# stage + +from __future__ import annotations + +import functools +import hashlib +import json +import time +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any, Callable, Dict, Optional, Tuple + +import tensorflow as tf +from functions import load_data + +from configs.serialization.serialization import load_qmodel, save_qmodel +from utils.metrics import compute_space_complexity_model + +# This shouldnt be here. + + +@dataclass(frozen=True) +class StageConfig: + """Holds all parameters that uniquely define a stage's output. + + This entire object is hashed to create a unique ID for the stage's result. + """ + + name: str + function: str + seed: int + parameters: Dict[str, Any] + previous_hash: Optional[str] = None + + def to_hash(self) -> str: + """Generates a unique hash for this configuration.""" + # asdict converts the dataclass to a dictionary. + # sort_keys ensures the hash is consistent. + config_str = json.dumps(asdict(self), sort_keys=True) + return hashlib.md5(config_str.encode()).hexdigest() + + +class Stage: + def __init__( + self, + function: Callable, + initial_config: Dict[str, Any], # We'll start with a dict + checkpoint_path: Optional[Path] = None, + metadata_path: str = "metadata", + ): + self.function = function + self.initial_config = initial_config + self.config: StageConfig = None # Will be set at runtime + self.hash: str = None # Will be set at runtime + self.loss = None # The loss after running the stage + self.accuracy = None # The accuracy after running the stage + self.complexity = None # The complexity after running the stage + checkpoint_path = checkpoint_path or Path("checkpoints") + checkpoint_path.mkdir(parents=True, exist_ok=True) + self.artifacts_path = checkpoint_path / "artifacts" + self.artifacts_path.mkdir(parents=True, exist_ok=True) + self.config_path = checkpoint_path / metadata_path + self.config_path.mkdir(parents=True, exist_ok=True) + self.model = None # The model after running the stage + + def _save_metadata(self): + """Saves the current stage configuration to a JSON file. + + This is useful for debugging and traceability. + """ + if self.config is None: + raise ValueError("StageConfig is not set. Run the stage first.") + + metadata_path = self.config_path / f"{self.config.name}.json" + config_dict = asdict(self.config) + config_dict["accuracy"] = self.accuracy + config_dict["loss"] = self.loss + config_dict["complexity"] = self.complexity + config_dict["hash"] = self.hash + with metadata_path.open("w") as f: + json.dump(config_dict, f, indent=2) + print(f"Configuration saved to '{metadata_path}'") + + def _save_model(self): + """Saves the model to a file using the unique hash as the filename. + + This is useful for traceability and caching. + """ + if self.hash is None: + raise ValueError("Hash is not set. Run the stage first.") + + model_path = self.artifacts_path / f"{self.hash}" + save_qmodel(self.model, model_path) + print(f"Model saved to '{model_path}'") + + def save(self): + """Saves the model and its configuration to disk. + + This is useful for traceability and caching. + """ + if self.config is None: + raise ValueError("StageConfig is not set. Run the stage first.") + + self._save_metadata() + self._save_model() + + def load(self, hash: str): + """Loads the model and its configuration from disk.""" + model_path = self.artifacts_path / hash + if not model_path.exists(): + raise FileNotFoundError(f"Model file not found: {model_path}") + + self.model = load_qmodel(model_path) + + def run( + self, + input_model: Optional[tf.keras.Model], + previous_hash: Optional[str] = None, + ) -> Tuple[tf.keras.Model, str]: + """Runs the stage with full traceability and caching. + + Returns the resulting model AND its unique hash. + """ + start_time = time.time() + + # 1. Create the final, traceable config for this run + self.config = StageConfig( + name=self.initial_config["name"], + seed=self.initial_config.get("seed", int(time.time())), + function=( + self.function.__name__ + if not isinstance(self.function, functools.partial) + else self.function.func.__name__ + ), + parameters=self.initial_config["kwargs"], + previous_hash=previous_hash, + ) + + # 2. Generate the unique hash for this specific configuration + self.hash = self.config.to_hash() + + print(f"--- Running Stage({self.config.name}) ---") + print(f" Hash: {self.hash}") + print(f" Depends on: {self.config.previous_hash}") + + try: + self.load(self.hash) + except FileNotFoundError as e: + print(f" Checkpoint NOT FOUND. {e} Executing function...") + self.model = self.function( + model=input_model, **self.config.parameters + ) + self._save_model() + # Evaluate the model if a dataset is provided in the parameters + dataset = self.config.parameters.get("dataset", None) + if dataset is not None: + self.loss, self.accuracy = self.evaluate(load_data(dataset)) + # Compute the complexity of the model + self.complexity = self.compute_complexity() + + self._save_metadata() + + print(f"--- Stage finished in {time.time() - start_time:.2f}s ---\n") + + # 5. Return both the model and its hash to the orchestrator + return self.model, self.hash + + def evaluate(self, data): + # After loading it is not compiled I think... + self.model.compile( + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + loss, accuracy = self.model.evaluate( + data["x_test"], data["y_test"], verbose=0 + ) + print("Evaluation results:") + print(f"Loss: {loss:.4f}, Accuracy: {accuracy:.4f}") + return loss, accuracy + + def compute_complexity(self): + complexity = compute_space_complexity_model(self.model) + print("Space complexity of the model:") + print(complexity) + return complexity + + +class Pipeline: + def __init__(self, stages: list[Stage]): + self.stages = stages + + def add(self, stage: Stage): + """Adds a new stage to the pipeline.""" + self.stages.append(stage) + + def remove(self, stages_names: list[str] | str): + """Removes stages by their names.""" + if isinstance(stages_names, str): + stages_names = [stages_names] + self.stages = [ + stage + for stage in self.stages + if stage.config.name not in stages_names + ] + + def run(self, input_model: Optional[tf.keras.Model] = None): + """Runs the entire pipeline, passing the model from one stage to the + next.""" + previous_hash = None + + for stage in self.stages: + current_model, previous_hash = stage.run( + input_model=input_model, previous_hash=previous_hash + ) + + return current_model diff --git a/src/utils/metrics.py b/src/utils/metrics.py index 99816af..a5ac6d6 100644 --- a/src/utils/metrics.py +++ b/src/utils/metrics.py @@ -24,17 +24,13 @@ def compute_space_complexity_quantize(qlayer: QuantizeWrapperV2) -> float: total_layer_size = 0.0 qconfig = qlayer.quantize_config - # Assumption: order is the same for layer.weights and get_weights_and_quantizers weights_and_quantizers = qconfig.get_weights_and_quantizers(qlayer.layer) - weights = qlayer.weights[: len(weights_and_quantizers)] - for weight, weight_and_quantizer in zip(weights, weights_and_quantizers): - quantizer = weight_and_quantizer[1] + for weight, quantizer in weights_and_quantizers: if isinstance(quantizer, UniformQuantizer): weight_size = weight.shape.num_elements() * quantizer.bits elif isinstance(quantizer, FlexQuantizer): - qweight = quantizer.quantize_op(weight) - weight_size = compute_huffman_nominal_complexity(qweight) + weight_size = compute_huffman_nominal_complexity(weight) weight_size += quantizer.n_levels * quantizer.bits else: raise ValueError(f"Unknown quantizer type: {type(quantizer)}") @@ -45,7 +41,7 @@ def compute_space_complexity_quantize(qlayer: QuantizeWrapperV2) -> float: def compute_space_complexity(layer): """Compute the space complexity for a normal layer.""" - total_layer_size = 0 + total_layer_size = 0.0 for weight in layer.weights: weight_size = ( 8 * weight.dtype.size * weight.shape.num_elements() @@ -58,7 +54,10 @@ def compute_space_complexity(layer): def compute_space_complexity_model(model: tf.keras.Model) -> float: """Compute the uniform space complexity of a model based on its quantization configuration.""" - total_space_complexity = 0 + total_space_complexity = 0.0 + + # Make an inference to ensure the model is built + model(tf.random.normal((1,) + model.input_shape[1:])) for layer in model.layers: if isinstance(layer, QuantizeWrapperV2): diff --git a/src/utils/metrics_integration_test.py b/src/utils/metrics_integration_test.py new file mode 100755 index 0000000..53719d8 --- /dev/null +++ b/src/utils/metrics_integration_test.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# Integration of saving models and metrics + +import tempfile +import unittest + +import numpy as np +import tensorflow as tf + +from configs.qmodel import apply_quantization +from configs.serialization.serialization import load_qmodel, save_qmodel +from quantizers.flex_quantizer import FlexQuantizer +from quantizers.uniform_quantizer import UniformQuantizer +from utils.metrics import compute_space_complexity_model + + +class TestIntegrationMetricsSerialization(unittest.TestCase): + def test_save_and_load_model(self): + """Test saving and loading a model with metrics.""" + model = tf.keras.Sequential( + [ + tf.keras.layers.Dense(10, input_shape=(5,), name="dense_1"), + tf.keras.layers.Dense(5, name="dense_2"), + ] + ) + + qconfig = { + "dense_1": { + "weights": { + "kernel": UniformQuantizer(bits=4, signed=True), + "bias": UniformQuantizer(bits=4, signed=True), + }, + }, + "dense_2": { + "weights": { + "kernel": UniformQuantizer(bits=4, signed=True), + "bias": UniformQuantizer(bits=4, signed=True), + }, + }, + } + qmodel = apply_quantization(model, qconfig) + qmodel.build((None, 5)) + + tmpdir = tempfile.mkdtemp() + save_qmodel(qmodel, tmpdir) + loaded_model = load_qmodel(tmpdir) + # make an inference to ensure the model is loaded correctly + loaded_model(tf.random.normal((1, 5))) + + original_weights = {w.name: w.numpy() for w in qmodel.weights} + loaded_weights = {w.name: w.numpy() for w in loaded_model.weights} + + # First, check that the set of weight names is identical + self.assertEqual( + set(original_weights.keys()), + set(loaded_weights.keys()), + "Models have different sets of weight names.", + ) + + # Now, compare each weight tensor by name + for name, orig_w in original_weights.items(): + loaded_w = loaded_weights[name] + # print(f"Comparing weight tensor: {name}") + # print(f"Weights: {orig_w}") + # print(f"Loaded: {loaded_w}") + np.testing.assert_allclose( + orig_w, + loaded_w, + rtol=1e-6, + atol=1e-6, + err_msg=f"Weight tensor '{name}' differs.", + ) + + self.assertEqual( + compute_space_complexity_model(qmodel), + compute_space_complexity_model(loaded_model), + ) + + def test_save_and_load_model_flex(self): + """Test saving and loading a model with metrics.""" + model = tf.keras.Sequential( + [ + tf.keras.layers.Dense(10, input_shape=(5,), name="dense_1"), + tf.keras.layers.Dense(5, name="dense_2"), + ] + ) + + qconfig = { + "dense_1": { + "weights": { + "kernel": FlexQuantizer(bits=4, n_levels=5, signed=True), + "bias": FlexQuantizer(bits=4, n_levels=4, signed=True), + }, + }, + "dense_2": { + "weights": { + "kernel": FlexQuantizer(bits=4, n_levels=5, signed=True), + "bias": FlexQuantizer(bits=4, n_levels=4, signed=True), + }, + }, + } + qmodel = apply_quantization(model, qconfig) + qmodel.build((None, 5)) + + tmpdir = tempfile.mkdtemp() + save_qmodel(qmodel, tmpdir) + loaded_model = load_qmodel(tmpdir) + # make an inference to ensure the model is loaded correctly + loaded_model(tf.random.normal((1, 5))) + + original_weights = {w.name: w.numpy() for w in qmodel.weights} + loaded_weights = {w.name: w.numpy() for w in loaded_model.weights} + + # First, check that the set of weight names is identical + self.assertEqual( + set(original_weights.keys()), + set(loaded_weights.keys()), + "Models have different sets of weight names.", + ) + + # Now, compare each weight tensor by name + for name, orig_w in original_weights.items(): + loaded_w = loaded_weights[name] + # print(f"Comparing weight tensor: {name}") + # print(f"Weights: {orig_w}") + # print(f"Loaded: {loaded_w}") + np.testing.assert_allclose( + orig_w, + loaded_w, + rtol=1e-6, + atol=1e-6, + err_msg=f"Weight tensor '{name}' differs.", + ) + + self.assertEqual( + compute_space_complexity_model(qmodel), + compute_space_complexity_model(loaded_model), + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/utils/metrics_lenet_test.py b/src/utils/metrics_lenet_test.py index 37cb1cf..c72aec0 100755 --- a/src/utils/metrics_lenet_test.py +++ b/src/utils/metrics_lenet_test.py @@ -6,6 +6,9 @@ import numpy as np import tensorflow as tf from tensorflow.keras import layers, models +from tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper import ( + QuantizeWrapperV2, +) from configs.qmodel import apply_quantization from quantizers.flex_quantizer import FlexQuantizer @@ -14,43 +17,57 @@ def apply_flex_dict(qmodel, alpha_dict, levels_dict, thresholds_dict): - """TODO(Colo): This function will is implemented in branch - colo/model_evalution in QTensor/src/examples/functions.py. - - When merged, import that functions insted of redefining it here. - """ + """Sets the internal state (alpha, levels, thresholds) of FlexQuantizers + within a quantized model by directly finding and assigning to the live + tf.Variable objects.""" for layer in qmodel.layers: - orig_layer_name = layer.name - if orig_layer_name.startswith("quant_"): - orig_layer_name = orig_layer_name[len("quant_") :] + if not isinstance(layer, QuantizeWrapperV2): + continue + orig_layer_name = layer.layer.name if orig_layer_name in alpha_dict: - for alpha_type in ["kernel", "bias", "activation"]: - new_alpha = alpha_dict[orig_layer_name].get(alpha_type, None) - new_levels = levels_dict[orig_layer_name].get(alpha_type, None) - new_thresholds = thresholds_dict[orig_layer_name].get( - alpha_type, None + # Find all variables in the wrapper layer and create a map by name + var_map = {v.name: v for v in layer.weights} + + # Iterate through the types ('kernel', 'bias') we might want to change + for attr_type in ["kernel", "bias"]: + new_alpha = alpha_dict.get(orig_layer_name, {}).get(attr_type) + new_levels = levels_dict.get(orig_layer_name, {}).get( + attr_type + ) + new_thresholds = thresholds_dict.get(orig_layer_name, {}).get( + attr_type ) - if new_alpha is not None: - for v in layer.weights: - if "alpha" in v.name and alpha_type in v.name: - v.assign(new_alpha) - # print(f"Updated {v.name} ({alpha_type}) with new alpha value {new_alpha}") - elif ( - alpha_type == "activation" - and "post_activation" in v.name - and "alpha" in v.name - ): - v.assign(new_alpha) - # print(f"Updated {v.name} (activation) with new alpha value {new_alpha}") - if "levels" in v.name and alpha_type in v.name: - v.assign(new_levels) - # print(f"Updated {v.name} ({alpha_type}) with new levels value {new_levels}") - if "thresholds" in v.name and alpha_type in v.name: - v.assign(new_thresholds) - # print(f"Updated {v.name} ({alpha_type}) with new thresholds value {new_thresholds}") - return qmodel + # Construct the expected variable names and assign if they exist + if new_alpha is not None: + # Note: TFMOT might name variables slightly differently. + # This searches for common patterns. + for name_pattern in [ + f"/{attr_type}_alpha:0", + f"_{attr_type}_alpha:0", + ]: + var_name = layer.name + name_pattern + if var_name in var_map: + var_map[var_name].assign(new_alpha) + + if new_levels is not None: + for name_pattern in [ + f"/{attr_type}_levels:0", + f"_{attr_type}_levels:0", + ]: + var_name = layer.name + name_pattern + if var_name in var_map: + var_map[var_name].assign(new_levels) + + if new_thresholds is not None: + for name_pattern in [ + f"/{attr_type}_thresholds:0", + f"_{attr_type}_thresholds:0", + ]: + var_name = layer.name + name_pattern + if var_name in var_map: + var_map[var_name].assign(new_thresholds) def apply_alpha_dict(qmodel, alpha_dict): @@ -418,6 +435,7 @@ def base_flex_quantizer_space_complexity( "bias": bthresholds, } apply_flex_dict(qmodel, alpha_dict, levels_dict, thresholds_dict) + qmodel(tf.random.normal(input_shape)) # 6) compare to your implementation computed_bits = compute_space_complexity_model(qmodel) diff --git a/src/utils/metrics_test.py b/src/utils/metrics_test.py index 760be67..5a3368b 100755 --- a/src/utils/metrics_test.py +++ b/src/utils/metrics_test.py @@ -1,10 +1,16 @@ #!/usr/bin/env python3 import unittest +from collections import Counter +import numpy as np import tensorflow as tf +from tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper import ( + QuantizeWrapperV2, +) from configs.qmodel import apply_quantization +from quantizers.flex_quantizer import FlexQuantizer from quantizers.uniform_quantizer import UniformQuantizer from utils.metrics import ( compute_space_complexity_model, @@ -12,6 +18,103 @@ ) +def apply_flex_dict(qmodel, alpha_dict, levels_dict, thresholds_dict): + """Sets the internal state (alpha, levels, thresholds) of FlexQuantizers + within a quantized model by directly finding and assigning to the live + tf.Variable objects.""" + for layer in qmodel.layers: + if not isinstance(layer, QuantizeWrapperV2): + continue + + orig_layer_name = layer.layer.name + if orig_layer_name in alpha_dict: + # Find all variables in the wrapper layer and create a map by name + var_map = {v.name: v for v in layer.weights} + + # Iterate through the types ('kernel', 'bias') we might want to change + for attr_type in ["kernel", "bias"]: + new_alpha = alpha_dict.get(orig_layer_name, {}).get(attr_type) + new_levels = levels_dict.get(orig_layer_name, {}).get( + attr_type + ) + new_thresholds = thresholds_dict.get(orig_layer_name, {}).get( + attr_type + ) + + # Construct the expected variable names and assign if they exist + if new_alpha is not None: + # Note: TFMOT might name variables slightly differently. + # This searches for common patterns. + for name_pattern in [ + f"/{attr_type}_alpha:0", + f"_{attr_type}_alpha:0", + ]: + var_name = layer.name + name_pattern + if var_name in var_map: + var_map[var_name].assign(new_alpha) + + if new_levels is not None: + for name_pattern in [ + f"/{attr_type}_levels:0", + f"_{attr_type}_levels:0", + ]: + var_name = layer.name + name_pattern + if var_name in var_map: + var_map[var_name].assign(new_levels) + + if new_thresholds is not None: + for name_pattern in [ + f"/{attr_type}_thresholds:0", + f"_{attr_type}_thresholds:0", + ]: + var_name = layer.name + name_pattern + if var_name in var_map: + var_map[var_name].assign(new_thresholds) + + +def check_weights(qlayer): + qlayer_weights = qlayer.get_weights() + qconfig = qlayer.quantize_config + weights_and_quantizers = qconfig.get_weights_and_quantizers(qlayer.layer) + weights_from_config = [ + weight_and_quantizer[0] + for weight_and_quantizer in weights_and_quantizers + ] + quantizers = [ + weight_and_quantizer[1] + for weight_and_quantizer in weights_and_quantizers + ] + + qlayer_weights = qlayer.get_weights() + + # Original weights from the quantized layer + weights_in_layer = qlayer_weights[0] + print("Non-quantized weights in layer:") + print(weights_in_layer) + print() + + weights_quantized_from_config = weights_from_config[0].numpy() + print("Weights from config:") + print(weights_quantized_from_config) + print() + + quantizer = quantizers[0] if quantizers else None + print("Quantizer levels:") + print(quantizer.levels.numpy()) + print() + + weights_manually_quantized = quantizer.quantize_op( + qlayer_weights[0] + ).numpy() + print("Manually quantized weights:") + print(weights_manually_quantized) + print() + + assert np.array_equal( + weights_quantized_from_config, weights_manually_quantized + ) + + # From tensorflow internal code def _compute_memory_size(weight): weight_counts = weight.shape.num_elements() @@ -66,6 +169,128 @@ def test_compute_space_complexity_uniform_only(self): self.assertEqual(quantized_size, expected_size) + def test_understanding_weights(self): + """Verify that we can access the weights of a quantized layer.""" + layer = tf.keras.layers.Dense(10, input_shape=(5,), name="dense_1") + layer.build((None, 5)) + qconfig = { + "dense_1": { + "weights": { + "kernel": FlexQuantizer(bits=2, n_levels=4, signed=True), + "bias": FlexQuantizer(bits=2, n_levels=4, signed=True), + }, + }, + } + model = tf.keras.Sequential([layer]) + qmodel = apply_quantization(model, qconfig) + qmodel.build((None, 5)) + # Run an inference to have access to the variables. + qmodel(tf.random.normal((1, 5))) + # Access the quantized layer + qlayer = qmodel.get_layer("quant_dense_1") + + check_weights(qlayer) + # Define the quantizer parameters based on our ideal state + alpha = 1.0 + # 2. Calculate the expected complexity based on a known data distribution + ideal_levels = np.array([-0.8, -0.2, 0.3, 0.9], dtype=np.float32) + midpoints = (ideal_levels[:-1] + ideal_levels[1:]) / 2.0 + thresholds = np.concatenate(([-alpha], midpoints, [alpha])).astype( + np.float32 + ) + + apply_flex_dict( + qmodel, + alpha_dict={"dense_1": {"kernel": alpha}}, + levels_dict={"dense_1": {"kernel": ideal_levels}}, + thresholds_dict={"dense_1": {"kernel": thresholds}}, + ) + qmodel(tf.random.normal((1, 5))) # Force weight creation + check_weights(qlayer) + + def test_compute_space_complexity_flex_only(self): + """Verify that for a flex configuration a layer size is as expected.""" + # 1. Setup the initial layer and model + layer = tf.keras.layers.Dense( + 10, input_shape=(5,), name="dense_1", use_bias=False + ) + model = tf.keras.Sequential([layer]) + model.build((None, 5)) + + # Define the FlexQuantizer configuration + qconfig = { + "dense_1": { + "weights": { + "kernel": FlexQuantizer(bits=4, n_levels=4, signed=True), + }, + }, + } + + # 2. Calculate the expected complexity based on a known data distribution + ideal_levels = np.array([-0.8, -0.2, 0.3, 0.9], dtype=np.float32) + ideal_weight_data = np.random.choice( + ideal_levels, size=(5, 10), replace=True + ) + + counter = Counter(ideal_weight_data.flatten()) + total_elements = sum(counter.values()) + emp_probs = np.array(list(counter.values())) / total_elements + entropy = -np.sum(emp_probs * np.log2(emp_probs)) + + huffman_size = ideal_weight_data.size * entropy + levels_size = len(ideal_levels) * 4 # n_levels * bits + expected_size = huffman_size + levels_size + + # 3. Apply quantization to get the qmodel structure + qmodel = apply_quantization(model, qconfig) + + # 4. Force weight creation by calling the model with a dummy input. + dummy_input_shape = (1,) + model.input_shape[1:] + qmodel(tf.random.normal(dummy_input_shape)) + + # 5. Inject the known state into the qmodel + q_layer = qmodel.get_layer("quant_dense_1") + + # Find the actual tf.Variable for the kernel. + kernel_var = None + for v in q_layer.trainable_weights: + if v.name.endswith("kernel:0"): + kernel_var = v + break + + self.assertIsNotNone( + kernel_var, "Could not find the kernel variable to assign." + ) + kernel_var.assign(ideal_weight_data) + + kernel_weights = qmodel.get_layer("quant_dense_1").get_weights() + print("Kernel Weights before assignment:") + print(kernel_weights) + + # Define the quantizer parameters based on our ideal state + alpha = 1.0 + # KEY CHANGE: Calculate thresholds correctly to match the quantizer's expected variable shape. + # The shape should be (n_levels + 1) to include outer bounds. + midpoints = (ideal_levels[:-1] + ideal_levels[1:]) / 2.0 + thresholds = np.concatenate(([-alpha], midpoints, [alpha])).astype( + np.float32 + ) + + # Use our helper to set the FlexQuantizer's internal state + apply_flex_dict( + qmodel, + alpha_dict={"dense_1": {"kernel": alpha}}, + levels_dict={"dense_1": {"kernel": ideal_levels}}, + thresholds_dict={"dense_1": {"kernel": thresholds}}, + ) + qmodel(tf.random.normal(dummy_input_shape)) # Force assigment + + # 6. Compute the quantized size using the metric function + quantized_size = compute_space_complexity_quantize(q_layer) + + # 7. Assert that the computed size matches the theoretical expected size + self.assertAlmostEqual(quantized_size, expected_size, places=6) + def test_compute_non_quantized_model(self): """Verify that computing the size of the model.""" layer = tf.keras.layers.Dense(30, input_shape=(5,), name="dense_1")