frneer · frneer · Nov 23, 2025 · Jun 9, 2025 · Jun 10, 2025 · Jun 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ old/
 build/
 
 *.mp4
+
+checkpoints/
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -5,7 +5,8 @@ RUN pip3 install \
     matplotlib \
     scikit-learn \
     tensorflow-model-optimization \
-    kagglehub
+    kagglehub \
+    pandas
 
 # Set backward compatibility for tfmot
 RUN pip3 install tf_keras --no-deps

diff --git a/functions.diff b/functions.diff
@@ -0,0 +1,99 @@
+diff --git a/src/stage/functions.py b/src/stage/functions.py
+index 3e06394..919615c 100644
+--- a/src/stage/functions.py
++++ b/src/stage/functions.py
+@@ -277,62 +277,49 @@ def model_quantize(model: tf.keras.Model, **params) -> tf.keras.Model:
+
+ # --- Alpha Initialization for QAT ---
+
+-
+-def compute_alpha_dict(model, x_train, batch_size=128):
+-    """Computes alpha values for weights and activations."""
+-    alpha_dict = {}
+-    # Compute weight alphas
+-    for layer in tqdm(
+-        model.layers,
+-        desc="Computing weight alphas",
+-        file=sys.stdout,
+-        leave=False,
+-    ):
+-        if layer.get_weights():
+-            alpha_dict[layer.name] = {}
+-            # Simplified alpha calculation for weights
+-            weights = layer.get_weights()[0]
+-            alpha_dict[layer.name]["kernel"] = np.max(np.abs(weights))
+-
+-    # Compute activation alphas
++def get_activations_output(model, x_train, batch_size=128):
++    """Gets the activations of the model for the training data."""
+     intermediate_model = models.Model(
+         inputs=model.input, outputs=[layer.output for layer in model.layers]
+     )
+     activations = intermediate_model.predict(
+         x_train, batch_size=batch_size, verbose=0
+     )
++    return activations
++
++def compute_alpha_dict(model, x_train, batch_size=128):
++    """Computes alpha values for weights and activations in a single comprehension."""
++    activations = get_activations_output(model, x_train, batch_size)
+
+-    for layer, activation_data in tqdm(
+-        zip(model.layers, activations),
+-        total=len(model.layers),
+-        desc="Computing activation alphas",
+-        file=sys.stdout,
+-        leave=False,
+-    ):
+-        if layer.name not in alpha_dict:
+-            alpha_dict[layer.name] = {}
+-        alpha_dict[layer.name]["activation"] = np.max(np.abs(activation_data))
++    alpha_dict = {
++        layer.name: {
++            **{weight.name: np.max(np.abs(weight.numpy())) for weight in layer.weights},
++            'activation': np.max(np.abs(activation_data))
++        }
++        for layer, activation_data in zip(model.layers, activations)
++    }
+
+     return alpha_dict
+
+-
+-def apply_alpha_dict(q_model, alpha_dict):
++def apply_alpha_dict(model, alpha_dict):
+     """Applies pre-computed alpha values to a quantized model."""
+-    for layer in q_model.layers:
+-        original_name = layer.name.replace("quant_", "")
+-        if original_name in alpha_dict:
+-            for alpha_type in ["kernel", "activation"]:
+-                if new_alpha := alpha_dict[original_name].get(alpha_type):
+-                    for weight_var in layer.weights:
+-                        if (
+-                            alpha_type in weight_var.name
+-                            and "alpha" in weight_var.name
+-                        ):
+-                            weight_var.assign(new_alpha)
+-                            print(
+-                                f"Updated {weight_var.name} with alpha: {new_alpha:.4f}"
+-                            )
+-    return q_model
++    for layer in model.layers:
++        original_layer_name = layer.name.replace("quant_", "")
++
++        if original_layer_name not in alpha_dict:
++            continue
++
++        for weight in layer.weights:
++            if weight.name not in alpha_dict[original_layer_name]:
++                continue
++
++            # See the quantizers weight naming convention
++            # No name_suffix for now
++            weight.assign(
++                alpha_dict[original_layer_name][weight.name]
++            )
++            print(f"Updated {weight.name} with alpha: {alpha_dict[original_layer_name][weight.name]:.4f}")
++    return model
+
+
+ def model_initialize_parameters(model, ref_model, **params) -> tf.keras.Model:
diff --git a/org.MD b/org.MD
@@ -0,0 +1,21 @@
+# Dataset
+  -  cifar10 has more characteristics
+# Model
+  - Lenet5_custom_v2
+
+# QConfig
+All Uniform arithmetic
+2->8 uniform
+
+Flex -- Uniform
+4, 6 y 8 bits.
+levels [2-20] que sean validos.
+
+
+Setear la seed y probar un par de seeds.
+3 runs con seeds distintas.
+
+Graficar la media y la varianza de todas las corridas.
+
+## Second step
+  - dsp vemos.
diff --git a/src/examples/data_analysis/plot.py b/src/examples/data_analysis/plot.py
@@ -16,9 +16,10 @@ def plot_flex_snapshot(
     # NOTE(Fran): Big assumption here that the keys are always the same
     # TODO(Fran): Also it seems activations aren't being stored as model weights
     # ...
-    alpha_history = layer_history[f"{layer_name}/alpha:0"]
-    level_history = layer_history[f"{layer_name}/levels:0"]
-    threshold_history = layer_history[f"{layer_name}/thresholds:0"]
+    print(layer_history.keys())
+    alpha_history = layer_history[f"{layer_name}/kernel_alpha:0"]
+    level_history = layer_history[f"{layer_name}/kernel_levels:0"]
+    threshold_history = layer_history[f"{layer_name}/kernel_thresholds:0"]
     bits = quantizer.bits
     signed = quantizer.signed
 

diff --git a/src/examples/datasets/cifar-10.py b/src/examples/datasets/cifar-10.py
@@ -0,0 +1,33 @@
+import tensorflow as tf
+from tensorflow.keras.utils import to_categorical
+
+
+def to_tf_dataset(x, y, batch_size, shuffle=True):
+    dataset = tf.data.Dataset.from_tensor_slices((x, y))
+    if shuffle:
+        dataset = dataset.shuffle(buffer_size=len(x))
+    return dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
+
+
+def generate_dataset(batch_size):
+    """Generate the CIFAR-10 dataset with a validation split."""
+    (x_train, y_train), (x_test, y_test) = (
+        tf.keras.datasets.cifar10.load_data()
+    )
+
+    x_train = x_train.astype("float32") / 255.0
+    x_test = x_test.astype("float32") / 255.0
+
+    y_train = to_categorical(y_train, 10)
+    y_test = to_categorical(y_test, 10)
+
+    # Split train data into train and validation sets
+    val_size = len(y_test)
+    x_val, y_val = x_train[:val_size], y_train[:val_size]
+    x_train, y_train = x_train[val_size:], y_train[val_size:]
+
+    train_dataset = to_tf_dataset(x_train, y_train, batch_size)
+    val_dataset = to_tf_dataset(x_val, y_val, batch_size)
+    test_dataset = to_tf_dataset(x_test, y_test, batch_size)
+
+    return train_dataset, val_dataset, test_dataset
diff --git a/src/examples/models/lenet_custom.py b/src/examples/models/lenet_custom.py
@@ -0,0 +1,71 @@
+from tensorflow.keras.layers import (
+    Conv2D,
+    Dense,
+    Dropout,
+    Flatten,
+    MaxPooling2D,
+)
+from tensorflow.keras.models import Sequential
+
+categories = 10  # Number of classes (digits 0-9)
+input_shape = [None, 32, 32, 3]  # Input shape for CIFAR-10 dataset
+model = Sequential(
+    [
+        Conv2D(
+            64,
+            (3, 3),
+            padding="same",
+            activation="relu",
+            input_shape=input_shape[1:],
+            name="conv2d",
+        ),
+        MaxPooling2D((2, 2)),
+        Conv2D(
+            128, (3, 3), padding="same", activation="relu", name="conv2d_1"
+        ),
+        MaxPooling2D((2, 2)),
+        Flatten(),
+        Dense(256, activation="relu", name="dense"),
+        Dropout(0.5),
+        Dense(categories, activation="softmax", name="dense_1"),
+    ],
+)
+
+from quantizers.flex_quantizer import FlexQuantizer
+from quantizers.uniform_quantizer import UniformQuantizer
+
+n_levels = 10  # Number of quantization levels
+bits = 8  # Number of bits for quantization
+
+qconfig = {
+    "conv2d": {
+        "weights": {
+            "kernel": FlexQuantizer(bits=bits, n_levels=n_levels, signed=True),
+            "bias": UniformQuantizer(bits=8, signed=True),
+        },
+        "activations": {"activation": UniformQuantizer(bits=16, signed=False)},
+    },
+    "conv2d_1": {
+        "weights": {
+            "kernel": FlexQuantizer(bits=bits, n_levels=n_levels, signed=True),
+            "bias": UniformQuantizer(bits=8, signed=True),
+        },
+        "activations": {"activation": UniformQuantizer(bits=16, signed=False)},
+    },
+    "dense": {
+        "weights": {
+            "kernel": FlexQuantizer(bits=bits, n_levels=n_levels, signed=True),
+            "bias": UniformQuantizer(bits=8, signed=True),
+        },
+        "activations": {"activation": UniformQuantizer(bits=16, signed=False)},
+    },
+    "dense_1": {
+        "weights": {
+            "kernel": FlexQuantizer(bits=bits, n_levels=n_levels, signed=True),
+            "bias": UniformQuantizer(bits=8, signed=True),
+        },
+        "activations": {"activation": UniformQuantizer(bits=16, signed=False)},
+    },
+}
+
+qconfigs = {"qconfig": qconfig}
diff --git a/src/examples/run.py b/src/examples/run.py
@@ -50,6 +50,7 @@ def main(args):
         loss="categorical_crossentropy",
         metrics=["accuracy"],
     )
+    # initialize_quantizer_weights(qmodel, qconfig)
 
     callback_tuples = [
         (CaptureWeightCallback(qlayer), qconfig[layer.name])

diff --git a/src/quantizers/flex_quantizer.py b/src/quantizers/flex_quantizer.py
@@ -157,10 +157,8 @@ def grad(upstream):
             ##### dq_dx uses STE #####
             dq_dx = tf.where(
                 tf.logical_and(
-                    tf.greater_equal(x, self.thresholds[0]),
-                    tf.less_equal(
-                        x, self.thresholds[-1]
-                    ),  # should it be alpha?
+                    tf.greater_equal(x, thresholds[0]),
+                    tf.less_equal(x, thresholds[-1]),  # should it be alpha?
                 ),
                 upstream,
                 tf.zeros_like(x),
@@ -207,16 +205,16 @@ def grad(upstream):
             ##### dq_dthresholds using piecewise-STE #####
             dq_dthresholds = tf.zeros_like(thresholds)
 
-            for i in range(1, self.thresholds.shape[0] - 1):
+            for i in range(1, thresholds.shape[0] - 1):
                 delta_y = qlevels[i - 1] - qlevels[i]
                 delta_x = thresholds[i + 1] - thresholds[i - 1]
 
                 # Only those associated with the 'x' values that
                 # Fall within the range of the two borderline levels
                 masked_upstream = tf.where(
                     tf.logical_and(
-                        tf.greater_equal(x, self.thresholds[i - 1]),
-                        tf.less_equal(x, self.thresholds[i + 1]),
+                        tf.greater_equal(x, thresholds[i - 1]),
+                        tf.less_equal(x, thresholds[i + 1]),
                     ),
                     upstream,
                     tf.zeros_like(x),

diff --git a/src/quantizers/integration_test.py b/src/quantizers/integration_test.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+import unittest
+
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.models import Sequential
+
+from configs.qmodel import apply_quantization
+from quantizers.uniform_quantizer import UniformQuantizer
+
+
+class TestQuantizers(unittest.TestCase):
+    def test_quantizers(self):
+        model = Sequential(
+            [
+                Dense(10, activation="relu", input_shape=(20,), name="dense1"),
+                Dense(5, activation="softmax"),
+            ]
+        )
+
+        qconfig = {
+            "dense1": {
+                "weights": {
+                    "kernel": UniformQuantizer(
+                        bits=4,
+                        signed=True,
+                    ),
+                    "bias": UniformQuantizer(
+                        bits=4,
+                        signed=True,
+                    ),
+                },
+                "activations": {
+                    "activation": UniformQuantizer(
+                        bits=4,
+                        signed=True,
+                    )
+                },
+            }
+        }
+
+        apply_quantization(model, qconfig)
+        # print(quantized_model.weights)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/quantizers/uniform_quantizer.py b/src/quantizers/uniform_quantizer.py
@@ -69,7 +69,6 @@ def __call__(self, w):
         alpha = layer.add_weight(
             name=f"{name}{self.name_suffix}_alpha",
             initializer=self.initializer,
-            # shape=(1,),
             trainable=True,
             dtype=tf.float32,
             regularizer=self.regularizer,

diff --git a/src/stage/__init__.py b/src/stage/__init__.py
diff --git a/src/stage/cifar_10_lenet5_model_accuracy_vs_size.png b/src/stage/cifar_10_lenet5_model_accuracy_vs_size.png
diff --git a/src/stage/fashion_mnist_lenet5_model_accuracy_vs_size.png b/src/stage/fashion_mnist_lenet5_model_accuracy_vs_size.png