Skip to content

NXP backend: Use zero point for quantized padding. #13576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import numpy as np

from executorch.backends.nxp.backend.ir.converter.conversion import (
aten_translator,
common,
)
from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
tf_lite_type_to_numpy,
)
from executorch.backends.nxp.backend.ir.converter.node_converter import NodeConverter
from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
Expand Down Expand Up @@ -57,9 +62,20 @@ def _convert_2d_avg_pool(
)

if explicit_padding is not None:
# Need to prepend a 'Pad' operator, which adds 0s. But these will be included in the computation!
# Need to prepend a 'Pad' operator, which adds 0s (or `zero_point` for the quantized case). But these will
# be included in the computation!
input_quantization = t_op.tmp_inputs[0].quantization
pad_value = (
None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why None instead of 0 in non quantized?

Copy link
Contributor Author

@MartinPavella MartinPavella Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None is the default value of the builder.create_pad_operator_before() method's constant_value parameter. This way, the actual default padding value (0) is only defined in 1 place.

But it's hard to image that the default padding value would ever be changed, and using 0 here would make the code more understandable. I have no problem using 0 instead of None if you prefer.

if input_quantization is None
else np.array(input_quantization.zero_point[0]).astype(
tf_lite_type_to_numpy(t_op.tmp_inputs[0].type)
)
)
ops.add_pre(
self.builder.create_pad_operator_before(t_op, 0, explicit_padding)
self.builder.create_pad_operator_before(
t_op, 0, explicit_padding, pad_value
)
)

return ops.flatten()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
common,
)
from executorch.backends.nxp.backend.ir.converter.conversion.common import try_get_input
from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
tf_lite_type_to_numpy,
)
from executorch.backends.nxp.backend.ir.converter.node_converter import (
NodeConverter,
Target,
Expand Down Expand Up @@ -182,9 +185,19 @@ def _convert_2d_conv(
aten_translator.convert_padding(conv_params.padding)
)
if explicit_padding is not None:
# Need to prepend a 'Pad' operator, which adds 0s.
# Need to prepend a 'Pad' operator, which adds 0s (or `zero_point` for the quantized case).
input_quantization = t_op.tmp_inputs[0].quantization
pad_value = (
None
if input_quantization is None
else np.array(input_quantization.zero_point[0]).astype(
tf_lite_type_to_numpy(t_op.tmp_inputs[0].type)
)
)
conversion_result.ops_list.add_pre(
self.builder.create_pad_operator_before(t_op, 0, explicit_padding)
self.builder.create_pad_operator_before(
t_op, 0, explicit_padding, constant_value=pad_value
)
)

# DepthwiseConv2D expects weights in format [kernel_channels, kernel_height, kernel_width, output_channels]
Expand Down Expand Up @@ -221,9 +234,19 @@ def _convert_2d_conv(
aten_translator.convert_padding(conv_params.padding)
)
if explicit_padding is not None:
# Need to prepend a 'Pad' operator, which adds 0s.
# Need to prepend a 'Pad' operator, which adds 0s (or `zero_point` for the quantized case).
input_quantization = t_op.tmp_inputs[0].quantization
pad_value = (
None
if input_quantization is None
else np.array(input_quantization.zero_point[0]).astype(
tf_lite_type_to_numpy(t_op.tmp_inputs[0].type)
)
)
conversion_result.ops_list.add_pre(
self.builder.create_pad_operator_before(t_op, 0, explicit_padding)
self.builder.create_pad_operator_before(
t_op, 0, explicit_padding, constant_value=pad_value
)
)

return conversion_result.ops_list.flatten()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
)
from executorch.backends.nxp.backend.ir.converter.conversion import aten_translator
from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
tf_lite_type_to_numpy,
)
from executorch.backends.nxp.backend.ir.converter.tensor_utils import tensor_has_data
from executorch.backends.nxp.backend.ir.lib.tflite.Padding import Padding
from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
Expand Down Expand Up @@ -289,9 +292,17 @@ def build_input_tensor_padding(

tfl_padding, explicit_padding = aten_translator.convert_padding(conv_params.padding)
if explicit_padding is not None:
# Must add extra 'Pad' operator
# Must add extra 'Pad' operator, which adds 0s (or `zero_point` for the quantized case).
input_quantization = t_op.tmp_inputs[0].quantization
pad_value = (
None
if input_quantization is None
else np.array(input_quantization.zero_point[0]).astype(
tf_lite_type_to_numpy(t_op.tmp_inputs[0].type)
)
)
return tfl_padding, builder.create_pad_operator_before(
t_op, input_idx, explicit_padding
t_op, input_idx, explicit_padding, pad_value
)

return tfl_padding, None
Expand Down
2 changes: 1 addition & 1 deletion backends/nxp/tests/executorch_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_random_float_data(input_shapes: tuple[int] | list[tuple[int]]):

def to_quantized_edge_program(
model: torch.nn.Module,
input_shapes: tuple[int] | list[tuple[int]],
input_shapes: tuple[int, ...] | list[tuple[int, ...]],
operators_not_to_delegate: list[str] = None,
target="imxrt700",
neutron_converter_flavor="SDK_25_03",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
from executorch.backends.nxp.backend.edge_program_converter import (
EdgeProgramToIRConverter,
)
from executorch.backends.nxp.backend.ir.converter.builder.model_builder import (
ModelBuilder,
)
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
BuiltinOperator,
)
from executorch.backends.nxp.tests.executorch_pipeline import (
to_edge_program,
to_quantized_edge_program,
Expand Down Expand Up @@ -156,3 +162,49 @@ def test_avg_pool_2d_quant_conversion(mocker, input_shape, padding, count_includ
tflite_output_preprocess=ToNCHWPreprocess(),
input_data=input_data,
)


def test_avg_pool_2d_quant_conversion__padded(mocker):
input_shape = (1, 8, 8, 8)
model = AvgPool2dModule(True, 1)

converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
ops_spy = mocker.spy(ModelBuilder, "finish")

# Run conversion
_ = to_quantized_edge_program(model, input_shape)

# Capture the converter operators.
ops = ops_spy.spy_return.sub_graphs[0].operators.vector

# Capture generated model
tflite_flatbuffers_model, io_formats = converter_spy.spy_return

# Capture converted program
exported_program: ExportedProgram = converter_spy.call_args.args[1]

input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)

convert_run_compare(
exported_program,
tflite_input_preprocess=ToNHWCPreprocess(),
tfl_model=tflite_flatbuffers_model,
tflite_output_preprocess=ToNCHWPreprocess(),
input_data=input_data,
)

assert len(ops) == 2
assert ops[0].builtin_options.operator_type == BuiltinOperator.PADV2
assert ops[1].builtin_options.operator_type == BuiltinOperator.AVERAGE_POOL_2D

# Make sure the padding used the `zero-point`.
pad_value = ops[0].tmp_inputs[2].tmp_buffer.data.item()
assert (
pad_value == ops[0].tmp_inputs[0].quantization.zero_point[0]
) # `Pad` input zp.
assert (
pad_value == ops[0].tmp_outputs[0].quantization.zero_point[0]
) # `Pad` output zp.
assert (
pad_value == ops[1].tmp_inputs[0].quantization.zero_point[0]
) # `AvgPool` input zp.
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker):

ops = spy.spy_return.sub_graphs[0].operators.vector
assert len(ops) == 2
assert ops[0].builtin_options.operator_type == BuiltinOperator.PAD
assert ops[0].builtin_options.operator_type == BuiltinOperator.PADV2
assert ops[1].builtin_options.operator_type == BuiltinOperator.DEPTHWISE_CONV_2D

nodes = list(edge_program.graph.nodes)
Expand All @@ -335,6 +335,12 @@ def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker):
) # input, Quant, lowered_module, delegate_call, getitem, Deq, output
assert nodes[2].target == "lowered_module_0"

# Make sure the padding used the `zero-point`.
assert (
ops[0].tmp_inputs[2].tmp_buffer.data.item()
== ops[0].tmp_outputs[0].quantization.zero_point[0]
)


@pytest.mark.parametrize("stride", [1, 2])
@pytest.mark.parametrize("dilation", [1, 2])
Expand Down
Loading