Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions backends/nxp/backend/edge_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,14 +415,13 @@ def input_quantization_type(
return dequantize_input_val.dtype


def output_quantization_type(
node: Node, output_index: int | None = None
) -> torch.dtype | None:
def output_quantization_type(node: Node, output_index: int) -> torch.dtype | None:
"""Return the quantization output datatype of the QDQ quantized `node`.

:param node: The compute node.
:param output_index: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
index selects the output.
index selects the output. If no `getitem` nodes follow it, the operator
produces only 1 output (most common case), and the value `0` must be used.
:return: The output quantization datatype of the QDQ quantized `node`, or `None` if the graph does not follow the
QDQ pattern or some metadata is incomplete or an invalid input index is given.

Expand All @@ -441,11 +440,13 @@ def output_quantization_type(
│ <returned type>
"""
users = list(node.users)
if len(users) == 1:
if not _is_quantize(quantize_node := users[0]):
if len(users) == 1 and _is_quantize(quantize_node := users[0]):
# Basic QDQ case.
if output_index != 0:
# There is only 1 output. Cannot access non-zero index.
return None

else: # Multiple users
else: # Only `getitem` nodes should follow.
if not isinstance(output_index, int):
return None # Invalid index.
if not all(user.target == operator.getitem for user in users):
Expand Down
25 changes: 14 additions & 11 deletions backends/nxp/backend/ir/converter/node_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ def uses_quantization_type_for_inputs(
:param node: The compute node.
:param supported_types: List of supported quantization types.
:param input_indices: List of indices into the `node.args`, or tuples of 2 indices into `node.args[idx1][idx2]`.
If empty, no type checking is performed and `True` is returned.
:return: True, if the `node` is QDQ quantized and has quantization input types in `supported_types`.
"""
return all(
Expand All @@ -336,40 +337,42 @@ def uses_quantization_type_for_inputs(
def uses_quantization_type_for_outputs(
node: Node,
supported_types: list[torch.dtype],
output_indices: list[int] | None = None,
output_indices: list[int],
):
"""Check if `node` uses the QDQ quantization schema and outputs on the provided indices use a quantization type
that is in `supported_types`.

:param node: The compute node.
:param supported_types: List of supported quantization types.
:param output_indices: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
indices select the outputs to be checked.
indices select the outputs to be checked. If no `getitem` nodes follow it, the operator
produces only 1 output (most common case), and the value `[0]` must be used.
If empty, no type checking is performed and `True` is returned.
:return: True, if the `node` is QDQ quantized and has quantization output types in `supported_types`.
"""
if output_indices is None:
return output_quantization_type(node) in supported_types
else:
return all(
output_quantization_type(node, output_index) in supported_types
for output_index in output_indices
)
return all(
output_quantization_type(node, output_index) in supported_types
for output_index in output_indices
)

@staticmethod
def uses_quantization_type_for_io(
node: Node,
supported_types: list[torch.dtype],
input_indices: list[int | tuple[int, int]],
output_indices: list[int] | None = None,
output_indices: list[int],
):
"""Check if `node` uses the QDQ quantization schema and inputs and outputs on the provided indices use a
quantization type that is in `supported_types`.

:param node: The compute node.
:param supported_types: List of supported quantization types.
:param input_indices: List of indices into the `node.args`, or tuples of 2 indices into `node.args[idx1][idx2]`.
If empty, no input type checking is performed.
:param output_indices: If the `node` has multiple outputs and therefore multiple `getitem` nodes follow it, the
indices select the outputs to be checked.
indices select the outputs to be checked. If no `getitem` nodes follow it, the operator
produces only 1 output (most common case), and the value `[0]` must be used.
If empty, no output type checking is performed.
:return: True, if the `node` is QDQ quantized and has quantization input types in `supported_types`.
"""
return NodeConverter.uses_quantization_type_for_inputs(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _is_supported_on_target(

supported_types = [torch.int8, torch.uint8]
if not NodeConverter.uses_quantization_type_for_io(
node, supported_types, [0]
node, supported_types, [0], [0]
):
return False

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import operator

import numpy as np
import torch

from executorch.backends.nxp.backend.edge_helper import try_get_arg
from executorch.backends.nxp.backend.ir.converter.conversion import (
Expand Down Expand Up @@ -73,32 +74,54 @@ def _is_supported_on_target(
MaxPool2DWithIndicesConverter._get_node_args(node)
)

output_shape = node.meta["val"][0].shape # Shape of the main output (index 0)
if output_shape[0] != 1:
# /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
return False

# Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
stride_h = stride[0]
if stride_h not in (1, 2):
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
return False

channels = output_shape[1]
if channels % neutron_target_spec.get_num_macs() != 0:
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
return False

if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929

# Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
# effective kernel size, which is an even stricter requirement than what Neutron imposes.
# https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
return False
if custom_delegation_options.use_new_flow_neutron_c:
# Requirements specified by the new Neutron flow documentation.

supported_types = [torch.int8, torch.uint8]
if not NodeConverter.uses_quantization_type_for_io(
node, supported_types, [0], [0]
):
return False

maximum_supported_kernel_size = 4096
# If there is no padding, Neutron allows maximum stride of 4096. Otherwise, it's 32. But the converter
# always inserts a `Pad` operator to add the padding, so the `MaxPool` never pads it's input itself, so
# 4096 is always the limit. And similarly, the `MaxPool` input padding limitation does not apply either.
maximum_supported_stride = 4096

if any(k > maximum_supported_kernel_size for k in kernel_size):
return False
if any(s > maximum_supported_stride for s in stride):
return False

else:
# Shape of the main output (index 0)
output_shape = node.meta["val"][0].shape
if output_shape[0] != 1:
# /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
return False

# Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
stride_h = stride[0]
if stride_h not in (1, 2):
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
return False

channels = output_shape[1]
if channels % neutron_target_spec.get_num_macs() != 0:
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
return False

if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
# /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929

# Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
# effective kernel size, which is an even stricter requirement than what Neutron imposes.
# https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
return False

return True

Expand Down
Loading
Loading