Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,23 @@ def _remove_annotations(self, model: GraphModule) -> GraphModule:

return model

def _log_nonquantized_nodes(self, model: GraphModule) -> None:
non_quantized_nodes = [
n
for n in model.graph.nodes
if n.meta.get(DISALLOW_TFA_META_KEY, True) and n.op != "get_attr"
]
if len(non_quantized_nodes) > 0:
msg = """
----------------------------------------------------------------------------------------------------
PRE-TRANSFORM FOR ANNOTATION QUANTIZATION REPORT
----------------------------------------------------------------------------------------------------
The following nodes are not marked for quantization and will not be decomposed in the transform for annotation pipeline:\n"""
for node in non_quantized_nodes:
msg += f" {node.name}\n"

logger.debug(msg)
Comment on lines +1119 to +1127

def transform_for_annotation(self, model: GraphModule) -> GraphModule:
# Transform_for_annotation should only decompose ops if quantized, which is
# indicated either by node.meta['DISALLOW_TFA_META_KEY']==False or no such key
Expand All @@ -1121,15 +1138,13 @@ def transform_for_annotation(self, model: GraphModule) -> GraphModule:
# run to set DISALLOW_TFA_META_KEY for quantized nodes and all nodes missing
# this key afterwards are set to DISALLOW_TFA_META_KEY=True.

reporter = QuantizerReporter(
self.quantizers, "PRE-TRANSFORM_FOR_ANNOTATION QUANTIZATION REPORT" # type: ignore[arg-type]
)
model = super().annotate(model)
reporter.log_quantizer_report(model)
for node in model.graph.nodes:
if DISALLOW_TFA_META_KEY not in node.meta:
node.meta[DISALLOW_TFA_META_KEY] = True

self._log_nonquantized_nodes(model)

pass_manager = ArmPassManager(self.compile_spec)
transformed_model = pass_manager.transform_for_annotation_pipeline(model)

Expand Down
65 changes: 30 additions & 35 deletions backends/arm/quantizer/arm_quantizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,46 +592,41 @@ def _annotate_shared_cluster(self, root_node: Node) -> None:
node_order = {node: index for index, node in enumerate(root_node.graph.nodes)}
ordered_nodes = sorted(shared_nodes, key=lambda node: node_order.get(node, 0))

if len(adjacent_qspecs) > 0:
if len(adjacent_qspecs) > 1:
logger.warning(
f"Multiple adjacent quantization specs found for {', '.join([n.name for n in ordered_nodes])}, all nodes will share the input quantization spec of {root_node.name}."
)
# Ensure the root node is the first one in the graph.
root_node = ordered_nodes[0]
Comment on lines +595 to +596

if len(adjacent_qspecs) > 0:
root_node_float_inputs = self._get_input_nodes_with_float_output(root_node)
if len(root_node_float_inputs) == 0:
self.report_reject(
ordered_nodes,
"Couldn't find any floating point input to base shared quantization spec on.",
)
return
root_node_first_input = root_node_float_inputs[0]

shared_qspec = SharedQuantizationSpec((root_node_first_input, root_node))
for node in shared_nodes:
input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
n: shared_qspec # type: ignore[misc]
for n in self._get_input_nodes_with_float_output(node)
}
if len(self._get_user_nodes_with_float_input(node)) == 0:
output_qspec = None
else:
output_qspec = shared_qspec
_mark_node_as_quantized(
node, input_qspec_map, output_qspec, is_quantized=True
if len(root_node_float_inputs) > 0:

root_node_first_input = root_node_float_inputs[0]
shared_qspec = SharedQuantizationSpec(
(root_node_first_input, root_node)
)
for node in shared_nodes:
input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
n: shared_qspec # type: ignore[misc]
for n in self._get_input_nodes_with_float_output(node)
}
if len(self._get_user_nodes_with_float_input(node)) == 0:
output_qspec = None
else:
output_qspec = shared_qspec
_mark_node_as_quantized(
node, input_qspec_map, output_qspec, is_quantized=True
)

root_node.meta[Q_ANNOTATION_KEY].input_qspec_map[root_node_first_input] = (
adjacent_qspecs[0]
)
self.report_accept(ordered_nodes)
root_node.meta[Q_ANNOTATION_KEY].input_qspec_map[
root_node_first_input
] = adjacent_qspecs[0]
self.report_accept(ordered_nodes)
return

else:
self.report_reject(
ordered_nodes,
"Couldn't find any adjacent quantization spec to base shared quantization spec on. You may however quantize these nodes manually if required.",
)
return
self.report_reject(
ordered_nodes,
"All inputs and outputs to these nodes are non-quantized.",
)
Comment on lines +625 to +628
return

def annotate(self, model: torch.fx.GraphModule) -> None: # type: ignore[override]
for node in model.graph.nodes:
Expand Down
4 changes: 3 additions & 1 deletion backends/cortex_m/quantizer_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,9 @@ def unannotated_nodes_report(
non_quantized_nodes: list[Node] = []
else:
non_quantized_nodes = [
node for node in model.graph.nodes if Q_ANNOTATION_KEY not in node.meta
node
for node in model.graph.nodes
if Q_ANNOTATION_KEY not in node.meta and node.op != "get_attr"
]

rows = []
Expand Down
32 changes: 19 additions & 13 deletions examples/arm/quantizer_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,25 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### The quantization report\n",
"### Pre-transform for annotation quantization report\n",
"Note that there are two quantization reports printed in this case, this is because the quantization\n",
"annotator is run twice during the quantization for two different purposes.\n",
"\n",
"In the logged quantization report each quantizer has added one header describing targeted nodes, the used quantization config, and the supported operators / operator patterns. \n",
"1. Mark nodes to be decomposed by the `transform_for_annotation` pipeline (PRE-TRANSFORM FOR ANNOTATION QUANTIZATION REPORT)\n",
"2. Perform actual quantization after the `transform_for_annotation` pipeline (FINAL QUANTIZATION REPORT)\n",
"\n",
"Consider for example the `torch.div` operator, which is decomposed into one multiplication and one\n",
"reciprocal operator when quantized. Since both ops in the decomposition require different quantization parameters, this decomposition needs to happen before quantization, in the `transform_for_annotation` pipeline, but on the other hand this\n",
"decomposition must not happen if it should be kept in float.\n",
"\n",
"**This is important to be aware of when doing mixed quantization since this means that for an operator to be fully quantized,\n",
"both the original operator and the decomposition needs to be targeted.**\n",
"\n",
"The pre-transform for annotation report prints all nodes which are not marked for decomposition (if any) to make this\n",
"easy to get right. If regular full-graph quantization is done, this is simply skipped.\n",
"\n",
"### The final quantization report\n",
"In the second quantization report, each quantizer has added one header describing targeted nodes, the used quantization config, and the supported operators / operator patterns. \n",
"```\n",
"PatternQuantizer using NodeNameNodeFinder targeting names: conv2d, relu\n",
"Annotating with executorch.backends.arm.quantizer.arm_quantizer.get_symmetric_quantization_config(is_per_channel=True)\n",
Expand All @@ -187,7 +203,7 @@
"```\n",
" NODE NAME INPUT QSPEC MAP OUTPUT QSPEC MAP\n",
" -- ----------- ---------------------------------------- ---------------------\n",
" ╒ conv2d x: INT8_PER_TENSOR_QSPEC NO_QSPEC\n",
" ╒ conv2d x: INT8_PER_TENSOR_QSPEC None\n",
" | _param_constant0: INT8_PER_CHANNEL_QSPEC\n",
" | _param_constant1: DERIVED_QSPEC\n",
" ╘ relu \n",
Expand All @@ -198,16 +214,6 @@
"many different quantization annotations for different types of tensors; per tensor for\n",
"activations, per channel for weights, and a special quantization spec for the int32 bias. \n",
"\n",
"### Pre-transform for annotation vs. final quantization report\n",
"One important detail is that there are two reports printed, one named PRE-TRANSFORM_FOR_ANNOTATION QUANTIZATION REPORT,\n",
"and one named FINAL QUANTIZATION REPORT. This is related to the fact that some operators has to be decomposed before quantization to ensure\n",
"that all \"sub operators\" gets quantized properly. As an example, the division operator in the first report\n",
"has decomposed into a reciprocal and multiplication operator in the second. Had it not been marked for quantization\n",
"in the first step, it would have remained a single division operator.\n",
"\n",
"**This is important to be aware of when doing mixed quantization since this means that for an operator to be fully quantized,\n",
"both the original operator and the decomposition needs to be targeted.**\n",
"\n",
"### SharedQspecQuantizer\n",
"Last in the report there is always an additional quantizer applied which is not specified by the user, the SharedQspecQuantizer.\n",
"It handles data shuffling operators without numerical behaviour such as copies and reshapes to ensure that they are quantized with the same qspec as\n",
Expand Down
Loading