Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/nncf/openvino/graph/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@ def get_linear_weights_layout_from_node(node: NNCFNode) -> tuple[OVLayoutElem]:
layer_attributes = node.layer_attributes
port_id = _get_constant_port_id_from_layer_attributes(layer_attributes)
constant_layer_attrs = layer_attributes.constant_attributes[port_id]

transpose = constant_layer_attrs.get("transpose", False)
input_shape = constant_layer_attrs["shape"]

return get_linear_input_layout(
input_shape=constant_layer_attrs["shape"],
transpose=constant_layer_attrs["transpose"],
input_shape=input_shape,
transpose=transpose,
port_id=port_id,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1173,11 +1173,6 @@ def apply_with_parameters(
)

if self._lora_correction:
for wc_params in all_weight_params:
if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, graph):
msg = "Transposed activations are not supported yet for the LoRa correction algorithm"
raise nncf.UnsupportedModelError(msg)

lora_correction_params = self._advanced_parameters.lora_correction_params
lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params)
description += " with correction of low-rank adapters"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,11 @@ def apply(
continue
_, weight_port_id = weight_data[0]

if self._backend_entity.matmul_has_transposed_activations(wp.node_with_weight, graph):
msg = "Transposed activations are not supported yet for the Scale Estimation algorithm"
raise nncf.UnsupportedModelError(msg)
activation_port = self._backend_entity.get_activation_port_id(wp.node_with_weight, graph)
activation_edge = graph.get_input_edge_by_port_id(wp.node_with_weight, activation_port)
act_ch_axis = self._backend_entity.get_activation_channel_axis(
wp.node_with_weight, activation_edge.input_port_id, activation_edge.tensor_shape
)

weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph)

Expand All @@ -154,6 +156,7 @@ def apply(
self._initial_steps,
self._scale_steps,
self._weight_penalty,
act_ch_axis,
)
res[weight_name] = CompressedWeight(None, scale, zero_point, None)

Expand All @@ -169,6 +172,7 @@ def calculate_quantization_params(
initial_steps: int = 5,
scale_steps: int = 10,
weight_penalty: float = -1.0,
act_ch_axis:int =-1,
) -> Tensor:
"""
Calculates the quantization parameters for a given set of weights and activations.
Expand All @@ -195,7 +199,7 @@ def calculate_quantization_params(
"""
reduction_axis = reduction_axes[0]

s, X = process_stats(statistics, subset_size)
s, X = process_stats(statistics, subset_size,act_ch_axis)

X = X.astype(TensorDataType.float32)
weight = weight.astype(TensorDataType.float32)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -809,30 +809,31 @@ def get_transposable_awq_model(
),
],
)
def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs):
def test_compression_works_with_transposed_activations(self, transpose_a_supported, kwargs):
if not transpose_a_supported:
pytest.skip("transpose_a is not supported for the current backend")
if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms():
pytest.skip("Scale estimation is not supported")
if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms():
pytest.skip("GPTQ is not supported")
if kwargs.get("gptq", False):
pytest.skip("GPTQ with transposed activations requires hessian axis refactoring - out of scope")
if kwargs.get("lora_correction", False) and "lora_correction" in self.get_not_supported_algorithms():
pytest.skip("lora_correction is not supported")

INPUT_SHAPE = (2, 4)
if kwargs.get("lora_correction", False):
pytest.skip("LoRA correction with transposed activations requires adapter shape refactoring - out of scope")
INPUT_SHAPE = (2, 24, 16)
model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
input = self.to_tensor(input)
dataset = Dataset([input] * 2, self.get_transform_func())

with pytest.raises(nncf.UnsupportedModelError):
compress_weights(
model,
mode=CompressWeightsMode.INT4_SYM,
ratio=1.0,
group_size=1,
subset_size=2,
dataset=dataset,
all_layers=True,
**kwargs,
)
compress_weights(
model,
mode=CompressWeightsMode.INT4_SYM,
ratio=1.0,
group_size=1,
subset_size=2,
dataset=dataset,
all_layers=True,
**kwargs,
)