Skip to content

Commit

Permalink
Align TPC.v4 with Converter spec.
Browse files Browse the repository at this point in the history
  • Loading branch information
elad-c committed Nov 19, 2024
1 parent 104445e commit 630d1c7
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@
OPSET_MERGE_OPS = "MergeOps"
OPSET_CONV = "Conv"
OPSET_FULLY_CONNECTED = "FullyConnected"
OPSET_BATCH_NORM = "BatchNorm"
OPSET_ANY_RELU = "AnyReLU"
OPSET_ADD = "Add"
OPSET_SUB = "Sub"
OPSET_MUL = "Mul"
OPSET_DIV = "Div"
OPSET_MIN_MAX = "MinMax"
OPSET_PRELU = "PReLU"
OPSET_SWISH = "Swish"
OPSET_SIGMOID = "Sigmoid"
OPSET_TANH = "Tanh"
OPSET_GELU = "Gelu"


def get_tp_model() -> TargetPlatformModel:
Expand Down Expand Up @@ -172,6 +175,11 @@ def generate_tp_model(default_config: OpQuantizationConfig,
# If the QuantizationConfigOptions contains only one configuration,
# this configuration will be used for the operation quantization:
default_configuration_options = tp.QuantizationConfigOptions([default_config])
default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16))
default_config_options_16bit = tp.QuantizationConfigOptions([default_config_input16,
default_config_input16.clone_and_edit(activation_n_bits=16,
signedness=Signedness.SIGNED)],
base_config=default_config_input16)

# Create a QuantizationConfigOptions for quantizing constants in functional ops.
# Constant configuration is similar to the default eight bit configuration except for PoT
Expand Down Expand Up @@ -212,6 +220,9 @@ def generate_tp_model(default_config: OpQuantizationConfig,
weights_per_channel_threshold=False))
qpreserving_const_config_options = tp.QuantizationConfigOptions([qpreserving_const_config])

mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED)
for mp_cfg in mixed_precision_cfg_list]

# Create a TargetPlatformModel and set its default quantization config.
# This default configuration will be used for all operations
# unless specified otherwise (see OperatorsSet, for example):
Expand Down Expand Up @@ -246,30 +257,33 @@ def generate_tp_model(default_config: OpQuantizationConfig,
tp.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor)

# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,
mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list + mp_cfg_list_16bit,
base_config=base_config)

# Define operator sets that use mixed_precision_configuration_options:
conv = tp.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options)
fc = tp.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options)

# Define operations sets without quantization configuration
# options (useful for creating fusing patterns, for example):
any_relu = tp.OperatorsSet(OPSET_ANY_RELU)
tp.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit)

# Note: Operations sets without quantization configuration are useful for creating fusing patterns
any_relu = tp.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit)
add = tp.OperatorsSet(OPSET_ADD, const_configuration_options_inout16)
sub = tp.OperatorsSet(OPSET_SUB, const_configuration_options_inout16)
mul = tp.OperatorsSet(OPSET_MUL, const_configuration_options_inout16)
div = tp.OperatorsSet(OPSET_DIV, const_configuration_options)
prelu = tp.OperatorsSet(OPSET_PRELU)
swish = tp.OperatorsSet(OPSET_SWISH)
sigmoid = tp.OperatorsSet(OPSET_SIGMOID)
tanh = tp.OperatorsSet(OPSET_TANH)
tp.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16)
prelu = tp.OperatorsSet(OPSET_PRELU, default_config_options_16bit)
swish = tp.OperatorsSet(OPSET_SWISH, default_config_options_16bit)
sigmoid = tp.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit)
tanh = tp.OperatorsSet(OPSET_TANH, default_config_options_16bit)
gelu = tp.OperatorsSet(OPSET_GELU, default_config_options_16bit)

# Combine multiple operators into a single operator to avoid quantization between
# them. To do this we define fusing patterns using the OperatorsSets that were created.
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh)
activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid)
activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh, gelu)
activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu)
any_binary = tp.OperatorSetConcat(add, sub, mul, div)

# ------------------- #
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,19 @@
if version.parse(tf.__version__) >= version.parse("2.13"):
from keras.src.layers import Conv2D, DepthwiseConv2D, Dense, Reshape, ZeroPadding2D, Dropout, \
MaxPooling2D, Activation, ReLU, Add, Subtract, Multiply, PReLU, Flatten, Cropping2D, LeakyReLU, Permute, \
Conv2DTranspose, Identity, Concatenate
Conv2DTranspose, Identity, Concatenate, BatchNormalization, Minimum, Maximum
else:
from keras.layers import Conv2D, DepthwiseConv2D, Dense, Reshape, ZeroPadding2D, Dropout, \
MaxPooling2D, Activation, ReLU, Add, Subtract, Multiply, PReLU, Flatten, Cropping2D, LeakyReLU, Permute, \
Conv2DTranspose, Identity, Concatenate
Conv2DTranspose, Identity, Concatenate, BatchNormalization, Minimum, Maximum

from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import get_tp_model
import model_compression_toolkit as mct
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4 import __version__ as TPC_VERSION
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import OPSET_NO_QUANTIZATION, \
OPSET_QUANTIZATION_PRESERVING, OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, OPSET_DIMENSION_MANIPULATION_OPS, \
OPSET_MERGE_OPS, OPSET_CONV, OPSET_FULLY_CONNECTED, OPSET_ANY_RELU, OPSET_ADD, OPSET_SUB, OPSET_MUL, OPSET_DIV, \
OPSET_PRELU, OPSET_SWISH, OPSET_SIGMOID, OPSET_TANH
OPSET_PRELU, OPSET_SWISH, OPSET_SIGMOID, OPSET_TANH, OPSET_GELU, OPSET_BATCH_NORM, OPSET_MIN_MAX

tp = mct.target_platform

Expand Down Expand Up @@ -117,6 +117,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
tp.OperationsSetToLayers(OPSET_FULLY_CONNECTED, [Dense],
attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
BIAS_ATTR: DefaultDict(default_value=BIAS)})
tp.OperationsSetToLayers(OPSET_BATCH_NORM, [BatchNormalization])
tp.OperationsSetToLayers(OPSET_ANY_RELU, [tf.nn.relu,
tf.nn.relu6,
tf.nn.leaky_relu,
Expand All @@ -128,9 +129,11 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
tp.OperationsSetToLayers(OPSET_SUB, [tf.subtract, Subtract])
tp.OperationsSetToLayers(OPSET_MUL, [tf.math.multiply, Multiply])
tp.OperationsSetToLayers(OPSET_DIV, [tf.math.divide, tf.math.truediv])
tp.OperationsSetToLayers(OPSET_MIN_MAX, [tf.math.minimum, tf.math.maximum, Minimum, Maximum])
tp.OperationsSetToLayers(OPSET_PRELU, [PReLU])
tp.OperationsSetToLayers(OPSET_SWISH, [tf.nn.swish, tp.LayerFilterParams(Activation, activation="swish")])
tp.OperationsSetToLayers(OPSET_SIGMOID, [tf.nn.sigmoid, tp.LayerFilterParams(Activation, activation="sigmoid")])
tp.OperationsSetToLayers(OPSET_TANH, [tf.nn.tanh, tp.LayerFilterParams(Activation, activation="tanh")])
tp.OperationsSetToLayers(OPSET_GELU, [tf.nn.gelu, tp.LayerFilterParams(Activation, activation="gelu")])

return keras_tpc
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@

import torch
from torch import add, sub, mul, div, divide, flatten, reshape, split, unsqueeze, dropout, sigmoid, tanh, \
chunk, unbind, topk, gather, equal, transpose, permute, argmax, squeeze, multiply, subtract
from torch.nn import Conv2d, Linear, ConvTranspose2d, MaxPool2d
chunk, unbind, topk, gather, equal, transpose, permute, argmax, squeeze, multiply, subtract, minimum, \
maximum
from torch.nn import Conv2d, Linear, ConvTranspose2d, MaxPool2d, BatchNorm2d
from torch.nn import Dropout, Flatten, Hardtanh
from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU
from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu
from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU, GELU
import torch.nn.functional as F
from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu, gelu

from model_compression_toolkit.defaultdict import DefaultDict
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \
Expand All @@ -32,7 +34,7 @@
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import OPSET_NO_QUANTIZATION, \
OPSET_QUANTIZATION_PRESERVING, OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, OPSET_DIMENSION_MANIPULATION_OPS, \
OPSET_MERGE_OPS, OPSET_CONV, OPSET_FULLY_CONNECTED, OPSET_ANY_RELU, OPSET_ADD, OPSET_SUB, OPSET_MUL, OPSET_DIV, \
OPSET_PRELU, OPSET_SWISH, OPSET_SIGMOID, OPSET_TANH
OPSET_PRELU, OPSET_SWISH, OPSET_SIGMOID, OPSET_TANH, OPSET_GELU, OPSET_BATCH_NORM, OPSET_MIN_MAX

tp = mct.target_platform

Expand Down Expand Up @@ -95,6 +97,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
attr_mapping=pytorch_linear_attr_mapping)
tp.OperationsSetToLayers(OPSET_FULLY_CONNECTED, [Linear],
attr_mapping=pytorch_linear_attr_mapping)
tp.OperationsSetToLayers(OPSET_BATCH_NORM, [BatchNorm2d])
tp.OperationsSetToLayers(OPSET_ANY_RELU, [torch.relu,
ReLU,
ReLU6,
Expand All @@ -109,9 +112,11 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
tp.OperationsSetToLayers(OPSET_SUB, [operator.sub, sub, subtract])
tp.OperationsSetToLayers(OPSET_MUL, [operator.mul, mul, multiply])
tp.OperationsSetToLayers(OPSET_DIV, [operator.truediv, div, divide])
tp.OperationsSetToLayers(OPSET_MIN_MAX, [minimum, maximum])
tp.OperationsSetToLayers(OPSET_PRELU, [PReLU, prelu])
tp.OperationsSetToLayers(OPSET_SWISH, [SiLU, silu, Hardswish, hardswish])
tp.OperationsSetToLayers(OPSET_SIGMOID, [Sigmoid, sigmoid])
tp.OperationsSetToLayers(OPSET_TANH, [Tanh, tanh])
tp.OperationsSetToLayers(OPSET_SIGMOID, [Sigmoid, sigmoid, F.sigmoid])
tp.OperationsSetToLayers(OPSET_TANH, [Tanh, tanh, F.tanh])
tp.OperationsSetToLayers(OPSET_GELU, [GELU, gelu])

return pytorch_tpc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from model_compression_toolkit.constants import PYTORCH
from model_compression_toolkit.core import MixedPrecisionQuantizationConfig
from model_compression_toolkit.target_platform_capabilities.constants import IMX500_TP_MODEL
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import \
OPSET_MUL, OPSET_GELU, OPSET_TANH
from model_compression_toolkit.core.pytorch.utils import get_working_device
from tests.pytorch_tests.model_tests.base_pytorch_feature_test import BasePytorchFeatureNetworkTest

Expand All @@ -28,10 +30,14 @@

class Activation16BitNet(torch.nn.Module):

def __init__(self, use_concat=True):
def __init__(self, use_concat=True, enable_head=True):
super().__init__()
self.use_concat = use_concat
self.enable_head = enable_head
self.conv = torch.nn.Conv2d(3, 3, 1)
if enable_head:
self.conv_a = torch.nn.Conv2d(3, 3, 1)
self.conv_b = torch.nn.Conv2d(3, 3, 1)
self.register_buffer('add_const', torch.rand((3, 1, 1)))
self.register_buffer('sub_const', torch.rand((3, 1, 1)))
self.register_buffer('div_const', 2*torch.ones((3, 1, 1)))
Expand All @@ -47,20 +53,32 @@ def forward(self, x):
x = torch.reshape(x, (-1, 3, 8*(1+int(self.use_concat)), 8))
x = self.conv(x)
x = torch.divide(x, self.div_const)

if self.enable_head:
x = torch.cat([torch.nn.functional.gelu(self.conv_a(x)),
torch.nn.functional.tanh(self.conv_b(x))], dim=1)

return x


def set_16bit_as_default(tpc, required_op_set, required_ops_list):
op_set = get_op_set(required_op_set, tpc.tp_model.operator_set)
op_set.qc_options.base_config = [l for l in op_set.qc_options.quantization_config_list if l.activation_n_bits == 16][0]
for op in required_ops_list:
tpc.layer2qco[op].base_config = [l for l in tpc.layer2qco[op].quantization_config_list if l.activation_n_bits == 16][0]


class Activation16BitTest(BasePytorchFeatureNetworkTest):

def get_tpc(self):
tpc = mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, 'v4')
mul_op_set = get_op_set('Mul', tpc.tp_model.operator_set)
mul_op_set.qc_options.base_config = [l for l in mul_op_set.qc_options.quantization_config_list if l.activation_n_bits == 16][0]
tpc.layer2qco[torch.mul].base_config = mul_op_set.qc_options.base_config
tpc.layer2qco[mul].base_config = mul_op_set.qc_options.base_config
set_16bit_as_default(tpc, OPSET_MUL, [torch.mul, mul])
set_16bit_as_default(tpc, OPSET_GELU, [torch.nn.GELU, torch.nn.functional.gelu])
set_16bit_as_default(tpc, OPSET_TANH, [torch.nn.Tanh, torch.nn.functional.tanh, torch.tanh])
return tpc

def create_networks(self):
# Activation16BitNet()(torch.from_numpy(self.generate_inputs()[0]).type(torch.float32))
return Activation16BitNet()

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
Expand All @@ -77,6 +95,10 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info=
"1st mul activation should be forced by TPC to be signed, even though activations as all positive.")
self.unit_test.assertTrue(mul2_act_quant.activation_holder_quantizer.num_bits == 8,
"2nd mul activation bits should be 8 bits because of following div node.")
self.unit_test.assertTrue(quantized_model.gelu_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16,
"gelu activation bits should be 16 bits because of following concat node.")
self.unit_test.assertTrue(quantized_model.tanh_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16,
"tanh activation bits should be 16 bits because of following concat node.")


class Activation16BitMixedPrecisionTest(Activation16BitTest):
Expand All @@ -103,7 +125,7 @@ def get_resource_utilization(self):
return mct.core.ResourceUtilization(activation_memory=200)

def create_networks(self):
return Activation16BitNet(use_concat=False)
return Activation16BitNet(use_concat=False, enable_head=False)

def get_mixed_precision_config(self):
return MixedPrecisionQuantizationConfig()
Expand Down

0 comments on commit 630d1c7

Please sign in to comment.