Hessian scores variables and methods names modification

sony · Nov 5, 2023 · fc4838d · fc4838d
1 parent fce9b52
commit fc4838d
Show file tree

Hide file tree

Showing 28 changed files with 104 additions and 104 deletions.
diff --git a/model_compression_toolkit/constants.py b/model_compression_toolkit/constants.py
@@ -118,9 +118,9 @@
 DUMMY_NODE = 'dummy_node'
 DUMMY_TENSOR = 'dummy_tensor'
 
-# Jacobian-weights constants
-MIN_JACOBIANS_ITER = 10
-JACOBIANS_COMP_TOLERANCE = 1e-3
+# Hessian scores constants
+MIN_HESSIAN_ITER = 10
+HESSIAN_COMP_TOLERANCE = 1e-3
 
 
 # Hessian configuration default constants

diff --git a/model_compression_toolkit/core/common/hessian/hessian_info_utils.py b/model_compression_toolkit/core/common/hessian/hessian_info_utils.py
@@ -17,7 +17,7 @@
 from model_compression_toolkit.constants import EPS
 
 
-def normalize_weights(hessian_approximations: List) -> np.ndarray:
+def normalize_scores(hessian_approximations: List) -> np.ndarray:
     """
     Normalize Hessian information approximations by dividing the trace Hessian approximations value by the sum of all
     other values.

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py
@@ -30,8 +30,8 @@ def __init__(self,
                  num_of_images: int = 32,
                  configuration_overwrite: List[int] = None,
                  num_interest_points_factor: float = 1.0,
-                 use_grad_based_weights: bool = True,
-                 norm_weights: bool = True,
+                 use_hessian_based_scores: bool = True,
+                 norm_scores: bool = True,
                  refine_mp_solution: bool = True,
                  metric_normalization_threshold: float = 1e10):
         """
@@ -45,8 +45,8 @@ def __init__(self,
             num_of_images (int): Number of images to use to evaluate the sensitivity of a mixed-precision model comparing to the float model.
             configuration_overwrite (List[int]): A list of integers that enables overwrite of mixed precision with a predefined one.
             num_interest_points_factor (float): A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.
-            use_grad_based_weights (bool): Whether to use Hessian-based scores for weighted average distance metric computation.
-            norm_weights (bool): Whether to normalize the returned weights (to get values between 0 and 1).
+            use_hessian_based_scores (bool): Whether to use Hessian-based scores for weighted average distance metric computation.
+            norm_scores (bool): Whether to normalize the returned scores for the weighted distance metric (to get values between 0 and 1).
             refine_mp_solution (bool): Whether to try to improve the final mixed-precision configuration using a greedy algorithm that searches layers to increase their bit-width, or not.
             metric_normalization_threshold (float): A threshold for checking the mixed precision distance metric values, In case of values larger than this threshold, the metric will be scaled to prevent numerical issues.
 
@@ -64,8 +64,8 @@ def __init__(self,
                                                         "thus, it should be between 0 to 1"
         self.num_interest_points_factor = num_interest_points_factor
 
-        self.use_grad_based_weights = use_grad_based_weights
-        self.norm_weights = norm_weights
+        self.use_hessian_based_scores = use_hessian_based_scores
+        self.norm_scores = norm_scores
 
         self.metric_normalization_threshold = metric_normalization_threshold
 

diff --git a/model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py b/model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py
@@ -76,7 +76,7 @@ def __init__(self,
         self.fw_impl = fw_impl
         self.set_layer_to_bitwidth = set_layer_to_bitwidth
         self.disable_activation_for_metric = disable_activation_for_metric
-        if self.quant_config.use_grad_based_weights:
+        if self.quant_config.use_hessian_based_scores:
             if not isinstance(hessian_info_service, HessianInfoService):
                 Logger.error(f"When using hessian based approximations for sensitivity evaluation, "
                              f" an HessianInfoService object must be provided but is {hessian_info_service}")
@@ -125,7 +125,7 @@ def __init__(self,
         # Computing Hessian-based scores for weighted average distance metric computation (only if requested),
         # and assigning distance_weighting method accordingly.
         self.interest_points_hessians = None
-        if self.quant_config.use_grad_based_weights  is True:
+        if self.quant_config.use_hessian_based_scores is True:
             self.interest_points_hessians = self._compute_hessian_based_scores()
             self.quant_config.distance_weighting_method = lambda d: self.interest_points_hessians
 
@@ -263,9 +263,9 @@ def _compute_hessian_based_scores(self) -> np.ndarray:
                 # Append the single approximation value to the list for the current image
                 approx_by_image_per_interest_point.append(compare_point_to_trace_hessian_approximations[target_node][image_idx][0])
 
-            if self.quant_config.norm_weights:
+            if self.quant_config.norm_scores:
                 approx_by_image_per_interest_point = \
-                    hessian_utils.normalize_weights(hessian_approximations=approx_by_image_per_interest_point)
+                    hessian_utils.normalize_scores(hessian_approximations=approx_by_image_per_interest_point)
 
             # Append the approximations for the current image to the main list
             approx_by_image.append(approx_by_image_per_interest_point)

diff --git a/model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py b/model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py
@@ -20,7 +20,7 @@
 from tqdm import tqdm
 import numpy as np
 
-from model_compression_toolkit.constants import MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, EPS, \
+from model_compression_toolkit.constants import MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, EPS, \
     HESSIAN_NUM_ITERATIONS
 from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
 from model_compression_toolkit.core.common import Graph, BaseNode
@@ -79,7 +79,7 @@ def compute(self) -> List[float]:
                 trace_approx_by_node = []
                 # Loop through each interest point activation tensor
                 for ipt in tqdm(interest_points_tensors):  # Per Interest point activation tensor
-                    interest_point_scores = [] # List to store scores for each interest point
+                    interest_point_scores = []  # List to store scores for each interest point
                     for j in range(self.num_iterations_for_approximation):  # Approximation iterations
                         # Getting a random vector with normal distribution
                         v = tf.random.normal(shape=output.shape)
@@ -106,7 +106,7 @@ def compute(self) -> List[float]:
 
                             # If the change to the mean approximation is insignificant (to all outputs)
                             # we stop the calculation.
-                            if j > MIN_JACOBIANS_ITER:
+                            if j > MIN_HESSIAN_ITER:
                                 new_mean_per_output = []
                                 delta_per_output = []
                                 # Compute new means and deltas for each output index
@@ -118,7 +118,7 @@ def compute(self) -> List[float]:
                                     delta_per_output.append(delta)
 
                                 # Check if all outputs have converged
-                                is_converged = all([np.abs(delta) / (np.abs(new_mean) + 1e-6) < JACOBIANS_COMP_TOLERANCE for delta, new_mean in zip(delta_per_output, new_mean_per_output)])
+                                is_converged = all([np.abs(delta) / (np.abs(new_mean) + 1e-6) < HESSIAN_COMP_TOLERANCE for delta, new_mean in zip(delta_per_output, new_mean_per_output)])
                                 if is_converged:
                                     interest_point_scores.append(score_approx_per_output)
                                     break

diff --git a/model_compression_toolkit/core/keras/hessian/weights_trace_hessian_calculator_keras.py b/model_compression_toolkit/core/keras/hessian/weights_trace_hessian_calculator_keras.py
@@ -17,7 +17,7 @@
 import tensorflow as tf
 from typing import List
 
-from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, HESSIAN_EPS
+from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_EPS
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.hessian import TraceHessianRequest, HessianInfoGranularity
 from model_compression_toolkit.core.keras.back2framework.float_model_builder import FloatKerasModelBuilder
@@ -120,11 +120,11 @@ def compute(self) -> np.ndarray:
 
                     # If the change to the mean approximation is insignificant (to all outputs)
                     # we stop the calculation.
-                    if j > MIN_JACOBIANS_ITER:
+                    if j > MIN_HESSIAN_ITER:
                         # Compute new means and deltas
                         new_mean = tf.reduce_mean(tf.stack(approximation_per_iteration + approx), axis=0)
                         delta = new_mean - tf.reduce_mean(tf.stack(approximation_per_iteration), axis=0)
-                        is_converged = np.all(np.abs(delta) / (np.abs(new_mean) + HESSIAN_EPS) < JACOBIANS_COMP_TOLERANCE)
+                        is_converged = np.all(np.abs(delta) / (np.abs(new_mean) + HESSIAN_EPS) < HESSIAN_COMP_TOLERANCE)
                         if is_converged:
                             approximation_per_iteration.append(approx)
                             break

diff --git a/...l_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py b/...l_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py
@@ -18,7 +18,7 @@
 from torch import autograd
 from tqdm import tqdm
 
-from model_compression_toolkit.constants import MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, HESSIAN_NUM_ITERATIONS
+from model_compression_toolkit.constants import MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_NUM_ITERATIONS
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.hessian import TraceHessianRequest, HessianInfoGranularity
 from model_compression_toolkit.core.pytorch.hessian.pytorch_model_gradients import PytorchModelGradients
@@ -77,52 +77,52 @@ def compute(self) -> List[float]:
             # First, we need to unfold all outputs that are given as list, to extract the actual output tensors
             output = self.concat_tensors(output_tensors)
 
-            ipts_jac_trace_approx = []
+            ipts_hessian_trace_approx = []
             for ipt in tqdm(model_grads_net.interest_points_tensors):  # Per Interest point activation tensor
-                trace_jv = []
+                trace_hv = []
                 for j in range(self.num_iterations_for_approximation):  # Approximation iterations
                     # Getting a random vector with normal distribution
                     v = torch.randn(output.shape, device=device)
                     f_v = torch.sum(v * output)
 
                     # Computing the hessian trace approximation by getting the gradient of (output * v)
-                    jac_v = autograd.grad(outputs=f_v,
+                    hess_v = autograd.grad(outputs=f_v,
                                           inputs=ipt,
                                           retain_graph=True,
                                           allow_unused=True)[0]
-                    if jac_v is None:
+                    if hess_v is None:
                         # In case we have an output node, which is an interest point, but it is not differentiable,
                         # we still want to set some weight for it. For this, we need to add this dummy tensor to the ipt
-                        # jacobian traces list.
-                        trace_jv.append(torch.tensor([0.0],
+                        # Hessian traces list.
+                        trace_hv.append(torch.tensor([0.0],
                                                      requires_grad=True,
                                                      device=device))
                         break
-                    jac_v = torch.reshape(jac_v, [jac_v.shape[0], -1])
-                    jac_trace_approx = torch.mean(torch.sum(torch.pow(jac_v, 2.0)))
-
-                    # If the change to the mean Jacobian approximation is insignificant we stop the calculation
-                    if j > MIN_JACOBIANS_ITER:
-                        new_mean = torch.mean(torch.stack([jac_trace_approx, *trace_jv]))
-                        delta = new_mean - torch.mean(torch.stack(trace_jv))
-                        if torch.abs(delta) / (torch.abs(new_mean) + 1e-6) < JACOBIANS_COMP_TOLERANCE:
-                            trace_jv.append(jac_trace_approx)
+                    hess_v = torch.reshape(hess_v, [hess_v.shape[0], -1])
+                    hessian_trace_approx = torch.mean(torch.sum(torch.pow(hess_v, 2.0)))
+
+                    # If the change to the mean Hessian approximation is insignificant we stop the calculation
+                    if j > MIN_HESSIAN_ITER:
+                        new_mean = torch.mean(torch.stack([hessian_trace_approx, *trace_hv]))
+                        delta = new_mean - torch.mean(torch.stack(trace_hv))
+                        if torch.abs(delta) / (torch.abs(new_mean) + 1e-6) < HESSIAN_COMP_TOLERANCE:
+                            trace_hv.append(hessian_trace_approx)
                             break
 
-                    trace_jv.append(jac_trace_approx)
-                ipts_jac_trace_approx.append(2 * torch.mean(torch.stack(trace_jv)) / output.shape[
-                    -1])  # Get averaged jacobian trace approximation
+                    trace_hv.append(hessian_trace_approx)
+                ipts_hessian_trace_approx.append(2 * torch.mean(torch.stack(trace_hv)) / output.shape[
+                    -1])  # Get averaged Hessian trace approximation
 
             # If a node has multiple outputs, it means that multiple approximations were computed
             # (one per output since granularity is per-tensor). In this case we average the approximations.
-            if len(ipts_jac_trace_approx)>1:
+            if len(ipts_hessian_trace_approx) > 1:
                 # Stack tensors and compute the average
-                ipts_jac_trace_approx = [torch.stack(ipts_jac_trace_approx).mean()]
+                ipts_hessian_trace_approx = [torch.stack(ipts_hessian_trace_approx).mean()]
 
-            ipts_jac_trace_approx = torch_tensor_to_numpy(torch.Tensor(
-                ipts_jac_trace_approx))  # Just to get one tensor instead of list of tensors with single element
+            ipts_hessian_trace_approx = torch_tensor_to_numpy(torch.Tensor(
+                ipts_hessian_trace_approx))  # Just to get one tensor instead of list of tensors with single element
 
-            return ipts_jac_trace_approx.tolist()
+            return ipts_hessian_trace_approx.tolist()
 
         else:
             Logger.error(f"{self.hessian_request.granularity} is not supported for Pytorch activation hessian's trace approx calculator")

diff --git a/model_compression_toolkit/core/pytorch/hessian/weights_trace_hessian_calculator_pytorch.py b/model_compression_toolkit/core/pytorch/hessian/weights_trace_hessian_calculator_pytorch.py
@@ -24,7 +24,7 @@
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.pytorch.back2framework.float_model_builder import FloatPyTorchModelBuilder
 from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
-from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, HESSIAN_EPS
+from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_EPS
 
 
 class WeightsTraceHessianCalculatorPytorch(TraceHessianCalculatorPytorch):
@@ -112,10 +112,10 @@ def compute(self) -> np.ndarray:
             if len(shape_channel_axis) > 0:
                 approx = torch.sum(approx, dim=shape_channel_axis)
 
-            if j > MIN_JACOBIANS_ITER:
+            if j > MIN_HESSIAN_ITER:
                 new_mean = (torch.sum(torch.stack(approximation_per_iteration), dim=0) + approx)/(j+1)
                 delta = new_mean - torch.mean(torch.stack(approximation_per_iteration), dim=0)
-                converged_tensor = torch.abs(delta) / (torch.abs(new_mean) + HESSIAN_EPS) < JACOBIANS_COMP_TOLERANCE
+                converged_tensor = torch.abs(delta) / (torch.abs(new_mean) + HESSIAN_EPS) < HESSIAN_COMP_TOLERANCE
                 if torch.all(converged_tensor):
                     break
 

diff --git a/model_compression_toolkit/gptq/__init__.py b/model_compression_toolkit/gptq/__init__.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 
-from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, RoundingType, GradientPTQConfigV2, GPTQHessianWeightsConfig
+from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, RoundingType, GradientPTQConfigV2, GPTQHessianScoresConfig
 from model_compression_toolkit.gptq.keras.quantization_facade import keras_gradient_post_training_quantization_experimental
 from model_compression_toolkit.gptq.keras.quantization_facade import get_keras_gptq_config
 from model_compression_toolkit.gptq.pytorch.quantization_facade import pytorch_gradient_post_training_quantization_experimental