Skip to content

Commit

Permalink
Hessian scores variables and methods names modification
Browse files Browse the repository at this point in the history
  • Loading branch information
Ofir Gordon authored and Ofir Gordon committed Nov 5, 2023
1 parent fce9b52 commit fc4838d
Show file tree
Hide file tree
Showing 28 changed files with 104 additions and 104 deletions.
6 changes: 3 additions & 3 deletions model_compression_toolkit/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@
DUMMY_NODE = 'dummy_node'
DUMMY_TENSOR = 'dummy_tensor'

# Jacobian-weights constants
MIN_JACOBIANS_ITER = 10
JACOBIANS_COMP_TOLERANCE = 1e-3
# Hessian scores constants
MIN_HESSIAN_ITER = 10
HESSIAN_COMP_TOLERANCE = 1e-3


# Hessian configuration default constants
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from model_compression_toolkit.constants import EPS


def normalize_weights(hessian_approximations: List) -> np.ndarray:
def normalize_scores(hessian_approximations: List) -> np.ndarray:
"""
Normalize Hessian information approximations by dividing the trace Hessian approximations value by the sum of all
other values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def __init__(self,
num_of_images: int = 32,
configuration_overwrite: List[int] = None,
num_interest_points_factor: float = 1.0,
use_grad_based_weights: bool = True,
norm_weights: bool = True,
use_hessian_based_scores: bool = True,
norm_scores: bool = True,
refine_mp_solution: bool = True,
metric_normalization_threshold: float = 1e10):
"""
Expand All @@ -45,8 +45,8 @@ def __init__(self,
num_of_images (int): Number of images to use to evaluate the sensitivity of a mixed-precision model comparing to the float model.
configuration_overwrite (List[int]): A list of integers that enables overwrite of mixed precision with a predefined one.
num_interest_points_factor (float): A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.
use_grad_based_weights (bool): Whether to use Hessian-based scores for weighted average distance metric computation.
norm_weights (bool): Whether to normalize the returned weights (to get values between 0 and 1).
use_hessian_based_scores (bool): Whether to use Hessian-based scores for weighted average distance metric computation.
norm_scores (bool): Whether to normalize the returned scores for the weighted distance metric (to get values between 0 and 1).
refine_mp_solution (bool): Whether to try to improve the final mixed-precision configuration using a greedy algorithm that searches layers to increase their bit-width, or not.
metric_normalization_threshold (float): A threshold for checking the mixed precision distance metric values, In case of values larger than this threshold, the metric will be scaled to prevent numerical issues.
Expand All @@ -64,8 +64,8 @@ def __init__(self,
"thus, it should be between 0 to 1"
self.num_interest_points_factor = num_interest_points_factor

self.use_grad_based_weights = use_grad_based_weights
self.norm_weights = norm_weights
self.use_hessian_based_scores = use_hessian_based_scores
self.norm_scores = norm_scores

self.metric_normalization_threshold = metric_normalization_threshold

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self,
self.fw_impl = fw_impl
self.set_layer_to_bitwidth = set_layer_to_bitwidth
self.disable_activation_for_metric = disable_activation_for_metric
if self.quant_config.use_grad_based_weights:
if self.quant_config.use_hessian_based_scores:
if not isinstance(hessian_info_service, HessianInfoService):
Logger.error(f"When using hessian based approximations for sensitivity evaluation, "
f" an HessianInfoService object must be provided but is {hessian_info_service}")
Expand Down Expand Up @@ -125,7 +125,7 @@ def __init__(self,
# Computing Hessian-based scores for weighted average distance metric computation (only if requested),
# and assigning distance_weighting method accordingly.
self.interest_points_hessians = None
if self.quant_config.use_grad_based_weights is True:
if self.quant_config.use_hessian_based_scores is True:
self.interest_points_hessians = self._compute_hessian_based_scores()
self.quant_config.distance_weighting_method = lambda d: self.interest_points_hessians

Expand Down Expand Up @@ -263,9 +263,9 @@ def _compute_hessian_based_scores(self) -> np.ndarray:
# Append the single approximation value to the list for the current image
approx_by_image_per_interest_point.append(compare_point_to_trace_hessian_approximations[target_node][image_idx][0])

if self.quant_config.norm_weights:
if self.quant_config.norm_scores:
approx_by_image_per_interest_point = \
hessian_utils.normalize_weights(hessian_approximations=approx_by_image_per_interest_point)
hessian_utils.normalize_scores(hessian_approximations=approx_by_image_per_interest_point)

# Append the approximations for the current image to the main list
approx_by_image.append(approx_by_image_per_interest_point)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from tqdm import tqdm
import numpy as np

from model_compression_toolkit.constants import MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, EPS, \
from model_compression_toolkit.constants import MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, EPS, \
HESSIAN_NUM_ITERATIONS
from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
from model_compression_toolkit.core.common import Graph, BaseNode
Expand Down Expand Up @@ -79,7 +79,7 @@ def compute(self) -> List[float]:
trace_approx_by_node = []
# Loop through each interest point activation tensor
for ipt in tqdm(interest_points_tensors): # Per Interest point activation tensor
interest_point_scores = [] # List to store scores for each interest point
interest_point_scores = [] # List to store scores for each interest point
for j in range(self.num_iterations_for_approximation): # Approximation iterations
# Getting a random vector with normal distribution
v = tf.random.normal(shape=output.shape)
Expand All @@ -106,7 +106,7 @@ def compute(self) -> List[float]:

# If the change to the mean approximation is insignificant (to all outputs)
# we stop the calculation.
if j > MIN_JACOBIANS_ITER:
if j > MIN_HESSIAN_ITER:
new_mean_per_output = []
delta_per_output = []
# Compute new means and deltas for each output index
Expand All @@ -118,7 +118,7 @@ def compute(self) -> List[float]:
delta_per_output.append(delta)

# Check if all outputs have converged
is_converged = all([np.abs(delta) / (np.abs(new_mean) + 1e-6) < JACOBIANS_COMP_TOLERANCE for delta, new_mean in zip(delta_per_output, new_mean_per_output)])
is_converged = all([np.abs(delta) / (np.abs(new_mean) + 1e-6) < HESSIAN_COMP_TOLERANCE for delta, new_mean in zip(delta_per_output, new_mean_per_output)])
if is_converged:
interest_point_scores.append(score_approx_per_output)
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import tensorflow as tf
from typing import List

from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, HESSIAN_EPS
from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_EPS
from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.core.common.hessian import TraceHessianRequest, HessianInfoGranularity
from model_compression_toolkit.core.keras.back2framework.float_model_builder import FloatKerasModelBuilder
Expand Down Expand Up @@ -120,11 +120,11 @@ def compute(self) -> np.ndarray:

# If the change to the mean approximation is insignificant (to all outputs)
# we stop the calculation.
if j > MIN_JACOBIANS_ITER:
if j > MIN_HESSIAN_ITER:
# Compute new means and deltas
new_mean = tf.reduce_mean(tf.stack(approximation_per_iteration + approx), axis=0)
delta = new_mean - tf.reduce_mean(tf.stack(approximation_per_iteration), axis=0)
is_converged = np.all(np.abs(delta) / (np.abs(new_mean) + HESSIAN_EPS) < JACOBIANS_COMP_TOLERANCE)
is_converged = np.all(np.abs(delta) / (np.abs(new_mean) + HESSIAN_EPS) < HESSIAN_COMP_TOLERANCE)
if is_converged:
approximation_per_iteration.append(approx)
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch import autograd
from tqdm import tqdm

from model_compression_toolkit.constants import MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, HESSIAN_NUM_ITERATIONS
from model_compression_toolkit.constants import MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_NUM_ITERATIONS
from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.core.common.hessian import TraceHessianRequest, HessianInfoGranularity
from model_compression_toolkit.core.pytorch.hessian.pytorch_model_gradients import PytorchModelGradients
Expand Down Expand Up @@ -77,52 +77,52 @@ def compute(self) -> List[float]:
# First, we need to unfold all outputs that are given as list, to extract the actual output tensors
output = self.concat_tensors(output_tensors)

ipts_jac_trace_approx = []
ipts_hessian_trace_approx = []
for ipt in tqdm(model_grads_net.interest_points_tensors): # Per Interest point activation tensor
trace_jv = []
trace_hv = []
for j in range(self.num_iterations_for_approximation): # Approximation iterations
# Getting a random vector with normal distribution
v = torch.randn(output.shape, device=device)
f_v = torch.sum(v * output)

# Computing the hessian trace approximation by getting the gradient of (output * v)
jac_v = autograd.grad(outputs=f_v,
hess_v = autograd.grad(outputs=f_v,
inputs=ipt,
retain_graph=True,
allow_unused=True)[0]
if jac_v is None:
if hess_v is None:
# In case we have an output node, which is an interest point, but it is not differentiable,
# we still want to set some weight for it. For this, we need to add this dummy tensor to the ipt
# jacobian traces list.
trace_jv.append(torch.tensor([0.0],
# Hessian traces list.
trace_hv.append(torch.tensor([0.0],
requires_grad=True,
device=device))
break
jac_v = torch.reshape(jac_v, [jac_v.shape[0], -1])
jac_trace_approx = torch.mean(torch.sum(torch.pow(jac_v, 2.0)))

# If the change to the mean Jacobian approximation is insignificant we stop the calculation
if j > MIN_JACOBIANS_ITER:
new_mean = torch.mean(torch.stack([jac_trace_approx, *trace_jv]))
delta = new_mean - torch.mean(torch.stack(trace_jv))
if torch.abs(delta) / (torch.abs(new_mean) + 1e-6) < JACOBIANS_COMP_TOLERANCE:
trace_jv.append(jac_trace_approx)
hess_v = torch.reshape(hess_v, [hess_v.shape[0], -1])
hessian_trace_approx = torch.mean(torch.sum(torch.pow(hess_v, 2.0)))

# If the change to the mean Hessian approximation is insignificant we stop the calculation
if j > MIN_HESSIAN_ITER:
new_mean = torch.mean(torch.stack([hessian_trace_approx, *trace_hv]))
delta = new_mean - torch.mean(torch.stack(trace_hv))
if torch.abs(delta) / (torch.abs(new_mean) + 1e-6) < HESSIAN_COMP_TOLERANCE:
trace_hv.append(hessian_trace_approx)
break

trace_jv.append(jac_trace_approx)
ipts_jac_trace_approx.append(2 * torch.mean(torch.stack(trace_jv)) / output.shape[
-1]) # Get averaged jacobian trace approximation
trace_hv.append(hessian_trace_approx)
ipts_hessian_trace_approx.append(2 * torch.mean(torch.stack(trace_hv)) / output.shape[
-1]) # Get averaged Hessian trace approximation

# If a node has multiple outputs, it means that multiple approximations were computed
# (one per output since granularity is per-tensor). In this case we average the approximations.
if len(ipts_jac_trace_approx)>1:
if len(ipts_hessian_trace_approx) > 1:
# Stack tensors and compute the average
ipts_jac_trace_approx = [torch.stack(ipts_jac_trace_approx).mean()]
ipts_hessian_trace_approx = [torch.stack(ipts_hessian_trace_approx).mean()]

ipts_jac_trace_approx = torch_tensor_to_numpy(torch.Tensor(
ipts_jac_trace_approx)) # Just to get one tensor instead of list of tensors with single element
ipts_hessian_trace_approx = torch_tensor_to_numpy(torch.Tensor(
ipts_hessian_trace_approx)) # Just to get one tensor instead of list of tensors with single element

return ipts_jac_trace_approx.tolist()
return ipts_hessian_trace_approx.tolist()

else:
Logger.error(f"{self.hessian_request.granularity} is not supported for Pytorch activation hessian's trace approx calculator")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.core.pytorch.back2framework.float_model_builder import FloatPyTorchModelBuilder
from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE, HESSIAN_EPS
from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_EPS


class WeightsTraceHessianCalculatorPytorch(TraceHessianCalculatorPytorch):
Expand Down Expand Up @@ -112,10 +112,10 @@ def compute(self) -> np.ndarray:
if len(shape_channel_axis) > 0:
approx = torch.sum(approx, dim=shape_channel_axis)

if j > MIN_JACOBIANS_ITER:
if j > MIN_HESSIAN_ITER:
new_mean = (torch.sum(torch.stack(approximation_per_iteration), dim=0) + approx)/(j+1)
delta = new_mean - torch.mean(torch.stack(approximation_per_iteration), dim=0)
converged_tensor = torch.abs(delta) / (torch.abs(new_mean) + HESSIAN_EPS) < JACOBIANS_COMP_TOLERANCE
converged_tensor = torch.abs(delta) / (torch.abs(new_mean) + HESSIAN_EPS) < HESSIAN_COMP_TOLERANCE
if torch.all(converged_tensor):
break

Expand Down
2 changes: 1 addition & 1 deletion model_compression_toolkit/gptq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================

from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, RoundingType, GradientPTQConfigV2, GPTQHessianWeightsConfig
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, RoundingType, GradientPTQConfigV2, GPTQHessianScoresConfig
from model_compression_toolkit.gptq.keras.quantization_facade import keras_gradient_post_training_quantization_experimental
from model_compression_toolkit.gptq.keras.quantization_facade import get_keras_gptq_config
from model_compression_toolkit.gptq.pytorch.quantization_facade import pytorch_gradient_post_training_quantization_experimental
Expand Down
Loading

0 comments on commit fc4838d

Please sign in to comment.