examples/lsq/resnet18_a2w3_imagenet.yaml

# Experiment name
name: ResNet18_ImageNet_a2w3

# Dataset loader
dataloader:
  # Dataset to train/validate (choices: imagenet, cifar10)
  dataset: imagenet
  # Number of categories in the specified dataset (choices: 1000, 10)
  num_classes: 1000
  # Path to dataset directory
  path: /localhome/fair/Dataset/imagenet
  # Size of mini-batch
  batch_size: 256
  # Portion of training dataset to set aside for validation (range: [0, 1))
  val_split: 0.

resume:
  # Path to a checkpoint to be loaded. Leave blank to skip
  path:
  # Resume model parameters only
  lean: false

#============================ Model ============================================

# Supported model architecture
# choices:
#   ImageNet:
#     resnet18, resnet34, resnet50, resnet101, resnet152
#   CIFAR10:
#     resnet20, resnet32, resnet44, resnet56, resnet110, resnet1202
arch: resnet18

# Use pre-trained model
pre_trained: true

#============================ Quantization =====================================

quan:
  act: # (default for all layers)
    # Quantizer type (choices: lsq)
    mode: lsq
    # Bit width of quantized activation
    bit: 2
    # Each output channel uses its own scaling factor
    per_channel: false
    # Whether to use symmetric quantization
    symmetric: false
    # Quantize all the numbers to non-negative
    all_positive: true
  weight: # (default for all layers)
    # Quantizer type (choices: lsq)
    mode: lsq
    # Bit width of quantized weight
    bit: 3
    # Each output channel uses its own scaling factor
    per_channel: false
    # Whether to use symmetric quantization
    symmetric: true
    # Whether to quantize all the numbers to non-negative
    all_positive: false
  excepts:
    # Specify quantized bit width for some layers, like this:
    conv1:
      act:
        bit:
        all_positive: false
      weight:
        bit:
    fc:
      act:
        bit:
      weight:
        bit:

#============================ Training / Evaluation ============================

# Evaluate the model without training
# If this field is true, all the bellowing options will be ignored
eval: false

epochs: 120

optimizer:
  learning_rate: 0.01
  momentum: 0.9
  weight_decay: 0.0001

# Learning rate scheduler
lr_scheduler:
  # Update learning rate per batch or epoch
  update_per_batch: true

  # Uncomment one of bellowing options to activate a learning rate scheduling

  # Fixed learning rate
  # mode: fixed

  # Step decay
  # mode: step
  # step_size: 30
  # gamma: 0.1

  # Multi-step decay
  mode: multi_step
  milestones: [30, 60, 90]
  gamma: 0.1

  # Exponential decay
  # mode: exp
  # gamma: 0.95

  # Cosine annealing
  # mode: cos
  # lr_min: 0
  # cycle: 0.95

  # Cosine annealing with warm restarts
  # mode: cos_warm_restarts
  # lr_min: 0
  # cycle: 5
  # cycle_scale: 2
  # amp_scale: 0.5