advance: null
approach: post_training_dynamic_quant
bf16_ops_list: []
calib_iteration: 1
calib_sampling_size: 100
framework: pytorch
op:
  ? !!python/tuple
  - distilbert.transformer.layer.0.attention.q_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.0.attention.k_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.0.attention.v_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.0.attention.out_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.0.ffn.lin1
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.0.ffn.lin2
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.1.attention.q_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.1.attention.k_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.1.attention.v_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.1.attention.out_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.1.ffn.lin1
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.1.ffn.lin2
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.2.attention.q_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.2.attention.k_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.2.attention.v_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.2.attention.out_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.2.ffn.lin1
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.2.ffn.lin2
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.3.attention.q_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.3.attention.k_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.3.attention.v_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.3.attention.out_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.3.ffn.lin1
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.3.ffn.lin2
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.4.attention.q_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.4.attention.k_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.4.attention.v_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.4.attention.out_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.4.ffn.lin1
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.4.ffn.lin2
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.5.attention.q_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.5.attention.k_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.5.attention.v_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.5.attention.out_lin
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.5.ffn.lin1
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - distilbert.transformer.layer.5.ffn.lin2
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - pre_classifier
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax
  ? !!python/tuple
  - classifier
  - Linear
  : weight:
      dtype: int8
      scheme: sym
      granularity: per_channel
      algorithm: minmax
      bit: 7.0
    activation:
      dtype: uint8
      scheme: asym
      granularity: per_tensor
      algorithm: minmax