echarlaix's picture
echarlaix HF staff
commit files to HF hub
096fd74
raw
history blame
11.7 kB
advance: null
approach: post_training_dynamic_quant
bf16_ops_list: []
calib_iteration: 1
calib_sampling_size: 100
framework: pytorch
op:
? !!python/tuple
- distilbert.transformer.layer.0.attention.q_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.0.attention.k_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.0.attention.v_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.0.attention.out_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.0.ffn.lin1
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.0.ffn.lin2
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.1.attention.q_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.1.attention.k_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.1.attention.v_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.1.attention.out_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.1.ffn.lin1
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.1.ffn.lin2
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.2.attention.q_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.2.attention.k_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.2.attention.v_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.2.attention.out_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.2.ffn.lin1
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.2.ffn.lin2
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.3.attention.q_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.3.attention.k_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.3.attention.v_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.3.attention.out_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.3.ffn.lin1
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.3.ffn.lin2
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.4.attention.q_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.4.attention.k_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.4.attention.v_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.4.attention.out_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.4.ffn.lin1
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.4.ffn.lin2
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.5.attention.q_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.5.attention.k_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.5.attention.v_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.5.attention.out_lin
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.5.ffn.lin1
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- distilbert.transformer.layer.5.ffn.lin2
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- pre_classifier
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax
? !!python/tuple
- classifier
- Linear
: weight:
dtype: int8
scheme: sym
granularity: per_channel
algorithm: minmax
bit: 7.0
activation:
dtype: uint8
scheme: asym
granularity: per_tensor
algorithm: minmax