commit files to HF hub

096fd74 almost 2 years ago

No virus

11.7 kB

	advance: null
	approach: post_training_dynamic_quant
	bf16_ops_list: []
	calib_iteration: 1
	calib_sampling_size: 100
	framework: pytorch
	op:
	? !!python/tuple
	- distilbert.transformer.layer.0.attention.q_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.0.attention.k_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.0.attention.v_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.0.attention.out_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.0.ffn.lin1
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.0.ffn.lin2
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.1.attention.q_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.1.attention.k_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.1.attention.v_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.1.attention.out_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.1.ffn.lin1
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.1.ffn.lin2
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.2.attention.q_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.2.attention.k_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.2.attention.v_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.2.attention.out_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.2.ffn.lin1
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.2.ffn.lin2
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.3.attention.q_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.3.attention.k_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.3.attention.v_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.3.attention.out_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.3.ffn.lin1
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.3.ffn.lin2
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.4.attention.q_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.4.attention.k_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.4.attention.v_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.4.attention.out_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.4.ffn.lin1
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.4.ffn.lin2
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.5.attention.q_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.5.attention.k_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.5.attention.v_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.5.attention.out_lin
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.5.ffn.lin1
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- distilbert.transformer.layer.5.ffn.lin2
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- pre_classifier
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax
	? !!python/tuple
	- classifier
	- Linear
	: weight:
	dtype: int8
	scheme: sym
	granularity: per_channel
	algorithm: minmax
	bit: 7.0
	activation:
	dtype: uint8
	scheme: asym
	granularity: per_tensor
	algorithm: minmax