|
{ |
|
"compression": { |
|
"algorithm": "quantization", |
|
"export_to_onnx_standard_ops": false, |
|
"ignored_scopes": [ |
|
"{re}.*Embedding*", |
|
"{re}.*__add___*", |
|
"{re}.*layer_norm_*" |
|
], |
|
"initializer": { |
|
"batchnorm_adaptation": { |
|
"num_bn_adaptation_samples": 0 |
|
}, |
|
"range": { |
|
"num_init_samples": 8, |
|
"type": "mean_min_max" |
|
} |
|
}, |
|
"overflow_fix": "disable", |
|
"scope_overrides": { |
|
"activations": { |
|
"activations": { |
|
"mode": "symmetric" |
|
}, |
|
"weights": { |
|
"mode": "symmetric" |
|
} |
|
} |
|
} |
|
}, |
|
"input_info": [ |
|
{ |
|
"keyword": "input_ids", |
|
"sample_size": [ |
|
8, |
|
1024 |
|
], |
|
"type": "long" |
|
}, |
|
{ |
|
"keyword": "attention_mask", |
|
"sample_size": [ |
|
8, |
|
1024 |
|
], |
|
"type": "long" |
|
} |
|
], |
|
"log_dir": "/data1/vchua/temp/ov-opt-350m-8bit-kv-cache", |
|
"optimum_version": "1.8.8", |
|
"save_onnx_model": false, |
|
"transformers_version": "4.30.2" |
|
} |
|
|