{ | |
"compression": { | |
"algorithm": "quantization", | |
"export_to_onnx_standard_ops": false, | |
"ignored_scopes": [ | |
"{re}.*Embedding.*", | |
"{re}.*add___.*", | |
"{re}.*norm.*" | |
], | |
"initializer": { | |
"batchnorm_adaptation": { | |
"num_bn_adaptation_samples": 0 | |
}, | |
"range": { | |
"num_init_samples": 16, | |
"type": "min_max" | |
} | |
}, | |
"overflow_fix": "disable", | |
"preset": "performance" | |
}, | |
"input_info": [ | |
{ | |
"keyword": "input_ids", | |
"sample_size": [ | |
1, | |
32 | |
], | |
"type": "long" | |
}, | |
{ | |
"keyword": "attention_mask", | |
"sample_size": [ | |
1, | |
32 | |
], | |
"type": "long" | |
} | |
], | |
"log_dir": "./logs/llama-2-13b-w8a8-unstructured50/", | |
"optimum_version": "1.13.2", | |
"save_onnx_model": false, | |
"transformers_version": "4.34.0" | |
} | |