{ "compression": [ { "algorithm": "magnitude_sparsity", "ignored_scopes": [ "{re}.*Embedding", "{re}.*lm_head*" ], "params": { "sparsity_level_setting_mode": "layer-wise" }, "sparsity_init": 0.85 }, { "algorithm": "quantization", "export_to_onnx_standard_ops": false, "ignored_scopes": [ "{re}.*Embedding*", "{re}.*__add___*", "{re}.*layer_norm_*" ], "initializer": { "batchnorm_adaptation": { "num_bn_adaptation_samples": 0 }, "range": { "num_init_samples": 8, "type": "mean_min_max" } }, "overflow_fix": "disable", "scope_overrides": { "activations": { "activations": { "mode": "symmetric" }, "weights": { "mode": "symmetric" } } } } ], "input_info": [ { "keyword": "input_ids", "sample_size": [ 8, 1024 ], "type": "long" }, { "keyword": "attention_mask", "sample_size": [ 8, 1024 ], "type": "long" } ], "log_dir": "/data1/vchua/temp/ov-opt-350m-8bit-85pc-sparse-kv-cache", "optimum_version": "1.8.8", "save_onnx_model": false, "transformers_version": "4.30.2" }