{ | |
"input_info": [ | |
{ | |
"sample_size": [1, 384], | |
"type": "long" | |
}, | |
{ | |
"sample_size": [1, 384], | |
"type": "long" | |
}, | |
{ | |
"sample_size": [1, 384], | |
"type": "long" | |
} | |
], | |
"compression": | |
[ | |
{ | |
"algorithm": "movement_sparsity", | |
"params": { | |
"schedule": "threshold_polynomial_decay", | |
"power": 3, | |
"init_importance_threshold": 0.0, | |
"final_importance_threshold": 0.1, | |
"warmup_start_epoch": 1, | |
"warmup_end_epoch": 10, | |
"steps_per_epoch": 5533, | |
"importance_regularization_factor": 0.03, | |
"update_per_optimizer_step": true, | |
}, | |
"sparse_structure_by_scopes": [ | |
["block", [32, 32], "{re}.*BertAttention*"], | |
["per_dim", [0], "{re}.*BertIntermediate*"], | |
["per_dim", [1], "{re}.*BertOutput*"] | |
], | |
"ignored_scopes": ["{re}.*NNCFEmbedding", "{re}.*qa_outputs*"] | |
}, | |
{ | |
"algorithm": "quantization", | |
"initializer": { | |
"range": { | |
"num_init_samples": 32, | |
"type": "percentile", | |
"params": | |
{ | |
"min_percentile": 0.01, | |
"max_percentile": 99.99 | |
} | |
}, | |
"batchnorm_adaptation": { | |
"num_bn_adaptation_samples": 200 | |
} | |
}, | |
"activations": | |
{ | |
"mode": "symmetric" | |
}, | |
"weights": | |
{ | |
"mode": "symmetric", | |
"signed": true, | |
"per_channel": false | |
} | |
} | |
] | |
} |