bert-base-squadv1-block-pruning-hybrid-filled-lt-nncf-50.0sparse-qat-lt
/
nncf_bert_squad_sparsity.json
{ | |
"input_info": [ | |
{ | |
"sample_size": [1, 384], | |
"type": "long" | |
}, | |
{ | |
"sample_size": [1, 384], | |
"type": "long" | |
}, | |
{ | |
"sample_size": [1, 384], | |
"type": "long" | |
} | |
], | |
"compression": | |
[ | |
// { | |
// "algorithm": "knowledge_distillation", | |
// "type": "softmax" | |
// }, | |
{ | |
"algorithm": "magnitude_sparsity", | |
"sparsity_init": 0.5, | |
"params": { | |
"schedule": "multistep", | |
"multistep_steps": [ | |
2, | |
4, | |
6, | |
8 | |
], | |
"multistep_sparsity_levels": [ | |
0.5, | |
0.5, | |
0.5, | |
0.5, | |
0.5, | |
] | |
}, | |
"ignored_scopes": ["{re}.*NNCFEmbedding", "{re}.*qa_outputs*"] | |
}, | |
{ | |
"algorithm": "quantization", | |
"initializer": { | |
"range": { | |
"num_init_samples": 32, | |
"type": "percentile", | |
"params": | |
{ | |
"min_percentile": 0.01, | |
"max_percentile": 99.99 | |
} | |
}, | |
"batchnorm_adaptation": { | |
"num_bn_adaptation_samples": 200 | |
} | |
}, | |
"activations": | |
{ | |
"mode": "symmetric" | |
}, | |
"weights": | |
{ | |
"mode": "symmetric", | |
"signed": true, | |
"per_channel": false | |
} | |
} | |
] | |
} | |