vchua
commited on
Commit
•
4088e0c
1
Parent(s):
5ceb4bf
Add collaterals
Browse files- all_results.json +5 -0
- compressed_graph.dot +0 -0
- eval_predictions.json +0 -0
- eval_results.json +5 -0
- ir/log.bapp.tput.10k-iter +63 -0
- ir/sparsity_structures.csv +73 -0
- ir/sparsity_structures.md +74 -0
- ir/sparsity_structures.pkl +0 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.bin +3 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.mapping +0 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.onnx +3 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.xml +0 -0
- nncf-mvmt-p3.json +67 -0
- original_graph.dot +0 -0
all_results.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_exact_match": 83.53831598864711,
|
3 |
+
"eval_f1": 89.99254134200535,
|
4 |
+
"eval_samples": 10784
|
5 |
+
}
|
compressed_graph.dot
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval_predictions.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval_results.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_exact_match": 83.53831598864711,
|
3 |
+
"eval_f1": 89.99254134200535,
|
4 |
+
"eval_samples": 10784
|
5 |
+
}
|
ir/log.bapp.tput.10k-iter
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[Step 1/11] Parsing and validating input arguments
|
2 |
+
[ WARNING ] -nstreams default value is determined automatically for a device. Although the automatic selection usually provides a reasonable performance, but it still may be non-optimal for some cases, for more information look at README.
|
3 |
+
[Step 2/11] Loading OpenVINO
|
4 |
+
[ WARNING ] PerformanceMode was not explicitly specified in command line. Device CPU performance hint will be set to THROUGHPUT.
|
5 |
+
[ INFO ] OpenVINO:
|
6 |
+
API version............. 2022.1.0-6935-7cd3c8e86e9
|
7 |
+
[ INFO ] Device info
|
8 |
+
CPU
|
9 |
+
openvino_intel_cpu_plugin version 2022.1
|
10 |
+
Build................... 2022.1.0-6935-7cd3c8e86e9
|
11 |
+
|
12 |
+
[Step 3/11] Setting device configuration
|
13 |
+
[ WARNING ] -nstreams default value is determined automatically for CPU device. Although the automatic selection usually provides a reasonable performance, but it still may be non-optimal for some cases, for more information look at README.
|
14 |
+
[Step 4/11] Reading network files
|
15 |
+
[ INFO ] Read model took 64.90 ms
|
16 |
+
[Step 5/11] Resizing network to match image sizes and given batch
|
17 |
+
[ WARNING ] Batch dimension is not specified for input 'input.0'. The first dimension will be interpreted as batch size.
|
18 |
+
[ WARNING ] Batch dimension is not specified for input 'input.1'. The first dimension will be interpreted as batch size.
|
19 |
+
[ WARNING ] Batch dimension is not specified for input 'input.2'. The first dimension will be interpreted as batch size.
|
20 |
+
[ INFO ] Reshaping model: 'input.0': {1,384}, 'input.1': {1,384}, 'input.2': {1,384}
|
21 |
+
[ INFO ] Reshape model took 0.05 ms
|
22 |
+
[ INFO ] Network batch size: 1
|
23 |
+
[Step 6/11] Configuring input of the model
|
24 |
+
[ INFO ] Model input 'input.0' precision i64, dimensions ([N,...]): 1 384
|
25 |
+
[ INFO ] Model input 'input.1' precision i64, dimensions ([N,...]): 1 384
|
26 |
+
[ INFO ] Model input 'input.2' precision i64, dimensions ([N,...]): 1 384
|
27 |
+
[ INFO ] Model output 'output.0' precision f32, dimensions ([...]): 1 384
|
28 |
+
[ INFO ] Model output 'output.1' precision f32, dimensions ([...]): 1 384
|
29 |
+
[Step 7/11] Loading the model to the device
|
30 |
+
[ INFO ] Compile model took 788.57 ms
|
31 |
+
[Step 8/11] Querying optimal runtime parameters
|
32 |
+
[ INFO ] DEVICE: CPU
|
33 |
+
[ INFO ] AVAILABLE_DEVICES , ['']
|
34 |
+
[ INFO ] RANGE_FOR_ASYNC_INFER_REQUESTS , (1, 1, 1)
|
35 |
+
[ INFO ] RANGE_FOR_STREAMS , (1, 152)
|
36 |
+
[ INFO ] FULL_DEVICE_NAME , Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz
|
37 |
+
[ INFO ] OPTIMIZATION_CAPABILITIES , ['WINOGRAD', 'FP32', 'FP16', 'INT8', 'BIN', 'EXPORT_IMPORT']
|
38 |
+
[ INFO ] CACHE_DIR ,
|
39 |
+
[ INFO ] NUM_STREAMS , 19
|
40 |
+
[ INFO ] INFERENCE_NUM_THREADS , 0
|
41 |
+
[ INFO ] PERF_COUNT , False
|
42 |
+
[ INFO ] PERFORMANCE_HINT_NUM_REQUESTS , 0
|
43 |
+
[Step 9/11] Creating infer requests and preparing input data
|
44 |
+
[ INFO ] Create 19 infer requests took 1.50 ms
|
45 |
+
[ WARNING ] No input files were given for input 'input.0'!. This input will be filled with random values!
|
46 |
+
[ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values!
|
47 |
+
[ WARNING ] No input files were given for input 'input.2'!. This input will be filled with random values!
|
48 |
+
[ INFO ] Fill input 'input.0' with random values
|
49 |
+
[ INFO ] Fill input 'input.1' with random values
|
50 |
+
[ INFO ] Fill input 'input.2' with random values
|
51 |
+
[ WARNING ] Number of iterations was aligned by request number from 10000 to 10013 using number of requests 19
|
52 |
+
[Step 10/11] Measuring performance (Start inference asynchronously, 19 inference requests using 19 streams for CPU, inference only: True, limits: 10013 iterations)
|
53 |
+
[ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
|
54 |
+
[ INFO ] First inference took 37.43 ms
|
55 |
+
[Step 11/11] Dumping statistics report
|
56 |
+
Count: 10013 iterations
|
57 |
+
Duration: 24302.14 ms
|
58 |
+
Latency:
|
59 |
+
Median: 43.95 ms
|
60 |
+
AVG: 45.97 ms
|
61 |
+
MIN: 39.02 ms
|
62 |
+
MAX: 126.30 ms
|
63 |
+
Throughput: 412.02 FPS
|
ir/sparsity_structures.csv
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pt_module_name,block_id,orig_w_shape,final_w_shape,orig_b_shape,final_b_shape,prune_by,id_to_keep,head_id_to_keep,nncf_graph_node
|
2 |
+
nncf_module.bert.encoder.layer.0.attention.output.dense,0,"(768, 768)","(768, 192)","(768,)","(768,)",group of 64 cols,See pkl,"[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
3 |
+
nncf_module.bert.encoder.layer.0.attention.self.query,0,"(768, 768)","(192, 768)","(768,)","(192,)",group of 64 rows,See pkl,"[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
4 |
+
nncf_module.bert.encoder.layer.0.attention.self.value,0,"(768, 768)","(192, 768)","(768,)","(192,)",group of 64 rows,See pkl,"[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
5 |
+
nncf_module.bert.encoder.layer.0.attention.self.key,0,"(768, 768)","(192, 768)","(768,)","(192,)",group of 64 rows,See pkl,"[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
6 |
+
nncf_module.bert.encoder.layer.0.intermediate.dense,1,"(3072, 768)","(2094, 768)","(3072,)","(2094,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
7 |
+
nncf_module.bert.encoder.layer.0.output.dense,1,"(768, 3072)","(768, 2094)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
8 |
+
nncf_module.bert.encoder.layer.1.attention.self.key,2,"(768, 768)","(320, 768)","(768,)","(320,)",group of 64 rows,See pkl,"[1, 4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
9 |
+
nncf_module.bert.encoder.layer.1.attention.self.value,2,"(768, 768)","(320, 768)","(768,)","(320,)",group of 64 rows,See pkl,"[1, 4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
10 |
+
nncf_module.bert.encoder.layer.1.attention.output.dense,2,"(768, 768)","(768, 320)","(768,)","(768,)",group of 64 cols,See pkl,"[1, 4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
11 |
+
nncf_module.bert.encoder.layer.1.attention.self.query,2,"(768, 768)","(320, 768)","(768,)","(320,)",group of 64 rows,See pkl,"[1, 4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
12 |
+
nncf_module.bert.encoder.layer.1.output.dense,3,"(768, 3072)","(768, 2062)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
13 |
+
nncf_module.bert.encoder.layer.1.intermediate.dense,3,"(3072, 768)","(2062, 768)","(3072,)","(2062,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
14 |
+
nncf_module.bert.encoder.layer.2.attention.self.key,4,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
15 |
+
nncf_module.bert.encoder.layer.2.attention.output.dense,4,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
16 |
+
nncf_module.bert.encoder.layer.2.attention.self.value,4,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
17 |
+
nncf_module.bert.encoder.layer.2.attention.self.query,4,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
18 |
+
nncf_module.bert.encoder.layer.2.output.dense,5,"(768, 3072)","(768, 2229)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
19 |
+
nncf_module.bert.encoder.layer.2.intermediate.dense,5,"(3072, 768)","(2229, 768)","(3072,)","(2229,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
20 |
+
nncf_module.bert.encoder.layer.3.attention.self.query,6,"(768, 768)","(448, 768)","(768,)","(448,)",group of 64 rows,See pkl,"[0, 1, 5, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
21 |
+
nncf_module.bert.encoder.layer.3.attention.output.dense,6,"(768, 768)","(768, 448)","(768,)","(768,)",group of 64 cols,See pkl,"[0, 1, 5, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
22 |
+
nncf_module.bert.encoder.layer.3.attention.self.value,6,"(768, 768)","(448, 768)","(768,)","(448,)",group of 64 rows,See pkl,"[0, 1, 5, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
23 |
+
nncf_module.bert.encoder.layer.3.attention.self.key,6,"(768, 768)","(448, 768)","(768,)","(448,)",group of 64 rows,See pkl,"[0, 1, 5, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
24 |
+
nncf_module.bert.encoder.layer.3.intermediate.dense,7,"(3072, 768)","(2155, 768)","(3072,)","(2155,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
25 |
+
nncf_module.bert.encoder.layer.3.output.dense,7,"(768, 3072)","(768, 2155)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
26 |
+
nncf_module.bert.encoder.layer.4.attention.output.dense,8,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
27 |
+
nncf_module.bert.encoder.layer.4.attention.self.query,8,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
28 |
+
nncf_module.bert.encoder.layer.4.attention.self.key,8,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
29 |
+
nncf_module.bert.encoder.layer.4.attention.self.value,8,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
30 |
+
nncf_module.bert.encoder.layer.4.output.dense,9,"(768, 3072)","(768, 1973)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
31 |
+
nncf_module.bert.encoder.layer.4.intermediate.dense,9,"(3072, 768)","(1973, 768)","(3072,)","(1973,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
32 |
+
nncf_module.bert.encoder.layer.5.attention.self.key,10,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
33 |
+
nncf_module.bert.encoder.layer.5.attention.output.dense,10,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
34 |
+
nncf_module.bert.encoder.layer.5.attention.self.value,10,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
35 |
+
nncf_module.bert.encoder.layer.5.attention.self.query,10,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
36 |
+
nncf_module.bert.encoder.layer.5.intermediate.dense,11,"(3072, 768)","(1947, 768)","(3072,)","(1947,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
37 |
+
nncf_module.bert.encoder.layer.5.output.dense,11,"(768, 3072)","(768, 1947)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
38 |
+
nncf_module.bert.encoder.layer.6.attention.self.value,12,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
39 |
+
nncf_module.bert.encoder.layer.6.attention.output.dense,12,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
40 |
+
nncf_module.bert.encoder.layer.6.attention.self.query,12,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
41 |
+
nncf_module.bert.encoder.layer.6.attention.self.key,12,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
42 |
+
nncf_module.bert.encoder.layer.6.output.dense,13,"(768, 3072)","(768, 1539)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
43 |
+
nncf_module.bert.encoder.layer.6.intermediate.dense,13,"(3072, 768)","(1539, 768)","(3072,)","(1539,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
44 |
+
nncf_module.bert.encoder.layer.7.attention.self.value,14,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
45 |
+
nncf_module.bert.encoder.layer.7.attention.self.key,14,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
46 |
+
nncf_module.bert.encoder.layer.7.attention.self.query,14,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
47 |
+
nncf_module.bert.encoder.layer.7.attention.output.dense,14,"(768, 768)","(768, 768)","(768,)","(768,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
48 |
+
nncf_module.bert.encoder.layer.7.output.dense,15,"(768, 3072)","(768, 1151)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
49 |
+
nncf_module.bert.encoder.layer.7.intermediate.dense,15,"(3072, 768)","(1151, 768)","(3072,)","(1151,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
50 |
+
nncf_module.bert.encoder.layer.8.attention.output.dense,16,"(768, 768)","(768, 384)","(768,)","(768,)",group of 64 cols,See pkl,"[1, 2, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
51 |
+
nncf_module.bert.encoder.layer.8.attention.self.value,16,"(768, 768)","(384, 768)","(768,)","(384,)",group of 64 rows,See pkl,"[1, 2, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
52 |
+
nncf_module.bert.encoder.layer.8.attention.self.query,16,"(768, 768)","(384, 768)","(768,)","(384,)",group of 64 rows,See pkl,"[1, 2, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
53 |
+
nncf_module.bert.encoder.layer.8.attention.self.key,16,"(768, 768)","(384, 768)","(768,)","(384,)",group of 64 rows,See pkl,"[1, 2, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
54 |
+
nncf_module.bert.encoder.layer.8.output.dense,17,"(768, 3072)","(768, 714)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
55 |
+
nncf_module.bert.encoder.layer.8.intermediate.dense,17,"(3072, 768)","(714, 768)","(3072,)","(714,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
56 |
+
nncf_module.bert.encoder.layer.9.attention.self.value,18,"(768, 768)","(320, 768)","(768,)","(320,)",group of 64 rows,See pkl,"[0, 2, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
57 |
+
nncf_module.bert.encoder.layer.9.attention.self.query,18,"(768, 768)","(320, 768)","(768,)","(320,)",group of 64 rows,See pkl,"[0, 2, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
58 |
+
nncf_module.bert.encoder.layer.9.attention.self.key,18,"(768, 768)","(320, 768)","(768,)","(320,)",group of 64 rows,See pkl,"[0, 2, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
59 |
+
nncf_module.bert.encoder.layer.9.attention.output.dense,18,"(768, 768)","(768, 320)","(768,)","(768,)",group of 64 cols,See pkl,"[0, 2, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
60 |
+
nncf_module.bert.encoder.layer.9.output.dense,19,"(768, 3072)","(768, 266)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
61 |
+
nncf_module.bert.encoder.layer.9.intermediate.dense,19,"(3072, 768)","(266, 768)","(3072,)","(266,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
62 |
+
nncf_module.bert.encoder.layer.10.attention.self.query,20,"(768, 768)","(256, 768)","(768,)","(256,)",group of 64 rows,See pkl,"[3, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
63 |
+
nncf_module.bert.encoder.layer.10.attention.self.value,20,"(768, 768)","(256, 768)","(768,)","(256,)",group of 64 rows,See pkl,"[3, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
64 |
+
nncf_module.bert.encoder.layer.10.attention.output.dense,20,"(768, 768)","(768, 256)","(768,)","(768,)",group of 64 cols,See pkl,"[3, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
65 |
+
nncf_module.bert.encoder.layer.10.attention.self.key,20,"(768, 768)","(256, 768)","(768,)","(256,)",group of 64 rows,See pkl,"[3, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
66 |
+
nncf_module.bert.encoder.layer.10.output.dense,21,"(768, 3072)","(768, 297)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
67 |
+
nncf_module.bert.encoder.layer.10.intermediate.dense,21,"(3072, 768)","(297, 768)","(3072,)","(297,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
68 |
+
nncf_module.bert.encoder.layer.11.attention.output.dense,22,"(768, 768)","(768, 256)","(768,)","(768,)",group of 64 cols,See pkl,"[1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
69 |
+
nncf_module.bert.encoder.layer.11.attention.self.value,22,"(768, 768)","(256, 768)","(768,)","(256,)",group of 64 rows,See pkl,"[1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
70 |
+
nncf_module.bert.encoder.layer.11.attention.self.key,22,"(768, 768)","(256, 768)","(768,)","(256,)",group of 64 rows,See pkl,"[1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
71 |
+
nncf_module.bert.encoder.layer.11.attention.self.query,22,"(768, 768)","(256, 768)","(768,)","(256,)",group of 64 rows,See pkl,"[1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
72 |
+
nncf_module.bert.encoder.layer.11.intermediate.dense,23,"(3072, 768)","(322, 768)","(3072,)","(322,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
73 |
+
nncf_module.bert.encoder.layer.11.output.dense,23,"(768, 3072)","(768, 322)","(768,)","(768,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
ir/sparsity_structures.md
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
| | pt_module_name | block_id | orig_w_shape | final_w_shape | orig_b_shape | final_b_shape | prune_by | id_to_keep | head_id_to_keep | nncf_graph_node |
|
2 |
+
|---:|:---------------------------------------------------------|-----------:|:---------------|:----------------|:---------------|:----------------|:-----------------|:-------------|:---------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
3 |
+
| 0 | nncf_module.bert.encoder.layer.0.attention.output.dense | 0 | (768, 768) | (768, 192) | (768,) | (768,) | group of 64 cols | See pkl | [3, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
4 |
+
| 1 | nncf_module.bert.encoder.layer.0.attention.self.query | 0 | (768, 768) | (192, 768) | (768,) | (192,) | group of 64 rows | See pkl | [3, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
5 |
+
| 2 | nncf_module.bert.encoder.layer.0.attention.self.value | 0 | (768, 768) | (192, 768) | (768,) | (192,) | group of 64 rows | See pkl | [3, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
6 |
+
| 3 | nncf_module.bert.encoder.layer.0.attention.self.key | 0 | (768, 768) | (192, 768) | (768,) | (192,) | group of 64 rows | See pkl | [3, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
7 |
+
| 4 | nncf_module.bert.encoder.layer.0.intermediate.dense | 1 | (3072, 768) | (2094, 768) | (3072,) | (2094,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
8 |
+
| 5 | nncf_module.bert.encoder.layer.0.output.dense | 1 | (768, 3072) | (768, 2094) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
9 |
+
| 6 | nncf_module.bert.encoder.layer.1.attention.self.key | 2 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
10 |
+
| 7 | nncf_module.bert.encoder.layer.1.attention.self.value | 2 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
11 |
+
| 8 | nncf_module.bert.encoder.layer.1.attention.output.dense | 2 | (768, 768) | (768, 320) | (768,) | (768,) | group of 64 cols | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
12 |
+
| 9 | nncf_module.bert.encoder.layer.1.attention.self.query | 2 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
13 |
+
| 10 | nncf_module.bert.encoder.layer.1.output.dense | 3 | (768, 3072) | (768, 2062) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
14 |
+
| 11 | nncf_module.bert.encoder.layer.1.intermediate.dense | 3 | (3072, 768) | (2062, 768) | (3072,) | (2062,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
15 |
+
| 12 | nncf_module.bert.encoder.layer.2.attention.self.key | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
16 |
+
| 13 | nncf_module.bert.encoder.layer.2.attention.output.dense | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
17 |
+
| 14 | nncf_module.bert.encoder.layer.2.attention.self.value | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
18 |
+
| 15 | nncf_module.bert.encoder.layer.2.attention.self.query | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
19 |
+
| 16 | nncf_module.bert.encoder.layer.2.output.dense | 5 | (768, 3072) | (768, 2229) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
20 |
+
| 17 | nncf_module.bert.encoder.layer.2.intermediate.dense | 5 | (3072, 768) | (2229, 768) | (3072,) | (2229,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
21 |
+
| 18 | nncf_module.bert.encoder.layer.3.attention.self.query | 6 | (768, 768) | (448, 768) | (768,) | (448,) | group of 64 rows | See pkl | [0, 1, 5, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
22 |
+
| 19 | nncf_module.bert.encoder.layer.3.attention.output.dense | 6 | (768, 768) | (768, 448) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 5, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
23 |
+
| 20 | nncf_module.bert.encoder.layer.3.attention.self.value | 6 | (768, 768) | (448, 768) | (768,) | (448,) | group of 64 rows | See pkl | [0, 1, 5, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
24 |
+
| 21 | nncf_module.bert.encoder.layer.3.attention.self.key | 6 | (768, 768) | (448, 768) | (768,) | (448,) | group of 64 rows | See pkl | [0, 1, 5, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
25 |
+
| 22 | nncf_module.bert.encoder.layer.3.intermediate.dense | 7 | (3072, 768) | (2155, 768) | (3072,) | (2155,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
26 |
+
| 23 | nncf_module.bert.encoder.layer.3.output.dense | 7 | (768, 3072) | (768, 2155) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
27 |
+
| 24 | nncf_module.bert.encoder.layer.4.attention.output.dense | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
28 |
+
| 25 | nncf_module.bert.encoder.layer.4.attention.self.query | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
29 |
+
| 26 | nncf_module.bert.encoder.layer.4.attention.self.key | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
30 |
+
| 27 | nncf_module.bert.encoder.layer.4.attention.self.value | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
31 |
+
| 28 | nncf_module.bert.encoder.layer.4.output.dense | 9 | (768, 3072) | (768, 1973) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
32 |
+
| 29 | nncf_module.bert.encoder.layer.4.intermediate.dense | 9 | (3072, 768) | (1973, 768) | (3072,) | (1973,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
33 |
+
| 30 | nncf_module.bert.encoder.layer.5.attention.self.key | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
34 |
+
| 31 | nncf_module.bert.encoder.layer.5.attention.output.dense | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
35 |
+
| 32 | nncf_module.bert.encoder.layer.5.attention.self.value | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
36 |
+
| 33 | nncf_module.bert.encoder.layer.5.attention.self.query | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
37 |
+
| 34 | nncf_module.bert.encoder.layer.5.intermediate.dense | 11 | (3072, 768) | (1947, 768) | (3072,) | (1947,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
38 |
+
| 35 | nncf_module.bert.encoder.layer.5.output.dense | 11 | (768, 3072) | (768, 1947) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
39 |
+
| 36 | nncf_module.bert.encoder.layer.6.attention.self.value | 12 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
40 |
+
| 37 | nncf_module.bert.encoder.layer.6.attention.output.dense | 12 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
41 |
+
| 38 | nncf_module.bert.encoder.layer.6.attention.self.query | 12 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
42 |
+
| 39 | nncf_module.bert.encoder.layer.6.attention.self.key | 12 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
43 |
+
| 40 | nncf_module.bert.encoder.layer.6.output.dense | 13 | (768, 3072) | (768, 1539) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
44 |
+
| 41 | nncf_module.bert.encoder.layer.6.intermediate.dense | 13 | (3072, 768) | (1539, 768) | (3072,) | (1539,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
45 |
+
| 42 | nncf_module.bert.encoder.layer.7.attention.self.value | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
46 |
+
| 43 | nncf_module.bert.encoder.layer.7.attention.self.key | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
47 |
+
| 44 | nncf_module.bert.encoder.layer.7.attention.self.query | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
48 |
+
| 45 | nncf_module.bert.encoder.layer.7.attention.output.dense | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
49 |
+
| 46 | nncf_module.bert.encoder.layer.7.output.dense | 15 | (768, 3072) | (768, 1151) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
50 |
+
| 47 | nncf_module.bert.encoder.layer.7.intermediate.dense | 15 | (3072, 768) | (1151, 768) | (3072,) | (1151,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
51 |
+
| 48 | nncf_module.bert.encoder.layer.8.attention.output.dense | 16 | (768, 768) | (768, 384) | (768,) | (768,) | group of 64 cols | See pkl | [1, 2, 6, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
52 |
+
| 49 | nncf_module.bert.encoder.layer.8.attention.self.value | 16 | (768, 768) | (384, 768) | (768,) | (384,) | group of 64 rows | See pkl | [1, 2, 6, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
53 |
+
| 50 | nncf_module.bert.encoder.layer.8.attention.self.query | 16 | (768, 768) | (384, 768) | (768,) | (384,) | group of 64 rows | See pkl | [1, 2, 6, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
54 |
+
| 51 | nncf_module.bert.encoder.layer.8.attention.self.key | 16 | (768, 768) | (384, 768) | (768,) | (384,) | group of 64 rows | See pkl | [1, 2, 6, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
55 |
+
| 52 | nncf_module.bert.encoder.layer.8.output.dense | 17 | (768, 3072) | (768, 714) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
56 |
+
| 53 | nncf_module.bert.encoder.layer.8.intermediate.dense | 17 | (3072, 768) | (714, 768) | (3072,) | (714,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
57 |
+
| 54 | nncf_module.bert.encoder.layer.9.attention.self.value | 18 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
58 |
+
| 55 | nncf_module.bert.encoder.layer.9.attention.self.query | 18 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
59 |
+
| 56 | nncf_module.bert.encoder.layer.9.attention.self.key | 18 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
60 |
+
| 57 | nncf_module.bert.encoder.layer.9.attention.output.dense | 18 | (768, 768) | (768, 320) | (768,) | (768,) | group of 64 cols | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
61 |
+
| 58 | nncf_module.bert.encoder.layer.9.output.dense | 19 | (768, 3072) | (768, 266) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
62 |
+
| 59 | nncf_module.bert.encoder.layer.9.intermediate.dense | 19 | (3072, 768) | (266, 768) | (3072,) | (266,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
63 |
+
| 60 | nncf_module.bert.encoder.layer.10.attention.self.query | 20 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
64 |
+
| 61 | nncf_module.bert.encoder.layer.10.attention.self.value | 20 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
65 |
+
| 62 | nncf_module.bert.encoder.layer.10.attention.output.dense | 20 | (768, 768) | (768, 256) | (768,) | (768,) | group of 64 cols | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
66 |
+
| 63 | nncf_module.bert.encoder.layer.10.attention.self.key | 20 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
67 |
+
| 64 | nncf_module.bert.encoder.layer.10.output.dense | 21 | (768, 3072) | (768, 297) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
68 |
+
| 65 | nncf_module.bert.encoder.layer.10.intermediate.dense | 21 | (3072, 768) | (297, 768) | (3072,) | (297,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
69 |
+
| 66 | nncf_module.bert.encoder.layer.11.attention.output.dense | 22 | (768, 768) | (768, 256) | (768,) | (768,) | group of 64 cols | See pkl | [1, 2, 3, 4] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
70 |
+
| 67 | nncf_module.bert.encoder.layer.11.attention.self.value | 22 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [1, 2, 3, 4] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
71 |
+
| 68 | nncf_module.bert.encoder.layer.11.attention.self.key | 22 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [1, 2, 3, 4] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
72 |
+
| 69 | nncf_module.bert.encoder.layer.11.attention.self.query | 22 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [1, 2, 3, 4] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
73 |
+
| 70 | nncf_module.bert.encoder.layer.11.intermediate.dense | 23 | (3072, 768) | (322, 768) | (3072,) | (322,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
74 |
+
| 71 | nncf_module.bert.encoder.layer.11.output.dense | 23 | (768, 3072) | (768, 322) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
ir/sparsity_structures.pkl
ADDED
Binary file (185 kB). View file
|
|
ir/squad-BertForQuestionAnswering.cropped.8bit.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6865b9646ebc6413b581926390f088cf95b4fe03584cf834fdd452f6676e7255
|
3 |
+
size 68417348
|
ir/squad-BertForQuestionAnswering.cropped.8bit.mapping
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ir/squad-BertForQuestionAnswering.cropped.8bit.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dae06d09997062857b9d9e381d0e8304cc8ecb2f1263cec5b2b53a1181d54d4d
|
3 |
+
size 272689860
|
ir/squad-BertForQuestionAnswering.cropped.8bit.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
nncf-mvmt-p3.json
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"input_info": [
|
3 |
+
{
|
4 |
+
"sample_size": [1, 384],
|
5 |
+
"type": "long"
|
6 |
+
},
|
7 |
+
{
|
8 |
+
"sample_size": [1, 384],
|
9 |
+
"type": "long"
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"sample_size": [1, 384],
|
13 |
+
"type": "long"
|
14 |
+
}
|
15 |
+
],
|
16 |
+
"compression":
|
17 |
+
[
|
18 |
+
{
|
19 |
+
"algorithm": "movement_sparsity",
|
20 |
+
"params": {
|
21 |
+
"schedule": "threshold_polynomial_decay",
|
22 |
+
"power": 3,
|
23 |
+
"init_importance_threshold": 0.0,
|
24 |
+
"final_importance_threshold": 0.1,
|
25 |
+
"warmup_start_epoch": 1,
|
26 |
+
"warmup_end_epoch": 10,
|
27 |
+
"steps_per_epoch": 5533,
|
28 |
+
"importance_regularization_factor": 0.010,
|
29 |
+
"update_per_optimizer_step": true,
|
30 |
+
},
|
31 |
+
"sparse_structure_by_scopes": [
|
32 |
+
["block", [32, 32], "{re}.*BertAttention*"],
|
33 |
+
["per_dim", [0], "{re}.*BertIntermediate*"],
|
34 |
+
["per_dim", [1], "{re}.*BertOutput*"]
|
35 |
+
],
|
36 |
+
"ignored_scopes": ["{re}.*NNCFEmbedding", "{re}.*qa_outputs*"]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"algorithm": "quantization",
|
40 |
+
"initializer": {
|
41 |
+
"range": {
|
42 |
+
"num_init_samples": 32,
|
43 |
+
"type": "percentile",
|
44 |
+
"params":
|
45 |
+
{
|
46 |
+
"min_percentile": 0.01,
|
47 |
+
"max_percentile": 99.99
|
48 |
+
}
|
49 |
+
},
|
50 |
+
|
51 |
+
"batchnorm_adaptation": {
|
52 |
+
"num_bn_adaptation_samples": 200
|
53 |
+
}
|
54 |
+
},
|
55 |
+
"activations":
|
56 |
+
{
|
57 |
+
"mode": "symmetric"
|
58 |
+
},
|
59 |
+
"weights":
|
60 |
+
{
|
61 |
+
"mode": "symmetric",
|
62 |
+
"signed": true,
|
63 |
+
"per_channel": false
|
64 |
+
}
|
65 |
+
}
|
66 |
+
]
|
67 |
+
}
|
original_graph.dot
ADDED
The diff for this file is too large to render.
See raw diff
|
|