vchua
commited on
Commit
•
b708194
1
Parent(s):
74066ee
Add collaterals
Browse files- all_results.json +5 -0
- compressed_graph.dot +0 -0
- eval_predictions.json +0 -0
- eval_results.json +5 -0
- ir/sparsity_structures.csv +145 -0
- ir/sparsity_structures.md +146 -0
- ir/sparsity_structures.pkl +0 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.bin +3 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.mapping +0 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.onnx +3 -0
- ir/squad-BertForQuestionAnswering.cropped.8bit.xml +0 -0
- nncf-mvmt-p3.json +68 -0
- original_graph.dot +0 -0
all_results.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_exact_match": 87.13339640491958,
|
3 |
+
"eval_f1": 93.04746686621255,
|
4 |
+
"eval_samples": 10784
|
5 |
+
}
|
compressed_graph.dot
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval_predictions.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval_results.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_exact_match": 87.13339640491958,
|
3 |
+
"eval_f1": 93.04746686621255,
|
4 |
+
"eval_samples": 10784
|
5 |
+
}
|
ir/sparsity_structures.csv
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pt_module_name,block_id,orig_w_shape,final_w_shape,orig_b_shape,final_b_shape,prune_by,id_to_keep,head_id_to_keep,nncf_graph_node
|
2 |
+
nncf_module.bert.encoder.layer.0.attention.output.dense,0,"(1024, 1024)","(1024, 320)","(1024,)","(1024,)",group of 64 cols,See pkl,"[2, 4, 6, 10, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
3 |
+
nncf_module.bert.encoder.layer.0.attention.self.value,0,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 4, 6, 10, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
4 |
+
nncf_module.bert.encoder.layer.0.attention.self.key,0,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 4, 6, 10, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
5 |
+
nncf_module.bert.encoder.layer.0.attention.self.query,0,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 4, 6, 10, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
6 |
+
nncf_module.bert.encoder.layer.0.output.dense,1,"(1024, 4096)","(1024, 3843)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
7 |
+
nncf_module.bert.encoder.layer.0.intermediate.dense,1,"(4096, 1024)","(3843, 1024)","(4096,)","(3843,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
8 |
+
nncf_module.bert.encoder.layer.1.attention.self.key,2,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[8],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
9 |
+
nncf_module.bert.encoder.layer.1.attention.self.value,2,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[8],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
10 |
+
nncf_module.bert.encoder.layer.1.attention.self.query,2,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[8],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
11 |
+
nncf_module.bert.encoder.layer.1.attention.output.dense,2,"(1024, 1024)","(1024, 64)","(1024,)","(1024,)",group of 64 cols,See pkl,[8],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
12 |
+
nncf_module.bert.encoder.layer.1.intermediate.dense,3,"(4096, 1024)","(3852, 1024)","(4096,)","(3852,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
13 |
+
nncf_module.bert.encoder.layer.1.output.dense,3,"(1024, 4096)","(1024, 3852)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
14 |
+
nncf_module.bert.encoder.layer.2.attention.self.key,4,"(1024, 1024)","(192, 1024)","(1024,)","(192,)",group of 64 rows,See pkl,"[2, 3, 5]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
15 |
+
nncf_module.bert.encoder.layer.2.attention.self.value,4,"(1024, 1024)","(192, 1024)","(1024,)","(192,)",group of 64 rows,See pkl,"[2, 3, 5]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
16 |
+
nncf_module.bert.encoder.layer.2.attention.output.dense,4,"(1024, 1024)","(1024, 192)","(1024,)","(1024,)",group of 64 cols,See pkl,"[2, 3, 5]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
17 |
+
nncf_module.bert.encoder.layer.2.attention.self.query,4,"(1024, 1024)","(192, 1024)","(1024,)","(192,)",group of 64 rows,See pkl,"[2, 3, 5]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
18 |
+
nncf_module.bert.encoder.layer.2.output.dense,5,"(1024, 4096)","(1024, 3953)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
19 |
+
nncf_module.bert.encoder.layer.2.intermediate.dense,5,"(4096, 1024)","(3953, 1024)","(4096,)","(3953,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
20 |
+
nncf_module.bert.encoder.layer.3.attention.output.dense,6,"(1024, 1024)","(1024, 128)","(1024,)","(1024,)",group of 64 cols,See pkl,"[4, 12]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
21 |
+
nncf_module.bert.encoder.layer.3.attention.self.query,6,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[4, 12]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
22 |
+
nncf_module.bert.encoder.layer.3.attention.self.key,6,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[4, 12]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
23 |
+
nncf_module.bert.encoder.layer.3.attention.self.value,6,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[4, 12]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
24 |
+
nncf_module.bert.encoder.layer.3.intermediate.dense,7,"(4096, 1024)","(3981, 1024)","(4096,)","(3981,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
25 |
+
nncf_module.bert.encoder.layer.3.output.dense,7,"(1024, 4096)","(1024, 3981)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
26 |
+
nncf_module.bert.encoder.layer.4.attention.self.key,8,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[1, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
27 |
+
nncf_module.bert.encoder.layer.4.attention.self.value,8,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[1, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
28 |
+
nncf_module.bert.encoder.layer.4.attention.output.dense,8,"(1024, 1024)","(1024, 128)","(1024,)","(1024,)",group of 64 cols,See pkl,"[1, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
29 |
+
nncf_module.bert.encoder.layer.4.attention.self.query,8,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[1, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
30 |
+
nncf_module.bert.encoder.layer.4.output.dense,9,"(1024, 4096)","(1024, 3991)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
31 |
+
nncf_module.bert.encoder.layer.4.intermediate.dense,9,"(4096, 1024)","(3991, 1024)","(4096,)","(3991,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
32 |
+
nncf_module.bert.encoder.layer.5.attention.output.dense,10,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
33 |
+
nncf_module.bert.encoder.layer.5.attention.self.value,10,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
34 |
+
nncf_module.bert.encoder.layer.5.attention.self.query,10,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
35 |
+
nncf_module.bert.encoder.layer.5.attention.self.key,10,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
36 |
+
nncf_module.bert.encoder.layer.5.output.dense,11,"(1024, 4096)","(1024, 4037)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
37 |
+
nncf_module.bert.encoder.layer.5.intermediate.dense,11,"(4096, 1024)","(4037, 1024)","(4096,)","(4037,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
38 |
+
nncf_module.bert.encoder.layer.6.attention.self.query,12,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
39 |
+
nncf_module.bert.encoder.layer.6.attention.self.value,12,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
40 |
+
nncf_module.bert.encoder.layer.6.attention.self.key,12,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
41 |
+
nncf_module.bert.encoder.layer.6.attention.output.dense,12,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
42 |
+
nncf_module.bert.encoder.layer.6.intermediate.dense,13,"(4096, 1024)","(4051, 1024)","(4096,)","(4051,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
43 |
+
nncf_module.bert.encoder.layer.6.output.dense,13,"(1024, 4096)","(1024, 4051)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
44 |
+
nncf_module.bert.encoder.layer.7.attention.self.key,14,"(1024, 1024)","(192, 1024)","(1024,)","(192,)",group of 64 rows,See pkl,"[1, 6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
45 |
+
nncf_module.bert.encoder.layer.7.attention.self.query,14,"(1024, 1024)","(192, 1024)","(1024,)","(192,)",group of 64 rows,See pkl,"[1, 6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
46 |
+
nncf_module.bert.encoder.layer.7.attention.output.dense,14,"(1024, 1024)","(1024, 192)","(1024,)","(1024,)",group of 64 cols,See pkl,"[1, 6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
47 |
+
nncf_module.bert.encoder.layer.7.attention.self.value,14,"(1024, 1024)","(192, 1024)","(1024,)","(192,)",group of 64 rows,See pkl,"[1, 6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
48 |
+
nncf_module.bert.encoder.layer.7.intermediate.dense,15,"(4096, 1024)","(4036, 1024)","(4096,)","(4036,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
49 |
+
nncf_module.bert.encoder.layer.7.output.dense,15,"(1024, 4096)","(1024, 4036)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
50 |
+
nncf_module.bert.encoder.layer.8.attention.self.key,16,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
51 |
+
nncf_module.bert.encoder.layer.8.attention.self.query,16,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
52 |
+
nncf_module.bert.encoder.layer.8.attention.output.dense,16,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
53 |
+
nncf_module.bert.encoder.layer.8.attention.self.value,16,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
54 |
+
nncf_module.bert.encoder.layer.8.intermediate.dense,17,"(4096, 1024)","(4031, 1024)","(4096,)","(4031,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
55 |
+
nncf_module.bert.encoder.layer.8.output.dense,17,"(1024, 4096)","(1024, 4031)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
56 |
+
nncf_module.bert.encoder.layer.9.attention.self.value,18,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[12],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
57 |
+
nncf_module.bert.encoder.layer.9.attention.output.dense,18,"(1024, 1024)","(1024, 64)","(1024,)","(1024,)",group of 64 cols,See pkl,[12],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
58 |
+
nncf_module.bert.encoder.layer.9.attention.self.query,18,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[12],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
59 |
+
nncf_module.bert.encoder.layer.9.attention.self.key,18,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[12],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
60 |
+
nncf_module.bert.encoder.layer.9.output.dense,19,"(1024, 4096)","(1024, 4023)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
61 |
+
nncf_module.bert.encoder.layer.9.intermediate.dense,19,"(4096, 1024)","(4023, 1024)","(4096,)","(4023,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
62 |
+
nncf_module.bert.encoder.layer.10.attention.self.value,20,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[0, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
63 |
+
nncf_module.bert.encoder.layer.10.attention.self.key,20,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[0, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
64 |
+
nncf_module.bert.encoder.layer.10.attention.self.query,20,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[0, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
65 |
+
nncf_module.bert.encoder.layer.10.attention.output.dense,20,"(1024, 1024)","(1024, 128)","(1024,)","(1024,)",group of 64 cols,See pkl,"[0, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
66 |
+
nncf_module.bert.encoder.layer.10.output.dense,21,"(1024, 4096)","(1024, 3998)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
67 |
+
nncf_module.bert.encoder.layer.10.intermediate.dense,21,"(4096, 1024)","(3998, 1024)","(4096,)","(3998,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
68 |
+
nncf_module.bert.encoder.layer.11.attention.self.key,22,"(1024, 1024)","(256, 1024)","(1024,)","(256,)",group of 64 rows,See pkl,"[1, 4, 9, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
69 |
+
nncf_module.bert.encoder.layer.11.attention.self.query,22,"(1024, 1024)","(256, 1024)","(1024,)","(256,)",group of 64 rows,See pkl,"[1, 4, 9, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
70 |
+
nncf_module.bert.encoder.layer.11.attention.self.value,22,"(1024, 1024)","(256, 1024)","(1024,)","(256,)",group of 64 rows,See pkl,"[1, 4, 9, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
71 |
+
nncf_module.bert.encoder.layer.11.attention.output.dense,22,"(1024, 1024)","(1024, 256)","(1024,)","(1024,)",group of 64 cols,See pkl,"[1, 4, 9, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
72 |
+
nncf_module.bert.encoder.layer.11.intermediate.dense,23,"(4096, 1024)","(3999, 1024)","(4096,)","(3999,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
73 |
+
nncf_module.bert.encoder.layer.11.output.dense,23,"(1024, 4096)","(1024, 3999)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
74 |
+
nncf_module.bert.encoder.layer.12.attention.output.dense,24,"(1024, 1024)","(1024, 384)","(1024,)","(1024,)",group of 64 cols,See pkl,"[1, 4, 5, 7, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
75 |
+
nncf_module.bert.encoder.layer.12.attention.self.value,24,"(1024, 1024)","(384, 1024)","(1024,)","(384,)",group of 64 rows,See pkl,"[1, 4, 5, 7, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
76 |
+
nncf_module.bert.encoder.layer.12.attention.self.query,24,"(1024, 1024)","(384, 1024)","(1024,)","(384,)",group of 64 rows,See pkl,"[1, 4, 5, 7, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
77 |
+
nncf_module.bert.encoder.layer.12.attention.self.key,24,"(1024, 1024)","(384, 1024)","(1024,)","(384,)",group of 64 rows,See pkl,"[1, 4, 5, 7, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
78 |
+
nncf_module.bert.encoder.layer.12.intermediate.dense,25,"(4096, 1024)","(3970, 1024)","(4096,)","(3970,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
79 |
+
nncf_module.bert.encoder.layer.12.output.dense,25,"(1024, 4096)","(1024, 3970)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
80 |
+
nncf_module.bert.encoder.layer.13.attention.self.key,26,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[4, 7, 9, 13, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
81 |
+
nncf_module.bert.encoder.layer.13.attention.output.dense,26,"(1024, 1024)","(1024, 320)","(1024,)","(1024,)",group of 64 cols,See pkl,"[4, 7, 9, 13, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
82 |
+
nncf_module.bert.encoder.layer.13.attention.self.value,26,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[4, 7, 9, 13, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
83 |
+
nncf_module.bert.encoder.layer.13.attention.self.query,26,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[4, 7, 9, 13, 14]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
84 |
+
nncf_module.bert.encoder.layer.13.output.dense,27,"(1024, 4096)","(1024, 3924)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
85 |
+
nncf_module.bert.encoder.layer.13.intermediate.dense,27,"(4096, 1024)","(3924, 1024)","(4096,)","(3924,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
86 |
+
nncf_module.bert.encoder.layer.14.attention.output.dense,28,"(1024, 1024)","(1024, 320)","(1024,)","(1024,)",group of 64 cols,See pkl,"[2, 3, 4, 7, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
87 |
+
nncf_module.bert.encoder.layer.14.attention.self.query,28,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 3, 4, 7, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
88 |
+
nncf_module.bert.encoder.layer.14.attention.self.key,28,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 3, 4, 7, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
89 |
+
nncf_module.bert.encoder.layer.14.attention.self.value,28,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 3, 4, 7, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
90 |
+
nncf_module.bert.encoder.layer.14.output.dense,29,"(1024, 4096)","(1024, 3817)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
91 |
+
nncf_module.bert.encoder.layer.14.intermediate.dense,29,"(4096, 1024)","(3817, 1024)","(4096,)","(3817,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
92 |
+
nncf_module.bert.encoder.layer.15.attention.self.query,30,"(1024, 1024)","(384, 1024)","(1024,)","(384,)",group of 64 rows,See pkl,"[2, 7, 8, 10, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
93 |
+
nncf_module.bert.encoder.layer.15.attention.output.dense,30,"(1024, 1024)","(1024, 384)","(1024,)","(1024,)",group of 64 cols,See pkl,"[2, 7, 8, 10, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
94 |
+
nncf_module.bert.encoder.layer.15.attention.self.value,30,"(1024, 1024)","(384, 1024)","(1024,)","(384,)",group of 64 rows,See pkl,"[2, 7, 8, 10, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
95 |
+
nncf_module.bert.encoder.layer.15.attention.self.key,30,"(1024, 1024)","(384, 1024)","(1024,)","(384,)",group of 64 rows,See pkl,"[2, 7, 8, 10, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
96 |
+
nncf_module.bert.encoder.layer.15.intermediate.dense,31,"(4096, 1024)","(3616, 1024)","(4096,)","(3616,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
97 |
+
nncf_module.bert.encoder.layer.15.output.dense,31,"(1024, 4096)","(1024, 3616)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
98 |
+
nncf_module.bert.encoder.layer.16.attention.output.dense,32,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
99 |
+
nncf_module.bert.encoder.layer.16.attention.self.query,32,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
100 |
+
nncf_module.bert.encoder.layer.16.attention.self.key,32,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
101 |
+
nncf_module.bert.encoder.layer.16.attention.self.value,32,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
102 |
+
nncf_module.bert.encoder.layer.16.intermediate.dense,33,"(4096, 1024)","(3490, 1024)","(4096,)","(3490,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
103 |
+
nncf_module.bert.encoder.layer.16.output.dense,33,"(1024, 4096)","(1024, 3490)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
104 |
+
nncf_module.bert.encoder.layer.17.attention.self.key,34,"(1024, 1024)","(512, 1024)","(1024,)","(512,)",group of 64 rows,See pkl,"[3, 5, 6, 7, 10, 11, 13, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
105 |
+
nncf_module.bert.encoder.layer.17.attention.self.query,34,"(1024, 1024)","(512, 1024)","(1024,)","(512,)",group of 64 rows,See pkl,"[3, 5, 6, 7, 10, 11, 13, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
106 |
+
nncf_module.bert.encoder.layer.17.attention.self.value,34,"(1024, 1024)","(512, 1024)","(1024,)","(512,)",group of 64 rows,See pkl,"[3, 5, 6, 7, 10, 11, 13, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
107 |
+
nncf_module.bert.encoder.layer.17.attention.output.dense,34,"(1024, 1024)","(1024, 512)","(1024,)","(1024,)",group of 64 cols,See pkl,"[3, 5, 6, 7, 10, 11, 13, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
108 |
+
nncf_module.bert.encoder.layer.17.output.dense,35,"(1024, 4096)","(1024, 3342)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
109 |
+
nncf_module.bert.encoder.layer.17.intermediate.dense,35,"(4096, 1024)","(3342, 1024)","(4096,)","(3342,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
110 |
+
nncf_module.bert.encoder.layer.18.attention.output.dense,36,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
111 |
+
nncf_module.bert.encoder.layer.18.attention.self.query,36,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
112 |
+
nncf_module.bert.encoder.layer.18.attention.self.value,36,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
113 |
+
nncf_module.bert.encoder.layer.18.attention.self.key,36,"(1024, 1024)","(1024, 1024)","(1024,)","(1024,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
114 |
+
nncf_module.bert.encoder.layer.18.output.dense,37,"(1024, 4096)","(1024, 3130)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
115 |
+
nncf_module.bert.encoder.layer.18.intermediate.dense,37,"(4096, 1024)","(3130, 1024)","(4096,)","(3130,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
116 |
+
nncf_module.bert.encoder.layer.19.attention.self.key,38,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 7, 9, 10, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
117 |
+
nncf_module.bert.encoder.layer.19.attention.self.query,38,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 7, 9, 10, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
118 |
+
nncf_module.bert.encoder.layer.19.attention.output.dense,38,"(1024, 1024)","(1024, 320)","(1024,)","(1024,)",group of 64 cols,See pkl,"[2, 7, 9, 10, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
119 |
+
nncf_module.bert.encoder.layer.19.attention.self.value,38,"(1024, 1024)","(320, 1024)","(1024,)","(320,)",group of 64 rows,See pkl,"[2, 7, 9, 10, 13]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
120 |
+
nncf_module.bert.encoder.layer.19.intermediate.dense,39,"(4096, 1024)","(2369, 1024)","(4096,)","(2369,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
121 |
+
nncf_module.bert.encoder.layer.19.output.dense,39,"(1024, 4096)","(1024, 2369)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
122 |
+
nncf_module.bert.encoder.layer.20.attention.self.key,40,"(1024, 1024)","(256, 1024)","(1024,)","(256,)",group of 64 rows,See pkl,"[5, 6, 9, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
123 |
+
nncf_module.bert.encoder.layer.20.attention.self.query,40,"(1024, 1024)","(256, 1024)","(1024,)","(256,)",group of 64 rows,See pkl,"[5, 6, 9, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
124 |
+
nncf_module.bert.encoder.layer.20.attention.output.dense,40,"(1024, 1024)","(1024, 256)","(1024,)","(1024,)",group of 64 cols,See pkl,"[5, 6, 9, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
125 |
+
nncf_module.bert.encoder.layer.20.attention.self.value,40,"(1024, 1024)","(256, 1024)","(1024,)","(256,)",group of 64 rows,See pkl,"[5, 6, 9, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
126 |
+
nncf_module.bert.encoder.layer.20.intermediate.dense,41,"(4096, 1024)","(1102, 1024)","(4096,)","(1102,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
127 |
+
nncf_module.bert.encoder.layer.20.output.dense,41,"(1024, 4096)","(1024, 1102)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
128 |
+
nncf_module.bert.encoder.layer.21.attention.self.key,42,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
129 |
+
nncf_module.bert.encoder.layer.21.attention.self.query,42,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
130 |
+
nncf_module.bert.encoder.layer.21.attention.output.dense,42,"(1024, 1024)","(1024, 128)","(1024,)","(1024,)",group of 64 cols,See pkl,"[6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
131 |
+
nncf_module.bert.encoder.layer.21.attention.self.value,42,"(1024, 1024)","(128, 1024)","(1024,)","(128,)",group of 64 rows,See pkl,"[6, 7]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
132 |
+
nncf_module.bert.encoder.layer.21.output.dense,43,"(1024, 4096)","(1024, 452)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
133 |
+
nncf_module.bert.encoder.layer.21.intermediate.dense,43,"(4096, 1024)","(452, 1024)","(4096,)","(452,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
134 |
+
nncf_module.bert.encoder.layer.22.attention.self.value,44,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[14],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
135 |
+
nncf_module.bert.encoder.layer.22.attention.output.dense,44,"(1024, 1024)","(1024, 64)","(1024,)","(1024,)",group of 64 cols,See pkl,[14],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
136 |
+
nncf_module.bert.encoder.layer.22.attention.self.query,44,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[14],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
137 |
+
nncf_module.bert.encoder.layer.22.attention.self.key,44,"(1024, 1024)","(64, 1024)","(1024,)","(64,)",group of 64 rows,See pkl,[14],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
138 |
+
nncf_module.bert.encoder.layer.22.intermediate.dense,45,"(4096, 1024)","(215, 1024)","(4096,)","(215,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
139 |
+
nncf_module.bert.encoder.layer.22.output.dense,45,"(1024, 4096)","(1024, 215)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
140 |
+
nncf_module.bert.encoder.layer.23.attention.self.value,46,"(1024, 1024)","(832, 1024)","(1024,)","(832,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
141 |
+
nncf_module.bert.encoder.layer.23.attention.self.key,46,"(1024, 1024)","(832, 1024)","(1024,)","(832,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
142 |
+
nncf_module.bert.encoder.layer.23.attention.self.query,46,"(1024, 1024)","(832, 1024)","(1024,)","(832,)",group of 64 rows,See pkl,"[0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
143 |
+
nncf_module.bert.encoder.layer.23.attention.output.dense,46,"(1024, 1024)","(1024, 832)","(1024,)","(1024,)",group of 64 cols,See pkl,"[0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
144 |
+
nncf_module.bert.encoder.layer.23.output.dense,47,"(1024, 4096)","(1024, 351)","(1024,)","(1024,)",col,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
145 |
+
nncf_module.bert.encoder.layer.23.intermediate.dense,47,"(4096, 1024)","(351, 1024)","(4096,)","(351,)",row,See pkl,,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
ir/sparsity_structures.md
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
| | pt_module_name | block_id | orig_w_shape | final_w_shape | orig_b_shape | final_b_shape | prune_by | id_to_keep | head_id_to_keep | nncf_graph_node |
|
2 |
+
|----:|:---------------------------------------------------------|-----------:|:---------------|:----------------|:---------------|:----------------|:-----------------|:-------------|:-------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
3 |
+
| 0 | nncf_module.bert.encoder.layer.0.attention.output.dense | 0 | (1024, 1024) | (1024, 320) | (1024,) | (1024,) | group of 64 cols | See pkl | [2, 4, 6, 10, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
4 |
+
| 1 | nncf_module.bert.encoder.layer.0.attention.self.value | 0 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 4, 6, 10, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
5 |
+
| 2 | nncf_module.bert.encoder.layer.0.attention.self.key | 0 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 4, 6, 10, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
6 |
+
| 3 | nncf_module.bert.encoder.layer.0.attention.self.query | 0 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 4, 6, 10, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
7 |
+
| 4 | nncf_module.bert.encoder.layer.0.output.dense | 1 | (1024, 4096) | (1024, 3843) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
8 |
+
| 5 | nncf_module.bert.encoder.layer.0.intermediate.dense | 1 | (4096, 1024) | (3843, 1024) | (4096,) | (3843,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
9 |
+
| 6 | nncf_module.bert.encoder.layer.1.attention.self.key | 2 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
10 |
+
| 7 | nncf_module.bert.encoder.layer.1.attention.self.value | 2 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
11 |
+
| 8 | nncf_module.bert.encoder.layer.1.attention.self.query | 2 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
12 |
+
| 9 | nncf_module.bert.encoder.layer.1.attention.output.dense | 2 | (1024, 1024) | (1024, 64) | (1024,) | (1024,) | group of 64 cols | See pkl | [8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
13 |
+
| 10 | nncf_module.bert.encoder.layer.1.intermediate.dense | 3 | (4096, 1024) | (3852, 1024) | (4096,) | (3852,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
14 |
+
| 11 | nncf_module.bert.encoder.layer.1.output.dense | 3 | (1024, 4096) | (1024, 3852) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
15 |
+
| 12 | nncf_module.bert.encoder.layer.2.attention.self.key | 4 | (1024, 1024) | (192, 1024) | (1024,) | (192,) | group of 64 rows | See pkl | [2, 3, 5] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
16 |
+
| 13 | nncf_module.bert.encoder.layer.2.attention.self.value | 4 | (1024, 1024) | (192, 1024) | (1024,) | (192,) | group of 64 rows | See pkl | [2, 3, 5] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
17 |
+
| 14 | nncf_module.bert.encoder.layer.2.attention.output.dense | 4 | (1024, 1024) | (1024, 192) | (1024,) | (1024,) | group of 64 cols | See pkl | [2, 3, 5] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
18 |
+
| 15 | nncf_module.bert.encoder.layer.2.attention.self.query | 4 | (1024, 1024) | (192, 1024) | (1024,) | (192,) | group of 64 rows | See pkl | [2, 3, 5] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
19 |
+
| 16 | nncf_module.bert.encoder.layer.2.output.dense | 5 | (1024, 4096) | (1024, 3953) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
20 |
+
| 17 | nncf_module.bert.encoder.layer.2.intermediate.dense | 5 | (4096, 1024) | (3953, 1024) | (4096,) | (3953,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
21 |
+
| 18 | nncf_module.bert.encoder.layer.3.attention.output.dense | 6 | (1024, 1024) | (1024, 128) | (1024,) | (1024,) | group of 64 cols | See pkl | [4, 12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
22 |
+
| 19 | nncf_module.bert.encoder.layer.3.attention.self.query | 6 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [4, 12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
23 |
+
| 20 | nncf_module.bert.encoder.layer.3.attention.self.key | 6 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [4, 12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
24 |
+
| 21 | nncf_module.bert.encoder.layer.3.attention.self.value | 6 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [4, 12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
25 |
+
| 22 | nncf_module.bert.encoder.layer.3.intermediate.dense | 7 | (4096, 1024) | (3981, 1024) | (4096,) | (3981,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
26 |
+
| 23 | nncf_module.bert.encoder.layer.3.output.dense | 7 | (1024, 4096) | (1024, 3981) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
27 |
+
| 24 | nncf_module.bert.encoder.layer.4.attention.self.key | 8 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [1, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
28 |
+
| 25 | nncf_module.bert.encoder.layer.4.attention.self.value | 8 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [1, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
29 |
+
| 26 | nncf_module.bert.encoder.layer.4.attention.output.dense | 8 | (1024, 1024) | (1024, 128) | (1024,) | (1024,) | group of 64 cols | See pkl | [1, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
30 |
+
| 27 | nncf_module.bert.encoder.layer.4.attention.self.query | 8 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [1, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
31 |
+
| 28 | nncf_module.bert.encoder.layer.4.output.dense | 9 | (1024, 4096) | (1024, 3991) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
32 |
+
| 29 | nncf_module.bert.encoder.layer.4.intermediate.dense | 9 | (4096, 1024) | (3991, 1024) | (4096,) | (3991,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
33 |
+
| 30 | nncf_module.bert.encoder.layer.5.attention.output.dense | 10 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
34 |
+
| 31 | nncf_module.bert.encoder.layer.5.attention.self.value | 10 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
35 |
+
| 32 | nncf_module.bert.encoder.layer.5.attention.self.query | 10 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
36 |
+
| 33 | nncf_module.bert.encoder.layer.5.attention.self.key | 10 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
37 |
+
| 34 | nncf_module.bert.encoder.layer.5.output.dense | 11 | (1024, 4096) | (1024, 4037) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
38 |
+
| 35 | nncf_module.bert.encoder.layer.5.intermediate.dense | 11 | (4096, 1024) | (4037, 1024) | (4096,) | (4037,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
39 |
+
| 36 | nncf_module.bert.encoder.layer.6.attention.self.query | 12 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
40 |
+
| 37 | nncf_module.bert.encoder.layer.6.attention.self.value | 12 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
41 |
+
| 38 | nncf_module.bert.encoder.layer.6.attention.self.key | 12 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
42 |
+
| 39 | nncf_module.bert.encoder.layer.6.attention.output.dense | 12 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
43 |
+
| 40 | nncf_module.bert.encoder.layer.6.intermediate.dense | 13 | (4096, 1024) | (4051, 1024) | (4096,) | (4051,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
44 |
+
| 41 | nncf_module.bert.encoder.layer.6.output.dense | 13 | (1024, 4096) | (1024, 4051) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
45 |
+
| 42 | nncf_module.bert.encoder.layer.7.attention.self.key | 14 | (1024, 1024) | (192, 1024) | (1024,) | (192,) | group of 64 rows | See pkl | [1, 6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
46 |
+
| 43 | nncf_module.bert.encoder.layer.7.attention.self.query | 14 | (1024, 1024) | (192, 1024) | (1024,) | (192,) | group of 64 rows | See pkl | [1, 6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
47 |
+
| 44 | nncf_module.bert.encoder.layer.7.attention.output.dense | 14 | (1024, 1024) | (1024, 192) | (1024,) | (1024,) | group of 64 cols | See pkl | [1, 6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
48 |
+
| 45 | nncf_module.bert.encoder.layer.7.attention.self.value | 14 | (1024, 1024) | (192, 1024) | (1024,) | (192,) | group of 64 rows | See pkl | [1, 6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
49 |
+
| 46 | nncf_module.bert.encoder.layer.7.intermediate.dense | 15 | (4096, 1024) | (4036, 1024) | (4096,) | (4036,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
50 |
+
| 47 | nncf_module.bert.encoder.layer.7.output.dense | 15 | (1024, 4096) | (1024, 4036) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
51 |
+
| 48 | nncf_module.bert.encoder.layer.8.attention.self.key | 16 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
52 |
+
| 49 | nncf_module.bert.encoder.layer.8.attention.self.query | 16 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
53 |
+
| 50 | nncf_module.bert.encoder.layer.8.attention.output.dense | 16 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
54 |
+
| 51 | nncf_module.bert.encoder.layer.8.attention.self.value | 16 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
55 |
+
| 52 | nncf_module.bert.encoder.layer.8.intermediate.dense | 17 | (4096, 1024) | (4031, 1024) | (4096,) | (4031,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
56 |
+
| 53 | nncf_module.bert.encoder.layer.8.output.dense | 17 | (1024, 4096) | (1024, 4031) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
57 |
+
| 54 | nncf_module.bert.encoder.layer.9.attention.self.value | 18 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
58 |
+
| 55 | nncf_module.bert.encoder.layer.9.attention.output.dense | 18 | (1024, 1024) | (1024, 64) | (1024,) | (1024,) | group of 64 cols | See pkl | [12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
59 |
+
| 56 | nncf_module.bert.encoder.layer.9.attention.self.query | 18 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
60 |
+
| 57 | nncf_module.bert.encoder.layer.9.attention.self.key | 18 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [12] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
61 |
+
| 58 | nncf_module.bert.encoder.layer.9.output.dense | 19 | (1024, 4096) | (1024, 4023) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
62 |
+
| 59 | nncf_module.bert.encoder.layer.9.intermediate.dense | 19 | (4096, 1024) | (4023, 1024) | (4096,) | (4023,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
63 |
+
| 60 | nncf_module.bert.encoder.layer.10.attention.self.value | 20 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [0, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
64 |
+
| 61 | nncf_module.bert.encoder.layer.10.attention.self.key | 20 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [0, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
65 |
+
| 62 | nncf_module.bert.encoder.layer.10.attention.self.query | 20 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [0, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
66 |
+
| 63 | nncf_module.bert.encoder.layer.10.attention.output.dense | 20 | (1024, 1024) | (1024, 128) | (1024,) | (1024,) | group of 64 cols | See pkl | [0, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
67 |
+
| 64 | nncf_module.bert.encoder.layer.10.output.dense | 21 | (1024, 4096) | (1024, 3998) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
68 |
+
| 65 | nncf_module.bert.encoder.layer.10.intermediate.dense | 21 | (4096, 1024) | (3998, 1024) | (4096,) | (3998,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
69 |
+
| 66 | nncf_module.bert.encoder.layer.11.attention.self.key | 22 | (1024, 1024) | (256, 1024) | (1024,) | (256,) | group of 64 rows | See pkl | [1, 4, 9, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
70 |
+
| 67 | nncf_module.bert.encoder.layer.11.attention.self.query | 22 | (1024, 1024) | (256, 1024) | (1024,) | (256,) | group of 64 rows | See pkl | [1, 4, 9, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
71 |
+
| 68 | nncf_module.bert.encoder.layer.11.attention.self.value | 22 | (1024, 1024) | (256, 1024) | (1024,) | (256,) | group of 64 rows | See pkl | [1, 4, 9, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
72 |
+
| 69 | nncf_module.bert.encoder.layer.11.attention.output.dense | 22 | (1024, 1024) | (1024, 256) | (1024,) | (1024,) | group of 64 cols | See pkl | [1, 4, 9, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
73 |
+
| 70 | nncf_module.bert.encoder.layer.11.intermediate.dense | 23 | (4096, 1024) | (3999, 1024) | (4096,) | (3999,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
74 |
+
| 71 | nncf_module.bert.encoder.layer.11.output.dense | 23 | (1024, 4096) | (1024, 3999) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
75 |
+
| 72 | nncf_module.bert.encoder.layer.12.attention.output.dense | 24 | (1024, 1024) | (1024, 384) | (1024,) | (1024,) | group of 64 cols | See pkl | [1, 4, 5, 7, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
76 |
+
| 73 | nncf_module.bert.encoder.layer.12.attention.self.value | 24 | (1024, 1024) | (384, 1024) | (1024,) | (384,) | group of 64 rows | See pkl | [1, 4, 5, 7, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
77 |
+
| 74 | nncf_module.bert.encoder.layer.12.attention.self.query | 24 | (1024, 1024) | (384, 1024) | (1024,) | (384,) | group of 64 rows | See pkl | [1, 4, 5, 7, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
78 |
+
| 75 | nncf_module.bert.encoder.layer.12.attention.self.key | 24 | (1024, 1024) | (384, 1024) | (1024,) | (384,) | group of 64 rows | See pkl | [1, 4, 5, 7, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
79 |
+
| 76 | nncf_module.bert.encoder.layer.12.intermediate.dense | 25 | (4096, 1024) | (3970, 1024) | (4096,) | (3970,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
80 |
+
| 77 | nncf_module.bert.encoder.layer.12.output.dense | 25 | (1024, 4096) | (1024, 3970) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[12]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
81 |
+
| 78 | nncf_module.bert.encoder.layer.13.attention.self.key | 26 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [4, 7, 9, 13, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
82 |
+
| 79 | nncf_module.bert.encoder.layer.13.attention.output.dense | 26 | (1024, 1024) | (1024, 320) | (1024,) | (1024,) | group of 64 cols | See pkl | [4, 7, 9, 13, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
83 |
+
| 80 | nncf_module.bert.encoder.layer.13.attention.self.value | 26 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [4, 7, 9, 13, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
84 |
+
| 81 | nncf_module.bert.encoder.layer.13.attention.self.query | 26 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [4, 7, 9, 13, 14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
85 |
+
| 82 | nncf_module.bert.encoder.layer.13.output.dense | 27 | (1024, 4096) | (1024, 3924) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
86 |
+
| 83 | nncf_module.bert.encoder.layer.13.intermediate.dense | 27 | (4096, 1024) | (3924, 1024) | (4096,) | (3924,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[13]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
87 |
+
| 84 | nncf_module.bert.encoder.layer.14.attention.output.dense | 28 | (1024, 1024) | (1024, 320) | (1024,) | (1024,) | group of 64 cols | See pkl | [2, 3, 4, 7, 9] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
88 |
+
| 85 | nncf_module.bert.encoder.layer.14.attention.self.query | 28 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 3, 4, 7, 9] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
89 |
+
| 86 | nncf_module.bert.encoder.layer.14.attention.self.key | 28 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 3, 4, 7, 9] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
90 |
+
| 87 | nncf_module.bert.encoder.layer.14.attention.self.value | 28 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 3, 4, 7, 9] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
91 |
+
| 88 | nncf_module.bert.encoder.layer.14.output.dense | 29 | (1024, 4096) | (1024, 3817) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
92 |
+
| 89 | nncf_module.bert.encoder.layer.14.intermediate.dense | 29 | (4096, 1024) | (3817, 1024) | (4096,) | (3817,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[14]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
93 |
+
| 90 | nncf_module.bert.encoder.layer.15.attention.self.query | 30 | (1024, 1024) | (384, 1024) | (1024,) | (384,) | group of 64 rows | See pkl | [2, 7, 8, 10, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
94 |
+
| 91 | nncf_module.bert.encoder.layer.15.attention.output.dense | 30 | (1024, 1024) | (1024, 384) | (1024,) | (1024,) | group of 64 cols | See pkl | [2, 7, 8, 10, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
95 |
+
| 92 | nncf_module.bert.encoder.layer.15.attention.self.value | 30 | (1024, 1024) | (384, 1024) | (1024,) | (384,) | group of 64 rows | See pkl | [2, 7, 8, 10, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
96 |
+
| 93 | nncf_module.bert.encoder.layer.15.attention.self.key | 30 | (1024, 1024) | (384, 1024) | (1024,) | (384,) | group of 64 rows | See pkl | [2, 7, 8, 10, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
97 |
+
| 94 | nncf_module.bert.encoder.layer.15.intermediate.dense | 31 | (4096, 1024) | (3616, 1024) | (4096,) | (3616,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
98 |
+
| 95 | nncf_module.bert.encoder.layer.15.output.dense | 31 | (1024, 4096) | (1024, 3616) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[15]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
99 |
+
| 96 | nncf_module.bert.encoder.layer.16.attention.output.dense | 32 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
100 |
+
| 97 | nncf_module.bert.encoder.layer.16.attention.self.query | 32 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
101 |
+
| 98 | nncf_module.bert.encoder.layer.16.attention.self.key | 32 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
102 |
+
| 99 | nncf_module.bert.encoder.layer.16.attention.self.value | 32 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
103 |
+
| 100 | nncf_module.bert.encoder.layer.16.intermediate.dense | 33 | (4096, 1024) | (3490, 1024) | (4096,) | (3490,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
104 |
+
| 101 | nncf_module.bert.encoder.layer.16.output.dense | 33 | (1024, 4096) | (1024, 3490) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[16]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
105 |
+
| 102 | nncf_module.bert.encoder.layer.17.attention.self.key | 34 | (1024, 1024) | (512, 1024) | (1024,) | (512,) | group of 64 rows | See pkl | [3, 5, 6, 7, 10, 11, 13, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
106 |
+
| 103 | nncf_module.bert.encoder.layer.17.attention.self.query | 34 | (1024, 1024) | (512, 1024) | (1024,) | (512,) | group of 64 rows | See pkl | [3, 5, 6, 7, 10, 11, 13, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
107 |
+
| 104 | nncf_module.bert.encoder.layer.17.attention.self.value | 34 | (1024, 1024) | (512, 1024) | (1024,) | (512,) | group of 64 rows | See pkl | [3, 5, 6, 7, 10, 11, 13, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
108 |
+
| 105 | nncf_module.bert.encoder.layer.17.attention.output.dense | 34 | (1024, 1024) | (1024, 512) | (1024,) | (1024,) | group of 64 cols | See pkl | [3, 5, 6, 7, 10, 11, 13, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
109 |
+
| 106 | nncf_module.bert.encoder.layer.17.output.dense | 35 | (1024, 4096) | (1024, 3342) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
110 |
+
| 107 | nncf_module.bert.encoder.layer.17.intermediate.dense | 35 | (4096, 1024) | (3342, 1024) | (4096,) | (3342,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[17]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
111 |
+
| 108 | nncf_module.bert.encoder.layer.18.attention.output.dense | 36 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
112 |
+
| 109 | nncf_module.bert.encoder.layer.18.attention.self.query | 36 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
113 |
+
| 110 | nncf_module.bert.encoder.layer.18.attention.self.value | 36 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
114 |
+
| 111 | nncf_module.bert.encoder.layer.18.attention.self.key | 36 | (1024, 1024) | (1024, 1024) | (1024,) | (1024,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
115 |
+
| 112 | nncf_module.bert.encoder.layer.18.output.dense | 37 | (1024, 4096) | (1024, 3130) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
116 |
+
| 113 | nncf_module.bert.encoder.layer.18.intermediate.dense | 37 | (4096, 1024) | (3130, 1024) | (4096,) | (3130,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[18]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
117 |
+
| 114 | nncf_module.bert.encoder.layer.19.attention.self.key | 38 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 7, 9, 10, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
118 |
+
| 115 | nncf_module.bert.encoder.layer.19.attention.self.query | 38 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 7, 9, 10, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
119 |
+
| 116 | nncf_module.bert.encoder.layer.19.attention.output.dense | 38 | (1024, 1024) | (1024, 320) | (1024,) | (1024,) | group of 64 cols | See pkl | [2, 7, 9, 10, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
120 |
+
| 117 | nncf_module.bert.encoder.layer.19.attention.self.value | 38 | (1024, 1024) | (320, 1024) | (1024,) | (320,) | group of 64 rows | See pkl | [2, 7, 9, 10, 13] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
121 |
+
| 118 | nncf_module.bert.encoder.layer.19.intermediate.dense | 39 | (4096, 1024) | (2369, 1024) | (4096,) | (2369,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
122 |
+
| 119 | nncf_module.bert.encoder.layer.19.output.dense | 39 | (1024, 4096) | (1024, 2369) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[19]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
123 |
+
| 120 | nncf_module.bert.encoder.layer.20.attention.self.key | 40 | (1024, 1024) | (256, 1024) | (1024,) | (256,) | group of 64 rows | See pkl | [5, 6, 9, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
124 |
+
| 121 | nncf_module.bert.encoder.layer.20.attention.self.query | 40 | (1024, 1024) | (256, 1024) | (1024,) | (256,) | group of 64 rows | See pkl | [5, 6, 9, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
125 |
+
| 122 | nncf_module.bert.encoder.layer.20.attention.output.dense | 40 | (1024, 1024) | (1024, 256) | (1024,) | (1024,) | group of 64 cols | See pkl | [5, 6, 9, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
126 |
+
| 123 | nncf_module.bert.encoder.layer.20.attention.self.value | 40 | (1024, 1024) | (256, 1024) | (1024,) | (256,) | group of 64 rows | See pkl | [5, 6, 9, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
127 |
+
| 124 | nncf_module.bert.encoder.layer.20.intermediate.dense | 41 | (4096, 1024) | (1102, 1024) | (4096,) | (1102,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
128 |
+
| 125 | nncf_module.bert.encoder.layer.20.output.dense | 41 | (1024, 4096) | (1024, 1102) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[20]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
129 |
+
| 126 | nncf_module.bert.encoder.layer.21.attention.self.key | 42 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
130 |
+
| 127 | nncf_module.bert.encoder.layer.21.attention.self.query | 42 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
131 |
+
| 128 | nncf_module.bert.encoder.layer.21.attention.output.dense | 42 | (1024, 1024) | (1024, 128) | (1024,) | (1024,) | group of 64 cols | See pkl | [6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
132 |
+
| 129 | nncf_module.bert.encoder.layer.21.attention.self.value | 42 | (1024, 1024) | (128, 1024) | (1024,) | (128,) | group of 64 rows | See pkl | [6, 7] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
133 |
+
| 130 | nncf_module.bert.encoder.layer.21.output.dense | 43 | (1024, 4096) | (1024, 452) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
134 |
+
| 131 | nncf_module.bert.encoder.layer.21.intermediate.dense | 43 | (4096, 1024) | (452, 1024) | (4096,) | (452,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[21]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
135 |
+
| 132 | nncf_module.bert.encoder.layer.22.attention.self.value | 44 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
136 |
+
| 133 | nncf_module.bert.encoder.layer.22.attention.output.dense | 44 | (1024, 1024) | (1024, 64) | (1024,) | (1024,) | group of 64 cols | See pkl | [14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
137 |
+
| 134 | nncf_module.bert.encoder.layer.22.attention.self.query | 44 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
138 |
+
| 135 | nncf_module.bert.encoder.layer.22.attention.self.key | 44 | (1024, 1024) | (64, 1024) | (1024,) | (64,) | group of 64 rows | See pkl | [14] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
139 |
+
| 136 | nncf_module.bert.encoder.layer.22.intermediate.dense | 45 | (4096, 1024) | (215, 1024) | (4096,) | (215,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
140 |
+
| 137 | nncf_module.bert.encoder.layer.22.output.dense | 45 | (1024, 4096) | (1024, 215) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[22]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
141 |
+
| 138 | nncf_module.bert.encoder.layer.23.attention.self.value | 46 | (1024, 1024) | (832, 1024) | (1024,) | (832,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
|
142 |
+
| 139 | nncf_module.bert.encoder.layer.23.attention.self.key | 46 | (1024, 1024) | (832, 1024) | (1024,) | (832,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
|
143 |
+
| 140 | nncf_module.bert.encoder.layer.23.attention.self.query | 46 | (1024, 1024) | (832, 1024) | (1024,) | (832,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
|
144 |
+
| 141 | nncf_module.bert.encoder.layer.23.attention.output.dense | 46 | (1024, 1024) | (1024, 832) | (1024,) | (1024,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
|
145 |
+
| 142 | nncf_module.bert.encoder.layer.23.output.dense | 47 | (1024, 4096) | (1024, 351) | (1024,) | (1024,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
|
146 |
+
| 143 | nncf_module.bert.encoder.layer.23.intermediate.dense | 47 | (4096, 1024) | (351, 1024) | (4096,) | (351,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[23]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
|
ir/sparsity_structures.pkl
ADDED
Binary file (615 kB). View file
|
|
ir/squad-BertForQuestionAnswering.cropped.8bit.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84af2a2d59eeff528ab10adc9f01ad3d0322868052c94b92aeb90bf9a62f1c78
|
3 |
+
size 233108484
|
ir/squad-BertForQuestionAnswering.cropped.8bit.mapping
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ir/squad-BertForQuestionAnswering.cropped.8bit.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ace675dbbcdb65094e664b1736adb7b8efcee8f3ff07e097b9db693970df877f
|
3 |
+
size 929594779
|
ir/squad-BertForQuestionAnswering.cropped.8bit.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
nncf-mvmt-p3.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"input_info": [
|
3 |
+
{
|
4 |
+
"sample_size": [1, 384],
|
5 |
+
"type": "long"
|
6 |
+
},
|
7 |
+
{
|
8 |
+
"sample_size": [1, 384],
|
9 |
+
"type": "long"
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"sample_size": [1, 384],
|
13 |
+
"type": "long"
|
14 |
+
}
|
15 |
+
],
|
16 |
+
"compression":
|
17 |
+
[
|
18 |
+
{
|
19 |
+
"algorithm": "movement_sparsity",
|
20 |
+
"params": {
|
21 |
+
"schedule": "threshold_polynomial_decay",
|
22 |
+
"power": 3,
|
23 |
+
"init_importance_threshold": 0.0,
|
24 |
+
"final_importance_threshold": 0.1,
|
25 |
+
"warmup_start_epoch": 1,
|
26 |
+
"warmup_end_epoch": 10,
|
27 |
+
"steps_per_epoch": 2767, // 5533,
|
28 |
+
"importance_regularization_factor": 0.025,
|
29 |
+
"update_per_optimizer_step": true,
|
30 |
+
},
|
31 |
+
"sparse_structure_by_scopes": [
|
32 |
+
["block", [32, 32], "{re}.*BertAttention*"],
|
33 |
+
["per_dim", [0], "{re}.*BertIntermediate*"],
|
34 |
+
["per_dim", [1], "{re}.*BertOutput*"]
|
35 |
+
],
|
36 |
+
"ignored_scopes": ["{re}.*NNCFEmbedding", "{re}.*qa_outputs*"]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"algorithm": "quantization",
|
40 |
+
"initializer": {
|
41 |
+
"range": {
|
42 |
+
"num_init_samples": 32,
|
43 |
+
"type": "percentile",
|
44 |
+
"params":
|
45 |
+
{
|
46 |
+
"min_percentile": 0.01,
|
47 |
+
"max_percentile": 99.99
|
48 |
+
}
|
49 |
+
},
|
50 |
+
|
51 |
+
"batchnorm_adaptation": {
|
52 |
+
// "num_bn_adaptation_samples": 2
|
53 |
+
"num_bn_adaptation_samples": 200
|
54 |
+
}
|
55 |
+
},
|
56 |
+
"activations":
|
57 |
+
{
|
58 |
+
"mode": "symmetric"
|
59 |
+
},
|
60 |
+
"weights":
|
61 |
+
{
|
62 |
+
"mode": "symmetric",
|
63 |
+
"signed": true,
|
64 |
+
"per_channel": false
|
65 |
+
}
|
66 |
+
}
|
67 |
+
]
|
68 |
+
}
|
original_graph.dot
ADDED
The diff for this file is too large to render.
See raw diff
|
|