File size: 29,303 Bytes
17658f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
| | pt_module_name | block_id | orig_w_shape | final_w_shape | orig_b_shape | final_b_shape | prune_by | id_to_keep | head_id_to_keep | nncf_graph_node |
|---:|:---------------------------------------------------------|-----------:|:---------------|:----------------|:---------------|:----------------|:-----------------|:-------------|:---------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0 | nncf_module.bert.encoder.layer.0.attention.self.query | 0 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 1 | nncf_module.bert.encoder.layer.0.attention.self.key | 0 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 2 | nncf_module.bert.encoder.layer.0.attention.output.dense | 0 | (768, 768) | (768, 256) | (768,) | (768,) | group of 64 cols | See pkl | [3, 7, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 3 | nncf_module.bert.encoder.layer.0.attention.self.value | 0 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 8, 10] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 4 | nncf_module.bert.encoder.layer.0.intermediate.dense | 1 | (3072, 768) | (2199, 768) | (3072,) | (2199,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 5 | nncf_module.bert.encoder.layer.0.output.dense | 1 | (768, 3072) | (768, 2199) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 6 | nncf_module.bert.encoder.layer.1.attention.output.dense | 2 | (768, 768) | (768, 320) | (768,) | (768,) | group of 64 cols | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 7 | nncf_module.bert.encoder.layer.1.attention.self.key | 2 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 8 | nncf_module.bert.encoder.layer.1.attention.self.query | 2 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 9 | nncf_module.bert.encoder.layer.1.attention.self.value | 2 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 4, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 10 | nncf_module.bert.encoder.layer.1.intermediate.dense | 3 | (3072, 768) | (2102, 768) | (3072,) | (2102,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 11 | nncf_module.bert.encoder.layer.1.output.dense | 3 | (768, 3072) | (768, 2102) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 12 | nncf_module.bert.encoder.layer.2.attention.self.query | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 13 | nncf_module.bert.encoder.layer.2.attention.output.dense | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 14 | nncf_module.bert.encoder.layer.2.attention.self.key | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 15 | nncf_module.bert.encoder.layer.2.attention.self.value | 4 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 16 | nncf_module.bert.encoder.layer.2.output.dense | 5 | (768, 3072) | (768, 2304) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 17 | nncf_module.bert.encoder.layer.2.intermediate.dense | 5 | (3072, 768) | (2304, 768) | (3072,) | (2304,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 18 | nncf_module.bert.encoder.layer.3.attention.self.value | 6 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 19 | nncf_module.bert.encoder.layer.3.attention.self.query | 6 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 20 | nncf_module.bert.encoder.layer.3.attention.output.dense | 6 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 21 | nncf_module.bert.encoder.layer.3.attention.self.key | 6 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 22 | nncf_module.bert.encoder.layer.3.intermediate.dense | 7 | (3072, 768) | (2243, 768) | (3072,) | (2243,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 23 | nncf_module.bert.encoder.layer.3.output.dense | 7 | (768, 3072) | (768, 2243) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 24 | nncf_module.bert.encoder.layer.4.attention.self.query | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 25 | nncf_module.bert.encoder.layer.4.attention.output.dense | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 26 | nncf_module.bert.encoder.layer.4.attention.self.key | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 27 | nncf_module.bert.encoder.layer.4.attention.self.value | 8 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 28 | nncf_module.bert.encoder.layer.4.intermediate.dense | 9 | (3072, 768) | (2042, 768) | (3072,) | (2042,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 29 | nncf_module.bert.encoder.layer.4.output.dense | 9 | (768, 3072) | (768, 2042) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 30 | nncf_module.bert.encoder.layer.5.attention.self.key | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 31 | nncf_module.bert.encoder.layer.5.attention.self.query | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 32 | nncf_module.bert.encoder.layer.5.attention.self.value | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 33 | nncf_module.bert.encoder.layer.5.attention.output.dense | 10 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 34 | nncf_module.bert.encoder.layer.5.intermediate.dense | 11 | (3072, 768) | (2007, 768) | (3072,) | (2007,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 35 | nncf_module.bert.encoder.layer.5.output.dense | 11 | (768, 3072) | (768, 2007) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 36 | nncf_module.bert.encoder.layer.6.attention.self.value | 12 | (768, 768) | (384, 768) | (768,) | (384,) | group of 64 rows | See pkl | [0, 1, 7, 8, 9, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 37 | nncf_module.bert.encoder.layer.6.attention.self.query | 12 | (768, 768) | (384, 768) | (768,) | (384,) | group of 64 rows | See pkl | [0, 1, 7, 8, 9, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 38 | nncf_module.bert.encoder.layer.6.attention.self.key | 12 | (768, 768) | (384, 768) | (768,) | (384,) | group of 64 rows | See pkl | [0, 1, 7, 8, 9, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 39 | nncf_module.bert.encoder.layer.6.attention.output.dense | 12 | (768, 768) | (768, 384) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 7, 8, 9, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 40 | nncf_module.bert.encoder.layer.6.intermediate.dense | 13 | (3072, 768) | (1610, 768) | (3072,) | (1610,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 41 | nncf_module.bert.encoder.layer.6.output.dense | 13 | (768, 3072) | (768, 1610) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 42 | nncf_module.bert.encoder.layer.7.attention.output.dense | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 43 | nncf_module.bert.encoder.layer.7.attention.self.query | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 44 | nncf_module.bert.encoder.layer.7.attention.self.value | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 45 | nncf_module.bert.encoder.layer.7.attention.self.key | 14 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 46 | nncf_module.bert.encoder.layer.7.intermediate.dense | 15 | (3072, 768) | (1262, 768) | (3072,) | (1262,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 47 | nncf_module.bert.encoder.layer.7.output.dense | 15 | (768, 3072) | (768, 1262) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 48 | nncf_module.bert.encoder.layer.8.attention.self.value | 16 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 49 | nncf_module.bert.encoder.layer.8.attention.self.query | 16 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 50 | nncf_module.bert.encoder.layer.8.attention.self.key | 16 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 rows | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 51 | nncf_module.bert.encoder.layer.8.attention.output.dense | 16 | (768, 768) | (768, 768) | (768,) | (768,) | group of 64 cols | See pkl | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 52 | nncf_module.bert.encoder.layer.8.output.dense | 17 | (768, 3072) | (768, 794) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 53 | nncf_module.bert.encoder.layer.8.intermediate.dense | 17 | (3072, 768) | (794, 768) | (3072,) | (794,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 54 | nncf_module.bert.encoder.layer.9.attention.self.query | 18 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 55 | nncf_module.bert.encoder.layer.9.attention.self.key | 18 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 56 | nncf_module.bert.encoder.layer.9.attention.self.value | 18 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 57 | nncf_module.bert.encoder.layer.9.attention.output.dense | 18 | (768, 768) | (768, 320) | (768,) | (768,) | group of 64 cols | See pkl | [0, 2, 6, 8, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 58 | nncf_module.bert.encoder.layer.9.intermediate.dense | 19 | (3072, 768) | (305, 768) | (3072,) | (305,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 59 | nncf_module.bert.encoder.layer.9.output.dense | 19 | (768, 3072) | (768, 305) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 60 | nncf_module.bert.encoder.layer.10.attention.self.value | 20 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 61 | nncf_module.bert.encoder.layer.10.attention.self.query | 20 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 62 | nncf_module.bert.encoder.layer.10.attention.output.dense | 20 | (768, 768) | (768, 256) | (768,) | (768,) | group of 64 cols | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 63 | nncf_module.bert.encoder.layer.10.attention.self.key | 20 | (768, 768) | (256, 768) | (768,) | (256,) | group of 64 rows | See pkl | [3, 7, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 64 | nncf_module.bert.encoder.layer.10.intermediate.dense | 21 | (3072, 768) | (305, 768) | (3072,) | (305,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 65 | nncf_module.bert.encoder.layer.10.output.dense | 21 | (768, 3072) | (768, 305) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0 |
| 66 | nncf_module.bert.encoder.layer.11.attention.self.query | 22 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 2, 3, 4, 8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
| 67 | nncf_module.bert.encoder.layer.11.attention.output.dense | 22 | (768, 768) | (768, 320) | (768,) | (768,) | group of 64 cols | See pkl | [1, 2, 3, 4, 8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0 |
| 68 | nncf_module.bert.encoder.layer.11.attention.self.value | 22 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 2, 3, 4, 8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
| 69 | nncf_module.bert.encoder.layer.11.attention.self.key | 22 | (768, 768) | (320, 768) | (768,) | (320,) | group of 64 rows | See pkl | [1, 2, 3, 4, 8] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0 |
| 70 | nncf_module.bert.encoder.layer.11.intermediate.dense | 23 | (3072, 768) | (364, 768) | (3072,) | (364,) | row | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0 |
| 71 | nncf_module.bert.encoder.layer.11.output.dense | 23 | (768, 3072) | (768, 364) | (768,) | (768,) | col | See pkl | | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0 | |