helenai commited on
Commit
2b96c47
1 Parent(s): d87fc97

Quantized model with overflow_fix enabled

Browse files
README.md CHANGED
@@ -5,19 +5,30 @@ tags:
5
  datasets:
6
  - squad
7
  model-index:
8
- - name: bert-base-uncased-squad-v1-jpqd-ov-int8
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
- # bert-base-uncased-squad-v1-jpqd-ov-int8
16
 
17
  This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the squad dataset.
18
- It was compressed using [NNCF](https://github.com/openvinotoolkit/nncf) with [Optimum
19
- Intel](https://github.com/huggingface/optimum-intel#openvino) following the [JPQD question-answering
20
- example](https://github.com/huggingface/optimum-intel/tree/main/examples/openvino/question-answering#joint-pruning-quantization-and-distillation-jpqd-for-bert-on-squad10).
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  ### Training hyperparameters
23
 
@@ -33,13 +44,7 @@ The following hyperparameters were used during training:
33
 
34
  ### Training results
35
 
36
- ```
37
- ***** eval metrics *****
38
- epoch = 8.0
39
- eval_exact_match = 83.141
40
- eval_f1 = 89.5906
41
- eval_samples = 10784
42
- ```
43
 
44
  ### Framework versions
45
 
 
5
  datasets:
6
  - squad
7
  model-index:
8
+ - name: jpqd_bert_squad_overflowfix
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
+ # jpqd_bert_squad_overflowfix
16
 
17
  This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the squad dataset.
18
+
19
+ ## Model description
20
+
21
+ More information needed
22
+
23
+ ## Intended uses & limitations
24
+
25
+ More information needed
26
+
27
+ ## Training and evaluation data
28
+
29
+ More information needed
30
+
31
+ ## Training procedure
32
 
33
  ### Training hyperparameters
34
 
 
44
 
45
  ### Training results
46
 
47
+
 
 
 
 
 
 
48
 
49
  ### Framework versions
50
 
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 8.0,
3
- "eval_exact_match": 83.14096499526963,
4
- "eval_f1": 89.59061048191492,
5
  "eval_samples": 10784,
6
- "train_loss": 2.368001897127539,
7
- "train_runtime": 49077.3955,
8
  "train_samples": 88524,
9
- "train_samples_per_second": 14.43,
10
- "train_steps_per_second": 0.902
11
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "eval_exact_match": 83.33964049195838,
4
+ "eval_f1": 89.80725863442484,
5
  "eval_samples": 10784,
6
+ "train_loss": 2.369025745212132,
7
+ "train_runtime": 49814.833,
8
  "train_samples": 88524,
9
+ "train_samples_per_second": 14.216,
10
+ "train_steps_per_second": 0.889
11
  }
openvino_config.json CHANGED
@@ -56,7 +56,7 @@
56
  "type": "percentile"
57
  }
58
  },
59
- "overflow_fix": "disable",
60
  "preset": "mixed",
61
  "scope_overrides": {
62
  "activations": {
 
56
  "type": "percentile"
57
  }
58
  },
59
+ "overflow_fix": "enable",
60
  "preset": "mixed",
61
  "scope_overrides": {
62
  "activations": {
openvino_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81f4e1eba20221fd9fcc6bb8820422413f8413123d7418b0a303fc262449ce59
3
- size 75477404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cead9787a2b86a186358962622f9cd81c9d7b83ce860c5422b7585c088bc94ac
3
+ size 75452788
openvino_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
structured_sparsity.csv CHANGED
@@ -3,71 +3,71 @@
3
  1,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
4
  2,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
5
  3,0,MHSA,nncf_module.bert.encoder.layer.0.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
6
- 4,1,FF,nncf_module.bert.encoder.layer.0.intermediate.dense,"(3072, 768)","(2066, 768)","(3072,)","(2066,)",[2066 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
7
- 5,1,FF,nncf_module.bert.encoder.layer.0.output.dense,"(768, 3072)","(768, 2066)","(768,)","(768,)",[2066 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
8
- 6,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
9
- 7,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
10
- 8,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
11
- 9,2,MHSA,nncf_module.bert.encoder.layer.1.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
12
- 10,3,FF,nncf_module.bert.encoder.layer.1.intermediate.dense,"(3072, 768)","(2067, 768)","(3072,)","(2067,)",[2067 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
13
- 11,3,FF,nncf_module.bert.encoder.layer.1.output.dense,"(768, 3072)","(768, 2067)","(768,)","(768,)",[2067 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
14
  12,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
15
  13,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
16
  14,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
17
  15,4,MHSA,nncf_module.bert.encoder.layer.2.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
18
- 16,5,FF,nncf_module.bert.encoder.layer.2.intermediate.dense,"(3072, 768)","(2082, 768)","(3072,)","(2082,)",[2082 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
19
- 17,5,FF,nncf_module.bert.encoder.layer.2.output.dense,"(768, 3072)","(768, 2082)","(768,)","(768,)",[2082 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
20
  18,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
21
  19,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
22
  20,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
23
  21,6,MHSA,nncf_module.bert.encoder.layer.3.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
24
- 22,7,FF,nncf_module.bert.encoder.layer.3.intermediate.dense,"(3072, 768)","(2136, 768)","(3072,)","(2136,)",[2136 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
25
- 23,7,FF,nncf_module.bert.encoder.layer.3.output.dense,"(768, 3072)","(768, 2136)","(768,)","(768,)",[2136 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
26
- 24,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.query,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
27
- 25,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.key,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
28
- 26,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.value,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
29
- 27,8,MHSA,nncf_module.bert.encoder.layer.4.attention.output.dense,"(768, 768)","(768, 640)","(768,)","(768,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
30
- 28,9,FF,nncf_module.bert.encoder.layer.4.intermediate.dense,"(3072, 768)","(2023, 768)","(3072,)","(2023,)",[2023 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
31
- 29,9,FF,nncf_module.bert.encoder.layer.4.output.dense,"(768, 3072)","(768, 2023)","(768,)","(768,)",[2023 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
32
  30,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
33
  31,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
34
  32,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
35
  33,10,MHSA,nncf_module.bert.encoder.layer.5.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
36
- 34,11,FF,nncf_module.bert.encoder.layer.5.intermediate.dense,"(3072, 768)","(2011, 768)","(3072,)","(2011,)",[2011 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
37
- 35,11,FF,nncf_module.bert.encoder.layer.5.output.dense,"(768, 3072)","(768, 2011)","(768,)","(768,)",[2011 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
38
- 36,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
39
- 37,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
40
- 38,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
41
- 39,12,MHSA,nncf_module.bert.encoder.layer.6.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
42
- 40,13,FF,nncf_module.bert.encoder.layer.6.intermediate.dense,"(3072, 768)","(1871, 768)","(3072,)","(1871,)",[1871 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
43
- 41,13,FF,nncf_module.bert.encoder.layer.6.output.dense,"(768, 3072)","(768, 1871)","(768,)","(768,)",[1871 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
44
  42,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
45
  43,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
46
  44,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
47
  45,14,MHSA,nncf_module.bert.encoder.layer.7.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
48
- 46,15,FF,nncf_module.bert.encoder.layer.7.intermediate.dense,"(3072, 768)","(1858, 768)","(3072,)","(1858,)",[1858 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
49
- 47,15,FF,nncf_module.bert.encoder.layer.7.output.dense,"(768, 3072)","(768, 1858)","(768,)","(768,)",[1858 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
50
- 48,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
51
- 49,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
52
- 50,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
53
- 51,16,MHSA,nncf_module.bert.encoder.layer.8.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
54
- 52,17,FF,nncf_module.bert.encoder.layer.8.intermediate.dense,"(3072, 768)","(1637, 768)","(3072,)","(1637,)",[1637 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
55
- 53,17,FF,nncf_module.bert.encoder.layer.8.output.dense,"(768, 3072)","(768, 1637)","(768,)","(768,)",[1637 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
56
- 54,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
57
- 55,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
58
- 56,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
59
- 57,18,MHSA,nncf_module.bert.encoder.layer.9.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
60
- 58,19,FF,nncf_module.bert.encoder.layer.9.intermediate.dense,"(3072, 768)","(1257, 768)","(3072,)","(1257,)",[1257 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
61
- 59,19,FF,nncf_module.bert.encoder.layer.9.output.dense,"(768, 3072)","(768, 1257)","(768,)","(768,)",[1257 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
62
  60,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
63
  61,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
64
  62,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
65
  63,20,MHSA,nncf_module.bert.encoder.layer.10.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
66
- 64,21,FF,nncf_module.bert.encoder.layer.10.intermediate.dense,"(3072, 768)","(1159, 768)","(3072,)","(1159,)",[1159 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
67
- 65,21,FF,nncf_module.bert.encoder.layer.10.output.dense,"(768, 3072)","(768, 1159)","(768,)","(768,)",[1159 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
68
- 66,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
69
- 67,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
70
- 68,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
71
- 69,22,MHSA,nncf_module.bert.encoder.layer.11.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
72
- 70,23,FF,nncf_module.bert.encoder.layer.11.intermediate.dense,"(3072, 768)","(1017, 768)","(3072,)","(1017,)",[1017 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
73
- 71,23,FF,nncf_module.bert.encoder.layer.11.output.dense,"(768, 3072)","(768, 1017)","(768,)","(768,)",[1017 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
 
3
  1,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
4
  2,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
5
  3,0,MHSA,nncf_module.bert.encoder.layer.0.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
6
+ 4,1,FF,nncf_module.bert.encoder.layer.0.intermediate.dense,"(3072, 768)","(2089, 768)","(3072,)","(2089,)",[2089 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
7
+ 5,1,FF,nncf_module.bert.encoder.layer.0.output.dense,"(768, 3072)","(768, 2089)","(768,)","(768,)",[2089 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
8
+ 6,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
9
+ 7,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
10
+ 8,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
11
+ 9,2,MHSA,nncf_module.bert.encoder.layer.1.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
12
+ 10,3,FF,nncf_module.bert.encoder.layer.1.intermediate.dense,"(3072, 768)","(2042, 768)","(3072,)","(2042,)",[2042 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
13
+ 11,3,FF,nncf_module.bert.encoder.layer.1.output.dense,"(768, 3072)","(768, 2042)","(768,)","(768,)",[2042 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
14
  12,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
15
  13,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
16
  14,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
17
  15,4,MHSA,nncf_module.bert.encoder.layer.2.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
18
+ 16,5,FF,nncf_module.bert.encoder.layer.2.intermediate.dense,"(3072, 768)","(2103, 768)","(3072,)","(2103,)",[2103 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
19
+ 17,5,FF,nncf_module.bert.encoder.layer.2.output.dense,"(768, 3072)","(768, 2103)","(768,)","(768,)",[2103 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
20
  18,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
21
  19,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
22
  20,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
23
  21,6,MHSA,nncf_module.bert.encoder.layer.3.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
24
+ 22,7,FF,nncf_module.bert.encoder.layer.3.intermediate.dense,"(3072, 768)","(2125, 768)","(3072,)","(2125,)",[2125 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
25
+ 23,7,FF,nncf_module.bert.encoder.layer.3.output.dense,"(768, 3072)","(768, 2125)","(768,)","(768,)",[2125 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
26
+ 24,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.query,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
27
+ 25,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.key,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
28
+ 26,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.value,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
29
+ 27,8,MHSA,nncf_module.bert.encoder.layer.4.attention.output.dense,"(768, 768)","(768, 704)","(768,)","(768,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
30
+ 28,9,FF,nncf_module.bert.encoder.layer.4.intermediate.dense,"(3072, 768)","(2049, 768)","(3072,)","(2049,)",[2049 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
31
+ 29,9,FF,nncf_module.bert.encoder.layer.4.output.dense,"(768, 3072)","(768, 2049)","(768,)","(768,)",[2049 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
32
  30,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
33
  31,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
34
  32,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
35
  33,10,MHSA,nncf_module.bert.encoder.layer.5.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
36
+ 34,11,FF,nncf_module.bert.encoder.layer.5.intermediate.dense,"(3072, 768)","(1998, 768)","(3072,)","(1998,)",[1998 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
37
+ 35,11,FF,nncf_module.bert.encoder.layer.5.output.dense,"(768, 3072)","(768, 1998)","(768,)","(768,)",[1998 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
38
+ 36,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
39
+ 37,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
40
+ 38,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
41
+ 39,12,MHSA,nncf_module.bert.encoder.layer.6.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
42
+ 40,13,FF,nncf_module.bert.encoder.layer.6.intermediate.dense,"(3072, 768)","(1874, 768)","(3072,)","(1874,)",[1874 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
43
+ 41,13,FF,nncf_module.bert.encoder.layer.6.output.dense,"(768, 3072)","(768, 1874)","(768,)","(768,)",[1874 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
44
  42,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
45
  43,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
46
  44,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
47
  45,14,MHSA,nncf_module.bert.encoder.layer.7.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
48
+ 46,15,FF,nncf_module.bert.encoder.layer.7.intermediate.dense,"(3072, 768)","(1806, 768)","(3072,)","(1806,)",[1806 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
49
+ 47,15,FF,nncf_module.bert.encoder.layer.7.output.dense,"(768, 3072)","(768, 1806)","(768,)","(768,)",[1806 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
50
+ 48,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
51
+ 49,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
52
+ 50,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
53
+ 51,16,MHSA,nncf_module.bert.encoder.layer.8.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
54
+ 52,17,FF,nncf_module.bert.encoder.layer.8.intermediate.dense,"(3072, 768)","(1654, 768)","(3072,)","(1654,)",[1654 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
55
+ 53,17,FF,nncf_module.bert.encoder.layer.8.output.dense,"(768, 3072)","(768, 1654)","(768,)","(768,)",[1654 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
56
+ 54,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
57
+ 55,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
58
+ 56,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
59
+ 57,18,MHSA,nncf_module.bert.encoder.layer.9.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
60
+ 58,19,FF,nncf_module.bert.encoder.layer.9.intermediate.dense,"(3072, 768)","(1311, 768)","(3072,)","(1311,)",[1311 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
61
+ 59,19,FF,nncf_module.bert.encoder.layer.9.output.dense,"(768, 3072)","(768, 1311)","(768,)","(768,)",[1311 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
62
  60,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
63
  61,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
64
  62,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
65
  63,20,MHSA,nncf_module.bert.encoder.layer.10.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
66
+ 64,21,FF,nncf_module.bert.encoder.layer.10.intermediate.dense,"(3072, 768)","(1090, 768)","(3072,)","(1090,)",[1090 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
67
+ 65,21,FF,nncf_module.bert.encoder.layer.10.output.dense,"(768, 3072)","(768, 1090)","(768,)","(768,)",[1090 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
68
+ 66,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.query,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
69
+ 67,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.key,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
70
+ 68,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.value,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
71
+ 69,22,MHSA,nncf_module.bert.encoder.layer.11.attention.output.dense,"(768, 768)","(768, 320)","(768,)","(768,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
72
+ 70,23,FF,nncf_module.bert.encoder.layer.11.intermediate.dense,"(3072, 768)","(1027, 768)","(3072,)","(1027,)",[1027 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
73
+ 71,23,FF,nncf_module.bert.encoder.layer.11.output.dense,"(768, 3072)","(768, 1027)","(768,)","(768,)",[1027 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 384,
6
- "strategy": "OnlySecond",
7
- "stride": 128
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 384
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 0,
16
- "pad_type_id": 0,
17
- "pad_token": "[PAD]"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5552aa8097d9d47b0343cdacd36e5bc50157c344cb9488c095d5eb9a1bc83f5
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:838f7a4735c59774d578e9a43a91763c34b74e016c13d43a3202f814296076cf
3
  size 3579