Update reports

Files changed (8) hide show

README.md CHANGED Viewed

@@ -4,8 +4,8 @@ This model is a downstream optimization of [```vuiseng9/bert-base-squadv1-block-
 3. Custom distillation with large model ```bert-large-uncased-whole-word-masking-finetuned-squad```
 ```
-  eval_exact_match = 80.3217
-  eval_f1          = 87.635
   eval_samples     =   10784
 ```

 3. Custom distillation with large model ```bert-large-uncased-whole-word-masking-finetuned-squad```
 ```
+  eval_exact_match = 80.2081
+  eval_f1          = 87.5921
   eval_samples     =   10784
 ```

XP_linear_layer_sparsity_20M_params_50.00_sparsity.csv CHANGED Viewed

@@ -71,4 +71,3 @@
 187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
 191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
 193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837
-197,nncf_module.qa_outputs,NNCFLinear,weight,"[2, 768]",1536,1536,0.0

 187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
 191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
 193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837

XP_linear_layer_sparsity_20M_params_50.00_sparsity.md CHANGED Viewed

@@ -71,5 +71,4 @@
 | 185 | nncf_module.bert.encoder.layer.11.attention.self.value   | NNCFLinear   | weight       | [384, 768] |   294912 |  64855 |   0.780087 |
 | 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear   | weight       | [768, 384] |   294912 |  69674 |   0.763747 |
 | 191 | nncf_module.bert.encoder.layer.11.intermediate.dense     | NNCFLinear   | weight       | [105, 768] |    80640 |  67724 |   0.160169 |
-| 193 | nncf_module.bert.encoder.layer.11.output.dense           | NNCFLinear   | weight       | [768, 105] |    80640 |  67519 |   0.162711 |
-| 197 | nncf_module.qa_outputs                                   | NNCFLinear   | weight       | [2, 768]   |     1536 |   1536 |   0        |

 | 185 | nncf_module.bert.encoder.layer.11.attention.self.value   | NNCFLinear   | weight       | [384, 768] |   294912 |  64855 |   0.780087 |
 | 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear   | weight       | [768, 384] |   294912 |  69674 |   0.763747 |
 | 191 | nncf_module.bert.encoder.layer.11.intermediate.dense     | NNCFLinear   | weight       | [105, 768] |    80640 |  67724 |   0.160169 |
+| 193 | nncf_module.bert.encoder.layer.11.output.dense           | NNCFLinear   | weight       | [768, 105] |    80640 |  67519 |   0.162711 |

all_results.json CHANGED Viewed

@@ -1,11 +1,5 @@
 {
-    "epoch": 5.0,
-    "eval_exact_match": 80.10406811731315,
-    "eval_f1": 87.47131878910791,
-    "eval_samples": 10784,
-    "train_loss": 0.26923960941476244,
-    "train_runtime": 49132.7191,
-    "train_samples": 88524,
-    "train_samples_per_second": 9.009,
-    "train_steps_per_second": 0.563
 }

 {
+    "eval_exact_match": 80.2081362346263,
+    "eval_f1": 87.59209878290773,
+    "eval_samples": 10784
 }

compressed_graph.dot CHANGED Viewed

@@ -1396,7 +1396,7 @@ strict digraph  {
 "670 BertForQuestionAnswering/split_0" -> "671 BertForQuestionAnswering/squeeze_0"  [label="(1, 384, 1)", style=solid];
 "670 BertForQuestionAnswering/split_0" -> "673 BertForQuestionAnswering/squeeze_1"  [label="(1, 384, 1)", style=solid];
 "671 BertForQuestionAnswering/squeeze_0" -> "672 BertForQuestionAnswering/contiguous_0"  [label="(1, 384)", style=solid];
-"672 BertForQuestionAnswering/contiguous_0" -> "675 /nncf_model_output_0"  [label="(1, 384)", style=solid];
 "673 BertForQuestionAnswering/squeeze_1" -> "674 BertForQuestionAnswering/contiguous_1"  [label="(1, 384)", style=solid];
-"674 BertForQuestionAnswering/contiguous_1" -> "676 /nncf_model_output_1"  [label="(1, 384)", style=solid];
 }

 "670 BertForQuestionAnswering/split_0" -> "671 BertForQuestionAnswering/squeeze_0"  [label="(1, 384, 1)", style=solid];
 "670 BertForQuestionAnswering/split_0" -> "673 BertForQuestionAnswering/squeeze_1"  [label="(1, 384, 1)", style=solid];
 "671 BertForQuestionAnswering/squeeze_0" -> "672 BertForQuestionAnswering/contiguous_0"  [label="(1, 384)", style=solid];
+"672 BertForQuestionAnswering/contiguous_0" -> "676 /nncf_model_output_1"  [label="(1, 384)", style=solid];
 "673 BertForQuestionAnswering/squeeze_1" -> "674 BertForQuestionAnswering/contiguous_1"  [label="(1, 384)", style=solid];
+"674 BertForQuestionAnswering/contiguous_1" -> "675 /nncf_model_output_0"  [label="(1, 384)", style=solid];
 }

eval_XP_results.json CHANGED Viewed

@@ -1,6 +1,5 @@
 {
-    "epoch": 5.0,
-    "eval_exact_match": 80.10406811731315,
-    "eval_f1": 87.47131878910791,
     "eval_samples": 10784
 }

 {
+    "eval_exact_match": 80.2081362346263,
+    "eval_f1": 87.59209878290773,
     "eval_samples": 10784
 }

eval_nbest_predictions.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7763073ce09e59016bc913ac5fa0c0ffa03cedde11804e2a7ae97b297d2584d1
-size 48946987

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa3cfc998d363339e4ee4b039b32712d6d951766a2c52eaef5554d372d95cd3f
+size 48938782

eval_predictions.json CHANGED Viewed

The diff for this file is too large to render. See raw diff