Vui Seng Chua commited on
Commit
592f522
1 Parent(s): 73501a1

Update reports

Browse files
README.md CHANGED
@@ -4,8 +4,8 @@ This model is a downstream optimization of [```vuiseng9/bert-base-squadv1-block-
4
  3. Custom distillation with large model ```bert-large-uncased-whole-word-masking-finetuned-squad```
5
 
6
  ```
7
- eval_exact_match = 80.3217
8
- eval_f1 = 87.635
9
  eval_samples = 10784
10
  ```
11
 
 
4
  3. Custom distillation with large model ```bert-large-uncased-whole-word-masking-finetuned-squad```
5
 
6
  ```
7
+ eval_exact_match = 80.2081
8
+ eval_f1 = 87.5921
9
  eval_samples = 10784
10
  ```
11
 
XP_linear_layer_sparsity_20M_params_50.00_sparsity.csv CHANGED
@@ -71,4 +71,3 @@
71
  187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
72
  191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
73
  193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837
74
- 197,nncf_module.qa_outputs,NNCFLinear,weight,"[2, 768]",1536,1536,0.0
 
71
  187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
72
  191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
73
  193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837
 
XP_linear_layer_sparsity_20M_params_50.00_sparsity.md CHANGED
@@ -71,5 +71,4 @@
71
  | 185 | nncf_module.bert.encoder.layer.11.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 64855 | 0.780087 |
72
  | 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 69674 | 0.763747 |
73
  | 191 | nncf_module.bert.encoder.layer.11.intermediate.dense | NNCFLinear | weight | [105, 768] | 80640 | 67724 | 0.160169 |
74
- | 193 | nncf_module.bert.encoder.layer.11.output.dense | NNCFLinear | weight | [768, 105] | 80640 | 67519 | 0.162711 |
75
- | 197 | nncf_module.qa_outputs | NNCFLinear | weight | [2, 768] | 1536 | 1536 | 0 |
 
71
  | 185 | nncf_module.bert.encoder.layer.11.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 64855 | 0.780087 |
72
  | 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 69674 | 0.763747 |
73
  | 191 | nncf_module.bert.encoder.layer.11.intermediate.dense | NNCFLinear | weight | [105, 768] | 80640 | 67724 | 0.160169 |
74
+ | 193 | nncf_module.bert.encoder.layer.11.output.dense | NNCFLinear | weight | [768, 105] | 80640 | 67519 | 0.162711 |
 
all_results.json CHANGED
@@ -1,11 +1,5 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_exact_match": 80.10406811731315,
4
- "eval_f1": 87.47131878910791,
5
- "eval_samples": 10784,
6
- "train_loss": 0.26923960941476244,
7
- "train_runtime": 49132.7191,
8
- "train_samples": 88524,
9
- "train_samples_per_second": 9.009,
10
- "train_steps_per_second": 0.563
11
  }
 
1
  {
2
+ "eval_exact_match": 80.2081362346263,
3
+ "eval_f1": 87.59209878290773,
4
+ "eval_samples": 10784
 
 
 
 
 
 
5
  }
compressed_graph.dot CHANGED
@@ -1396,7 +1396,7 @@ strict digraph {
1396
  "670 BertForQuestionAnswering/split_0" -> "671 BertForQuestionAnswering/squeeze_0" [label="(1, 384, 1)", style=solid];
1397
  "670 BertForQuestionAnswering/split_0" -> "673 BertForQuestionAnswering/squeeze_1" [label="(1, 384, 1)", style=solid];
1398
  "671 BertForQuestionAnswering/squeeze_0" -> "672 BertForQuestionAnswering/contiguous_0" [label="(1, 384)", style=solid];
1399
- "672 BertForQuestionAnswering/contiguous_0" -> "675 /nncf_model_output_0" [label="(1, 384)", style=solid];
1400
  "673 BertForQuestionAnswering/squeeze_1" -> "674 BertForQuestionAnswering/contiguous_1" [label="(1, 384)", style=solid];
1401
- "674 BertForQuestionAnswering/contiguous_1" -> "676 /nncf_model_output_1" [label="(1, 384)", style=solid];
1402
  }
 
1396
  "670 BertForQuestionAnswering/split_0" -> "671 BertForQuestionAnswering/squeeze_0" [label="(1, 384, 1)", style=solid];
1397
  "670 BertForQuestionAnswering/split_0" -> "673 BertForQuestionAnswering/squeeze_1" [label="(1, 384, 1)", style=solid];
1398
  "671 BertForQuestionAnswering/squeeze_0" -> "672 BertForQuestionAnswering/contiguous_0" [label="(1, 384)", style=solid];
1399
+ "672 BertForQuestionAnswering/contiguous_0" -> "676 /nncf_model_output_1" [label="(1, 384)", style=solid];
1400
  "673 BertForQuestionAnswering/squeeze_1" -> "674 BertForQuestionAnswering/contiguous_1" [label="(1, 384)", style=solid];
1401
+ "674 BertForQuestionAnswering/contiguous_1" -> "675 /nncf_model_output_0" [label="(1, 384)", style=solid];
1402
  }
eval_XP_results.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_exact_match": 80.10406811731315,
4
- "eval_f1": 87.47131878910791,
5
  "eval_samples": 10784
6
  }
 
1
  {
2
+ "eval_exact_match": 80.2081362346263,
3
+ "eval_f1": 87.59209878290773,
 
4
  "eval_samples": 10784
5
  }
eval_nbest_predictions.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7763073ce09e59016bc913ac5fa0c0ffa03cedde11804e2a7ae97b297d2584d1
3
- size 48946987
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa3cfc998d363339e4ee4b039b32712d6d951766a2c52eaef5554d372d95cd3f
3
+ size 48938782
eval_predictions.json CHANGED
The diff for this file is too large to render. See raw diff