Vui Seng Chua
commited on
Commit
•
592f522
1
Parent(s):
73501a1
Update reports
Browse files- README.md +2 -2
- XP_linear_layer_sparsity_20M_params_50.00_sparsity.csv +0 -1
- XP_linear_layer_sparsity_20M_params_50.00_sparsity.md +1 -2
- all_results.json +3 -9
- compressed_graph.dot +2 -2
- eval_XP_results.json +2 -3
- eval_nbest_predictions.json +2 -2
- eval_predictions.json +0 -0
README.md
CHANGED
@@ -4,8 +4,8 @@ This model is a downstream optimization of [```vuiseng9/bert-base-squadv1-block-
|
|
4 |
3. Custom distillation with large model ```bert-large-uncased-whole-word-masking-finetuned-squad```
|
5 |
|
6 |
```
|
7 |
-
eval_exact_match = 80.
|
8 |
-
eval_f1 = 87.
|
9 |
eval_samples = 10784
|
10 |
```
|
11 |
|
|
|
4 |
3. Custom distillation with large model ```bert-large-uncased-whole-word-masking-finetuned-squad```
|
5 |
|
6 |
```
|
7 |
+
eval_exact_match = 80.2081
|
8 |
+
eval_f1 = 87.5921
|
9 |
eval_samples = 10784
|
10 |
```
|
11 |
|
XP_linear_layer_sparsity_20M_params_50.00_sparsity.csv
CHANGED
@@ -71,4 +71,3 @@
|
|
71 |
187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
|
72 |
191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
|
73 |
193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837
|
74 |
-
197,nncf_module.qa_outputs,NNCFLinear,weight,"[2, 768]",1536,1536,0.0
|
|
|
71 |
187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
|
72 |
191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
|
73 |
193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837
|
|
XP_linear_layer_sparsity_20M_params_50.00_sparsity.md
CHANGED
@@ -71,5 +71,4 @@
|
|
71 |
| 185 | nncf_module.bert.encoder.layer.11.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 64855 | 0.780087 |
|
72 |
| 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 69674 | 0.763747 |
|
73 |
| 191 | nncf_module.bert.encoder.layer.11.intermediate.dense | NNCFLinear | weight | [105, 768] | 80640 | 67724 | 0.160169 |
|
74 |
-
| 193 | nncf_module.bert.encoder.layer.11.output.dense | NNCFLinear | weight | [768, 105] | 80640 | 67519 | 0.162711 |
|
75 |
-
| 197 | nncf_module.qa_outputs | NNCFLinear | weight | [2, 768] | 1536 | 1536 | 0 |
|
|
|
71 |
| 185 | nncf_module.bert.encoder.layer.11.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 64855 | 0.780087 |
|
72 |
| 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 69674 | 0.763747 |
|
73 |
| 191 | nncf_module.bert.encoder.layer.11.intermediate.dense | NNCFLinear | weight | [105, 768] | 80640 | 67724 | 0.160169 |
|
74 |
+
| 193 | nncf_module.bert.encoder.layer.11.output.dense | NNCFLinear | weight | [768, 105] | 80640 | 67519 | 0.162711 |
|
|
all_results.json
CHANGED
@@ -1,11 +1,5 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"eval_samples": 10784,
|
6 |
-
"train_loss": 0.26923960941476244,
|
7 |
-
"train_runtime": 49132.7191,
|
8 |
-
"train_samples": 88524,
|
9 |
-
"train_samples_per_second": 9.009,
|
10 |
-
"train_steps_per_second": 0.563
|
11 |
}
|
|
|
1 |
{
|
2 |
+
"eval_exact_match": 80.2081362346263,
|
3 |
+
"eval_f1": 87.59209878290773,
|
4 |
+
"eval_samples": 10784
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
}
|
compressed_graph.dot
CHANGED
@@ -1396,7 +1396,7 @@ strict digraph {
|
|
1396 |
"670 BertForQuestionAnswering/split_0" -> "671 BertForQuestionAnswering/squeeze_0" [label="(1, 384, 1)", style=solid];
|
1397 |
"670 BertForQuestionAnswering/split_0" -> "673 BertForQuestionAnswering/squeeze_1" [label="(1, 384, 1)", style=solid];
|
1398 |
"671 BertForQuestionAnswering/squeeze_0" -> "672 BertForQuestionAnswering/contiguous_0" [label="(1, 384)", style=solid];
|
1399 |
-
"672 BertForQuestionAnswering/contiguous_0" -> "
|
1400 |
"673 BertForQuestionAnswering/squeeze_1" -> "674 BertForQuestionAnswering/contiguous_1" [label="(1, 384)", style=solid];
|
1401 |
-
"674 BertForQuestionAnswering/contiguous_1" -> "
|
1402 |
}
|
|
|
1396 |
"670 BertForQuestionAnswering/split_0" -> "671 BertForQuestionAnswering/squeeze_0" [label="(1, 384, 1)", style=solid];
|
1397 |
"670 BertForQuestionAnswering/split_0" -> "673 BertForQuestionAnswering/squeeze_1" [label="(1, 384, 1)", style=solid];
|
1398 |
"671 BertForQuestionAnswering/squeeze_0" -> "672 BertForQuestionAnswering/contiguous_0" [label="(1, 384)", style=solid];
|
1399 |
+
"672 BertForQuestionAnswering/contiguous_0" -> "676 /nncf_model_output_1" [label="(1, 384)", style=solid];
|
1400 |
"673 BertForQuestionAnswering/squeeze_1" -> "674 BertForQuestionAnswering/contiguous_1" [label="(1, 384)", style=solid];
|
1401 |
+
"674 BertForQuestionAnswering/contiguous_1" -> "675 /nncf_model_output_0" [label="(1, 384)", style=solid];
|
1402 |
}
|
eval_XP_results.json
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
"
|
4 |
-
"eval_f1": 87.47131878910791,
|
5 |
"eval_samples": 10784
|
6 |
}
|
|
|
1 |
{
|
2 |
+
"eval_exact_match": 80.2081362346263,
|
3 |
+
"eval_f1": 87.59209878290773,
|
|
|
4 |
"eval_samples": 10784
|
5 |
}
|
eval_nbest_predictions.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa3cfc998d363339e4ee4b039b32712d6d951766a2c52eaef5554d372d95cd3f
|
3 |
+
size 48938782
|
eval_predictions.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|