Initial revision.
Browse files- config.json +22 -0
- data_args.json +16 -0
- eval_metrics.json +4 -0
- evaluate_timing.json +1 -0
- model_args.json +6 -0
- pytorch_model.bin +3 -0
- sparse_args.json +27 -0
- sparsity_report.json +1 -0
- special_tokens_map.json +1 -0
- speed_report.json +1 -0
- tokenizer_config.json +1 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
config.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-uncased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForQuestionAnswering"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"type_vocab_size": 2,
|
21 |
+
"vocab_size": 30522
|
22 |
+
}
|
data_args.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_cache_dir": "dataset_cache",
|
3 |
+
"dataset_config_name": null,
|
4 |
+
"dataset_name": "squad",
|
5 |
+
"doc_stride": 128,
|
6 |
+
"max_answer_length": 30,
|
7 |
+
"max_seq_length": 384,
|
8 |
+
"n_best_size": 20,
|
9 |
+
"null_score_diff_threshold": 0.0,
|
10 |
+
"overwrite_cache": 0,
|
11 |
+
"pad_to_max_length": true,
|
12 |
+
"preprocessing_num_workers": null,
|
13 |
+
"train_file": null,
|
14 |
+
"validation_file": null,
|
15 |
+
"version_2_with_negative": false
|
16 |
+
}
|
eval_metrics.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"exact_match": 78.26868495742669,
|
3 |
+
"f1": 86.30683282660192
|
4 |
+
}
|
evaluate_timing.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"eval_elapsed_time": 80.22872724197805}
|
model_args.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cache_dir": null,
|
3 |
+
"config_name": null,
|
4 |
+
"model_name_or_path": "bert-base-uncased",
|
5 |
+
"tokenizer_name": null
|
6 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e19fbe64099a36780c4d41f362e41cd98f20e1c1b44611d7f80710753ad59f4b
|
3 |
+
size 435853395
|
sparse_args.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"ampere_pruning_method": "disabled",
|
3 |
+
"attention_block_cols": 768,
|
4 |
+
"attention_block_rows": 64,
|
5 |
+
"attention_lambda": 0.00156,
|
6 |
+
"attention_output_with_dense": 0,
|
7 |
+
"attention_pruning_method": "sigmoied_threshold",
|
8 |
+
"dense_block_cols": 1,
|
9 |
+
"dense_block_rows": 1,
|
10 |
+
"dense_lambda": 1.0,
|
11 |
+
"dense_pruning_method": "sigmoied_threshold:1d_alt",
|
12 |
+
"distil_alpha_ce": 0.1,
|
13 |
+
"distil_alpha_teacher": 0.9,
|
14 |
+
"distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1",
|
15 |
+
"distil_temperature": 2.0,
|
16 |
+
"final_ampere_temperature": 20.0,
|
17 |
+
"final_threshold": 0.1,
|
18 |
+
"final_warmup": 10,
|
19 |
+
"initial_ampere_temperature": 0.0,
|
20 |
+
"initial_threshold": 0,
|
21 |
+
"initial_warmup": 1,
|
22 |
+
"mask_init": "constant",
|
23 |
+
"mask_scale": 0.0,
|
24 |
+
"mask_scores_learning_rate": 0.01,
|
25 |
+
"regularization": "l1",
|
26 |
+
"regularization_final_lambda": 10.0
|
27 |
+
}
|
sparsity_report.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
speed_report.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"timings": {"eval_elapsed_time": 12.045548059046268}, "metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-uncased"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1d1b7aa97766771f559f054a999e70f936f9a8600a3f4eb1568203c26ddde75
|
3 |
+
size 1839
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|