tdiggelm commited on
Commit
650b3cf
1 Parent(s): 06071be

Initial import

Browse files
README.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language: en
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - squad_v2
8
+ model-index:
9
+ - name: distilroberta-base-squad_v2
10
+ results: []
11
+ ---
12
+
13
+ # distilroberta-base-squad_v2
14
+
15
+ This model is a fine-tuned version of [distilroberta-base](https://huggingface.co/distilroberta-base) on the squad_v2 dataset.
16
+
17
+ ## Model description
18
+
19
+ This model is fine-tuned on the extractive question answering task -- The Stanford Question Answering Dataset -- [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/).
20
+
21
+ For convenience this model is prepared to be used with the frameworks `PyTorch`, `Tensorflow` and `ONNX`.
22
+
23
+ ## Intended uses & limitations
24
+
25
+ This model can handle mismatched question-context pairs. Make sure to specify `handle_impossible_answer=True` when using `QuestionAnsweringPipeline`.
26
+
27
+ __Example usage:___
28
+
29
+ ```python
30
+ >>> from transformers import AutoModelForQuestionAnswering, AutoTokenizer
31
+ >>> model = AutoModelForQuestionAnswering.from_pretrained("squirro/distilroberta-base-squad_v2")
32
+ >>> tokenizer = AutoTokenizer.from_pretrained("squirro/distilroberta-base-squad_v2")
33
+ >>> qa_model = QuestionAnsweringPipeline(model, tokenizer)
34
+ >>> qa_model(
35
+ >>> question="What's your name?",
36
+ >>> context="My name is Clara and I live in Berkeley.",
37
+ >>> handle_impossible_answer=True # important!
38
+ >>> )
39
+ {'score': 0.9027367830276489, 'start': 11, 'end': 16, 'answer': 'Clara'}
40
+ ```
41
+
42
+ ## Training and evaluation data
43
+
44
+ Training and evaluation was done on [SQuAD2.0](https://huggingface.co/datasets/squad_v2).
45
+
46
+ ## Training procedure
47
+
48
+ ### Training hyperparameters
49
+
50
+ The following hyperparameters were used during training:
51
+ - learning_rate: 5e-05
52
+ - train_batch_size: 64
53
+ - eval_batch_size: 8
54
+ - seed: 42
55
+ - distributed_type: tpu
56
+ - num_devices: 8
57
+ - total_train_batch_size: 512
58
+ - total_eval_batch_size: 64
59
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
+ - lr_scheduler_type: linear
61
+ - num_epochs: 3.0
62
+
63
+ ### Training results
64
+
65
+ | Metric | Value |
66
+ |:-------------------------|-------------:|
67
+ | epoch | 3 |
68
+ | eval_HasAns_exact | 67.5776 |
69
+ | eval_HasAns_f1 | 74.3594 |
70
+ | eval_HasAns_total | 5928 |
71
+ | eval_NoAns_exact | 62.91 |
72
+ | eval_NoAns_f1 | 62.91 |
73
+ | eval_NoAns_total | 5945 |
74
+ | eval_best_exact | 65.2489 |
75
+ | eval_best_exact_thresh | 0 |
76
+ | eval_best_f1 | 68.6349 |
77
+ | eval_best_f1_thresh | 0 |
78
+ | eval_exact | 65.2405 |
79
+ | eval_f1 | 68.6265 |
80
+ | eval_samples | 12165 |
81
+ | eval_total | 11873 |
82
+ | train_loss | 1.40336 |
83
+ | train_runtime | 1365.28 |
84
+ | train_samples | 131823 |
85
+ | train_samples_per_second | 289.662 |
86
+ | train_steps_per_second | 0.567 |
87
+
88
+ ### Framework versions
89
+
90
+ - Transformers 4.17.0.dev0
91
+ - Pytorch 1.9.0+cu111
92
+ - Datasets 1.18.3
93
+ - Tokenizers 0.11.6
94
+
all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_HasAns_exact": 67.57759784075573,
4
+ "eval_HasAns_f1": 74.35942116486743,
5
+ "eval_HasAns_total": 5928,
6
+ "eval_NoAns_exact": 62.91000841042893,
7
+ "eval_NoAns_f1": 62.91000841042893,
8
+ "eval_NoAns_total": 5945,
9
+ "eval_best_exact": 65.24888402257223,
10
+ "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 68.63494050916684,
12
+ "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 65.24046155141919,
14
+ "eval_f1": 68.6265180380136,
15
+ "eval_samples": 12165,
16
+ "eval_total": 11873,
17
+ "train_loss": 1.403361870028868,
18
+ "train_runtime": 1365.2765,
19
+ "train_samples": 131823,
20
+ "train_samples_per_second": 289.662,
21
+ "train_steps_per_second": 0.567
22
+ }
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./distilroberta-base-squad_v2/",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 6,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.17.0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
eval_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_HasAns_exact": 67.57759784075573,
4
+ "eval_HasAns_f1": 74.35942116486743,
5
+ "eval_HasAns_total": 5928,
6
+ "eval_NoAns_exact": 62.91000841042893,
7
+ "eval_NoAns_f1": 62.91000841042893,
8
+ "eval_NoAns_total": 5945,
9
+ "eval_best_exact": 65.24888402257223,
10
+ "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 68.63494050916684,
12
+ "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 65.24046155141919,
14
+ "eval_f1": 68.6265180380136,
15
+ "eval_samples": 12165,
16
+ "eval_total": 11873
17
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f323a5dec09e89692e1734d2c48e27f764681e0f04fff74d396768271fd4ad11
3
+ size 326157245
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e09041fdc00c057cf9d19459de8885f534aeaf78970e01a886933f43b27f256b
3
+ size 326154289
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6295f48bc710694c362ce7572dfd8e66f35913d7f75cd8f4a3ae04e996173c7d
3
+ size 326264448
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"errors": "replace", "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilroberta-base", "tokenizer_class": "RobertaTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 1.403361870028868,
4
+ "train_runtime": 1365.2765,
5
+ "train_samples": 131823,
6
+ "train_samples_per_second": 289.662,
7
+ "train_steps_per_second": 0.567
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 774,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.94,
12
+ "learning_rate": 1.7700258397932818e-05,
13
+ "loss": 1.5796,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 3.0,
18
+ "step": 774,
19
+ "total_flos": 4854023972388864.0,
20
+ "train_loss": 1.403361870028868,
21
+ "train_runtime": 1365.2765,
22
+ "train_samples_per_second": 289.662,
23
+ "train_steps_per_second": 0.567
24
+ }
25
+ ],
26
+ "max_steps": 774,
27
+ "num_train_epochs": 3,
28
+ "total_flos": 4854023972388864.0,
29
+ "trial_name": null,
30
+ "trial_params": null
31
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04013c80b52dd2596cc4123eb08f6353ab42307060d89a6aa05bebf401a5617b
3
+ size 3055
vocab.json ADDED
The diff for this file is too large to render. See raw diff