josecannete commited on
Commit
7c8cb32
1 Parent(s): b4bc653

adding model finetuned on NER

Browse files
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9777703135973618,
4
+ "eval_f1": 0.8632724446171783,
5
+ "eval_loss": 0.09008093178272247,
6
+ "eval_precision": 0.8535198278512143,
7
+ "eval_recall": 0.8732505110866489,
8
+ "eval_runtime": 2.5664,
9
+ "eval_samples": 1916,
10
+ "eval_samples_per_second": 746.578,
11
+ "eval_steps_per_second": 46.759,
12
+ "train_loss": 0.08574806804925451,
13
+ "train_runtime": 168.5849,
14
+ "train_samples": 8324,
15
+ "train_samples_per_second": 148.127,
16
+ "train_steps_per_second": 9.271
17
+ }
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bertin-project/bertin-roberta-base-spanish",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "ner",
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.0,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "LABEL_0",
17
+ "1": "LABEL_1",
18
+ "2": "LABEL_2",
19
+ "3": "LABEL_3",
20
+ "4": "LABEL_4",
21
+ "5": "LABEL_5",
22
+ "6": "LABEL_6",
23
+ "7": "LABEL_7",
24
+ "8": "LABEL_8"
25
+ },
26
+ "initializer_range": 0.02,
27
+ "intermediate_size": 3072,
28
+ "label2id": {
29
+ "LABEL_0": 0,
30
+ "LABEL_1": 1,
31
+ "LABEL_2": 2,
32
+ "LABEL_3": 3,
33
+ "LABEL_4": 4,
34
+ "LABEL_5": 5,
35
+ "LABEL_6": 6,
36
+ "LABEL_7": 7,
37
+ "LABEL_8": 8
38
+ },
39
+ "layer_norm_eps": 1e-05,
40
+ "max_position_embeddings": 514,
41
+ "model_type": "roberta",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "pad_token_id": 1,
45
+ "position_embedding_type": "absolute",
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.15.0",
48
+ "type_vocab_size": 1,
49
+ "use_cache": true,
50
+ "vocab_size": 50262
51
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9777703135973618,
4
+ "eval_f1": 0.8632724446171783,
5
+ "eval_loss": 0.09008093178272247,
6
+ "eval_precision": 0.8535198278512143,
7
+ "eval_recall": 0.8732505110866489,
8
+ "eval_runtime": 2.5664,
9
+ "eval_samples": 1916,
10
+ "eval_samples_per_second": 746.578,
11
+ "eval_steps_per_second": 46.759
12
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157b08174a2976a55b86f7a0c9898fdd9488f7c6e2afd43f07f129bc2203f2a2
3
+ size 496323313
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "trim_offsets": true, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "bertin-project/bertin-roberta-base-spanish", "do_lower_case": false, "tokenizer_class": "RobertaTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.08574806804925451,
4
+ "train_runtime": 168.5849,
5
+ "train_samples": 8324,
6
+ "train_samples_per_second": 148.127,
7
+ "train_steps_per_second": 9.271
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.09008093178272247,
3
+ "best_model_checkpoint": "/home/jcanete/ft-data/all_results/ner/bertin_roberta_base/epochs_3_bs_16_lr_3e-5/checkpoint-900",
4
+ "epoch": 3.0,
5
+ "global_step": 1563,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.19,
12
+ "eval_accuracy": 0.8860423219029588,
13
+ "eval_f1": 0.348936885452559,
14
+ "eval_loss": 0.3453126847743988,
15
+ "eval_precision": 0.3747292418772563,
16
+ "eval_recall": 0.3264664255386067,
17
+ "eval_runtime": 2.6554,
18
+ "eval_samples_per_second": 721.552,
19
+ "eval_steps_per_second": 45.191,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.38,
24
+ "eval_accuracy": 0.9502274878622248,
25
+ "eval_f1": 0.6605220354736272,
26
+ "eval_loss": 0.1896633505821228,
27
+ "eval_precision": 0.6507936507936508,
28
+ "eval_recall": 0.6705456832835351,
29
+ "eval_runtime": 2.5793,
30
+ "eval_samples_per_second": 742.845,
31
+ "eval_steps_per_second": 46.525,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.58,
36
+ "eval_accuracy": 0.9694952517634127,
37
+ "eval_f1": 0.8140789988267502,
38
+ "eval_loss": 0.11993961036205292,
39
+ "eval_precision": 0.8098350451291628,
40
+ "eval_recall": 0.818367667872307,
41
+ "eval_runtime": 2.5794,
42
+ "eval_samples_per_second": 742.807,
43
+ "eval_steps_per_second": 46.522,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 0.77,
48
+ "eval_accuracy": 0.9709762130141378,
49
+ "eval_f1": 0.8182973316391361,
50
+ "eval_loss": 0.11212530732154846,
51
+ "eval_precision": 0.8265682656826568,
52
+ "eval_recall": 0.8101902814908004,
53
+ "eval_runtime": 2.5886,
54
+ "eval_samples_per_second": 740.159,
55
+ "eval_steps_per_second": 46.357,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 0.96,
60
+ "learning_rate": 2.0460652591170824e-05,
61
+ "loss": 0.2024,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 0.96,
66
+ "eval_accuracy": 0.9743350942013497,
67
+ "eval_f1": 0.8378357811429876,
68
+ "eval_loss": 0.09800967574119568,
69
+ "eval_precision": 0.8117074609259806,
70
+ "eval_recall": 0.8657021544267967,
71
+ "eval_runtime": 2.5773,
72
+ "eval_samples_per_second": 743.42,
73
+ "eval_steps_per_second": 46.561,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 1.15,
78
+ "eval_accuracy": 0.9757091819597545,
79
+ "eval_f1": 0.8524022961390265,
80
+ "eval_loss": 0.09717094898223877,
81
+ "eval_precision": 0.8524693299779805,
82
+ "eval_recall": 0.8523352728416418,
83
+ "eval_runtime": 2.6535,
84
+ "eval_samples_per_second": 722.067,
85
+ "eval_steps_per_second": 45.223,
86
+ "step": 600
87
+ },
88
+ {
89
+ "epoch": 1.34,
90
+ "eval_accuracy": 0.9756633790344743,
91
+ "eval_f1": 0.8469219278245145,
92
+ "eval_loss": 0.10494286566972733,
93
+ "eval_precision": 0.8468553459119497,
94
+ "eval_recall": 0.8469885202075798,
95
+ "eval_runtime": 2.5616,
96
+ "eval_samples_per_second": 747.968,
97
+ "eval_steps_per_second": 46.846,
98
+ "step": 700
99
+ },
100
+ {
101
+ "epoch": 1.54,
102
+ "eval_accuracy": 0.9754343644080735,
103
+ "eval_f1": 0.8522736197282652,
104
+ "eval_loss": 0.1033582091331482,
105
+ "eval_precision": 0.8512707875745215,
106
+ "eval_recall": 0.8532788174241233,
107
+ "eval_runtime": 2.5767,
108
+ "eval_samples_per_second": 743.6,
109
+ "eval_steps_per_second": 46.572,
110
+ "step": 800
111
+ },
112
+ {
113
+ "epoch": 1.73,
114
+ "eval_accuracy": 0.9777703135973618,
115
+ "eval_f1": 0.8632724446171783,
116
+ "eval_loss": 0.09008093178272247,
117
+ "eval_precision": 0.8535198278512143,
118
+ "eval_recall": 0.8732505110866489,
119
+ "eval_runtime": 2.5668,
120
+ "eval_samples_per_second": 746.461,
121
+ "eval_steps_per_second": 46.751,
122
+ "step": 900
123
+ },
124
+ {
125
+ "epoch": 1.92,
126
+ "learning_rate": 1.0863723608445298e-05,
127
+ "loss": 0.0435,
128
+ "step": 1000
129
+ },
130
+ {
131
+ "epoch": 1.92,
132
+ "eval_accuracy": 0.9767779168829582,
133
+ "eval_f1": 0.8591637849732788,
134
+ "eval_loss": 0.09683168679475784,
135
+ "eval_precision": 0.8587588373919874,
136
+ "eval_recall": 0.8595691146406668,
137
+ "eval_runtime": 2.5807,
138
+ "eval_samples_per_second": 742.428,
139
+ "eval_steps_per_second": 46.499,
140
+ "step": 1000
141
+ },
142
+ {
143
+ "epoch": 2.11,
144
+ "eval_accuracy": 0.9784726251183242,
145
+ "eval_f1": 0.8643483709273184,
146
+ "eval_loss": 0.09452169388532639,
147
+ "eval_precision": 0.8609767514432829,
148
+ "eval_recall": 0.8677465010221733,
149
+ "eval_runtime": 2.5589,
150
+ "eval_samples_per_second": 748.766,
151
+ "eval_steps_per_second": 46.896,
152
+ "step": 1100
153
+ },
154
+ {
155
+ "epoch": 2.3,
156
+ "eval_accuracy": 0.9770527344346392,
157
+ "eval_f1": 0.8597122302158274,
158
+ "eval_loss": 0.10257242619991302,
159
+ "eval_precision": 0.8550318867631047,
160
+ "eval_recall": 0.864444094983488,
161
+ "eval_runtime": 2.7283,
162
+ "eval_samples_per_second": 702.266,
163
+ "eval_steps_per_second": 43.983,
164
+ "step": 1200
165
+ },
166
+ {
167
+ "epoch": 2.5,
168
+ "eval_accuracy": 0.9772054108522398,
169
+ "eval_f1": 0.8668813319720412,
170
+ "eval_loss": 0.10338085889816284,
171
+ "eval_precision": 0.8658613115782868,
172
+ "eval_recall": 0.8679037584525869,
173
+ "eval_runtime": 2.5673,
174
+ "eval_samples_per_second": 746.324,
175
+ "eval_steps_per_second": 46.743,
176
+ "step": 1300
177
+ },
178
+ {
179
+ "epoch": 2.69,
180
+ "eval_accuracy": 0.9771290726434395,
181
+ "eval_f1": 0.8576551294044278,
182
+ "eval_loss": 0.09793581068515778,
183
+ "eval_precision": 0.8503632709846962,
184
+ "eval_recall": 0.8650731247051423,
185
+ "eval_runtime": 2.5716,
186
+ "eval_samples_per_second": 745.057,
187
+ "eval_steps_per_second": 46.663,
188
+ "step": 1400
189
+ },
190
+ {
191
+ "epoch": 2.88,
192
+ "learning_rate": 1.2667946257197696e-06,
193
+ "loss": 0.02,
194
+ "step": 1500
195
+ },
196
+ {
197
+ "epoch": 2.88,
198
+ "eval_accuracy": 0.9782130752084033,
199
+ "eval_f1": 0.868623450007848,
200
+ "eval_loss": 0.09656020998954773,
201
+ "eval_precision": 0.8669904433651888,
202
+ "eval_recall": 0.8702626199087907,
203
+ "eval_runtime": 2.5726,
204
+ "eval_samples_per_second": 744.76,
205
+ "eval_steps_per_second": 46.645,
206
+ "step": 1500
207
+ },
208
+ {
209
+ "epoch": 3.0,
210
+ "step": 1563,
211
+ "total_flos": 1130082001089408.0,
212
+ "train_loss": 0.08574806804925451,
213
+ "train_runtime": 168.5849,
214
+ "train_samples_per_second": 148.127,
215
+ "train_steps_per_second": 9.271
216
+ }
217
+ ],
218
+ "max_steps": 1563,
219
+ "num_train_epochs": 3,
220
+ "total_flos": 1130082001089408.0,
221
+ "trial_name": null,
222
+ "trial_params": null
223
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afc5fafbd9fa4e91e4c2ea2b8aa9dbf18388a9937a8d1e4ecf44addddbb44539
3
+ size 2991
vocab.json ADDED
The diff for this file is too large to render. See raw diff