ivlcic commited on
Commit
59e509a
1 Parent(s): 4ef40e3

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "xlm-roberta-base",
3
+ "architectures": [
4
+ "XLMRobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "O",
15
+ "1": "B-PER",
16
+ "2": "I-PER",
17
+ "3": "B-LOC",
18
+ "4": "I-LOC",
19
+ "5": "B-ORG",
20
+ "6": "I-ORG",
21
+ "7": "B-MISC",
22
+ "8": "I-MISC"
23
+ },
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 3072,
26
+ "label2id": {
27
+ "B-LOC": 3,
28
+ "B-MISC": 7,
29
+ "B-ORG": 5,
30
+ "B-PER": 1,
31
+ "I-LOC": 4,
32
+ "I-MISC": 8,
33
+ "I-ORG": 6,
34
+ "I-PER": 2,
35
+ "O": 0
36
+ },
37
+ "layer_norm_eps": 1e-05,
38
+ "max_position_embeddings": 514,
39
+ "model_type": "xlm-roberta",
40
+ "num_attention_heads": 12,
41
+ "num_hidden_layers": 12,
42
+ "output_past": true,
43
+ "pad_token_id": 1,
44
+ "position_embedding_type": "absolute",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.32.0",
47
+ "type_vocab_size": 1,
48
+ "use_cache": true,
49
+ "vocab_size": 250002
50
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b27282a42cb7fd8c011e72809c17cc0d4b3b34705799f8b5328caea1f7670bb
3
+ size 2219845765
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7dfef1f0465b0406b087677aee6de8e233f0d37bf75c5adcb1f7547456c87b9
3
+ size 1109908201
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d2a64961ceecc9cce644a70292037b4d6fff45e3dec5f1a2356cd3fd6f4638
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9abe8ae02edaedb0af455d76563d84148137be1587c88455653f6dd45a155616
3
+ size 627
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93bf61272f75c0a0b96b85fa262d2242e8a46008d76095386e98675f0bdd119
3
+ size 17082925
tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "mask_token": {
7
+ "__type": "AddedToken",
8
+ "content": "<mask>",
9
+ "lstrip": true,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "model_max_length": 512,
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "tokenizer_class": "XLMRobertaTokenizer",
18
+ "unk_token": "<unk>"
19
+ }
trainer_state.json ADDED
@@ -0,0 +1,739 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9324748546966902,
3
+ "best_model_checkpoint": "/home/nikola/projects/neuroticla/result/ner/xlmrb-sl_hr_sr_bs_mk_sq_cs_bg_pl_ru_sk_uk/checkpoint-433760",
4
+ "epoch": 40.0,
5
+ "eval_steps": 500,
6
+ "global_step": 433760,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "learning_rate": 1.95e-05,
14
+ "loss": 0.1128,
15
+ "step": 10844
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_accuracy": 0.9769370487250145,
20
+ "eval_f1": 0.9010081585905575,
21
+ "eval_loss": 0.09421534836292267,
22
+ "eval_precision": 0.9111733920553665,
23
+ "eval_recall": 0.8910672334713056,
24
+ "eval_runtime": 89.8183,
25
+ "eval_samples_per_second": 301.776,
26
+ "eval_steps_per_second": 15.097,
27
+ "step": 10844
28
+ },
29
+ {
30
+ "epoch": 2.0,
31
+ "learning_rate": 1.9e-05,
32
+ "loss": 0.0676,
33
+ "step": 21688
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_accuracy": 0.9799811843427059,
38
+ "eval_f1": 0.9143064683983713,
39
+ "eval_loss": 0.08378221094608307,
40
+ "eval_precision": 0.9133559397719819,
41
+ "eval_recall": 0.9152589775142633,
42
+ "eval_runtime": 89.3428,
43
+ "eval_samples_per_second": 303.382,
44
+ "eval_steps_per_second": 15.177,
45
+ "step": 21688
46
+ },
47
+ {
48
+ "epoch": 3.0,
49
+ "learning_rate": 1.8500000000000002e-05,
50
+ "loss": 0.0507,
51
+ "step": 32532
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.9804305567434127,
56
+ "eval_f1": 0.9163790332723132,
57
+ "eval_loss": 0.08407577127218246,
58
+ "eval_precision": 0.9157519899455383,
59
+ "eval_recall": 0.9170069358988702,
60
+ "eval_runtime": 90.9562,
61
+ "eval_samples_per_second": 298.001,
62
+ "eval_steps_per_second": 14.908,
63
+ "step": 32532
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "learning_rate": 1.8e-05,
68
+ "loss": 0.0401,
69
+ "step": 43376
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "eval_accuracy": 0.9814757099721534,
74
+ "eval_f1": 0.9221617781244994,
75
+ "eval_loss": 0.08777288347482681,
76
+ "eval_precision": 0.9186871140101311,
77
+ "eval_recall": 0.9256628258194429,
78
+ "eval_runtime": 89.4976,
79
+ "eval_samples_per_second": 302.857,
80
+ "eval_steps_per_second": 15.151,
81
+ "step": 43376
82
+ },
83
+ {
84
+ "epoch": 5.0,
85
+ "learning_rate": 1.7500000000000002e-05,
86
+ "loss": 0.032,
87
+ "step": 54220
88
+ },
89
+ {
90
+ "epoch": 5.0,
91
+ "eval_accuracy": 0.9822106512855675,
92
+ "eval_f1": 0.9250753982350196,
93
+ "eval_loss": 0.09507809579372406,
94
+ "eval_precision": 0.921564273720146,
95
+ "eval_recall": 0.9286133795726591,
96
+ "eval_runtime": 90.924,
97
+ "eval_samples_per_second": 298.106,
98
+ "eval_steps_per_second": 14.914,
99
+ "step": 54220
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "learning_rate": 1.7e-05,
104
+ "loss": 0.0264,
105
+ "step": 65064
106
+ },
107
+ {
108
+ "epoch": 6.0,
109
+ "eval_accuracy": 0.9809553077403671,
110
+ "eval_f1": 0.9212521094420008,
111
+ "eval_loss": 0.1004280373454094,
112
+ "eval_precision": 0.9113936176035908,
113
+ "eval_recall": 0.9313262109855689,
114
+ "eval_runtime": 90.6899,
115
+ "eval_samples_per_second": 298.875,
116
+ "eval_steps_per_second": 14.952,
117
+ "step": 65064
118
+ },
119
+ {
120
+ "epoch": 7.0,
121
+ "learning_rate": 1.65e-05,
122
+ "loss": 0.0215,
123
+ "step": 75908
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "eval_accuracy": 0.9824875226679385,
128
+ "eval_f1": 0.92642423737521,
129
+ "eval_loss": 0.1009296178817749,
130
+ "eval_precision": 0.9199227905694195,
131
+ "eval_recall": 0.9330182347018682,
132
+ "eval_runtime": 90.3482,
133
+ "eval_samples_per_second": 300.006,
134
+ "eval_steps_per_second": 15.009,
135
+ "step": 75908
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "learning_rate": 1.6000000000000003e-05,
140
+ "loss": 0.0177,
141
+ "step": 86752
142
+ },
143
+ {
144
+ "epoch": 8.0,
145
+ "eval_accuracy": 0.9821120792750898,
146
+ "eval_f1": 0.9245811025699784,
147
+ "eval_loss": 0.11745402961969376,
148
+ "eval_precision": 0.9183000675890037,
149
+ "eval_recall": 0.9309486519744938,
150
+ "eval_runtime": 89.2897,
151
+ "eval_samples_per_second": 303.563,
152
+ "eval_steps_per_second": 15.187,
153
+ "step": 86752
154
+ },
155
+ {
156
+ "epoch": 9.0,
157
+ "learning_rate": 1.55e-05,
158
+ "loss": 0.015,
159
+ "step": 97596
160
+ },
161
+ {
162
+ "epoch": 9.0,
163
+ "eval_accuracy": 0.9825324599080091,
164
+ "eval_f1": 0.926065441605738,
165
+ "eval_loss": 0.11713194102048874,
166
+ "eval_precision": 0.9205632012380479,
167
+ "eval_recall": 0.9316338516612597,
168
+ "eval_runtime": 89.7586,
169
+ "eval_samples_per_second": 301.977,
170
+ "eval_steps_per_second": 15.107,
171
+ "step": 97596
172
+ },
173
+ {
174
+ "epoch": 10.0,
175
+ "learning_rate": 1.5000000000000002e-05,
176
+ "loss": 0.0133,
177
+ "step": 108440
178
+ },
179
+ {
180
+ "epoch": 10.0,
181
+ "eval_accuracy": 0.9820569949162935,
182
+ "eval_f1": 0.9250865136228494,
183
+ "eval_loss": 0.13051354885101318,
184
+ "eval_precision": 0.9212697796330453,
185
+ "eval_recall": 0.9289350039154268,
186
+ "eval_runtime": 89.1452,
187
+ "eval_samples_per_second": 304.054,
188
+ "eval_steps_per_second": 15.211,
189
+ "step": 108440
190
+ },
191
+ {
192
+ "epoch": 11.0,
193
+ "learning_rate": 1.45e-05,
194
+ "loss": 0.0112,
195
+ "step": 119284
196
+ },
197
+ {
198
+ "epoch": 11.0,
199
+ "eval_accuracy": 0.9825614516757967,
200
+ "eval_f1": 0.9272675473035418,
201
+ "eval_loss": 0.1286834180355072,
202
+ "eval_precision": 0.9215873147419164,
203
+ "eval_recall": 0.9330182347018682,
204
+ "eval_runtime": 89.577,
205
+ "eval_samples_per_second": 302.589,
206
+ "eval_steps_per_second": 15.138,
207
+ "step": 119284
208
+ },
209
+ {
210
+ "epoch": 12.0,
211
+ "learning_rate": 1.4e-05,
212
+ "loss": 0.01,
213
+ "step": 130128
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "eval_accuracy": 0.982439686251089,
218
+ "eval_f1": 0.9262543854764159,
219
+ "eval_loss": 0.13614533841609955,
220
+ "eval_precision": 0.9221972721224219,
221
+ "eval_recall": 0.9303473542901891,
222
+ "eval_runtime": 88.3792,
223
+ "eval_samples_per_second": 306.69,
224
+ "eval_steps_per_second": 15.343,
225
+ "step": 130128
226
+ },
227
+ {
228
+ "epoch": 13.0,
229
+ "learning_rate": 1.3500000000000001e-05,
230
+ "loss": 0.0086,
231
+ "step": 140972
232
+ },
233
+ {
234
+ "epoch": 13.0,
235
+ "eval_accuracy": 0.9824309887207527,
236
+ "eval_f1": 0.9259202794101342,
237
+ "eval_loss": 0.13529813289642334,
238
+ "eval_precision": 0.9177794263105836,
239
+ "eval_recall": 0.9342068464034008,
240
+ "eval_runtime": 88.5532,
241
+ "eval_samples_per_second": 306.087,
242
+ "eval_steps_per_second": 15.313,
243
+ "step": 140972
244
+ },
245
+ {
246
+ "epoch": 14.0,
247
+ "learning_rate": 1.3000000000000001e-05,
248
+ "loss": 0.0079,
249
+ "step": 151816
250
+ },
251
+ {
252
+ "epoch": 14.0,
253
+ "eval_accuracy": 0.9827180072218493,
254
+ "eval_f1": 0.9273193405173312,
255
+ "eval_loss": 0.14126552641391754,
256
+ "eval_precision": 0.9215123103053067,
257
+ "eval_recall": 0.9332000223738673,
258
+ "eval_runtime": 89.1586,
259
+ "eval_samples_per_second": 304.009,
260
+ "eval_steps_per_second": 15.209,
261
+ "step": 151816
262
+ },
263
+ {
264
+ "epoch": 15.0,
265
+ "learning_rate": 1.25e-05,
266
+ "loss": 0.007,
267
+ "step": 162660
268
+ },
269
+ {
270
+ "epoch": 15.0,
271
+ "eval_accuracy": 0.982528111142841,
272
+ "eval_f1": 0.9268309701909121,
273
+ "eval_loss": 0.1539030820131302,
274
+ "eval_precision": 0.9249721448467967,
275
+ "eval_recall": 0.9286972815751202,
276
+ "eval_runtime": 88.721,
277
+ "eval_samples_per_second": 305.508,
278
+ "eval_steps_per_second": 15.284,
279
+ "step": 162660
280
+ },
281
+ {
282
+ "epoch": 16.0,
283
+ "learning_rate": 1.2e-05,
284
+ "loss": 0.0064,
285
+ "step": 173504
286
+ },
287
+ {
288
+ "epoch": 16.0,
289
+ "eval_accuracy": 0.982581745913248,
290
+ "eval_f1": 0.9264010897364617,
291
+ "eval_loss": 0.15460434556007385,
292
+ "eval_precision": 0.9208715596330275,
293
+ "eval_recall": 0.9319974270052579,
294
+ "eval_runtime": 88.6075,
295
+ "eval_samples_per_second": 305.9,
296
+ "eval_steps_per_second": 15.303,
297
+ "step": 173504
298
+ },
299
+ {
300
+ "epoch": 17.0,
301
+ "learning_rate": 1.15e-05,
302
+ "loss": 0.0057,
303
+ "step": 184348
304
+ },
305
+ {
306
+ "epoch": 17.0,
307
+ "eval_accuracy": 0.982916600831194,
308
+ "eval_f1": 0.9276149465203501,
309
+ "eval_loss": 0.1507822722196579,
310
+ "eval_precision": 0.9214963846111387,
311
+ "eval_recall": 0.9338153037252489,
312
+ "eval_runtime": 89.1007,
313
+ "eval_samples_per_second": 304.206,
314
+ "eval_steps_per_second": 15.219,
315
+ "step": 184348
316
+ },
317
+ {
318
+ "epoch": 18.0,
319
+ "learning_rate": 1.1000000000000001e-05,
320
+ "loss": 0.0053,
321
+ "step": 195192
322
+ },
323
+ {
324
+ "epoch": 18.0,
325
+ "eval_accuracy": 0.9827933858180969,
326
+ "eval_f1": 0.927893329629501,
327
+ "eval_loss": 0.15079163014888763,
328
+ "eval_precision": 0.9216781866092739,
329
+ "eval_recall": 0.934192862736324,
330
+ "eval_runtime": 90.146,
331
+ "eval_samples_per_second": 300.679,
332
+ "eval_steps_per_second": 15.042,
333
+ "step": 195192
334
+ },
335
+ {
336
+ "epoch": 19.0,
337
+ "learning_rate": 1.0500000000000001e-05,
338
+ "loss": 0.0048,
339
+ "step": 206036
340
+ },
341
+ {
342
+ "epoch": 19.0,
343
+ "eval_accuracy": 0.9825150648473366,
344
+ "eval_f1": 0.9270733168889446,
345
+ "eval_loss": 0.16124233603477478,
346
+ "eval_precision": 0.9239187800338879,
347
+ "eval_recall": 0.930249468620651,
348
+ "eval_runtime": 89.3088,
349
+ "eval_samples_per_second": 303.497,
350
+ "eval_steps_per_second": 15.183,
351
+ "step": 206036
352
+ },
353
+ {
354
+ "epoch": 20.0,
355
+ "learning_rate": 1e-05,
356
+ "loss": 0.0043,
357
+ "step": 216880
358
+ },
359
+ {
360
+ "epoch": 20.0,
361
+ "eval_accuracy": 0.9827440998128582,
362
+ "eval_f1": 0.9272743748997957,
363
+ "eval_loss": 0.1557987779378891,
364
+ "eval_precision": 0.9244839808186809,
365
+ "eval_recall": 0.9300816646157288,
366
+ "eval_runtime": 88.3664,
367
+ "eval_samples_per_second": 306.734,
368
+ "eval_steps_per_second": 15.345,
369
+ "step": 216880
370
+ },
371
+ {
372
+ "epoch": 21.0,
373
+ "learning_rate": 9.5e-06,
374
+ "loss": 0.0041,
375
+ "step": 227724
376
+ },
377
+ {
378
+ "epoch": 21.0,
379
+ "eval_accuracy": 0.9828412222349464,
380
+ "eval_f1": 0.92726855170398,
381
+ "eval_loss": 0.1576606184244156,
382
+ "eval_precision": 0.9229714190715008,
383
+ "eval_recall": 0.9316058843271059,
384
+ "eval_runtime": 88.9473,
385
+ "eval_samples_per_second": 304.731,
386
+ "eval_steps_per_second": 15.245,
387
+ "step": 227724
388
+ },
389
+ {
390
+ "epoch": 22.0,
391
+ "learning_rate": 9e-06,
392
+ "loss": 0.0036,
393
+ "step": 238568
394
+ },
395
+ {
396
+ "epoch": 22.0,
397
+ "eval_accuracy": 0.9831050473218129,
398
+ "eval_f1": 0.9286320918900104,
399
+ "eval_loss": 0.16860993206501007,
400
+ "eval_precision": 0.9246028445479497,
401
+ "eval_recall": 0.9326966103591006,
402
+ "eval_runtime": 88.4212,
403
+ "eval_samples_per_second": 306.544,
404
+ "eval_steps_per_second": 15.336,
405
+ "step": 238568
406
+ },
407
+ {
408
+ "epoch": 23.0,
409
+ "learning_rate": 8.5e-06,
410
+ "loss": 0.0033,
411
+ "step": 249412
412
+ },
413
+ {
414
+ "epoch": 23.0,
415
+ "eval_accuracy": 0.9828832602982384,
416
+ "eval_f1": 0.9276945586288581,
417
+ "eval_loss": 0.16733527183532715,
418
+ "eval_precision": 0.9244355577772224,
419
+ "eval_recall": 0.9309766193086475,
420
+ "eval_runtime": 88.9341,
421
+ "eval_samples_per_second": 304.776,
422
+ "eval_steps_per_second": 15.247,
423
+ "step": 249412
424
+ },
425
+ {
426
+ "epoch": 24.0,
427
+ "learning_rate": 8.000000000000001e-06,
428
+ "loss": 0.0031,
429
+ "step": 260256
430
+ },
431
+ {
432
+ "epoch": 24.0,
433
+ "eval_accuracy": 0.9835123816592278,
434
+ "eval_f1": 0.9298053519496189,
435
+ "eval_loss": 0.16969779133796692,
436
+ "eval_precision": 0.9274573913043478,
437
+ "eval_recall": 0.9321652310101801,
438
+ "eval_runtime": 87.785,
439
+ "eval_samples_per_second": 308.766,
440
+ "eval_steps_per_second": 15.447,
441
+ "step": 260256
442
+ },
443
+ {
444
+ "epoch": 25.0,
445
+ "learning_rate": 7.500000000000001e-06,
446
+ "loss": 0.0029,
447
+ "step": 271100
448
+ },
449
+ {
450
+ "epoch": 25.0,
451
+ "eval_accuracy": 0.983328283933777,
452
+ "eval_f1": 0.9295600197618832,
453
+ "eval_loss": 0.16894972324371338,
454
+ "eval_precision": 0.9251374672779401,
455
+ "eval_recall": 0.9340250587314017,
456
+ "eval_runtime": 88.675,
457
+ "eval_samples_per_second": 305.667,
458
+ "eval_steps_per_second": 15.292,
459
+ "step": 271100
460
+ },
461
+ {
462
+ "epoch": 26.0,
463
+ "learning_rate": 7e-06,
464
+ "loss": 0.0026,
465
+ "step": 281944
466
+ },
467
+ {
468
+ "epoch": 26.0,
469
+ "eval_accuracy": 0.9831079464985917,
470
+ "eval_f1": 0.9281980972530569,
471
+ "eval_loss": 0.1714058518409729,
472
+ "eval_precision": 0.9239348804987877,
473
+ "eval_recall": 0.9325008390200246,
474
+ "eval_runtime": 89.606,
475
+ "eval_samples_per_second": 302.491,
476
+ "eval_steps_per_second": 15.133,
477
+ "step": 281944
478
+ },
479
+ {
480
+ "epoch": 27.0,
481
+ "learning_rate": 6.5000000000000004e-06,
482
+ "loss": 0.0022,
483
+ "step": 292788
484
+ },
485
+ {
486
+ "epoch": 27.0,
487
+ "eval_accuracy": 0.9831311399128217,
488
+ "eval_f1": 0.9291277150061997,
489
+ "eval_loss": 0.16884349286556244,
490
+ "eval_precision": 0.9257100024985425,
491
+ "eval_recall": 0.9325707573554088,
492
+ "eval_runtime": 88.5198,
493
+ "eval_samples_per_second": 306.203,
494
+ "eval_steps_per_second": 15.319,
495
+ "step": 292788
496
+ },
497
+ {
498
+ "epoch": 28.0,
499
+ "learning_rate": 6e-06,
500
+ "loss": 0.002,
501
+ "step": 303632
502
+ },
503
+ {
504
+ "epoch": 28.0,
505
+ "eval_accuracy": 0.9833819187041839,
506
+ "eval_f1": 0.9299846945874496,
507
+ "eval_loss": 0.17876744270324707,
508
+ "eval_precision": 0.9253752007531705,
509
+ "eval_recall": 0.9346403400827833,
510
+ "eval_runtime": 88.8069,
511
+ "eval_samples_per_second": 305.213,
512
+ "eval_steps_per_second": 15.269,
513
+ "step": 303632
514
+ },
515
+ {
516
+ "epoch": 29.0,
517
+ "learning_rate": 5.500000000000001e-06,
518
+ "loss": 0.0019,
519
+ "step": 314476
520
+ },
521
+ {
522
+ "epoch": 29.0,
523
+ "eval_accuracy": 0.9836268991419886,
524
+ "eval_f1": 0.9311766262342112,
525
+ "eval_loss": 0.1778053343296051,
526
+ "eval_precision": 0.9273667859421375,
527
+ "eval_recall": 0.9350178990938584,
528
+ "eval_runtime": 89.2258,
529
+ "eval_samples_per_second": 303.78,
530
+ "eval_steps_per_second": 15.197,
531
+ "step": 314476
532
+ },
533
+ {
534
+ "epoch": 30.0,
535
+ "learning_rate": 5e-06,
536
+ "loss": 0.0018,
537
+ "step": 325320
538
+ },
539
+ {
540
+ "epoch": 30.0,
541
+ "eval_accuracy": 0.9834732427727146,
542
+ "eval_f1": 0.9307018886832106,
543
+ "eval_loss": 0.18135882914066315,
544
+ "eval_precision": 0.9263569113124429,
545
+ "eval_recall": 0.9350878174292426,
546
+ "eval_runtime": 88.7746,
547
+ "eval_samples_per_second": 305.324,
548
+ "eval_steps_per_second": 15.275,
549
+ "step": 325320
550
+ },
551
+ {
552
+ "epoch": 31.0,
553
+ "learning_rate": 4.5e-06,
554
+ "loss": 0.0016,
555
+ "step": 336164
556
+ },
557
+ {
558
+ "epoch": 31.0,
559
+ "eval_accuracy": 0.983370321997069,
560
+ "eval_f1": 0.9299260330348701,
561
+ "eval_loss": 0.18739104270935059,
562
+ "eval_precision": 0.9246295067462951,
563
+ "eval_recall": 0.9352835887683186,
564
+ "eval_runtime": 89.214,
565
+ "eval_samples_per_second": 303.82,
566
+ "eval_steps_per_second": 15.199,
567
+ "step": 336164
568
+ },
569
+ {
570
+ "epoch": 32.0,
571
+ "learning_rate": 4.000000000000001e-06,
572
+ "loss": 0.0014,
573
+ "step": 347008
574
+ },
575
+ {
576
+ "epoch": 32.0,
577
+ "eval_accuracy": 0.9836066049045373,
578
+ "eval_f1": 0.9308149819620296,
579
+ "eval_loss": 0.18249443173408508,
580
+ "eval_precision": 0.927185947775897,
581
+ "eval_recall": 0.9344725360778611,
582
+ "eval_runtime": 89.9015,
583
+ "eval_samples_per_second": 301.497,
584
+ "eval_steps_per_second": 15.083,
585
+ "step": 347008
586
+ },
587
+ {
588
+ "epoch": 33.0,
589
+ "learning_rate": 3.5e-06,
590
+ "loss": 0.0015,
591
+ "step": 357852
592
+ },
593
+ {
594
+ "epoch": 33.0,
595
+ "eval_accuracy": 0.9833732211738477,
596
+ "eval_f1": 0.9303621946290954,
597
+ "eval_loss": 0.18371780216693878,
598
+ "eval_precision": 0.927196466764812,
599
+ "eval_recall": 0.9335496140507887,
600
+ "eval_runtime": 89.8085,
601
+ "eval_samples_per_second": 301.809,
602
+ "eval_steps_per_second": 15.099,
603
+ "step": 357852
604
+ },
605
+ {
606
+ "epoch": 34.0,
607
+ "learning_rate": 3e-06,
608
+ "loss": 0.0013,
609
+ "step": 368696
610
+ },
611
+ {
612
+ "epoch": 34.0,
613
+ "eval_accuracy": 0.9834457005933165,
614
+ "eval_f1": 0.9307165143748607,
615
+ "eval_loss": 0.1884261518716812,
616
+ "eval_precision": 0.9271006771006771,
617
+ "eval_recall": 0.9343606667412462,
618
+ "eval_runtime": 90.3295,
619
+ "eval_samples_per_second": 300.068,
620
+ "eval_steps_per_second": 15.012,
621
+ "step": 368696
622
+ },
623
+ {
624
+ "epoch": 35.0,
625
+ "learning_rate": 2.5e-06,
626
+ "loss": 0.0012,
627
+ "step": 379540
628
+ },
629
+ {
630
+ "epoch": 35.0,
631
+ "eval_accuracy": 0.9836544413213868,
632
+ "eval_f1": 0.9312036572448013,
633
+ "eval_loss": 0.18962261080741882,
634
+ "eval_precision": 0.9281507001555901,
635
+ "eval_recall": 0.9342767647387851,
636
+ "eval_runtime": 89.7245,
637
+ "eval_samples_per_second": 302.092,
638
+ "eval_steps_per_second": 15.113,
639
+ "step": 379540
640
+ },
641
+ {
642
+ "epoch": 36.0,
643
+ "learning_rate": 2.0000000000000003e-06,
644
+ "loss": 0.0011,
645
+ "step": 390384
646
+ },
647
+ {
648
+ "epoch": 36.0,
649
+ "eval_accuracy": 0.9836124032580948,
650
+ "eval_f1": 0.9311286323238515,
651
+ "eval_loss": 0.18686576187610626,
652
+ "eval_precision": 0.9275881210102692,
653
+ "eval_recall": 0.9346962747510907,
654
+ "eval_runtime": 90.2423,
655
+ "eval_samples_per_second": 300.358,
656
+ "eval_steps_per_second": 15.026,
657
+ "step": 390384
658
+ },
659
+ {
660
+ "epoch": 37.0,
661
+ "learning_rate": 1.5e-06,
662
+ "loss": 0.001,
663
+ "step": 401228
664
+ },
665
+ {
666
+ "epoch": 37.0,
667
+ "eval_accuracy": 0.9836442942026612,
668
+ "eval_f1": 0.9311170842443909,
669
+ "eval_loss": 0.18996329605579376,
670
+ "eval_precision": 0.9273587261075817,
671
+ "eval_recall": 0.9349060297572436,
672
+ "eval_runtime": 88.7307,
673
+ "eval_samples_per_second": 305.475,
674
+ "eval_steps_per_second": 15.282,
675
+ "step": 401228
676
+ },
677
+ {
678
+ "epoch": 38.0,
679
+ "learning_rate": 1.0000000000000002e-06,
680
+ "loss": 0.001,
681
+ "step": 412072
682
+ },
683
+ {
684
+ "epoch": 38.0,
685
+ "eval_accuracy": 0.9837356182711919,
686
+ "eval_f1": 0.9316876434183994,
687
+ "eval_loss": 0.19159720838069916,
688
+ "eval_precision": 0.9266231431021605,
689
+ "eval_recall": 0.9368078084796957,
690
+ "eval_runtime": 88.7508,
691
+ "eval_samples_per_second": 305.406,
692
+ "eval_steps_per_second": 15.279,
693
+ "step": 412072
694
+ },
695
+ {
696
+ "epoch": 39.0,
697
+ "learning_rate": 5.000000000000001e-07,
698
+ "loss": 0.001,
699
+ "step": 422916
700
+ },
701
+ {
702
+ "epoch": 39.0,
703
+ "eval_accuracy": 0.9837994001603245,
704
+ "eval_f1": 0.9321045231167601,
705
+ "eval_loss": 0.1949097365140915,
706
+ "eval_precision": 0.927983367983368,
707
+ "eval_recall": 0.9362624454636984,
708
+ "eval_runtime": 88.5519,
709
+ "eval_samples_per_second": 306.092,
710
+ "eval_steps_per_second": 15.313,
711
+ "step": 422916
712
+ },
713
+ {
714
+ "epoch": 40.0,
715
+ "learning_rate": 0.0,
716
+ "loss": 0.0009,
717
+ "step": 433760
718
+ },
719
+ {
720
+ "epoch": 40.0,
721
+ "eval_accuracy": 0.9839153672314747,
722
+ "eval_f1": 0.9324748546966902,
723
+ "eval_loss": 0.19464968144893646,
724
+ "eval_precision": 0.9283328482530179,
725
+ "eval_recall": 0.9366539881418503,
726
+ "eval_runtime": 88.4748,
727
+ "eval_samples_per_second": 306.358,
728
+ "eval_steps_per_second": 15.326,
729
+ "step": 433760
730
+ }
731
+ ],
732
+ "logging_steps": 500,
733
+ "max_steps": 433760,
734
+ "num_train_epochs": 40,
735
+ "save_steps": 500,
736
+ "total_flos": 1.1334359572090675e+18,
737
+ "trial_name": null,
738
+ "trial_params": null
739
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4f1cbb10d26c61bfbf10060a5bb72af92bf04d5852be8acd714f7fbf039678
3
+ size 4155
xlmrb-sl_hr_sr_bs_mk_sq_cs_bg_pl_ru_sk_uk.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ,LOC.precision,LOC.recall,LOC.f1,LOC.number,MISC.precision,MISC.recall,MISC.f1,MISC.number,ORG.precision,ORG.recall,ORG.f1,ORG.number,PER.precision,PER.recall,PER.f1,PER.number,overall_precision,overall_recall,overall_f1,overall_accuracy,model_name
2
+ 0,0.9410536270144608,0.955128974205159,0.9480390600190536,25005,0.8519650655021834,0.8554516223326513,0.8537047841306884,6842,0.9122568093385214,0.915194691129111,0.9137233887075559,20494,0.9499552728357022,0.9619061996779388,0.955893384007601,19872,0.9269994926711549,0.9362164707185687,0.931585184368627,0.9834613206674987,xlmrb-sl_hr_sr_bs_mk_sq_cs_bg_pl_ru_sk_uk
xlmrb-sl_hr_sr_bs_mk_sq_cs_bg_pl_ru_sk_uk.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "xlmrb-sl_hr_sr_bs_mk_sq_cs_bg_pl_ru_sk_uk": {
3
+ "LOC": {
4
+ "precision": 0.9410536270144608,
5
+ "recall": 0.955128974205159,
6
+ "f1": 0.9480390600190536,
7
+ "number": 25005
8
+ },
9
+ "MISC": {
10
+ "precision": 0.8519650655021834,
11
+ "recall": 0.8554516223326513,
12
+ "f1": 0.8537047841306884,
13
+ "number": 6842
14
+ },
15
+ "ORG": {
16
+ "precision": 0.9122568093385214,
17
+ "recall": 0.915194691129111,
18
+ "f1": 0.9137233887075559,
19
+ "number": 20494
20
+ },
21
+ "PER": {
22
+ "precision": 0.9499552728357022,
23
+ "recall": 0.9619061996779388,
24
+ "f1": 0.955893384007601,
25
+ "number": 19872
26
+ },
27
+ "overall_precision": 0.9269994926711549,
28
+ "overall_recall": 0.9362164707185687,
29
+ "overall_f1": 0.931585184368627,
30
+ "overall_accuracy": 0.9834613206674987
31
+ }
32
+ }