josecannete commited on
Commit
9a86d59
1 Parent(s): a07aa31

adding model finetuned on POS

Browse files
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.9779572239196858,
4
+ "eval_f1": 0.9755450236966825,
5
+ "eval_loss": 0.07586462050676346,
6
+ "eval_precision": 0.9746795063500608,
7
+ "eval_recall": 0.9764120795717823,
8
+ "eval_runtime": 1.8042,
9
+ "eval_samples": 1654,
10
+ "eval_samples_per_second": 916.745,
11
+ "eval_steps_per_second": 57.643,
12
+ "train_loss": 0.08176401986756138,
13
+ "train_runtime": 7371.7605,
14
+ "train_samples": 14305,
15
+ "train_samples_per_second": 7.762,
16
+ "train_steps_per_second": 0.486
17
+ }
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CenIA/distillbert-base-spanish-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "finetuning_task": "pos",
11
+ "hidden_dim": 3072,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17"
31
+ },
32
+ "initializer_range": 0.02,
33
+ "label2id": {
34
+ "LABEL_0": 0,
35
+ "LABEL_1": 1,
36
+ "LABEL_10": 10,
37
+ "LABEL_11": 11,
38
+ "LABEL_12": 12,
39
+ "LABEL_13": 13,
40
+ "LABEL_14": 14,
41
+ "LABEL_15": 15,
42
+ "LABEL_16": 16,
43
+ "LABEL_17": 17,
44
+ "LABEL_2": 2,
45
+ "LABEL_3": 3,
46
+ "LABEL_4": 4,
47
+ "LABEL_5": 5,
48
+ "LABEL_6": 6,
49
+ "LABEL_7": 7,
50
+ "LABEL_8": 8,
51
+ "LABEL_9": 9
52
+ },
53
+ "max_position_embeddings": 512,
54
+ "model_type": "distilbert",
55
+ "n_heads": 12,
56
+ "n_layers": 6,
57
+ "pad_token_id": 0,
58
+ "qa_dropout": 0.1,
59
+ "seq_classif_dropout": 0.2,
60
+ "sinusoidal_pos_embds": true,
61
+ "tie_weights_": true,
62
+ "torch_dtype": "float32",
63
+ "transformers_version": "4.12.5",
64
+ "vocab_size": 31002
65
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.9779572239196858,
4
+ "eval_f1": 0.9755450236966825,
5
+ "eval_loss": 0.07586462050676346,
6
+ "eval_precision": 0.9746795063500608,
7
+ "eval_recall": 0.9764120795717823,
8
+ "eval_runtime": 1.8042,
9
+ "eval_samples": 1654,
10
+ "eval_samples_per_second": 916.745,
11
+ "eval_steps_per_second": 57.643
12
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cacc37529578a9f0423df64b933c11031f8f4c94fb1e051017f12faabc4ea012
3
+ size 267020853
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": false, "do_basic_tokenize": true, "never_split": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "CenIA/distillbert-base-spanish-uncased", "tokenizer_class": "DistilBertTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.08176401986756138,
4
+ "train_runtime": 7371.7605,
5
+ "train_samples": 14305,
6
+ "train_samples_per_second": 7.762,
7
+ "train_steps_per_second": 0.486
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.07586462050676346,
3
+ "best_model_checkpoint": "/data/jcanete/all_results/pos/distillbeto/epochs_4_bs_16_lr_5e-5/checkpoint-1700",
4
+ "epoch": 4.0,
5
+ "global_step": 3580,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "eval_accuracy": 0.9443306584293053,
13
+ "eval_f1": 0.9341365141420992,
14
+ "eval_loss": 0.212116539478302,
15
+ "eval_precision": 0.9329083665338646,
16
+ "eval_recall": 0.9353678996564672,
17
+ "eval_runtime": 1.788,
18
+ "eval_samples_per_second": 925.041,
19
+ "eval_steps_per_second": 58.165,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.22,
24
+ "eval_accuracy": 0.9563677265554176,
25
+ "eval_f1": 0.9504974205973039,
26
+ "eval_loss": 0.15402881801128387,
27
+ "eval_precision": 0.9497298051805619,
28
+ "eval_recall": 0.9512662778621075,
29
+ "eval_runtime": 1.7688,
30
+ "eval_samples_per_second": 935.077,
31
+ "eval_steps_per_second": 58.796,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.34,
36
+ "eval_accuracy": 0.9625961118759023,
37
+ "eval_f1": 0.9571392947982644,
38
+ "eval_loss": 0.13182665407657623,
39
+ "eval_precision": 0.9560045430091458,
40
+ "eval_recall": 0.958276743628665,
41
+ "eval_runtime": 1.7773,
42
+ "eval_samples_per_second": 930.646,
43
+ "eval_steps_per_second": 58.517,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 0.45,
48
+ "eval_accuracy": 0.9649632340596985,
49
+ "eval_f1": 0.96070983500324,
50
+ "eval_loss": 0.12322123348712921,
51
+ "eval_precision": 0.9590938949379939,
52
+ "eval_recall": 0.9623312295278421,
53
+ "eval_runtime": 1.7902,
54
+ "eval_samples_per_second": 923.933,
55
+ "eval_steps_per_second": 58.095,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 0.56,
60
+ "learning_rate": 4.301675977653631e-05,
61
+ "loss": 0.2881,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 0.56,
66
+ "eval_accuracy": 0.9674814491488433,
67
+ "eval_f1": 0.9628313283082913,
68
+ "eval_loss": 0.11282572150230408,
69
+ "eval_precision": 0.9625526478631455,
70
+ "eval_recall": 0.9631101701685707,
71
+ "eval_runtime": 1.7431,
72
+ "eval_samples_per_second": 948.862,
73
+ "eval_steps_per_second": 59.662,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 0.67,
78
+ "eval_accuracy": 0.9697646308296679,
79
+ "eval_f1": 0.9659112436064888,
80
+ "eval_loss": 0.10294512659311295,
81
+ "eval_precision": 0.9643632164685739,
82
+ "eval_recall": 0.9674642486218743,
83
+ "eval_runtime": 1.7656,
84
+ "eval_samples_per_second": 936.792,
85
+ "eval_steps_per_second": 58.903,
86
+ "step": 600
87
+ },
88
+ {
89
+ "epoch": 0.78,
90
+ "eval_accuracy": 0.9717624148003895,
91
+ "eval_f1": 0.9682482698099284,
92
+ "eval_loss": 0.09821932762861252,
93
+ "eval_precision": 0.9668791077474607,
94
+ "eval_recall": 0.9696213150115842,
95
+ "eval_runtime": 1.786,
96
+ "eval_samples_per_second": 926.088,
97
+ "eval_steps_per_second": 58.23,
98
+ "step": 700
99
+ },
100
+ {
101
+ "epoch": 0.89,
102
+ "eval_accuracy": 0.9731894033509049,
103
+ "eval_f1": 0.9696576892779867,
104
+ "eval_loss": 0.09264940768480301,
105
+ "eval_precision": 0.9681806415642858,
106
+ "eval_recall": 0.971139250619158,
107
+ "eval_runtime": 1.7594,
108
+ "eval_samples_per_second": 940.07,
109
+ "eval_steps_per_second": 59.11,
110
+ "step": 800
111
+ },
112
+ {
113
+ "epoch": 1.01,
114
+ "eval_accuracy": 0.9730215223449619,
115
+ "eval_f1": 0.9703566974307807,
116
+ "eval_loss": 0.09371750056743622,
117
+ "eval_precision": 0.9694957832406245,
118
+ "eval_recall": 0.9712191419669249,
119
+ "eval_runtime": 1.7445,
120
+ "eval_samples_per_second": 948.129,
121
+ "eval_steps_per_second": 59.616,
122
+ "step": 900
123
+ },
124
+ {
125
+ "epoch": 1.12,
126
+ "learning_rate": 3.603351955307263e-05,
127
+ "loss": 0.0962,
128
+ "step": 1000
129
+ },
130
+ {
131
+ "epoch": 1.12,
132
+ "eval_accuracy": 0.9754054326293523,
133
+ "eval_f1": 0.9727950199024332,
134
+ "eval_loss": 0.08893433958292007,
135
+ "eval_precision": 0.9717964561200694,
136
+ "eval_recall": 0.973795637932412,
137
+ "eval_runtime": 1.7661,
138
+ "eval_samples_per_second": 936.527,
139
+ "eval_steps_per_second": 58.887,
140
+ "step": 1000
141
+ },
142
+ {
143
+ "epoch": 1.23,
144
+ "eval_accuracy": 0.9751368230198435,
145
+ "eval_f1": 0.972067262112669,
146
+ "eval_loss": 0.09049103409051895,
147
+ "eval_precision": 0.9713983684703912,
148
+ "eval_recall": 0.9727370775744987,
149
+ "eval_runtime": 1.7822,
150
+ "eval_samples_per_second": 928.079,
151
+ "eval_steps_per_second": 58.356,
152
+ "step": 1100
153
+ },
154
+ {
155
+ "epoch": 1.34,
156
+ "eval_accuracy": 0.9766141758721418,
157
+ "eval_f1": 0.9741814481533949,
158
+ "eval_loss": 0.08416619151830673,
159
+ "eval_precision": 0.973191150089695,
160
+ "eval_recall": 0.9751737636813933,
161
+ "eval_runtime": 1.8044,
162
+ "eval_samples_per_second": 916.636,
163
+ "eval_steps_per_second": 57.636,
164
+ "step": 1200
165
+ },
166
+ {
167
+ "epoch": 1.45,
168
+ "eval_accuracy": 0.975892287546587,
169
+ "eval_f1": 0.9735277689115864,
170
+ "eval_loss": 0.08117581903934479,
171
+ "eval_precision": 0.972363911692038,
172
+ "eval_recall": 0.9746944155947911,
173
+ "eval_runtime": 1.7582,
174
+ "eval_samples_per_second": 940.735,
175
+ "eval_steps_per_second": 59.151,
176
+ "step": 1300
177
+ },
178
+ {
179
+ "epoch": 1.56,
180
+ "eval_accuracy": 0.9770674545881879,
181
+ "eval_f1": 0.9749930148086058,
182
+ "eval_loss": 0.08163653314113617,
183
+ "eval_precision": 0.9742541480536057,
184
+ "eval_recall": 0.9757330031157626,
185
+ "eval_runtime": 1.7901,
186
+ "eval_samples_per_second": 923.964,
187
+ "eval_steps_per_second": 58.097,
188
+ "step": 1400
189
+ },
190
+ {
191
+ "epoch": 1.68,
192
+ "learning_rate": 2.9050279329608944e-05,
193
+ "loss": 0.0625,
194
+ "step": 1500
195
+ },
196
+ {
197
+ "epoch": 1.68,
198
+ "eval_accuracy": 0.976513447268576,
199
+ "eval_f1": 0.9742249531181423,
200
+ "eval_loss": 0.08262678235769272,
201
+ "eval_precision": 0.9730989956958394,
202
+ "eval_recall": 0.9753535192138691,
203
+ "eval_runtime": 1.755,
204
+ "eval_samples_per_second": 942.465,
205
+ "eval_steps_per_second": 59.26,
206
+ "step": 1500
207
+ },
208
+ {
209
+ "epoch": 1.79,
210
+ "eval_accuracy": 0.9771513950911593,
211
+ "eval_f1": 0.9745410305248356,
212
+ "eval_loss": 0.08152274787425995,
213
+ "eval_precision": 0.9731532931031049,
214
+ "eval_recall": 0.9759327314851801,
215
+ "eval_runtime": 1.7691,
216
+ "eval_samples_per_second": 934.957,
217
+ "eval_steps_per_second": 58.788,
218
+ "step": 1600
219
+ },
220
+ {
221
+ "epoch": 1.9,
222
+ "eval_accuracy": 0.9779572239196858,
223
+ "eval_f1": 0.9755450236966825,
224
+ "eval_loss": 0.07586462050676346,
225
+ "eval_precision": 0.9746795063500608,
226
+ "eval_recall": 0.9764120795717823,
227
+ "eval_runtime": 1.754,
228
+ "eval_samples_per_second": 942.987,
229
+ "eval_steps_per_second": 59.293,
230
+ "step": 1700
231
+ },
232
+ {
233
+ "epoch": 2.01,
234
+ "eval_accuracy": 0.9781251049256288,
235
+ "eval_f1": 0.9761717152574831,
236
+ "eval_loss": 0.07830089330673218,
237
+ "eval_precision": 0.9749755932338467,
238
+ "eval_recall": 0.9773707757449868,
239
+ "eval_runtime": 1.7982,
240
+ "eval_samples_per_second": 919.825,
241
+ "eval_steps_per_second": 57.837,
242
+ "step": 1800
243
+ },
244
+ {
245
+ "epoch": 2.12,
246
+ "eval_accuracy": 0.9776550381089884,
247
+ "eval_f1": 0.9756199790429619,
248
+ "eval_loss": 0.08235861361026764,
249
+ "eval_precision": 0.9749486407244151,
250
+ "eval_recall": 0.9762922425501318,
251
+ "eval_runtime": 1.7592,
252
+ "eval_samples_per_second": 940.194,
253
+ "eval_steps_per_second": 59.117,
254
+ "step": 1900
255
+ },
256
+ {
257
+ "epoch": 2.23,
258
+ "learning_rate": 2.208100558659218e-05,
259
+ "loss": 0.0492,
260
+ "step": 2000
261
+ },
262
+ {
263
+ "epoch": 2.23,
264
+ "eval_accuracy": 0.9773528522982909,
265
+ "eval_f1": 0.975572275330792,
266
+ "eval_loss": 0.08445031195878983,
267
+ "eval_precision": 0.9748135444502054,
268
+ "eval_recall": 0.9763321882240154,
269
+ "eval_runtime": 1.7661,
270
+ "eval_samples_per_second": 936.546,
271
+ "eval_steps_per_second": 58.888,
272
+ "step": 2000
273
+ },
274
+ {
275
+ "epoch": 2.35,
276
+ "eval_accuracy": 0.9780579525232516,
277
+ "eval_f1": 0.9760987969594797,
278
+ "eval_loss": 0.08256729692220688,
279
+ "eval_precision": 0.9750288971262306,
280
+ "eval_recall": 0.9771710473755693,
281
+ "eval_runtime": 1.7494,
282
+ "eval_samples_per_second": 945.46,
283
+ "eval_steps_per_second": 59.449,
284
+ "step": 2100
285
+ },
286
+ {
287
+ "epoch": 2.46,
288
+ "eval_accuracy": 0.9782761978309774,
289
+ "eval_f1": 0.9764048308214393,
290
+ "eval_loss": 0.08261139690876007,
291
+ "eval_precision": 0.9758788555923547,
292
+ "eval_recall": 0.9769313733322681,
293
+ "eval_runtime": 1.7667,
294
+ "eval_samples_per_second": 936.186,
295
+ "eval_steps_per_second": 58.865,
296
+ "step": 2200
297
+ },
298
+ {
299
+ "epoch": 2.57,
300
+ "eval_accuracy": 0.9783265621327603,
301
+ "eval_f1": 0.9761928518688512,
302
+ "eval_loss": 0.07977774739265442,
303
+ "eval_precision": 0.9753559038162459,
304
+ "eval_recall": 0.977031237516977,
305
+ "eval_runtime": 1.7935,
306
+ "eval_samples_per_second": 922.203,
307
+ "eval_steps_per_second": 57.986,
308
+ "step": 2300
309
+ },
310
+ {
311
+ "epoch": 2.68,
312
+ "eval_accuracy": 0.9788973575529665,
313
+ "eval_f1": 0.9770031227863634,
314
+ "eval_loss": 0.08085376024246216,
315
+ "eval_precision": 0.9760779857664015,
316
+ "eval_recall": 0.9779300151793561,
317
+ "eval_runtime": 1.8062,
318
+ "eval_samples_per_second": 915.718,
319
+ "eval_steps_per_second": 57.578,
320
+ "step": 2400
321
+ },
322
+ {
323
+ "epoch": 2.79,
324
+ "learning_rate": 1.509776536312849e-05,
325
+ "loss": 0.0356,
326
+ "step": 2500
327
+ },
328
+ {
329
+ "epoch": 2.79,
330
+ "eval_accuracy": 0.9785280193398919,
331
+ "eval_f1": 0.9764671847410792,
332
+ "eval_loss": 0.08119603991508484,
333
+ "eval_precision": 0.9754260089686099,
334
+ "eval_recall": 0.9775105856035792,
335
+ "eval_runtime": 1.7562,
336
+ "eval_samples_per_second": 941.793,
337
+ "eval_steps_per_second": 59.218,
338
+ "step": 2500
339
+ },
340
+ {
341
+ "epoch": 2.91,
342
+ "eval_accuracy": 0.9783937145351375,
343
+ "eval_f1": 0.9762508232382701,
344
+ "eval_loss": 0.079347625374794,
345
+ "eval_precision": 0.9754915646312766,
346
+ "eval_recall": 0.9770112646800352,
347
+ "eval_runtime": 1.8127,
348
+ "eval_samples_per_second": 912.464,
349
+ "eval_steps_per_second": 57.374,
350
+ "step": 2600
351
+ },
352
+ {
353
+ "epoch": 3.02,
354
+ "eval_accuracy": 0.9787966289494007,
355
+ "eval_f1": 0.9768903789738371,
356
+ "eval_loss": 0.07749345153570175,
357
+ "eval_precision": 0.9760917248255234,
358
+ "eval_recall": 0.977690341136055,
359
+ "eval_runtime": 1.7581,
360
+ "eval_samples_per_second": 940.794,
361
+ "eval_steps_per_second": 59.155,
362
+ "step": 2700
363
+ },
364
+ {
365
+ "epoch": 3.13,
366
+ "eval_accuracy": 0.9790148742571265,
367
+ "eval_f1": 0.9772217067257326,
368
+ "eval_loss": 0.08272269368171692,
369
+ "eval_precision": 0.9763352538926214,
370
+ "eval_recall": 0.9781097707118319,
371
+ "eval_runtime": 1.7744,
372
+ "eval_samples_per_second": 932.135,
373
+ "eval_steps_per_second": 58.611,
374
+ "step": 2800
375
+ },
376
+ {
377
+ "epoch": 3.24,
378
+ "eval_accuracy": 0.9795353053755498,
379
+ "eval_f1": 0.9775534228536084,
380
+ "eval_loss": 0.08201353996992111,
381
+ "eval_precision": 0.9765596970300977,
382
+ "eval_recall": 0.9785491731245506,
383
+ "eval_runtime": 1.8046,
384
+ "eval_samples_per_second": 916.528,
385
+ "eval_steps_per_second": 57.629,
386
+ "step": 2900
387
+ },
388
+ {
389
+ "epoch": 3.35,
390
+ "learning_rate": 8.114525139664805e-06,
391
+ "loss": 0.0273,
392
+ "step": 3000
393
+ },
394
+ {
395
+ "epoch": 3.35,
396
+ "eval_accuracy": 0.9786959003458349,
397
+ "eval_f1": 0.9767107848515375,
398
+ "eval_loss": 0.08390253037214279,
399
+ "eval_precision": 0.9754945908792063,
400
+ "eval_recall": 0.9779300151793561,
401
+ "eval_runtime": 1.7708,
402
+ "eval_samples_per_second": 934.046,
403
+ "eval_steps_per_second": 58.731,
404
+ "step": 3000
405
+ },
406
+ {
407
+ "epoch": 3.46,
408
+ "eval_accuracy": 0.9789309337541551,
409
+ "eval_f1": 0.9770059880239521,
410
+ "eval_loss": 0.0809033140540123,
411
+ "eval_precision": 0.9763823505944307,
412
+ "eval_recall": 0.9776304226252297,
413
+ "eval_runtime": 1.7599,
414
+ "eval_samples_per_second": 939.83,
415
+ "eval_steps_per_second": 59.094,
416
+ "step": 3100
417
+ },
418
+ {
419
+ "epoch": 3.58,
420
+ "eval_accuracy": 0.9789645099553437,
421
+ "eval_f1": 0.9768119412515964,
422
+ "eval_loss": 0.08214528858661652,
423
+ "eval_precision": 0.97595502033655,
424
+ "eval_recall": 0.9776703682991132,
425
+ "eval_runtime": 1.7652,
426
+ "eval_samples_per_second": 936.992,
427
+ "eval_steps_per_second": 58.916,
428
+ "step": 3200
429
+ },
430
+ {
431
+ "epoch": 3.69,
432
+ "eval_accuracy": 0.9794849410737669,
433
+ "eval_f1": 0.9775280898876404,
434
+ "eval_loss": 0.08165577799081802,
435
+ "eval_precision": 0.9767678379132931,
436
+ "eval_recall": 0.9782895262443078,
437
+ "eval_runtime": 1.7562,
438
+ "eval_samples_per_second": 941.784,
439
+ "eval_steps_per_second": 59.217,
440
+ "step": 3300
441
+ },
442
+ {
443
+ "epoch": 3.8,
444
+ "eval_accuracy": 0.9792499076654467,
445
+ "eval_f1": 0.9771967466693278,
446
+ "eval_loss": 0.0833316445350647,
447
+ "eval_precision": 0.97652432335401,
448
+ "eval_recall": 0.9778700966685308,
449
+ "eval_runtime": 1.7941,
450
+ "eval_samples_per_second": 921.897,
451
+ "eval_steps_per_second": 57.967,
452
+ "step": 3400
453
+ },
454
+ {
455
+ "epoch": 3.91,
456
+ "learning_rate": 1.1312849162011174e-06,
457
+ "loss": 0.0232,
458
+ "step": 3500
459
+ },
460
+ {
461
+ "epoch": 3.91,
462
+ "eval_accuracy": 0.9791827552630695,
463
+ "eval_f1": 0.977121020125121,
464
+ "eval_loss": 0.08293969184160233,
465
+ "eval_precision": 0.9762735519888346,
466
+ "eval_recall": 0.9779699608532396,
467
+ "eval_runtime": 1.7577,
468
+ "eval_samples_per_second": 941.011,
469
+ "eval_steps_per_second": 59.169,
470
+ "step": 3500
471
+ },
472
+ {
473
+ "epoch": 4.0,
474
+ "step": 3580,
475
+ "total_flos": 1136902583504160.0,
476
+ "train_loss": 0.08176401986756138,
477
+ "train_runtime": 7371.7605,
478
+ "train_samples_per_second": 7.762,
479
+ "train_steps_per_second": 0.486
480
+ }
481
+ ],
482
+ "max_steps": 3580,
483
+ "num_train_epochs": 4,
484
+ "total_flos": 1136902583504160.0,
485
+ "trial_name": null,
486
+ "trial_params": null
487
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5101f6dd014fdc65cf785e2a645ba918465c411c3ba7882342edd57c789cba96
3
+ size 2863
vocab.txt ADDED
The diff for this file is too large to render. See raw diff