yashcode00 commited on
Commit
0891c08
·
1 Parent(s): 69dd9e2

yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-large-xlsr-indian-language-classification-featureExtractor
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.5192
21
- - Accuracy: 0.7529
22
 
23
  ## Model description
24
 
@@ -51,7 +51,9 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:-----:|:---------------:|:--------:|
54
- | 0.1136 | 52.81 | 10000 | 1.4509 | 0.7108 |
 
 
55
 
56
 
57
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # wav2vec2-large-xlsr-indian-language-classification-featureExtractor
17
 
18
+ This model is a fine-tuned version of [yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor](https://huggingface.co/yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.6214
21
+ - Accuracy: 0.8911
22
 
23
  ## Model description
24
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:-----:|:---------------:|:--------:|
54
+ | 0.065 | 26.4 | 5000 | 0.6983 | 0.8568 |
55
+ | 0.0412 | 52.81 | 10000 | 0.5958 | 0.8762 |
56
+ | 0.0173 | 79.21 | 15000 | 0.5708 | 0.8969 |
57
 
58
 
59
  ### Framework versions
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 99.8,
3
- "eval_accuracy": 0.7528877854347229,
4
- "eval_loss": 1.519213080406189,
5
- "eval_runtime": 48.7307,
6
  "eval_samples": 2424,
7
- "eval_samples_per_second": 49.743,
8
- "eval_steps_per_second": 6.218,
9
  "total_flos": 3.6752439370752e+19,
10
- "train_loss": 0.3331463608035335,
11
- "train_runtime": 40683.5838,
12
  "train_samples": 12120,
13
- "train_samples_per_second": 29.791,
14
- "train_steps_per_second": 0.465
15
  }
 
1
  {
2
  "epoch": 99.8,
3
+ "eval_accuracy": 0.8910890817642212,
4
+ "eval_loss": 0.6214143633842468,
5
+ "eval_runtime": 50.0096,
6
  "eval_samples": 2424,
7
+ "eval_samples_per_second": 48.471,
8
+ "eval_steps_per_second": 6.059,
9
  "total_flos": 3.6752439370752e+19,
10
+ "train_loss": 0.05161126141825681,
11
+ "train_runtime": 41136.7074,
12
  "train_samples": 12120,
13
+ "train_samples_per_second": 29.463,
14
+ "train_steps_per_second": 0.459
15
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
  "activation_dropout": 0.0,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
 
1
  {
2
+ "_name_or_path": "yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor",
3
  "activation_dropout": 0.0,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 99.8,
3
- "eval_accuracy": 0.7528877854347229,
4
- "eval_loss": 1.519213080406189,
5
- "eval_runtime": 48.7307,
6
  "eval_samples": 2424,
7
- "eval_samples_per_second": 49.743,
8
- "eval_steps_per_second": 6.218
9
  }
 
1
  {
2
  "epoch": 99.8,
3
+ "eval_accuracy": 0.8910890817642212,
4
+ "eval_loss": 0.6214143633842468,
5
+ "eval_runtime": 50.0096,
6
  "eval_samples": 2424,
7
+ "eval_samples_per_second": 48.471,
8
+ "eval_steps_per_second": 6.059
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:891bea4361102bde3e80265f97be89f91a22bb4eb116a7b395690848cd130186
3
  size 1266146037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e822c5fb55993c9b4c6df450112755bab2c5d2e28b029c06fc99c29c716826
3
  size 1266146037
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 99.8,
3
  "total_flos": 3.6752439370752e+19,
4
- "train_loss": 0.3331463608035335,
5
- "train_runtime": 40683.5838,
6
  "train_samples": 12120,
7
- "train_samples_per_second": 29.791,
8
- "train_steps_per_second": 0.465
9
  }
 
1
  {
2
  "epoch": 99.8,
3
  "total_flos": 3.6752439370752e+19,
4
+ "train_loss": 0.05161126141825681,
5
+ "train_runtime": 41136.7074,
6
  "train_samples": 12120,
7
+ "train_samples_per_second": 29.463,
8
+ "train_steps_per_second": 0.459
9
  }
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 99.8019801980198,
5
- "eval_steps": 10000,
6
  "global_step": 18900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -11,1162 +11,1180 @@
11
  {
12
  "epoch": 0.53,
13
  "learning_rate": 4.974074074074074e-05,
14
- "loss": 2.3707,
15
  "step": 100
16
  },
17
  {
18
  "epoch": 1.06,
19
  "learning_rate": 4.947883597883598e-05,
20
- "loss": 2.3317,
21
  "step": 200
22
  },
23
  {
24
  "epoch": 1.58,
25
- "learning_rate": 4.9216931216931214e-05,
26
- "loss": 2.2586,
27
  "step": 300
28
  },
29
  {
30
  "epoch": 2.11,
31
- "learning_rate": 4.895238095238096e-05,
32
- "loss": 2.1762,
33
  "step": 400
34
  },
35
  {
36
  "epoch": 2.64,
37
- "learning_rate": 4.8690476190476194e-05,
38
- "loss": 2.0583,
39
  "step": 500
40
  },
41
  {
42
  "epoch": 3.17,
43
- "learning_rate": 4.842592592592593e-05,
44
- "loss": 1.9666,
45
  "step": 600
46
  },
47
  {
48
  "epoch": 3.7,
49
- "learning_rate": 4.8161375661375666e-05,
50
- "loss": 1.8805,
51
  "step": 700
52
  },
53
  {
54
  "epoch": 4.22,
55
- "learning_rate": 4.78968253968254e-05,
56
- "loss": 1.7925,
57
  "step": 800
58
  },
59
  {
60
  "epoch": 4.75,
61
- "learning_rate": 4.763227513227513e-05,
62
- "loss": 1.668,
63
  "step": 900
64
  },
65
  {
66
  "epoch": 5.28,
67
- "learning_rate": 4.7367724867724874e-05,
68
- "loss": 1.582,
69
  "step": 1000
70
  },
71
  {
72
  "epoch": 5.81,
73
- "learning_rate": 4.71031746031746e-05,
74
- "loss": 1.517,
75
  "step": 1100
76
  },
77
  {
78
  "epoch": 6.34,
79
- "learning_rate": 4.6838624338624346e-05,
80
- "loss": 1.4208,
81
  "step": 1200
82
  },
83
  {
84
  "epoch": 6.86,
85
- "learning_rate": 4.6574074074074076e-05,
86
- "loss": 1.3843,
87
  "step": 1300
88
  },
89
  {
90
  "epoch": 7.39,
91
- "learning_rate": 4.630952380952381e-05,
92
- "loss": 1.2794,
93
  "step": 1400
94
  },
95
  {
96
  "epoch": 7.92,
97
- "learning_rate": 4.604497354497355e-05,
98
- "loss": 1.2562,
99
  "step": 1500
100
  },
101
  {
102
  "epoch": 8.45,
103
- "learning_rate": 4.5780423280423284e-05,
104
- "loss": 1.1514,
105
  "step": 1600
106
  },
107
  {
108
  "epoch": 8.98,
109
- "learning_rate": 4.551587301587302e-05,
110
- "loss": 1.1387,
111
  "step": 1700
112
  },
113
  {
114
  "epoch": 9.5,
115
- "learning_rate": 4.5251322751322756e-05,
116
- "loss": 1.0741,
117
  "step": 1800
118
  },
119
  {
120
  "epoch": 10.03,
121
- "learning_rate": 4.4986772486772485e-05,
122
- "loss": 1.0015,
123
  "step": 1900
124
  },
125
  {
126
  "epoch": 10.56,
127
- "learning_rate": 4.472222222222223e-05,
128
- "loss": 0.9665,
129
  "step": 2000
130
  },
131
  {
132
  "epoch": 11.09,
133
- "learning_rate": 4.445767195767196e-05,
134
- "loss": 0.902,
135
  "step": 2100
136
  },
137
  {
138
  "epoch": 11.62,
139
- "learning_rate": 4.41931216931217e-05,
140
- "loss": 0.8938,
141
  "step": 2200
142
  },
143
  {
144
  "epoch": 12.15,
145
- "learning_rate": 4.392857142857143e-05,
146
- "loss": 0.8334,
147
  "step": 2300
148
  },
149
  {
150
  "epoch": 12.67,
151
- "learning_rate": 4.3664021164021166e-05,
152
- "loss": 0.787,
153
  "step": 2400
154
  },
155
  {
156
  "epoch": 13.2,
157
- "learning_rate": 4.33994708994709e-05,
158
- "loss": 0.7729,
159
  "step": 2500
160
  },
161
  {
162
  "epoch": 13.73,
163
- "learning_rate": 4.313492063492064e-05,
164
- "loss": 0.7254,
165
  "step": 2600
166
  },
167
  {
168
  "epoch": 14.26,
169
- "learning_rate": 4.2870370370370374e-05,
170
- "loss": 0.6632,
171
  "step": 2700
172
  },
173
  {
174
  "epoch": 14.79,
175
- "learning_rate": 4.260582010582011e-05,
176
- "loss": 0.6737,
177
  "step": 2800
178
  },
179
  {
180
  "epoch": 15.31,
181
- "learning_rate": 4.2341269841269846e-05,
182
- "loss": 0.6641,
183
  "step": 2900
184
  },
185
  {
186
  "epoch": 15.84,
187
- "learning_rate": 4.207671957671958e-05,
188
- "loss": 0.5873,
189
  "step": 3000
190
  },
191
  {
192
  "epoch": 16.37,
193
- "learning_rate": 4.181216931216931e-05,
194
- "loss": 0.5618,
195
  "step": 3100
196
  },
197
  {
198
  "epoch": 16.9,
199
- "learning_rate": 4.1547619047619054e-05,
200
- "loss": 0.5628,
201
  "step": 3200
202
  },
203
  {
204
  "epoch": 17.43,
205
- "learning_rate": 4.1283068783068784e-05,
206
- "loss": 0.5301,
207
  "step": 3300
208
  },
209
  {
210
  "epoch": 17.95,
211
- "learning_rate": 4.101851851851852e-05,
212
- "loss": 0.533,
213
  "step": 3400
214
  },
215
  {
216
  "epoch": 18.48,
217
- "learning_rate": 4.0753968253968256e-05,
218
- "loss": 0.4929,
219
  "step": 3500
220
  },
221
  {
222
  "epoch": 19.01,
223
- "learning_rate": 4.048941798941799e-05,
224
- "loss": 0.4758,
225
  "step": 3600
226
  },
227
  {
228
  "epoch": 19.54,
229
- "learning_rate": 4.022486772486773e-05,
230
- "loss": 0.4542,
231
  "step": 3700
232
  },
233
  {
234
  "epoch": 20.07,
235
- "learning_rate": 3.9960317460317464e-05,
236
- "loss": 0.4134,
237
  "step": 3800
238
  },
239
  {
240
  "epoch": 20.59,
241
- "learning_rate": 3.96984126984127e-05,
242
- "loss": 0.4194,
243
  "step": 3900
244
  },
245
  {
246
  "epoch": 21.12,
247
- "learning_rate": 3.943386243386244e-05,
248
- "loss": 0.4098,
249
  "step": 4000
250
  },
251
  {
252
  "epoch": 21.65,
253
- "learning_rate": 3.916931216931217e-05,
254
- "loss": 0.3907,
255
  "step": 4100
256
  },
257
  {
258
  "epoch": 22.18,
259
- "learning_rate": 3.89047619047619e-05,
260
- "loss": 0.4125,
261
  "step": 4200
262
  },
263
  {
264
  "epoch": 22.71,
265
- "learning_rate": 3.8640211640211645e-05,
266
- "loss": 0.3631,
267
  "step": 4300
268
  },
269
  {
270
  "epoch": 23.23,
271
- "learning_rate": 3.8375661375661375e-05,
272
- "loss": 0.3434,
273
  "step": 4400
274
  },
275
  {
276
  "epoch": 23.76,
277
- "learning_rate": 3.811111111111112e-05,
278
- "loss": 0.3318,
279
  "step": 4500
280
  },
281
  {
282
  "epoch": 24.29,
283
- "learning_rate": 3.784656084656085e-05,
284
- "loss": 0.3506,
285
  "step": 4600
286
  },
287
  {
288
  "epoch": 24.82,
289
- "learning_rate": 3.758201058201058e-05,
290
- "loss": 0.3299,
291
  "step": 4700
292
  },
293
  {
294
  "epoch": 25.35,
295
- "learning_rate": 3.731746031746032e-05,
296
- "loss": 0.2909,
297
  "step": 4800
298
  },
299
  {
300
  "epoch": 25.87,
301
- "learning_rate": 3.7052910052910055e-05,
302
- "loss": 0.312,
303
  "step": 4900
304
  },
305
  {
306
  "epoch": 26.4,
307
- "learning_rate": 3.678835978835979e-05,
308
- "loss": 0.2961,
 
 
 
 
 
 
 
 
 
309
  "step": 5000
310
  },
311
  {
312
  "epoch": 26.93,
313
- "learning_rate": 3.652380952380953e-05,
314
- "loss": 0.311,
315
  "step": 5100
316
  },
317
  {
318
  "epoch": 27.46,
319
- "learning_rate": 3.6259259259259256e-05,
320
- "loss": 0.288,
321
  "step": 5200
322
  },
323
  {
324
  "epoch": 27.99,
325
- "learning_rate": 3.5994708994709e-05,
326
- "loss": 0.2783,
327
  "step": 5300
328
  },
329
  {
330
  "epoch": 28.51,
331
- "learning_rate": 3.573015873015873e-05,
332
- "loss": 0.2649,
333
  "step": 5400
334
  },
335
  {
336
  "epoch": 29.04,
337
- "learning_rate": 3.546560846560847e-05,
338
- "loss": 0.2573,
339
  "step": 5500
340
  },
341
  {
342
  "epoch": 29.57,
343
- "learning_rate": 3.52010582010582e-05,
344
- "loss": 0.2536,
345
  "step": 5600
346
  },
347
  {
348
  "epoch": 30.1,
349
- "learning_rate": 3.493650793650794e-05,
350
- "loss": 0.2528,
351
  "step": 5700
352
  },
353
  {
354
  "epoch": 30.63,
355
- "learning_rate": 3.467195767195767e-05,
356
- "loss": 0.232,
357
  "step": 5800
358
  },
359
  {
360
  "epoch": 31.16,
361
- "learning_rate": 3.440740740740741e-05,
362
- "loss": 0.2334,
363
  "step": 5900
364
  },
365
  {
366
  "epoch": 31.68,
367
- "learning_rate": 3.4142857142857145e-05,
368
- "loss": 0.2231,
369
  "step": 6000
370
  },
371
  {
372
  "epoch": 32.21,
373
- "learning_rate": 3.387830687830688e-05,
374
- "loss": 0.2201,
375
  "step": 6100
376
  },
377
  {
378
  "epoch": 32.74,
379
- "learning_rate": 3.361375661375662e-05,
380
- "loss": 0.2045,
381
  "step": 6200
382
  },
383
  {
384
  "epoch": 33.27,
385
- "learning_rate": 3.334920634920635e-05,
386
- "loss": 0.2221,
387
  "step": 6300
388
  },
389
  {
390
  "epoch": 33.8,
391
- "learning_rate": 3.308465608465608e-05,
392
- "loss": 0.2086,
393
  "step": 6400
394
  },
395
  {
396
  "epoch": 34.32,
397
- "learning_rate": 3.2820105820105826e-05,
398
- "loss": 0.2108,
399
  "step": 6500
400
  },
401
  {
402
  "epoch": 34.85,
403
- "learning_rate": 3.2555555555555555e-05,
404
- "loss": 0.209,
405
  "step": 6600
406
  },
407
  {
408
  "epoch": 35.38,
409
- "learning_rate": 3.229100529100529e-05,
410
- "loss": 0.1968,
411
  "step": 6700
412
  },
413
  {
414
  "epoch": 35.91,
415
- "learning_rate": 3.202645502645503e-05,
416
- "loss": 0.2068,
417
  "step": 6800
418
  },
419
  {
420
  "epoch": 36.44,
421
- "learning_rate": 3.176190476190476e-05,
422
- "loss": 0.1827,
423
  "step": 6900
424
  },
425
  {
426
  "epoch": 36.96,
427
- "learning_rate": 3.14973544973545e-05,
428
- "loss": 0.1966,
429
  "step": 7000
430
  },
431
  {
432
  "epoch": 37.49,
433
- "learning_rate": 3.1232804232804235e-05,
434
- "loss": 0.1794,
435
  "step": 7100
436
  },
437
  {
438
  "epoch": 38.02,
439
- "learning_rate": 3.096825396825397e-05,
440
- "loss": 0.1737,
441
  "step": 7200
442
  },
443
  {
444
  "epoch": 38.55,
445
- "learning_rate": 3.070370370370371e-05,
446
- "loss": 0.1721,
447
  "step": 7300
448
  },
449
  {
450
  "epoch": 39.08,
451
- "learning_rate": 3.043915343915344e-05,
452
- "loss": 0.1928,
453
  "step": 7400
454
  },
455
  {
456
  "epoch": 39.6,
457
- "learning_rate": 3.0174603174603176e-05,
458
- "loss": 0.164,
459
  "step": 7500
460
  },
461
  {
462
  "epoch": 40.13,
463
- "learning_rate": 2.991005291005291e-05,
464
- "loss": 0.1651,
465
  "step": 7600
466
  },
467
  {
468
  "epoch": 40.66,
469
- "learning_rate": 2.964550264550265e-05,
470
- "loss": 0.1533,
471
  "step": 7700
472
  },
473
  {
474
  "epoch": 41.19,
475
- "learning_rate": 2.938095238095238e-05,
476
- "loss": 0.1663,
477
  "step": 7800
478
  },
479
  {
480
  "epoch": 41.72,
481
- "learning_rate": 2.911640211640212e-05,
482
- "loss": 0.1473,
483
  "step": 7900
484
  },
485
  {
486
  "epoch": 42.24,
487
- "learning_rate": 2.8851851851851853e-05,
488
- "loss": 0.1562,
489
  "step": 8000
490
  },
491
  {
492
  "epoch": 42.77,
493
- "learning_rate": 2.858730158730159e-05,
494
- "loss": 0.1372,
495
  "step": 8100
496
  },
497
  {
498
  "epoch": 43.3,
499
- "learning_rate": 2.8322751322751322e-05,
500
- "loss": 0.1479,
501
  "step": 8200
502
  },
503
  {
504
  "epoch": 43.83,
505
- "learning_rate": 2.805820105820106e-05,
506
- "loss": 0.1496,
507
  "step": 8300
508
  },
509
  {
510
  "epoch": 44.36,
511
- "learning_rate": 2.7793650793650794e-05,
512
- "loss": 0.1373,
513
  "step": 8400
514
  },
515
  {
516
  "epoch": 44.88,
517
- "learning_rate": 2.7529100529100534e-05,
518
- "loss": 0.1414,
519
  "step": 8500
520
  },
521
  {
522
  "epoch": 45.41,
523
- "learning_rate": 2.7264550264550266e-05,
524
- "loss": 0.1411,
525
  "step": 8600
526
  },
527
  {
528
  "epoch": 45.94,
529
- "learning_rate": 2.7000000000000002e-05,
530
- "loss": 0.1349,
531
  "step": 8700
532
  },
533
  {
534
  "epoch": 46.47,
535
- "learning_rate": 2.673809523809524e-05,
536
- "loss": 0.1248,
537
  "step": 8800
538
  },
539
  {
540
  "epoch": 47.0,
541
- "learning_rate": 2.6473544973544972e-05,
542
- "loss": 0.1489,
543
  "step": 8900
544
  },
545
  {
546
  "epoch": 47.52,
547
- "learning_rate": 2.620899470899471e-05,
548
- "loss": 0.1311,
549
  "step": 9000
550
  },
551
  {
552
  "epoch": 48.05,
553
- "learning_rate": 2.5944444444444444e-05,
554
- "loss": 0.1282,
555
  "step": 9100
556
  },
557
  {
558
  "epoch": 48.58,
559
- "learning_rate": 2.567989417989418e-05,
560
- "loss": 0.1367,
561
  "step": 9200
562
  },
563
  {
564
  "epoch": 49.11,
565
- "learning_rate": 2.5415343915343913e-05,
566
- "loss": 0.1209,
567
  "step": 9300
568
  },
569
  {
570
  "epoch": 49.64,
571
- "learning_rate": 2.5150793650793652e-05,
572
- "loss": 0.1416,
573
  "step": 9400
574
  },
575
  {
576
  "epoch": 50.17,
577
- "learning_rate": 2.488624338624339e-05,
578
- "loss": 0.132,
579
  "step": 9500
580
  },
581
  {
582
  "epoch": 50.69,
583
- "learning_rate": 2.4621693121693125e-05,
584
- "loss": 0.111,
585
  "step": 9600
586
  },
587
  {
588
  "epoch": 51.22,
589
  "learning_rate": 2.4357142857142857e-05,
590
- "loss": 0.1004,
591
  "step": 9700
592
  },
593
  {
594
  "epoch": 51.75,
595
  "learning_rate": 2.4092592592592593e-05,
596
- "loss": 0.1118,
597
  "step": 9800
598
  },
599
  {
600
  "epoch": 52.28,
601
  "learning_rate": 2.382804232804233e-05,
602
- "loss": 0.1309,
603
  "step": 9900
604
  },
605
  {
606
  "epoch": 52.81,
607
  "learning_rate": 2.3563492063492065e-05,
608
- "loss": 0.1136,
609
  "step": 10000
610
  },
611
  {
612
  "epoch": 52.81,
613
- "eval_accuracy": 0.7108085751533508,
614
- "eval_loss": 1.4509037733078003,
615
- "eval_runtime": 49.9625,
616
- "eval_samples_per_second": 48.516,
617
- "eval_steps_per_second": 6.065,
618
  "step": 10000
619
  },
620
  {
621
  "epoch": 53.33,
622
  "learning_rate": 2.32989417989418e-05,
623
- "loss": 0.1023,
624
  "step": 10100
625
  },
626
  {
627
  "epoch": 53.86,
628
  "learning_rate": 2.3034391534391538e-05,
629
- "loss": 0.1242,
630
  "step": 10200
631
  },
632
  {
633
  "epoch": 54.39,
634
  "learning_rate": 2.276984126984127e-05,
635
- "loss": 0.1162,
636
  "step": 10300
637
  },
638
  {
639
  "epoch": 54.92,
640
  "learning_rate": 2.2505291005291006e-05,
641
- "loss": 0.1228,
642
  "step": 10400
643
  },
644
  {
645
  "epoch": 55.45,
646
  "learning_rate": 2.2240740740740743e-05,
647
- "loss": 0.1087,
648
  "step": 10500
649
  },
650
  {
651
  "epoch": 55.97,
652
  "learning_rate": 2.197619047619048e-05,
653
- "loss": 0.1039,
654
  "step": 10600
655
  },
656
  {
657
  "epoch": 56.5,
658
  "learning_rate": 2.1711640211640215e-05,
659
- "loss": 0.1059,
660
  "step": 10700
661
  },
662
  {
663
  "epoch": 57.03,
664
  "learning_rate": 2.1447089947089947e-05,
665
- "loss": 0.0999,
666
  "step": 10800
667
  },
668
  {
669
  "epoch": 57.56,
670
  "learning_rate": 2.1182539682539683e-05,
671
- "loss": 0.0937,
672
  "step": 10900
673
  },
674
  {
675
  "epoch": 58.09,
676
  "learning_rate": 2.091798941798942e-05,
677
- "loss": 0.0996,
678
  "step": 11000
679
  },
680
  {
681
  "epoch": 58.61,
682
  "learning_rate": 2.0653439153439156e-05,
683
- "loss": 0.0856,
684
  "step": 11100
685
  },
686
  {
687
  "epoch": 59.14,
688
  "learning_rate": 2.0388888888888892e-05,
689
- "loss": 0.1025,
690
  "step": 11200
691
  },
692
  {
693
  "epoch": 59.67,
694
  "learning_rate": 2.0124338624338628e-05,
695
- "loss": 0.0908,
696
  "step": 11300
697
  },
698
  {
699
  "epoch": 60.2,
700
  "learning_rate": 1.985978835978836e-05,
701
- "loss": 0.0873,
702
  "step": 11400
703
  },
704
  {
705
  "epoch": 60.73,
706
  "learning_rate": 1.9595238095238097e-05,
707
- "loss": 0.0959,
708
  "step": 11500
709
  },
710
  {
711
  "epoch": 61.25,
712
  "learning_rate": 1.9330687830687833e-05,
713
- "loss": 0.0967,
714
  "step": 11600
715
  },
716
  {
717
  "epoch": 61.78,
718
  "learning_rate": 1.906613756613757e-05,
719
- "loss": 0.0968,
720
  "step": 11700
721
  },
722
  {
723
  "epoch": 62.31,
724
  "learning_rate": 1.8801587301587305e-05,
725
- "loss": 0.083,
726
  "step": 11800
727
  },
728
  {
729
  "epoch": 62.84,
730
  "learning_rate": 1.8537037037037037e-05,
731
- "loss": 0.0831,
732
  "step": 11900
733
  },
734
  {
735
  "epoch": 63.37,
736
  "learning_rate": 1.8272486772486774e-05,
737
- "loss": 0.0938,
738
  "step": 12000
739
  },
740
  {
741
  "epoch": 63.89,
742
  "learning_rate": 1.800793650793651e-05,
743
- "loss": 0.088,
744
  "step": 12100
745
  },
746
  {
747
  "epoch": 64.42,
748
  "learning_rate": 1.7743386243386246e-05,
749
- "loss": 0.0741,
750
  "step": 12200
751
  },
752
  {
753
  "epoch": 64.95,
754
  "learning_rate": 1.7478835978835982e-05,
755
- "loss": 0.0938,
756
  "step": 12300
757
  },
758
  {
759
  "epoch": 65.48,
760
  "learning_rate": 1.7214285714285715e-05,
761
- "loss": 0.0833,
762
  "step": 12400
763
  },
764
  {
765
  "epoch": 66.01,
766
  "learning_rate": 1.694973544973545e-05,
767
- "loss": 0.0803,
768
  "step": 12500
769
  },
770
  {
771
  "epoch": 66.53,
772
  "learning_rate": 1.6685185185185187e-05,
773
- "loss": 0.0702,
774
  "step": 12600
775
  },
776
  {
777
  "epoch": 67.06,
778
  "learning_rate": 1.6420634920634923e-05,
779
- "loss": 0.0783,
780
  "step": 12700
781
  },
782
  {
783
  "epoch": 67.59,
784
  "learning_rate": 1.615608465608466e-05,
785
- "loss": 0.0858,
786
  "step": 12800
787
  },
788
  {
789
  "epoch": 68.12,
790
  "learning_rate": 1.5891534391534395e-05,
791
- "loss": 0.0737,
792
  "step": 12900
793
  },
794
  {
795
  "epoch": 68.65,
796
  "learning_rate": 1.5626984126984128e-05,
797
- "loss": 0.0751,
798
  "step": 13000
799
  },
800
  {
801
  "epoch": 69.17,
802
  "learning_rate": 1.5362433862433864e-05,
803
- "loss": 0.0703,
804
  "step": 13100
805
  },
806
  {
807
  "epoch": 69.7,
808
- "learning_rate": 1.51005291005291e-05,
809
- "loss": 0.0626,
810
  "step": 13200
811
  },
812
  {
813
  "epoch": 70.23,
814
- "learning_rate": 1.4835978835978837e-05,
815
- "loss": 0.0747,
816
  "step": 13300
817
  },
818
  {
819
  "epoch": 70.76,
820
- "learning_rate": 1.4571428571428573e-05,
821
- "loss": 0.0763,
822
  "step": 13400
823
  },
824
  {
825
  "epoch": 71.29,
826
- "learning_rate": 1.4309523809523811e-05,
827
- "loss": 0.0777,
828
  "step": 13500
829
  },
830
  {
831
  "epoch": 71.82,
832
- "learning_rate": 1.4044973544973547e-05,
833
- "loss": 0.06,
834
  "step": 13600
835
  },
836
  {
837
  "epoch": 72.34,
838
- "learning_rate": 1.3780423280423282e-05,
839
- "loss": 0.0695,
840
  "step": 13700
841
  },
842
  {
843
  "epoch": 72.87,
844
- "learning_rate": 1.3515873015873018e-05,
845
- "loss": 0.0619,
846
  "step": 13800
847
  },
848
  {
849
  "epoch": 73.4,
850
- "learning_rate": 1.325132275132275e-05,
851
- "loss": 0.0689,
852
  "step": 13900
853
  },
854
  {
855
  "epoch": 73.93,
856
- "learning_rate": 1.2986772486772487e-05,
857
- "loss": 0.06,
858
  "step": 14000
859
  },
860
  {
861
  "epoch": 74.46,
862
- "learning_rate": 1.2722222222222221e-05,
863
- "loss": 0.0626,
864
  "step": 14100
865
  },
866
  {
867
  "epoch": 74.98,
868
- "learning_rate": 1.2457671957671959e-05,
869
- "loss": 0.0644,
870
  "step": 14200
871
  },
872
  {
873
  "epoch": 75.51,
874
- "learning_rate": 1.2193121693121693e-05,
875
- "loss": 0.0591,
876
  "step": 14300
877
  },
878
  {
879
  "epoch": 76.04,
880
- "learning_rate": 1.192857142857143e-05,
881
- "loss": 0.0584,
882
  "step": 14400
883
  },
884
  {
885
  "epoch": 76.57,
886
- "learning_rate": 1.1664021164021165e-05,
887
- "loss": 0.0521,
888
  "step": 14500
889
  },
890
  {
891
  "epoch": 77.1,
892
- "learning_rate": 1.13994708994709e-05,
893
- "loss": 0.0609,
894
  "step": 14600
895
  },
896
  {
897
  "epoch": 77.62,
898
- "learning_rate": 1.1134920634920636e-05,
899
- "loss": 0.0589,
900
  "step": 14700
901
  },
902
  {
903
  "epoch": 78.15,
904
- "learning_rate": 1.087037037037037e-05,
905
- "loss": 0.0581,
906
  "step": 14800
907
  },
908
  {
909
  "epoch": 78.68,
910
- "learning_rate": 1.0605820105820106e-05,
911
- "loss": 0.0714,
912
  "step": 14900
913
  },
914
  {
915
  "epoch": 79.21,
916
- "learning_rate": 1.0341269841269842e-05,
917
- "loss": 0.0511,
 
 
 
 
 
 
 
 
 
918
  "step": 15000
919
  },
920
  {
921
  "epoch": 79.74,
922
- "learning_rate": 1.0076719576719577e-05,
923
- "loss": 0.0541,
924
  "step": 15100
925
  },
926
  {
927
  "epoch": 80.26,
928
- "learning_rate": 9.812169312169313e-06,
929
- "loss": 0.0581,
930
  "step": 15200
931
  },
932
  {
933
  "epoch": 80.79,
934
- "learning_rate": 9.547619047619049e-06,
935
- "loss": 0.0535,
936
  "step": 15300
937
  },
938
  {
939
  "epoch": 81.32,
940
- "learning_rate": 9.283068783068783e-06,
941
- "loss": 0.0464,
942
  "step": 15400
943
  },
944
  {
945
  "epoch": 81.85,
946
- "learning_rate": 9.01851851851852e-06,
947
- "loss": 0.0635,
948
  "step": 15500
949
  },
950
  {
951
  "epoch": 82.38,
952
- "learning_rate": 8.753968253968254e-06,
953
- "loss": 0.0508,
954
  "step": 15600
955
  },
956
  {
957
  "epoch": 82.9,
958
- "learning_rate": 8.48941798941799e-06,
959
- "loss": 0.0562,
960
  "step": 15700
961
  },
962
  {
963
  "epoch": 83.43,
964
- "learning_rate": 8.224867724867726e-06,
965
- "loss": 0.0543,
966
  "step": 15800
967
  },
968
  {
969
  "epoch": 83.96,
970
- "learning_rate": 7.96031746031746e-06,
971
- "loss": 0.0525,
972
  "step": 15900
973
  },
974
  {
975
  "epoch": 84.49,
976
- "learning_rate": 7.695767195767196e-06,
977
- "loss": 0.0517,
978
  "step": 16000
979
  },
980
  {
981
  "epoch": 85.02,
982
- "learning_rate": 7.431216931216932e-06,
983
- "loss": 0.0552,
984
  "step": 16100
985
  },
986
  {
987
  "epoch": 85.54,
988
- "learning_rate": 7.166666666666667e-06,
989
- "loss": 0.0443,
990
  "step": 16200
991
  },
992
  {
993
  "epoch": 86.07,
994
- "learning_rate": 6.902116402116403e-06,
995
- "loss": 0.0409,
996
  "step": 16300
997
  },
998
  {
999
  "epoch": 86.6,
1000
- "learning_rate": 6.637566137566138e-06,
1001
- "loss": 0.0418,
1002
  "step": 16400
1003
  },
1004
  {
1005
  "epoch": 87.13,
1006
- "learning_rate": 6.3730158730158735e-06,
1007
- "loss": 0.0537,
1008
  "step": 16500
1009
  },
1010
  {
1011
  "epoch": 87.66,
1012
- "learning_rate": 6.108465608465609e-06,
1013
- "loss": 0.0518,
1014
  "step": 16600
1015
  },
1016
  {
1017
  "epoch": 88.18,
1018
- "learning_rate": 5.843915343915344e-06,
1019
- "loss": 0.057,
1020
  "step": 16700
1021
  },
1022
  {
1023
  "epoch": 88.71,
1024
- "learning_rate": 5.579365079365079e-06,
1025
- "loss": 0.0463,
1026
  "step": 16800
1027
  },
1028
  {
1029
  "epoch": 89.24,
1030
- "learning_rate": 5.3148148148148144e-06,
1031
- "loss": 0.0384,
1032
  "step": 16900
1033
  },
1034
  {
1035
  "epoch": 89.77,
1036
- "learning_rate": 5.0502645502645505e-06,
1037
- "loss": 0.0505,
1038
  "step": 17000
1039
  },
1040
  {
1041
  "epoch": 90.3,
1042
- "learning_rate": 4.785714285714286e-06,
1043
- "loss": 0.0553,
1044
  "step": 17100
1045
  },
1046
  {
1047
  "epoch": 90.83,
1048
- "learning_rate": 4.521164021164021e-06,
1049
- "loss": 0.049,
1050
  "step": 17200
1051
  },
1052
  {
1053
  "epoch": 91.35,
1054
- "learning_rate": 4.256613756613756e-06,
1055
- "loss": 0.0443,
1056
  "step": 17300
1057
  },
1058
  {
1059
  "epoch": 91.88,
1060
- "learning_rate": 3.992063492063492e-06,
1061
- "loss": 0.0453,
1062
  "step": 17400
1063
  },
1064
  {
1065
  "epoch": 92.41,
1066
- "learning_rate": 3.7275132275132275e-06,
1067
- "loss": 0.0432,
1068
  "step": 17500
1069
  },
1070
  {
1071
  "epoch": 92.94,
1072
- "learning_rate": 3.462962962962963e-06,
1073
- "loss": 0.0451,
1074
  "step": 17600
1075
  },
1076
  {
1077
  "epoch": 93.47,
1078
- "learning_rate": 3.1984126984126984e-06,
1079
- "loss": 0.0436,
1080
  "step": 17700
1081
  },
1082
  {
1083
  "epoch": 93.99,
1084
- "learning_rate": 2.933862433862434e-06,
1085
- "loss": 0.0424,
1086
  "step": 17800
1087
  },
1088
  {
1089
  "epoch": 94.52,
1090
- "learning_rate": 2.6693121693121693e-06,
1091
- "loss": 0.0456,
1092
  "step": 17900
1093
  },
1094
  {
1095
  "epoch": 95.05,
1096
- "learning_rate": 2.404761904761905e-06,
1097
- "loss": 0.0426,
1098
  "step": 18000
1099
  },
1100
  {
1101
  "epoch": 95.58,
1102
- "learning_rate": 2.1402116402116402e-06,
1103
- "loss": 0.0411,
1104
  "step": 18100
1105
  },
1106
  {
1107
  "epoch": 96.11,
1108
- "learning_rate": 1.8756613756613757e-06,
1109
- "loss": 0.0539,
1110
  "step": 18200
1111
  },
1112
  {
1113
  "epoch": 96.63,
1114
- "learning_rate": 1.6111111111111111e-06,
1115
- "loss": 0.0451,
1116
  "step": 18300
1117
  },
1118
  {
1119
  "epoch": 97.16,
1120
- "learning_rate": 1.3465608465608466e-06,
1121
- "loss": 0.0426,
1122
  "step": 18400
1123
  },
1124
  {
1125
  "epoch": 97.69,
1126
- "learning_rate": 1.082010582010582e-06,
1127
- "loss": 0.0526,
1128
  "step": 18500
1129
  },
1130
  {
1131
  "epoch": 98.22,
1132
- "learning_rate": 8.174603174603175e-07,
1133
- "loss": 0.0417,
1134
  "step": 18600
1135
  },
1136
  {
1137
  "epoch": 98.75,
1138
- "learning_rate": 5.529100529100529e-07,
1139
- "loss": 0.0413,
1140
  "step": 18700
1141
  },
1142
  {
1143
  "epoch": 99.27,
1144
- "learning_rate": 2.8835978835978836e-07,
1145
- "loss": 0.0512,
1146
  "step": 18800
1147
  },
1148
  {
1149
  "epoch": 99.8,
1150
- "learning_rate": 2.380952380952381e-08,
1151
- "loss": 0.0476,
1152
  "step": 18900
1153
  },
1154
  {
1155
  "epoch": 99.8,
1156
  "step": 18900,
1157
  "total_flos": 3.6752439370752e+19,
1158
- "train_loss": 0.3331463608035335,
1159
- "train_runtime": 40683.5838,
1160
- "train_samples_per_second": 29.791,
1161
- "train_steps_per_second": 0.465
1162
  },
1163
  {
1164
  "epoch": 99.8,
1165
- "eval_accuracy": 0.7528877854347229,
1166
- "eval_loss": 1.519213080406189,
1167
- "eval_runtime": 48.7307,
1168
- "eval_samples_per_second": 49.743,
1169
- "eval_steps_per_second": 6.218,
1170
  "step": 18900
1171
  }
1172
  ],
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 99.8019801980198,
5
+ "eval_steps": 5000,
6
  "global_step": 18900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
 
11
  {
12
  "epoch": 0.53,
13
  "learning_rate": 4.974074074074074e-05,
14
+ "loss": 0.2238,
15
  "step": 100
16
  },
17
  {
18
  "epoch": 1.06,
19
  "learning_rate": 4.947883597883598e-05,
20
+ "loss": 0.1891,
21
  "step": 200
22
  },
23
  {
24
  "epoch": 1.58,
25
+ "learning_rate": 4.921428571428572e-05,
26
+ "loss": 0.1687,
27
  "step": 300
28
  },
29
  {
30
  "epoch": 2.11,
31
+ "learning_rate": 4.894973544973545e-05,
32
+ "loss": 0.1855,
33
  "step": 400
34
  },
35
  {
36
  "epoch": 2.64,
37
+ "learning_rate": 4.868518518518519e-05,
38
+ "loss": 0.1481,
39
  "step": 500
40
  },
41
  {
42
  "epoch": 3.17,
43
+ "learning_rate": 4.842063492063492e-05,
44
+ "loss": 0.1691,
45
  "step": 600
46
  },
47
  {
48
  "epoch": 3.7,
49
+ "learning_rate": 4.815608465608466e-05,
50
+ "loss": 0.1415,
51
  "step": 700
52
  },
53
  {
54
  "epoch": 4.22,
55
+ "learning_rate": 4.7891534391534393e-05,
56
+ "loss": 0.1427,
57
  "step": 800
58
  },
59
  {
60
  "epoch": 4.75,
61
+ "learning_rate": 4.762698412698413e-05,
62
+ "loss": 0.1249,
63
  "step": 900
64
  },
65
  {
66
  "epoch": 5.28,
67
+ "learning_rate": 4.7362433862433866e-05,
68
+ "loss": 0.1405,
69
  "step": 1000
70
  },
71
  {
72
  "epoch": 5.81,
73
+ "learning_rate": 4.70978835978836e-05,
74
+ "loss": 0.138,
75
  "step": 1100
76
  },
77
  {
78
  "epoch": 6.34,
79
+ "learning_rate": 4.683333333333334e-05,
80
+ "loss": 0.1295,
81
  "step": 1200
82
  },
83
  {
84
  "epoch": 6.86,
85
+ "learning_rate": 4.6568783068783074e-05,
86
+ "loss": 0.1314,
87
  "step": 1300
88
  },
89
  {
90
  "epoch": 7.39,
91
+ "learning_rate": 4.63042328042328e-05,
92
+ "loss": 0.1092,
93
  "step": 1400
94
  },
95
  {
96
  "epoch": 7.92,
97
+ "learning_rate": 4.6039682539682546e-05,
98
+ "loss": 0.1169,
99
  "step": 1500
100
  },
101
  {
102
  "epoch": 8.45,
103
+ "learning_rate": 4.5775132275132275e-05,
104
+ "loss": 0.1195,
105
  "step": 1600
106
  },
107
  {
108
  "epoch": 8.98,
109
+ "learning_rate": 4.551058201058201e-05,
110
+ "loss": 0.1212,
111
  "step": 1700
112
  },
113
  {
114
  "epoch": 9.5,
115
+ "learning_rate": 4.524603174603175e-05,
116
+ "loss": 0.1057,
117
  "step": 1800
118
  },
119
  {
120
  "epoch": 10.03,
121
+ "learning_rate": 4.4981481481481484e-05,
122
+ "loss": 0.1199,
123
  "step": 1900
124
  },
125
  {
126
  "epoch": 10.56,
127
+ "learning_rate": 4.471693121693122e-05,
128
+ "loss": 0.1116,
129
  "step": 2000
130
  },
131
  {
132
  "epoch": 11.09,
133
+ "learning_rate": 4.4452380952380956e-05,
134
+ "loss": 0.0921,
135
  "step": 2100
136
  },
137
  {
138
  "epoch": 11.62,
139
+ "learning_rate": 4.418783068783069e-05,
140
+ "loss": 0.1057,
141
  "step": 2200
142
  },
143
  {
144
  "epoch": 12.15,
145
+ "learning_rate": 4.392328042328043e-05,
146
+ "loss": 0.1082,
147
  "step": 2300
148
  },
149
  {
150
  "epoch": 12.67,
151
+ "learning_rate": 4.365873015873016e-05,
152
+ "loss": 0.1045,
153
  "step": 2400
154
  },
155
  {
156
  "epoch": 13.2,
157
+ "learning_rate": 4.33941798941799e-05,
158
+ "loss": 0.0976,
159
  "step": 2500
160
  },
161
  {
162
  "epoch": 13.73,
163
+ "learning_rate": 4.312962962962963e-05,
164
+ "loss": 0.0971,
165
  "step": 2600
166
  },
167
  {
168
  "epoch": 14.26,
169
+ "learning_rate": 4.286507936507937e-05,
170
+ "loss": 0.0862,
171
  "step": 2700
172
  },
173
  {
174
  "epoch": 14.79,
175
+ "learning_rate": 4.26005291005291e-05,
176
+ "loss": 0.0883,
177
  "step": 2800
178
  },
179
  {
180
  "epoch": 15.31,
181
+ "learning_rate": 4.233597883597884e-05,
182
+ "loss": 0.1122,
183
  "step": 2900
184
  },
185
  {
186
  "epoch": 15.84,
187
+ "learning_rate": 4.2071428571428574e-05,
188
+ "loss": 0.0854,
189
  "step": 3000
190
  },
191
  {
192
  "epoch": 16.37,
193
+ "learning_rate": 4.180687830687831e-05,
194
+ "loss": 0.0942,
195
  "step": 3100
196
  },
197
  {
198
  "epoch": 16.9,
199
+ "learning_rate": 4.1542328042328046e-05,
200
+ "loss": 0.0888,
201
  "step": 3200
202
  },
203
  {
204
  "epoch": 17.43,
205
+ "learning_rate": 4.127777777777778e-05,
206
+ "loss": 0.0917,
207
  "step": 3300
208
  },
209
  {
210
  "epoch": 17.95,
211
+ "learning_rate": 4.101322751322751e-05,
212
+ "loss": 0.0928,
213
  "step": 3400
214
  },
215
  {
216
  "epoch": 18.48,
217
+ "learning_rate": 4.0748677248677254e-05,
218
+ "loss": 0.0851,
219
  "step": 3500
220
  },
221
  {
222
  "epoch": 19.01,
223
+ "learning_rate": 4.0484126984126983e-05,
224
+ "loss": 0.0849,
225
  "step": 3600
226
  },
227
  {
228
  "epoch": 19.54,
229
+ "learning_rate": 4.0219576719576726e-05,
230
+ "loss": 0.0829,
231
  "step": 3700
232
  },
233
  {
234
  "epoch": 20.07,
235
+ "learning_rate": 3.9955026455026456e-05,
236
+ "loss": 0.0709,
237
  "step": 3800
238
  },
239
  {
240
  "epoch": 20.59,
241
+ "learning_rate": 3.969047619047619e-05,
242
+ "loss": 0.069,
243
  "step": 3900
244
  },
245
  {
246
  "epoch": 21.12,
247
+ "learning_rate": 3.942592592592593e-05,
248
+ "loss": 0.0764,
249
  "step": 4000
250
  },
251
  {
252
  "epoch": 21.65,
253
+ "learning_rate": 3.9161375661375664e-05,
254
+ "loss": 0.0676,
255
  "step": 4100
256
  },
257
  {
258
  "epoch": 22.18,
259
+ "learning_rate": 3.88968253968254e-05,
260
+ "loss": 0.0802,
261
  "step": 4200
262
  },
263
  {
264
  "epoch": 22.71,
265
+ "learning_rate": 3.8632275132275136e-05,
266
+ "loss": 0.0651,
267
  "step": 4300
268
  },
269
  {
270
  "epoch": 23.23,
271
+ "learning_rate": 3.837037037037037e-05,
272
+ "loss": 0.0732,
273
  "step": 4400
274
  },
275
  {
276
  "epoch": 23.76,
277
+ "learning_rate": 3.810582010582011e-05,
278
+ "loss": 0.0617,
279
  "step": 4500
280
  },
281
  {
282
  "epoch": 24.29,
283
+ "learning_rate": 3.7841269841269845e-05,
284
+ "loss": 0.0696,
285
  "step": 4600
286
  },
287
  {
288
  "epoch": 24.82,
289
+ "learning_rate": 3.7576719576719574e-05,
290
+ "loss": 0.0739,
291
  "step": 4700
292
  },
293
  {
294
  "epoch": 25.35,
295
+ "learning_rate": 3.731216931216932e-05,
296
+ "loss": 0.0569,
297
  "step": 4800
298
  },
299
  {
300
  "epoch": 25.87,
301
+ "learning_rate": 3.7047619047619047e-05,
302
+ "loss": 0.0696,
303
  "step": 4900
304
  },
305
  {
306
  "epoch": 26.4,
307
+ "learning_rate": 3.678306878306878e-05,
308
+ "loss": 0.065,
309
+ "step": 5000
310
+ },
311
+ {
312
+ "epoch": 26.4,
313
+ "eval_accuracy": 0.8568481802940369,
314
+ "eval_loss": 0.6983007192611694,
315
+ "eval_runtime": 50.8227,
316
+ "eval_samples_per_second": 47.695,
317
+ "eval_steps_per_second": 5.962,
318
  "step": 5000
319
  },
320
  {
321
  "epoch": 26.93,
322
+ "learning_rate": 3.651851851851852e-05,
323
+ "loss": 0.0652,
324
  "step": 5100
325
  },
326
  {
327
  "epoch": 27.46,
328
+ "learning_rate": 3.6253968253968255e-05,
329
+ "loss": 0.0586,
330
  "step": 5200
331
  },
332
  {
333
  "epoch": 27.99,
334
+ "learning_rate": 3.598941798941799e-05,
335
+ "loss": 0.0673,
336
  "step": 5300
337
  },
338
  {
339
  "epoch": 28.51,
340
+ "learning_rate": 3.5727513227513235e-05,
341
+ "loss": 0.0622,
342
  "step": 5400
343
  },
344
  {
345
  "epoch": 29.04,
346
+ "learning_rate": 3.5462962962962964e-05,
347
+ "loss": 0.0618,
348
  "step": 5500
349
  },
350
  {
351
  "epoch": 29.57,
352
+ "learning_rate": 3.51984126984127e-05,
353
+ "loss": 0.0655,
354
  "step": 5600
355
  },
356
  {
357
  "epoch": 30.1,
358
+ "learning_rate": 3.4933862433862436e-05,
359
+ "loss": 0.0625,
360
  "step": 5700
361
  },
362
  {
363
  "epoch": 30.63,
364
+ "learning_rate": 3.466931216931217e-05,
365
+ "loss": 0.0584,
366
  "step": 5800
367
  },
368
  {
369
  "epoch": 31.16,
370
+ "learning_rate": 3.440476190476191e-05,
371
+ "loss": 0.0649,
372
  "step": 5900
373
  },
374
  {
375
  "epoch": 31.68,
376
+ "learning_rate": 3.4140211640211644e-05,
377
+ "loss": 0.0546,
378
  "step": 6000
379
  },
380
  {
381
  "epoch": 32.21,
382
+ "learning_rate": 3.387566137566138e-05,
383
+ "loss": 0.0564,
384
  "step": 6100
385
  },
386
  {
387
  "epoch": 32.74,
388
+ "learning_rate": 3.3611111111111116e-05,
389
+ "loss": 0.0584,
390
  "step": 6200
391
  },
392
  {
393
  "epoch": 33.27,
394
+ "learning_rate": 3.3346560846560846e-05,
395
+ "loss": 0.0516,
396
  "step": 6300
397
  },
398
  {
399
  "epoch": 33.8,
400
+ "learning_rate": 3.308201058201059e-05,
401
+ "loss": 0.051,
402
  "step": 6400
403
  },
404
  {
405
  "epoch": 34.32,
406
+ "learning_rate": 3.281746031746032e-05,
407
+ "loss": 0.0534,
408
  "step": 6500
409
  },
410
  {
411
  "epoch": 34.85,
412
+ "learning_rate": 3.2552910052910054e-05,
413
+ "loss": 0.0498,
414
  "step": 6600
415
  },
416
  {
417
  "epoch": 35.38,
418
+ "learning_rate": 3.228835978835979e-05,
419
+ "loss": 0.0499,
420
  "step": 6700
421
  },
422
  {
423
  "epoch": 35.91,
424
+ "learning_rate": 3.202380952380952e-05,
425
+ "loss": 0.0566,
426
  "step": 6800
427
  },
428
  {
429
  "epoch": 36.44,
430
+ "learning_rate": 3.175925925925926e-05,
431
+ "loss": 0.0424,
432
  "step": 6900
433
  },
434
  {
435
  "epoch": 36.96,
436
+ "learning_rate": 3.149470899470899e-05,
437
+ "loss": 0.0531,
438
  "step": 7000
439
  },
440
  {
441
  "epoch": 37.49,
442
+ "learning_rate": 3.1230158730158734e-05,
443
+ "loss": 0.0563,
444
  "step": 7100
445
  },
446
  {
447
  "epoch": 38.02,
448
+ "learning_rate": 3.0965608465608464e-05,
449
+ "loss": 0.0475,
450
  "step": 7200
451
  },
452
  {
453
  "epoch": 38.55,
454
+ "learning_rate": 3.07010582010582e-05,
455
+ "loss": 0.0381,
456
  "step": 7300
457
  },
458
  {
459
  "epoch": 39.08,
460
+ "learning_rate": 3.0436507936507936e-05,
461
+ "loss": 0.0525,
462
  "step": 7400
463
  },
464
  {
465
  "epoch": 39.6,
466
+ "learning_rate": 3.0171957671957672e-05,
467
+ "loss": 0.058,
468
  "step": 7500
469
  },
470
  {
471
  "epoch": 40.13,
472
+ "learning_rate": 2.9907407407407405e-05,
473
+ "loss": 0.0422,
474
  "step": 7600
475
  },
476
  {
477
  "epoch": 40.66,
478
+ "learning_rate": 2.9642857142857144e-05,
479
+ "loss": 0.0401,
480
  "step": 7700
481
  },
482
  {
483
  "epoch": 41.19,
484
+ "learning_rate": 2.9378306878306877e-05,
485
+ "loss": 0.0434,
486
  "step": 7800
487
  },
488
  {
489
  "epoch": 41.72,
490
+ "learning_rate": 2.9113756613756616e-05,
491
+ "loss": 0.0407,
492
  "step": 7900
493
  },
494
  {
495
  "epoch": 42.24,
496
+ "learning_rate": 2.884920634920635e-05,
497
+ "loss": 0.048,
498
  "step": 8000
499
  },
500
  {
501
  "epoch": 42.77,
502
+ "learning_rate": 2.8584656084656085e-05,
503
+ "loss": 0.032,
504
  "step": 8100
505
  },
506
  {
507
  "epoch": 43.3,
508
+ "learning_rate": 2.8320105820105818e-05,
509
+ "loss": 0.0457,
510
  "step": 8200
511
  },
512
  {
513
  "epoch": 43.83,
514
+ "learning_rate": 2.8055555555555557e-05,
515
+ "loss": 0.0531,
516
  "step": 8300
517
  },
518
  {
519
  "epoch": 44.36,
520
+ "learning_rate": 2.779100529100529e-05,
521
+ "loss": 0.0443,
522
  "step": 8400
523
  },
524
  {
525
  "epoch": 44.88,
526
+ "learning_rate": 2.752645502645503e-05,
527
+ "loss": 0.0404,
528
  "step": 8500
529
  },
530
  {
531
  "epoch": 45.41,
532
+ "learning_rate": 2.7261904761904762e-05,
533
+ "loss": 0.037,
534
  "step": 8600
535
  },
536
  {
537
  "epoch": 45.94,
538
+ "learning_rate": 2.6997354497354498e-05,
539
+ "loss": 0.0461,
540
  "step": 8700
541
  },
542
  {
543
  "epoch": 46.47,
544
+ "learning_rate": 2.673280423280423e-05,
545
+ "loss": 0.0362,
546
  "step": 8800
547
  },
548
  {
549
  "epoch": 47.0,
550
+ "learning_rate": 2.6470899470899475e-05,
551
+ "loss": 0.0417,
552
  "step": 8900
553
  },
554
  {
555
  "epoch": 47.52,
556
+ "learning_rate": 2.6206349206349207e-05,
557
+ "loss": 0.0347,
558
  "step": 9000
559
  },
560
  {
561
  "epoch": 48.05,
562
+ "learning_rate": 2.5941798941798943e-05,
563
+ "loss": 0.0448,
564
  "step": 9100
565
  },
566
  {
567
  "epoch": 48.58,
568
+ "learning_rate": 2.5677248677248676e-05,
569
+ "loss": 0.0368,
570
  "step": 9200
571
  },
572
  {
573
  "epoch": 49.11,
574
+ "learning_rate": 2.5412698412698415e-05,
575
+ "loss": 0.0379,
576
  "step": 9300
577
  },
578
  {
579
  "epoch": 49.64,
580
+ "learning_rate": 2.5148148148148148e-05,
581
+ "loss": 0.0367,
582
  "step": 9400
583
  },
584
  {
585
  "epoch": 50.17,
586
+ "learning_rate": 2.4883597883597884e-05,
587
+ "loss": 0.0331,
588
  "step": 9500
589
  },
590
  {
591
  "epoch": 50.69,
592
+ "learning_rate": 2.461904761904762e-05,
593
+ "loss": 0.0301,
594
  "step": 9600
595
  },
596
  {
597
  "epoch": 51.22,
598
  "learning_rate": 2.4357142857142857e-05,
599
+ "loss": 0.0325,
600
  "step": 9700
601
  },
602
  {
603
  "epoch": 51.75,
604
  "learning_rate": 2.4092592592592593e-05,
605
+ "loss": 0.0397,
606
  "step": 9800
607
  },
608
  {
609
  "epoch": 52.28,
610
  "learning_rate": 2.382804232804233e-05,
611
+ "loss": 0.0396,
612
  "step": 9900
613
  },
614
  {
615
  "epoch": 52.81,
616
  "learning_rate": 2.3563492063492065e-05,
617
+ "loss": 0.0412,
618
  "step": 10000
619
  },
620
  {
621
  "epoch": 52.81,
622
+ "eval_accuracy": 0.8762376308441162,
623
+ "eval_loss": 0.5958317518234253,
624
+ "eval_runtime": 50.4121,
625
+ "eval_samples_per_second": 48.084,
626
+ "eval_steps_per_second": 6.01,
627
  "step": 10000
628
  },
629
  {
630
  "epoch": 53.33,
631
  "learning_rate": 2.32989417989418e-05,
632
+ "loss": 0.0427,
633
  "step": 10100
634
  },
635
  {
636
  "epoch": 53.86,
637
  "learning_rate": 2.3034391534391538e-05,
638
+ "loss": 0.0317,
639
  "step": 10200
640
  },
641
  {
642
  "epoch": 54.39,
643
  "learning_rate": 2.276984126984127e-05,
644
+ "loss": 0.0314,
645
  "step": 10300
646
  },
647
  {
648
  "epoch": 54.92,
649
  "learning_rate": 2.2505291005291006e-05,
650
+ "loss": 0.0384,
651
  "step": 10400
652
  },
653
  {
654
  "epoch": 55.45,
655
  "learning_rate": 2.2240740740740743e-05,
656
+ "loss": 0.0311,
657
  "step": 10500
658
  },
659
  {
660
  "epoch": 55.97,
661
  "learning_rate": 2.197619047619048e-05,
662
+ "loss": 0.0315,
663
  "step": 10600
664
  },
665
  {
666
  "epoch": 56.5,
667
  "learning_rate": 2.1711640211640215e-05,
668
+ "loss": 0.0263,
669
  "step": 10700
670
  },
671
  {
672
  "epoch": 57.03,
673
  "learning_rate": 2.1447089947089947e-05,
674
+ "loss": 0.0319,
675
  "step": 10800
676
  },
677
  {
678
  "epoch": 57.56,
679
  "learning_rate": 2.1182539682539683e-05,
680
+ "loss": 0.025,
681
  "step": 10900
682
  },
683
  {
684
  "epoch": 58.09,
685
  "learning_rate": 2.091798941798942e-05,
686
+ "loss": 0.0323,
687
  "step": 11000
688
  },
689
  {
690
  "epoch": 58.61,
691
  "learning_rate": 2.0653439153439156e-05,
692
+ "loss": 0.034,
693
  "step": 11100
694
  },
695
  {
696
  "epoch": 59.14,
697
  "learning_rate": 2.0388888888888892e-05,
698
+ "loss": 0.0326,
699
  "step": 11200
700
  },
701
  {
702
  "epoch": 59.67,
703
  "learning_rate": 2.0124338624338628e-05,
704
+ "loss": 0.0273,
705
  "step": 11300
706
  },
707
  {
708
  "epoch": 60.2,
709
  "learning_rate": 1.985978835978836e-05,
710
+ "loss": 0.0261,
711
  "step": 11400
712
  },
713
  {
714
  "epoch": 60.73,
715
  "learning_rate": 1.9595238095238097e-05,
716
+ "loss": 0.0297,
717
  "step": 11500
718
  },
719
  {
720
  "epoch": 61.25,
721
  "learning_rate": 1.9330687830687833e-05,
722
+ "loss": 0.0375,
723
  "step": 11600
724
  },
725
  {
726
  "epoch": 61.78,
727
  "learning_rate": 1.906613756613757e-05,
728
+ "loss": 0.0262,
729
  "step": 11700
730
  },
731
  {
732
  "epoch": 62.31,
733
  "learning_rate": 1.8801587301587305e-05,
734
+ "loss": 0.0333,
735
  "step": 11800
736
  },
737
  {
738
  "epoch": 62.84,
739
  "learning_rate": 1.8537037037037037e-05,
740
+ "loss": 0.025,
741
  "step": 11900
742
  },
743
  {
744
  "epoch": 63.37,
745
  "learning_rate": 1.8272486772486774e-05,
746
+ "loss": 0.0245,
747
  "step": 12000
748
  },
749
  {
750
  "epoch": 63.89,
751
  "learning_rate": 1.800793650793651e-05,
752
+ "loss": 0.0261,
753
  "step": 12100
754
  },
755
  {
756
  "epoch": 64.42,
757
  "learning_rate": 1.7743386243386246e-05,
758
+ "loss": 0.0277,
759
  "step": 12200
760
  },
761
  {
762
  "epoch": 64.95,
763
  "learning_rate": 1.7478835978835982e-05,
764
+ "loss": 0.0306,
765
  "step": 12300
766
  },
767
  {
768
  "epoch": 65.48,
769
  "learning_rate": 1.7214285714285715e-05,
770
+ "loss": 0.0287,
771
  "step": 12400
772
  },
773
  {
774
  "epoch": 66.01,
775
  "learning_rate": 1.694973544973545e-05,
776
+ "loss": 0.0222,
777
  "step": 12500
778
  },
779
  {
780
  "epoch": 66.53,
781
  "learning_rate": 1.6685185185185187e-05,
782
+ "loss": 0.0302,
783
  "step": 12600
784
  },
785
  {
786
  "epoch": 67.06,
787
  "learning_rate": 1.6420634920634923e-05,
788
+ "loss": 0.0252,
789
  "step": 12700
790
  },
791
  {
792
  "epoch": 67.59,
793
  "learning_rate": 1.615608465608466e-05,
794
+ "loss": 0.0221,
795
  "step": 12800
796
  },
797
  {
798
  "epoch": 68.12,
799
  "learning_rate": 1.5891534391534395e-05,
800
+ "loss": 0.0383,
801
  "step": 12900
802
  },
803
  {
804
  "epoch": 68.65,
805
  "learning_rate": 1.5626984126984128e-05,
806
+ "loss": 0.0242,
807
  "step": 13000
808
  },
809
  {
810
  "epoch": 69.17,
811
  "learning_rate": 1.5362433862433864e-05,
812
+ "loss": 0.0194,
813
  "step": 13100
814
  },
815
  {
816
  "epoch": 69.7,
817
+ "learning_rate": 1.50978835978836e-05,
818
+ "loss": 0.0245,
819
  "step": 13200
820
  },
821
  {
822
  "epoch": 70.23,
823
+ "learning_rate": 1.4833333333333336e-05,
824
+ "loss": 0.0263,
825
  "step": 13300
826
  },
827
  {
828
  "epoch": 70.76,
829
+ "learning_rate": 1.456878306878307e-05,
830
+ "loss": 0.0276,
831
  "step": 13400
832
  },
833
  {
834
  "epoch": 71.29,
835
+ "learning_rate": 1.4304232804232806e-05,
836
+ "loss": 0.0213,
837
  "step": 13500
838
  },
839
  {
840
  "epoch": 71.82,
841
+ "learning_rate": 1.4042328042328043e-05,
842
+ "loss": 0.0296,
843
  "step": 13600
844
  },
845
  {
846
  "epoch": 72.34,
847
+ "learning_rate": 1.3777777777777778e-05,
848
+ "loss": 0.0231,
849
  "step": 13700
850
  },
851
  {
852
  "epoch": 72.87,
853
+ "learning_rate": 1.3513227513227514e-05,
854
+ "loss": 0.0241,
855
  "step": 13800
856
  },
857
  {
858
  "epoch": 73.4,
859
+ "learning_rate": 1.324867724867725e-05,
860
+ "loss": 0.0283,
861
  "step": 13900
862
  },
863
  {
864
  "epoch": 73.93,
865
+ "learning_rate": 1.2984126984126984e-05,
866
+ "loss": 0.0231,
867
  "step": 14000
868
  },
869
  {
870
  "epoch": 74.46,
871
+ "learning_rate": 1.271957671957672e-05,
872
+ "loss": 0.0179,
873
  "step": 14100
874
  },
875
  {
876
  "epoch": 74.98,
877
+ "learning_rate": 1.2455026455026456e-05,
878
+ "loss": 0.0208,
879
  "step": 14200
880
  },
881
  {
882
  "epoch": 75.51,
883
+ "learning_rate": 1.219047619047619e-05,
884
+ "loss": 0.0277,
885
  "step": 14300
886
  },
887
  {
888
  "epoch": 76.04,
889
+ "learning_rate": 1.1925925925925927e-05,
890
+ "loss": 0.0195,
891
  "step": 14400
892
  },
893
  {
894
  "epoch": 76.57,
895
+ "learning_rate": 1.1661375661375661e-05,
896
+ "loss": 0.0188,
897
  "step": 14500
898
  },
899
  {
900
  "epoch": 77.1,
901
+ "learning_rate": 1.1396825396825397e-05,
902
+ "loss": 0.0184,
903
  "step": 14600
904
  },
905
  {
906
  "epoch": 77.62,
907
+ "learning_rate": 1.1132275132275133e-05,
908
+ "loss": 0.0165,
909
  "step": 14700
910
  },
911
  {
912
  "epoch": 78.15,
913
+ "learning_rate": 1.0867724867724868e-05,
914
+ "loss": 0.0245,
915
  "step": 14800
916
  },
917
  {
918
  "epoch": 78.68,
919
+ "learning_rate": 1.0603174603174604e-05,
920
+ "loss": 0.0331,
921
  "step": 14900
922
  },
923
  {
924
  "epoch": 79.21,
925
+ "learning_rate": 1.033862433862434e-05,
926
+ "loss": 0.0173,
927
+ "step": 15000
928
+ },
929
+ {
930
+ "epoch": 79.21,
931
+ "eval_accuracy": 0.8968647122383118,
932
+ "eval_loss": 0.5708499550819397,
933
+ "eval_runtime": 49.6061,
934
+ "eval_samples_per_second": 48.865,
935
+ "eval_steps_per_second": 6.108,
936
  "step": 15000
937
  },
938
  {
939
  "epoch": 79.74,
940
+ "learning_rate": 1.0074074074074074e-05,
941
+ "loss": 0.0243,
942
  "step": 15100
943
  },
944
  {
945
  "epoch": 80.26,
946
+ "learning_rate": 9.80952380952381e-06,
947
+ "loss": 0.0203,
948
  "step": 15200
949
  },
950
  {
951
  "epoch": 80.79,
952
+ "learning_rate": 9.544973544973545e-06,
953
+ "loss": 0.018,
954
  "step": 15300
955
  },
956
  {
957
  "epoch": 81.32,
958
+ "learning_rate": 9.280423280423281e-06,
959
+ "loss": 0.0239,
960
  "step": 15400
961
  },
962
  {
963
  "epoch": 81.85,
964
+ "learning_rate": 9.015873015873017e-06,
965
+ "loss": 0.0176,
966
  "step": 15500
967
  },
968
  {
969
  "epoch": 82.38,
970
+ "learning_rate": 8.751322751322751e-06,
971
+ "loss": 0.0231,
972
  "step": 15600
973
  },
974
  {
975
  "epoch": 82.9,
976
+ "learning_rate": 8.486772486772487e-06,
977
+ "loss": 0.0181,
978
  "step": 15700
979
  },
980
  {
981
  "epoch": 83.43,
982
+ "learning_rate": 8.222222222222223e-06,
983
+ "loss": 0.0221,
984
  "step": 15800
985
  },
986
  {
987
  "epoch": 83.96,
988
+ "learning_rate": 7.957671957671958e-06,
989
+ "loss": 0.0132,
990
  "step": 15900
991
  },
992
  {
993
  "epoch": 84.49,
994
+ "learning_rate": 7.693121693121694e-06,
995
+ "loss": 0.0127,
996
  "step": 16000
997
  },
998
  {
999
  "epoch": 85.02,
1000
+ "learning_rate": 7.428571428571429e-06,
1001
+ "loss": 0.0178,
1002
  "step": 16100
1003
  },
1004
  {
1005
  "epoch": 85.54,
1006
+ "learning_rate": 7.1640211640211644e-06,
1007
+ "loss": 0.0176,
1008
  "step": 16200
1009
  },
1010
  {
1011
  "epoch": 86.07,
1012
+ "learning_rate": 6.8994708994709e-06,
1013
+ "loss": 0.0169,
1014
  "step": 16300
1015
  },
1016
  {
1017
  "epoch": 86.6,
1018
+ "learning_rate": 6.634920634920636e-06,
1019
+ "loss": 0.0163,
1020
  "step": 16400
1021
  },
1022
  {
1023
  "epoch": 87.13,
1024
+ "learning_rate": 6.370370370370371e-06,
1025
+ "loss": 0.015,
1026
  "step": 16500
1027
  },
1028
  {
1029
  "epoch": 87.66,
1030
+ "learning_rate": 6.105820105820106e-06,
1031
+ "loss": 0.022,
1032
  "step": 16600
1033
  },
1034
  {
1035
  "epoch": 88.18,
1036
+ "learning_rate": 5.841269841269842e-06,
1037
+ "loss": 0.0115,
1038
  "step": 16700
1039
  },
1040
  {
1041
  "epoch": 88.71,
1042
+ "learning_rate": 5.576719576719577e-06,
1043
+ "loss": 0.0148,
1044
  "step": 16800
1045
  },
1046
  {
1047
  "epoch": 89.24,
1048
+ "learning_rate": 5.312169312169312e-06,
1049
+ "loss": 0.0153,
1050
  "step": 16900
1051
  },
1052
  {
1053
  "epoch": 89.77,
1054
+ "learning_rate": 5.047619047619047e-06,
1055
+ "loss": 0.0194,
1056
  "step": 17000
1057
  },
1058
  {
1059
  "epoch": 90.3,
1060
+ "learning_rate": 4.783068783068783e-06,
1061
+ "loss": 0.0165,
1062
  "step": 17100
1063
  },
1064
  {
1065
  "epoch": 90.83,
1066
+ "learning_rate": 4.5185185185185185e-06,
1067
+ "loss": 0.013,
1068
  "step": 17200
1069
  },
1070
  {
1071
  "epoch": 91.35,
1072
+ "learning_rate": 4.253968253968254e-06,
1073
+ "loss": 0.0212,
1074
  "step": 17300
1075
  },
1076
  {
1077
  "epoch": 91.88,
1078
+ "learning_rate": 3.989417989417989e-06,
1079
+ "loss": 0.0192,
1080
  "step": 17400
1081
  },
1082
  {
1083
  "epoch": 92.41,
1084
+ "learning_rate": 3.7248677248677246e-06,
1085
+ "loss": 0.0152,
1086
  "step": 17500
1087
  },
1088
  {
1089
  "epoch": 92.94,
1090
+ "learning_rate": 3.4603174603174603e-06,
1091
+ "loss": 0.02,
1092
  "step": 17600
1093
  },
1094
  {
1095
  "epoch": 93.47,
1096
+ "learning_rate": 3.1957671957671955e-06,
1097
+ "loss": 0.0089,
1098
  "step": 17700
1099
  },
1100
  {
1101
  "epoch": 93.99,
1102
+ "learning_rate": 2.9312169312169316e-06,
1103
+ "loss": 0.0124,
1104
  "step": 17800
1105
  },
1106
  {
1107
  "epoch": 94.52,
1108
+ "learning_rate": 2.666666666666667e-06,
1109
+ "loss": 0.019,
1110
  "step": 17900
1111
  },
1112
  {
1113
  "epoch": 95.05,
1114
+ "learning_rate": 2.402116402116402e-06,
1115
+ "loss": 0.0151,
1116
  "step": 18000
1117
  },
1118
  {
1119
  "epoch": 95.58,
1120
+ "learning_rate": 2.1375661375661377e-06,
1121
+ "loss": 0.0184,
1122
  "step": 18100
1123
  },
1124
  {
1125
  "epoch": 96.11,
1126
+ "learning_rate": 1.873015873015873e-06,
1127
+ "loss": 0.0146,
1128
  "step": 18200
1129
  },
1130
  {
1131
  "epoch": 96.63,
1132
+ "learning_rate": 1.6084656084656084e-06,
1133
+ "loss": 0.0227,
1134
  "step": 18300
1135
  },
1136
  {
1137
  "epoch": 97.16,
1138
+ "learning_rate": 1.343915343915344e-06,
1139
+ "loss": 0.0149,
1140
  "step": 18400
1141
  },
1142
  {
1143
  "epoch": 97.69,
1144
+ "learning_rate": 1.0793650793650795e-06,
1145
+ "loss": 0.015,
1146
  "step": 18500
1147
  },
1148
  {
1149
  "epoch": 98.22,
1150
+ "learning_rate": 8.148148148148147e-07,
1151
+ "loss": 0.0166,
1152
  "step": 18600
1153
  },
1154
  {
1155
  "epoch": 98.75,
1156
+ "learning_rate": 5.502645502645503e-07,
1157
+ "loss": 0.0167,
1158
  "step": 18700
1159
  },
1160
  {
1161
  "epoch": 99.27,
1162
+ "learning_rate": 2.8571428571428575e-07,
1163
+ "loss": 0.0152,
1164
  "step": 18800
1165
  },
1166
  {
1167
  "epoch": 99.8,
1168
+ "learning_rate": 2.1164021164021167e-08,
1169
+ "loss": 0.0164,
1170
  "step": 18900
1171
  },
1172
  {
1173
  "epoch": 99.8,
1174
  "step": 18900,
1175
  "total_flos": 3.6752439370752e+19,
1176
+ "train_loss": 0.05161126141825681,
1177
+ "train_runtime": 41136.7074,
1178
+ "train_samples_per_second": 29.463,
1179
+ "train_steps_per_second": 0.459
1180
  },
1181
  {
1182
  "epoch": 99.8,
1183
+ "eval_accuracy": 0.8910890817642212,
1184
+ "eval_loss": 0.6214143633842468,
1185
+ "eval_runtime": 50.0096,
1186
+ "eval_samples_per_second": 48.471,
1187
+ "eval_steps_per_second": 6.059,
1188
  "step": 18900
1189
  }
1190
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6973a8ed9ca8e0acde38cd1635af8a78516b15c39a24f84c8d726e2276ded6be
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe211c843b5d503caa749faf86af4e798d09fba3908277cc22163c26ef6460de
3
  size 4155