paul commited on
Commit
884b0dc
1 Parent(s): 3cfb633

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.99,
3
- "total_flos": 1.4224524590735456e+19,
4
- "train_loss": 0.3359685759269371,
5
- "train_runtime": 2345.2254,
6
- "train_samples_per_second": 78.345,
7
- "train_steps_per_second": 0.304
8
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 4.6098795710207476e+19,
4
+ "train_loss": 0.3678270006343594,
5
+ "train_runtime": 6722.1212,
6
+ "train_samples_per_second": 88.513,
7
+ "train_steps_per_second": 0.345
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:997c868e1b0b60fabb1fdae800ca24cbacac33a015dfcd4321ce4447cfefb9ec
3
  size 343282353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fffa524f1376fd63a09c60d8acc1a21ee47558b1dd183bb48fe5dc47cfff97f
3
  size 343282353
runs/Jan16_16-52-03_teesta/events.out.tfevents.1673868138.teesta.13849.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7be617a0599adcc55cabc1c9b0d7b51d3d76d1452525c954d9277ff017be527
3
- size 44104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d691ffa8a775bc0dee4e53b8c31f9689293ca9ed5c8bbfc07e704235673cd26
3
+ size 44458
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.99,
3
- "total_flos": 1.4224524590735456e+19,
4
- "train_loss": 0.3359685759269371,
5
- "train_runtime": 2345.2254,
6
- "train_samples_per_second": 78.345,
7
- "train_steps_per_second": 0.304
8
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 4.6098795710207476e+19,
4
+ "train_loss": 0.3678270006343594,
5
+ "train_runtime": 6722.1212,
6
+ "train_samples_per_second": 88.513,
7
+ "train_steps_per_second": 0.345
8
  }
trainer_state.json CHANGED
@@ -1,547 +1,1513 @@
1
  {
2
- "best_metric": 0.6960989202368513,
3
- "best_model_checkpoint": "google-vit-base-patch16-224-face/checkpoint-712",
4
- "epoch": 7.991643454038997,
5
- "global_step": 712,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.11,
12
- "learning_rate": 1.6666666666666667e-05,
13
- "loss": 0.9691,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.22,
18
- "learning_rate": 3.3333333333333335e-05,
19
- "loss": 0.9687,
20
  "step": 20
21
  },
22
  {
23
- "epoch": 0.33,
24
- "learning_rate": 5e-05,
25
- "loss": 0.9727,
26
  "step": 30
27
  },
28
  {
29
- "epoch": 0.45,
30
- "learning_rate": 6.666666666666667e-05,
31
- "loss": 0.9245,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 0.56,
36
- "learning_rate": 8.333333333333333e-05,
37
- "loss": 0.8955,
38
  "step": 50
39
  },
40
  {
41
- "epoch": 0.67,
42
- "learning_rate": 0.0001,
43
- "loss": 0.8887,
44
  "step": 60
45
  },
46
  {
47
- "epoch": 0.78,
48
- "learning_rate": 0.00011666666666666667,
49
- "loss": 0.8827,
50
  "step": 70
51
  },
52
  {
53
- "epoch": 0.89,
54
- "learning_rate": 0.00011850000000000001,
55
- "loss": 0.8364,
56
  "step": 80
57
  },
58
  {
59
- "epoch": 0.99,
60
- "eval_accuracy": 0.648380355276907,
61
- "eval_f1": 0.6385482827611145,
62
- "eval_loss": 0.9452694654464722,
63
- "eval_precision": 0.6461904026185493,
64
- "eval_recall": 0.648380355276907,
65
- "eval_runtime": 32.7545,
66
- "eval_samples_per_second": 175.304,
67
- "eval_steps_per_second": 2.748,
68
- "step": 89
69
- },
70
- {
71
- "epoch": 1.01,
72
- "learning_rate": 0.000116625,
73
- "loss": 0.9165,
74
  "step": 90
75
  },
76
  {
77
- "epoch": 1.12,
78
- "learning_rate": 0.00011475000000000001,
79
- "loss": 0.765,
80
  "step": 100
81
  },
82
  {
83
- "epoch": 1.23,
84
- "learning_rate": 0.000112875,
85
- "loss": 0.7639,
86
  "step": 110
87
  },
88
  {
89
- "epoch": 1.35,
90
- "learning_rate": 0.00011100000000000001,
91
- "loss": 0.7557,
92
  "step": 120
93
  },
94
  {
95
- "epoch": 1.46,
96
- "learning_rate": 0.00010912500000000001,
97
- "loss": 0.7494,
98
  "step": 130
99
  },
100
  {
101
- "epoch": 1.57,
102
- "learning_rate": 0.00010725,
103
- "loss": 0.7363,
104
  "step": 140
105
  },
106
  {
107
- "epoch": 1.68,
108
- "learning_rate": 0.00010537500000000001,
109
- "loss": 0.7147,
110
  "step": 150
111
  },
112
  {
113
- "epoch": 1.79,
114
- "learning_rate": 0.00010350000000000001,
115
- "loss": 0.7283,
116
  "step": 160
117
  },
118
  {
119
- "epoch": 1.9,
120
- "learning_rate": 0.000101625,
121
- "loss": 0.7433,
122
  "step": 170
123
  },
124
  {
125
- "epoch": 1.99,
126
- "eval_accuracy": 0.6778126088470916,
127
- "eval_f1": 0.672969661980768,
128
- "eval_loss": 0.8876001834869385,
129
- "eval_precision": 0.679362397138447,
130
- "eval_recall": 0.6778126088470916,
131
- "eval_runtime": 37.9557,
132
- "eval_samples_per_second": 151.282,
133
- "eval_steps_per_second": 2.371,
134
- "step": 178
135
- },
136
- {
137
- "epoch": 2.02,
138
- "learning_rate": 9.975000000000001e-05,
139
- "loss": 0.7192,
140
  "step": 180
141
  },
142
  {
143
- "epoch": 2.13,
144
- "learning_rate": 9.787500000000001e-05,
145
- "loss": 0.5258,
146
  "step": 190
147
  },
148
  {
149
- "epoch": 2.25,
150
- "learning_rate": 9.6e-05,
151
- "loss": 0.5159,
152
  "step": 200
153
  },
154
  {
155
- "epoch": 2.36,
156
- "learning_rate": 9.412500000000001e-05,
157
- "loss": 0.5151,
158
  "step": 210
159
  },
160
  {
161
- "epoch": 2.47,
162
- "learning_rate": 9.225e-05,
163
- "loss": 0.5134,
164
  "step": 220
165
  },
166
  {
167
- "epoch": 2.58,
168
- "learning_rate": 9.0375e-05,
169
- "loss": 0.4776,
170
  "step": 230
171
  },
172
  {
173
- "epoch": 2.69,
174
- "learning_rate": 8.850000000000001e-05,
175
- "loss": 0.5188,
176
  "step": 240
177
  },
178
  {
179
- "epoch": 2.8,
180
- "learning_rate": 8.6625e-05,
181
- "loss": 0.5312,
182
  "step": 250
183
  },
184
  {
185
- "epoch": 2.91,
186
- "learning_rate": 8.475000000000001e-05,
187
- "loss": 0.4732,
188
  "step": 260
189
  },
190
  {
191
- "epoch": 2.99,
192
- "eval_accuracy": 0.6872169975618252,
193
- "eval_f1": 0.6840932939913709,
194
- "eval_loss": 0.9042980670928955,
195
- "eval_precision": 0.6906579200512205,
196
- "eval_recall": 0.6872169975618252,
197
- "eval_runtime": 32.8137,
198
- "eval_samples_per_second": 174.988,
199
- "eval_steps_per_second": 2.743,
200
- "step": 267
201
- },
202
- {
203
- "epoch": 3.03,
204
- "learning_rate": 8.287500000000001e-05,
205
- "loss": 0.4772,
206
  "step": 270
207
  },
208
  {
209
- "epoch": 3.14,
210
- "learning_rate": 8.1e-05,
211
- "loss": 0.299,
212
  "step": 280
213
  },
214
  {
215
- "epoch": 3.26,
216
- "learning_rate": 7.912500000000001e-05,
217
- "loss": 0.2614,
 
 
 
 
 
 
 
 
 
 
 
 
218
  "step": 290
219
  },
220
  {
221
- "epoch": 3.37,
222
- "learning_rate": 7.725000000000001e-05,
223
- "loss": 0.2679,
224
  "step": 300
225
  },
226
  {
227
- "epoch": 3.48,
228
- "learning_rate": 7.5375e-05,
229
- "loss": 0.2992,
230
  "step": 310
231
  },
232
  {
233
- "epoch": 3.59,
234
- "learning_rate": 7.350000000000001e-05,
235
- "loss": 0.2835,
236
  "step": 320
237
  },
238
  {
239
- "epoch": 3.7,
240
- "learning_rate": 7.1625e-05,
241
- "loss": 0.2752,
242
  "step": 330
243
  },
244
  {
245
- "epoch": 3.81,
246
- "learning_rate": 6.975e-05,
247
- "loss": 0.27,
248
  "step": 340
249
  },
250
  {
251
- "epoch": 3.92,
252
- "learning_rate": 6.787500000000001e-05,
253
- "loss": 0.2861,
254
  "step": 350
255
  },
256
  {
257
- "epoch": 3.99,
258
- "eval_accuracy": 0.6847788227098572,
259
- "eval_f1": 0.6813474424826861,
260
- "eval_loss": 0.9865238666534424,
261
- "eval_precision": 0.6808262458026351,
262
- "eval_recall": 0.6847788227098572,
263
- "eval_runtime": 34.0061,
264
- "eval_samples_per_second": 168.852,
265
- "eval_steps_per_second": 2.647,
266
- "step": 356
267
- },
268
- {
269
- "epoch": 4.04,
270
- "learning_rate": 6.6e-05,
271
- "loss": 0.2485,
272
  "step": 360
273
  },
274
  {
275
- "epoch": 4.16,
276
- "learning_rate": 6.412500000000001e-05,
277
- "loss": 0.1212,
278
  "step": 370
279
  },
280
  {
281
- "epoch": 4.27,
282
- "learning_rate": 6.225000000000001e-05,
283
- "loss": 0.121,
284
  "step": 380
285
  },
286
  {
287
- "epoch": 4.38,
288
- "learning_rate": 6.0375000000000004e-05,
289
- "loss": 0.131,
290
  "step": 390
291
  },
292
  {
293
- "epoch": 4.49,
294
- "learning_rate": 5.85e-05,
295
- "loss": 0.1192,
296
  "step": 400
297
  },
298
  {
299
- "epoch": 4.6,
300
- "learning_rate": 5.6625e-05,
301
- "loss": 0.1137,
302
  "step": 410
303
  },
304
  {
305
- "epoch": 4.71,
306
- "learning_rate": 5.475e-05,
307
- "loss": 0.1334,
308
  "step": 420
309
  },
310
  {
311
- "epoch": 4.82,
312
- "learning_rate": 5.2875e-05,
313
- "loss": 0.1285,
314
  "step": 430
315
  },
316
  {
317
- "epoch": 4.94,
318
- "learning_rate": 5.1e-05,
319
- "loss": 0.1234,
320
  "step": 440
321
  },
322
  {
323
- "epoch": 4.99,
324
- "eval_accuracy": 0.6853012887495646,
325
- "eval_f1": 0.6871627462072427,
326
- "eval_loss": 1.1047998666763306,
327
- "eval_precision": 0.6907009316583976,
328
- "eval_recall": 0.6853012887495646,
329
- "eval_runtime": 32.6937,
330
- "eval_samples_per_second": 175.63,
331
- "eval_steps_per_second": 2.753,
332
- "step": 445
333
- },
334
- {
335
- "epoch": 5.06,
336
- "learning_rate": 4.9125e-05,
337
- "loss": 0.1018,
338
  "step": 450
339
  },
340
  {
341
- "epoch": 5.17,
342
- "learning_rate": 4.7249999999999997e-05,
343
- "loss": 0.053,
344
  "step": 460
345
  },
346
  {
347
- "epoch": 5.28,
348
- "learning_rate": 4.5375e-05,
349
- "loss": 0.0495,
350
  "step": 470
351
  },
352
  {
353
- "epoch": 5.39,
354
- "learning_rate": 4.35e-05,
355
- "loss": 0.052,
356
  "step": 480
357
  },
358
  {
359
- "epoch": 5.5,
360
- "learning_rate": 4.1625e-05,
361
- "loss": 0.0597,
362
  "step": 490
363
  },
364
  {
365
- "epoch": 5.61,
366
- "learning_rate": 3.975e-05,
367
- "loss": 0.0548,
368
  "step": 500
369
  },
370
  {
371
- "epoch": 5.72,
372
- "learning_rate": 3.7875e-05,
373
- "loss": 0.0619,
374
  "step": 510
375
  },
376
  {
377
- "epoch": 5.84,
378
- "learning_rate": 3.6e-05,
379
- "loss": 0.0627,
380
  "step": 520
381
  },
382
  {
383
- "epoch": 5.95,
384
- "learning_rate": 3.4125e-05,
385
- "loss": 0.0599,
386
  "step": 530
387
  },
388
  {
389
- "epoch": 5.99,
390
- "eval_accuracy": 0.6889585510275166,
391
- "eval_f1": 0.6876003709723829,
392
- "eval_loss": 1.2361745834350586,
393
- "eval_precision": 0.6897347831759311,
394
- "eval_recall": 0.6889585510275166,
395
- "eval_runtime": 34.0898,
396
- "eval_samples_per_second": 168.437,
397
- "eval_steps_per_second": 2.64,
398
- "step": 534
399
- },
400
- {
401
- "epoch": 6.07,
402
- "learning_rate": 3.225e-05,
403
- "loss": 0.0431,
404
  "step": 540
405
  },
406
  {
407
- "epoch": 6.18,
408
- "learning_rate": 3.0375e-05,
409
- "loss": 0.033,
410
  "step": 550
411
  },
412
  {
413
- "epoch": 6.29,
414
- "learning_rate": 2.8499999999999998e-05,
415
- "loss": 0.0332,
416
  "step": 560
417
  },
418
  {
419
- "epoch": 6.4,
420
- "learning_rate": 2.6625e-05,
421
- "loss": 0.0315,
422
  "step": 570
423
  },
424
  {
425
- "epoch": 6.51,
426
- "learning_rate": 2.475e-05,
427
- "loss": 0.0305,
428
  "step": 580
429
  },
430
  {
431
- "epoch": 6.62,
432
- "learning_rate": 2.2875e-05,
433
- "loss": 0.0243,
 
 
 
 
 
 
 
 
 
 
 
 
434
  "step": 590
435
  },
436
  {
437
- "epoch": 6.74,
438
- "learning_rate": 2.1e-05,
439
- "loss": 0.0241,
440
  "step": 600
441
  },
442
  {
443
- "epoch": 6.85,
444
- "learning_rate": 1.9125e-05,
445
- "loss": 0.0206,
446
  "step": 610
447
  },
448
  {
449
- "epoch": 6.96,
450
- "learning_rate": 1.725e-05,
451
- "loss": 0.0289,
452
  "step": 620
453
  },
454
  {
455
- "epoch": 6.99,
456
- "eval_accuracy": 0.6931382793451759,
457
- "eval_f1": 0.6921150248707686,
458
- "eval_loss": 1.3140767812728882,
459
- "eval_precision": 0.6925717262836596,
460
- "eval_recall": 0.6931382793451759,
461
- "eval_runtime": 33.0395,
462
- "eval_samples_per_second": 173.792,
463
- "eval_steps_per_second": 2.724,
464
- "step": 623
465
- },
466
- {
467
- "epoch": 7.08,
468
- "learning_rate": 1.5374999999999998e-05,
469
- "loss": 0.0185,
470
  "step": 630
471
  },
472
  {
473
- "epoch": 7.19,
474
- "learning_rate": 1.3500000000000001e-05,
475
- "loss": 0.0132,
476
  "step": 640
477
  },
478
  {
479
- "epoch": 7.3,
480
- "learning_rate": 1.1625000000000001e-05,
481
- "loss": 0.014,
482
  "step": 650
483
  },
484
  {
485
- "epoch": 7.41,
486
- "learning_rate": 9.75e-06,
487
- "loss": 0.0121,
488
  "step": 660
489
  },
490
  {
491
- "epoch": 7.52,
492
- "learning_rate": 7.875e-06,
493
- "loss": 0.0203,
494
  "step": 670
495
  },
496
  {
497
- "epoch": 7.64,
498
- "learning_rate": 6e-06,
499
- "loss": 0.0103,
500
  "step": 680
501
  },
502
  {
503
- "epoch": 7.75,
504
- "learning_rate": 4.125e-06,
505
- "loss": 0.0178,
506
  "step": 690
507
  },
508
  {
509
- "epoch": 7.86,
510
- "learning_rate": 2.25e-06,
511
- "loss": 0.0126,
512
  "step": 700
513
  },
514
  {
515
- "epoch": 7.97,
516
- "learning_rate": 3.75e-07,
517
- "loss": 0.0134,
518
  "step": 710
519
  },
520
  {
521
- "epoch": 7.99,
522
- "eval_accuracy": 0.6960989202368513,
523
- "eval_f1": 0.6957934361657124,
524
- "eval_loss": 1.3257348537445068,
525
- "eval_precision": 0.6966334506335445,
526
- "eval_recall": 0.6960989202368513,
527
- "eval_runtime": 37.8195,
528
- "eval_samples_per_second": 151.827,
529
- "eval_steps_per_second": 2.38,
530
- "step": 712
531
- },
532
- {
533
- "epoch": 7.99,
534
- "step": 712,
535
- "total_flos": 1.4224524590735456e+19,
536
- "train_loss": 0.3359685759269371,
537
- "train_runtime": 2345.2254,
538
- "train_samples_per_second": 78.345,
539
- "train_steps_per_second": 0.304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  }
541
  ],
542
- "max_steps": 712,
543
  "num_train_epochs": 8,
544
- "total_flos": 1.4224524590735456e+19,
545
  "trial_name": null,
546
  "trial_params": null
547
  }
 
1
  {
2
+ "best_metric": 0.728460793804453,
3
+ "best_model_checkpoint": "google-vit-base-patch16-224-face/checkpoint-870",
4
+ "epoch": 7.9974204643164235,
5
+ "global_step": 2320,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.03,
12
+ "learning_rate": 5.172413793103449e-06,
13
+ "loss": 2.0401,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.07,
18
+ "learning_rate": 1.0344827586206898e-05,
19
+ "loss": 1.8797,
20
  "step": 20
21
  },
22
  {
23
+ "epoch": 0.1,
24
+ "learning_rate": 1.5517241379310346e-05,
25
+ "loss": 1.6439,
26
  "step": 30
27
  },
28
  {
29
+ "epoch": 0.14,
30
+ "learning_rate": 2.0689655172413797e-05,
31
+ "loss": 1.4464,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 0.17,
36
+ "learning_rate": 2.586206896551724e-05,
37
+ "loss": 1.26,
38
  "step": 50
39
  },
40
  {
41
+ "epoch": 0.21,
42
+ "learning_rate": 3.103448275862069e-05,
43
+ "loss": 1.1469,
44
  "step": 60
45
  },
46
  {
47
+ "epoch": 0.24,
48
+ "learning_rate": 3.6206896551724136e-05,
49
+ "loss": 1.0549,
50
  "step": 70
51
  },
52
  {
53
+ "epoch": 0.28,
54
+ "learning_rate": 4.137931034482759e-05,
55
+ "loss": 1.047,
56
  "step": 80
57
  },
58
  {
59
+ "epoch": 0.31,
60
+ "learning_rate": 4.655172413793104e-05,
61
+ "loss": 0.9929,
 
 
 
 
 
 
 
 
 
 
 
 
62
  "step": 90
63
  },
64
  {
65
+ "epoch": 0.34,
66
+ "learning_rate": 5.172413793103448e-05,
67
+ "loss": 0.9421,
68
  "step": 100
69
  },
70
  {
71
+ "epoch": 0.38,
72
+ "learning_rate": 5.689655172413793e-05,
73
+ "loss": 1.0011,
74
  "step": 110
75
  },
76
  {
77
+ "epoch": 0.41,
78
+ "learning_rate": 6.206896551724138e-05,
79
+ "loss": 0.9637,
80
  "step": 120
81
  },
82
  {
83
+ "epoch": 0.45,
84
+ "learning_rate": 6.724137931034483e-05,
85
+ "loss": 0.9283,
86
  "step": 130
87
  },
88
  {
89
+ "epoch": 0.48,
90
+ "learning_rate": 7.241379310344827e-05,
91
+ "loss": 0.9275,
92
  "step": 140
93
  },
94
  {
95
+ "epoch": 0.52,
96
+ "learning_rate": 7.758620689655173e-05,
97
+ "loss": 0.9041,
98
  "step": 150
99
  },
100
  {
101
+ "epoch": 0.55,
102
+ "learning_rate": 8.275862068965519e-05,
103
+ "loss": 0.8695,
104
  "step": 160
105
  },
106
  {
107
+ "epoch": 0.58,
108
+ "learning_rate": 8.793103448275862e-05,
109
+ "loss": 0.9382,
110
  "step": 170
111
  },
112
  {
113
+ "epoch": 0.62,
114
+ "learning_rate": 9.310344827586207e-05,
115
+ "loss": 0.8939,
 
 
 
 
 
 
 
 
 
 
 
 
116
  "step": 180
117
  },
118
  {
119
+ "epoch": 0.65,
120
+ "learning_rate": 9.827586206896552e-05,
121
+ "loss": 0.8945,
122
  "step": 190
123
  },
124
  {
125
+ "epoch": 0.69,
126
+ "learning_rate": 0.00010344827586206896,
127
+ "loss": 0.8488,
128
  "step": 200
129
  },
130
  {
131
+ "epoch": 0.72,
132
+ "learning_rate": 0.00010862068965517241,
133
+ "loss": 0.9054,
134
  "step": 210
135
  },
136
  {
137
+ "epoch": 0.76,
138
+ "learning_rate": 0.00011379310344827586,
139
+ "loss": 0.8823,
140
  "step": 220
141
  },
142
  {
143
+ "epoch": 0.79,
144
+ "learning_rate": 0.00011896551724137932,
145
+ "loss": 0.8898,
146
  "step": 230
147
  },
148
  {
149
+ "epoch": 0.83,
150
+ "learning_rate": 0.00011954022988505747,
151
+ "loss": 0.8953,
152
  "step": 240
153
  },
154
  {
155
+ "epoch": 0.86,
156
+ "learning_rate": 0.00011896551724137932,
157
+ "loss": 0.8591,
158
  "step": 250
159
  },
160
  {
161
+ "epoch": 0.89,
162
+ "learning_rate": 0.00011839080459770115,
163
+ "loss": 0.8697,
164
  "step": 260
165
  },
166
  {
167
+ "epoch": 0.93,
168
+ "learning_rate": 0.00011781609195402298,
169
+ "loss": 0.8245,
 
 
 
 
 
 
 
 
 
 
 
 
170
  "step": 270
171
  },
172
  {
173
+ "epoch": 0.96,
174
+ "learning_rate": 0.00011724137931034484,
175
+ "loss": 0.8431,
176
  "step": 280
177
  },
178
  {
179
+ "epoch": 1.0,
180
+ "learning_rate": 0.00011666666666666667,
181
+ "loss": 0.8514,
182
+ "step": 290
183
+ },
184
+ {
185
+ "epoch": 1.0,
186
+ "eval_accuracy": 0.7047972464235776,
187
+ "eval_f1": 0.6909131065764619,
188
+ "eval_loss": 0.8464350700378418,
189
+ "eval_precision": 0.7034765556664452,
190
+ "eval_recall": 0.7047972464235776,
191
+ "eval_runtime": 103.9481,
192
+ "eval_samples_per_second": 178.878,
193
+ "eval_steps_per_second": 2.799,
194
  "step": 290
195
  },
196
  {
197
+ "epoch": 1.03,
198
+ "learning_rate": 0.00011609195402298851,
199
+ "loss": 0.8106,
200
  "step": 300
201
  },
202
  {
203
+ "epoch": 1.07,
204
+ "learning_rate": 0.00011551724137931035,
205
+ "loss": 0.766,
206
  "step": 310
207
  },
208
  {
209
+ "epoch": 1.1,
210
+ "learning_rate": 0.00011494252873563218,
211
+ "loss": 0.7461,
212
  "step": 320
213
  },
214
  {
215
+ "epoch": 1.14,
216
+ "learning_rate": 0.00011436781609195404,
217
+ "loss": 0.7814,
218
  "step": 330
219
  },
220
  {
221
+ "epoch": 1.17,
222
+ "learning_rate": 0.00011379310344827586,
223
+ "loss": 0.741,
224
  "step": 340
225
  },
226
  {
227
+ "epoch": 1.21,
228
+ "learning_rate": 0.0001132183908045977,
229
+ "loss": 0.7118,
230
  "step": 350
231
  },
232
  {
233
+ "epoch": 1.24,
234
+ "learning_rate": 0.00011264367816091955,
235
+ "loss": 0.7539,
 
 
 
 
 
 
 
 
 
 
 
 
236
  "step": 360
237
  },
238
  {
239
+ "epoch": 1.28,
240
+ "learning_rate": 0.00011206896551724138,
241
+ "loss": 0.7633,
242
  "step": 370
243
  },
244
  {
245
+ "epoch": 1.31,
246
+ "learning_rate": 0.00011149425287356322,
247
+ "loss": 0.7305,
248
  "step": 380
249
  },
250
  {
251
+ "epoch": 1.34,
252
+ "learning_rate": 0.00011091954022988506,
253
+ "loss": 0.7455,
254
  "step": 390
255
  },
256
  {
257
+ "epoch": 1.38,
258
+ "learning_rate": 0.00011034482758620689,
259
+ "loss": 0.724,
260
  "step": 400
261
  },
262
  {
263
+ "epoch": 1.41,
264
+ "learning_rate": 0.00010977011494252874,
265
+ "loss": 0.7825,
266
  "step": 410
267
  },
268
  {
269
+ "epoch": 1.45,
270
+ "learning_rate": 0.00010919540229885058,
271
+ "loss": 0.7558,
272
  "step": 420
273
  },
274
  {
275
+ "epoch": 1.48,
276
+ "learning_rate": 0.00010862068965517241,
277
+ "loss": 0.744,
278
  "step": 430
279
  },
280
  {
281
+ "epoch": 1.52,
282
+ "learning_rate": 0.00010804597701149426,
283
+ "loss": 0.7328,
284
  "step": 440
285
  },
286
  {
287
+ "epoch": 1.55,
288
+ "learning_rate": 0.00010747126436781609,
289
+ "loss": 0.7385,
 
 
 
 
 
 
 
 
 
 
 
 
290
  "step": 450
291
  },
292
  {
293
+ "epoch": 1.58,
294
+ "learning_rate": 0.00010689655172413793,
295
+ "loss": 0.7443,
296
  "step": 460
297
  },
298
  {
299
+ "epoch": 1.62,
300
+ "learning_rate": 0.00010632183908045978,
301
+ "loss": 0.7126,
302
  "step": 470
303
  },
304
  {
305
+ "epoch": 1.65,
306
+ "learning_rate": 0.0001057471264367816,
307
+ "loss": 0.8166,
308
  "step": 480
309
  },
310
  {
311
+ "epoch": 1.69,
312
+ "learning_rate": 0.00010517241379310345,
313
+ "loss": 0.7516,
314
  "step": 490
315
  },
316
  {
317
+ "epoch": 1.72,
318
+ "learning_rate": 0.00010459770114942529,
319
+ "loss": 0.7389,
320
  "step": 500
321
  },
322
  {
323
+ "epoch": 1.76,
324
+ "learning_rate": 0.00010402298850574713,
325
+ "loss": 0.766,
326
  "step": 510
327
  },
328
  {
329
+ "epoch": 1.79,
330
+ "learning_rate": 0.00010344827586206896,
331
+ "loss": 0.7343,
332
  "step": 520
333
  },
334
  {
335
+ "epoch": 1.83,
336
+ "learning_rate": 0.0001028735632183908,
337
+ "loss": 0.7132,
338
  "step": 530
339
  },
340
  {
341
+ "epoch": 1.86,
342
+ "learning_rate": 0.00010229885057471265,
343
+ "loss": 0.7494,
 
 
 
 
 
 
 
 
 
 
 
 
344
  "step": 540
345
  },
346
  {
347
+ "epoch": 1.89,
348
+ "learning_rate": 0.00010172413793103449,
349
+ "loss": 0.7426,
350
  "step": 550
351
  },
352
  {
353
+ "epoch": 1.93,
354
+ "learning_rate": 0.00010114942528735632,
355
+ "loss": 0.7305,
356
  "step": 560
357
  },
358
  {
359
+ "epoch": 1.96,
360
+ "learning_rate": 0.00010057471264367816,
361
+ "loss": 0.7489,
362
  "step": 570
363
  },
364
  {
365
+ "epoch": 2.0,
366
+ "learning_rate": 0.0001,
367
+ "loss": 0.7202,
368
  "step": 580
369
  },
370
  {
371
+ "epoch": 2.0,
372
+ "eval_accuracy": 0.7282994514359471,
373
+ "eval_f1": 0.7111205455958668,
374
+ "eval_loss": 0.7790911793708801,
375
+ "eval_precision": 0.7297231799712814,
376
+ "eval_recall": 0.7282994514359471,
377
+ "eval_runtime": 104.6701,
378
+ "eval_samples_per_second": 177.644,
379
+ "eval_steps_per_second": 2.78,
380
+ "step": 580
381
+ },
382
+ {
383
+ "epoch": 2.03,
384
+ "learning_rate": 9.942528735632185e-05,
385
+ "loss": 0.6398,
386
  "step": 590
387
  },
388
  {
389
+ "epoch": 2.07,
390
+ "learning_rate": 9.885057471264368e-05,
391
+ "loss": 0.5381,
392
  "step": 600
393
  },
394
  {
395
+ "epoch": 2.1,
396
+ "learning_rate": 9.827586206896552e-05,
397
+ "loss": 0.5646,
398
  "step": 610
399
  },
400
  {
401
+ "epoch": 2.14,
402
+ "learning_rate": 9.770114942528736e-05,
403
+ "loss": 0.5356,
404
  "step": 620
405
  },
406
  {
407
+ "epoch": 2.17,
408
+ "learning_rate": 9.712643678160919e-05,
409
+ "loss": 0.5734,
 
 
 
 
 
 
 
 
 
 
 
 
410
  "step": 630
411
  },
412
  {
413
+ "epoch": 2.21,
414
+ "learning_rate": 9.655172413793103e-05,
415
+ "loss": 0.5097,
416
  "step": 640
417
  },
418
  {
419
+ "epoch": 2.24,
420
+ "learning_rate": 9.597701149425288e-05,
421
+ "loss": 0.5701,
422
  "step": 650
423
  },
424
  {
425
+ "epoch": 2.28,
426
+ "learning_rate": 9.540229885057472e-05,
427
+ "loss": 0.5802,
428
  "step": 660
429
  },
430
  {
431
+ "epoch": 2.31,
432
+ "learning_rate": 9.482758620689656e-05,
433
+ "loss": 0.5794,
434
  "step": 670
435
  },
436
  {
437
+ "epoch": 2.34,
438
+ "learning_rate": 9.425287356321839e-05,
439
+ "loss": 0.5845,
440
  "step": 680
441
  },
442
  {
443
+ "epoch": 2.38,
444
+ "learning_rate": 9.367816091954023e-05,
445
+ "loss": 0.5591,
446
  "step": 690
447
  },
448
  {
449
+ "epoch": 2.41,
450
+ "learning_rate": 9.310344827586207e-05,
451
+ "loss": 0.5567,
452
  "step": 700
453
  },
454
  {
455
+ "epoch": 2.45,
456
+ "learning_rate": 9.25287356321839e-05,
457
+ "loss": 0.5524,
458
  "step": 710
459
  },
460
  {
461
+ "epoch": 2.48,
462
+ "learning_rate": 9.195402298850575e-05,
463
+ "loss": 0.5569,
464
+ "step": 720
465
+ },
466
+ {
467
+ "epoch": 2.52,
468
+ "learning_rate": 9.137931034482759e-05,
469
+ "loss": 0.5582,
470
+ "step": 730
471
+ },
472
+ {
473
+ "epoch": 2.55,
474
+ "learning_rate": 9.080459770114942e-05,
475
+ "loss": 0.5819,
476
+ "step": 740
477
+ },
478
+ {
479
+ "epoch": 2.58,
480
+ "learning_rate": 9.022988505747127e-05,
481
+ "loss": 0.5735,
482
+ "step": 750
483
+ },
484
+ {
485
+ "epoch": 2.62,
486
+ "learning_rate": 8.96551724137931e-05,
487
+ "loss": 0.5534,
488
+ "step": 760
489
+ },
490
+ {
491
+ "epoch": 2.65,
492
+ "learning_rate": 8.908045977011495e-05,
493
+ "loss": 0.5936,
494
+ "step": 770
495
+ },
496
+ {
497
+ "epoch": 2.69,
498
+ "learning_rate": 8.850574712643679e-05,
499
+ "loss": 0.5248,
500
+ "step": 780
501
+ },
502
+ {
503
+ "epoch": 2.72,
504
+ "learning_rate": 8.793103448275862e-05,
505
+ "loss": 0.5574,
506
+ "step": 790
507
+ },
508
+ {
509
+ "epoch": 2.76,
510
+ "learning_rate": 8.735632183908046e-05,
511
+ "loss": 0.5148,
512
+ "step": 800
513
+ },
514
+ {
515
+ "epoch": 2.79,
516
+ "learning_rate": 8.67816091954023e-05,
517
+ "loss": 0.5914,
518
+ "step": 810
519
+ },
520
+ {
521
+ "epoch": 2.83,
522
+ "learning_rate": 8.620689655172413e-05,
523
+ "loss": 0.6162,
524
+ "step": 820
525
+ },
526
+ {
527
+ "epoch": 2.86,
528
+ "learning_rate": 8.563218390804599e-05,
529
+ "loss": 0.5406,
530
+ "step": 830
531
+ },
532
+ {
533
+ "epoch": 2.89,
534
+ "learning_rate": 8.505747126436782e-05,
535
+ "loss": 0.5718,
536
+ "step": 840
537
+ },
538
+ {
539
+ "epoch": 2.93,
540
+ "learning_rate": 8.448275862068965e-05,
541
+ "loss": 0.5683,
542
+ "step": 850
543
+ },
544
+ {
545
+ "epoch": 2.96,
546
+ "learning_rate": 8.39080459770115e-05,
547
+ "loss": 0.5374,
548
+ "step": 860
549
+ },
550
+ {
551
+ "epoch": 3.0,
552
+ "learning_rate": 8.333333333333333e-05,
553
+ "loss": 0.5455,
554
+ "step": 870
555
+ },
556
+ {
557
+ "epoch": 3.0,
558
+ "eval_accuracy": 0.728460793804453,
559
+ "eval_f1": 0.717108375092518,
560
+ "eval_loss": 0.7950025200843811,
561
+ "eval_precision": 0.7174203394442972,
562
+ "eval_recall": 0.728460793804453,
563
+ "eval_runtime": 109.0183,
564
+ "eval_samples_per_second": 170.559,
565
+ "eval_steps_per_second": 2.669,
566
+ "step": 870
567
+ },
568
+ {
569
+ "epoch": 3.03,
570
+ "learning_rate": 8.275862068965519e-05,
571
+ "loss": 0.4109,
572
+ "step": 880
573
+ },
574
+ {
575
+ "epoch": 3.07,
576
+ "learning_rate": 8.218390804597702e-05,
577
+ "loss": 0.3615,
578
+ "step": 890
579
+ },
580
+ {
581
+ "epoch": 3.1,
582
+ "learning_rate": 8.160919540229884e-05,
583
+ "loss": 0.3445,
584
+ "step": 900
585
+ },
586
+ {
587
+ "epoch": 3.14,
588
+ "learning_rate": 8.10344827586207e-05,
589
+ "loss": 0.3407,
590
+ "step": 910
591
+ },
592
+ {
593
+ "epoch": 3.17,
594
+ "learning_rate": 8.045977011494253e-05,
595
+ "loss": 0.3228,
596
+ "step": 920
597
+ },
598
+ {
599
+ "epoch": 3.21,
600
+ "learning_rate": 7.988505747126436e-05,
601
+ "loss": 0.3376,
602
+ "step": 930
603
+ },
604
+ {
605
+ "epoch": 3.24,
606
+ "learning_rate": 7.931034482758621e-05,
607
+ "loss": 0.3616,
608
+ "step": 940
609
+ },
610
+ {
611
+ "epoch": 3.28,
612
+ "learning_rate": 7.873563218390804e-05,
613
+ "loss": 0.3393,
614
+ "step": 950
615
+ },
616
+ {
617
+ "epoch": 3.31,
618
+ "learning_rate": 7.816091954022989e-05,
619
+ "loss": 0.324,
620
+ "step": 960
621
+ },
622
+ {
623
+ "epoch": 3.34,
624
+ "learning_rate": 7.758620689655173e-05,
625
+ "loss": 0.3391,
626
+ "step": 970
627
+ },
628
+ {
629
+ "epoch": 3.38,
630
+ "learning_rate": 7.701149425287356e-05,
631
+ "loss": 0.3589,
632
+ "step": 980
633
+ },
634
+ {
635
+ "epoch": 3.41,
636
+ "learning_rate": 7.643678160919541e-05,
637
+ "loss": 0.328,
638
+ "step": 990
639
+ },
640
+ {
641
+ "epoch": 3.45,
642
+ "learning_rate": 7.586206896551724e-05,
643
+ "loss": 0.3256,
644
+ "step": 1000
645
+ },
646
+ {
647
+ "epoch": 3.48,
648
+ "learning_rate": 7.528735632183907e-05,
649
+ "loss": 0.338,
650
+ "step": 1010
651
+ },
652
+ {
653
+ "epoch": 3.52,
654
+ "learning_rate": 7.471264367816093e-05,
655
+ "loss": 0.3353,
656
+ "step": 1020
657
+ },
658
+ {
659
+ "epoch": 3.55,
660
+ "learning_rate": 7.413793103448276e-05,
661
+ "loss": 0.3147,
662
+ "step": 1030
663
+ },
664
+ {
665
+ "epoch": 3.58,
666
+ "learning_rate": 7.35632183908046e-05,
667
+ "loss": 0.3405,
668
+ "step": 1040
669
+ },
670
+ {
671
+ "epoch": 3.62,
672
+ "learning_rate": 7.298850574712644e-05,
673
+ "loss": 0.3409,
674
+ "step": 1050
675
+ },
676
+ {
677
+ "epoch": 3.65,
678
+ "learning_rate": 7.241379310344827e-05,
679
+ "loss": 0.3611,
680
+ "step": 1060
681
+ },
682
+ {
683
+ "epoch": 3.69,
684
+ "learning_rate": 7.183908045977013e-05,
685
+ "loss": 0.3501,
686
+ "step": 1070
687
+ },
688
+ {
689
+ "epoch": 3.72,
690
+ "learning_rate": 7.126436781609196e-05,
691
+ "loss": 0.3594,
692
+ "step": 1080
693
+ },
694
+ {
695
+ "epoch": 3.76,
696
+ "learning_rate": 7.068965517241379e-05,
697
+ "loss": 0.3507,
698
+ "step": 1090
699
+ },
700
+ {
701
+ "epoch": 3.79,
702
+ "learning_rate": 7.011494252873564e-05,
703
+ "loss": 0.3376,
704
+ "step": 1100
705
+ },
706
+ {
707
+ "epoch": 3.83,
708
+ "learning_rate": 6.954022988505747e-05,
709
+ "loss": 0.3355,
710
+ "step": 1110
711
+ },
712
+ {
713
+ "epoch": 3.86,
714
+ "learning_rate": 6.896551724137931e-05,
715
+ "loss": 0.3356,
716
+ "step": 1120
717
+ },
718
+ {
719
+ "epoch": 3.89,
720
+ "learning_rate": 6.839080459770116e-05,
721
+ "loss": 0.3489,
722
+ "step": 1130
723
+ },
724
+ {
725
+ "epoch": 3.93,
726
+ "learning_rate": 6.781609195402298e-05,
727
+ "loss": 0.3348,
728
+ "step": 1140
729
+ },
730
+ {
731
+ "epoch": 3.96,
732
+ "learning_rate": 6.724137931034483e-05,
733
+ "loss": 0.3436,
734
+ "step": 1150
735
+ },
736
+ {
737
+ "epoch": 4.0,
738
+ "learning_rate": 6.666666666666667e-05,
739
+ "loss": 0.334,
740
+ "step": 1160
741
+ },
742
+ {
743
+ "epoch": 4.0,
744
+ "eval_accuracy": 0.7154996235344735,
745
+ "eval_f1": 0.7144950276664553,
746
+ "eval_loss": 0.894753098487854,
747
+ "eval_precision": 0.715167448955365,
748
+ "eval_recall": 0.7154996235344735,
749
+ "eval_runtime": 102.1557,
750
+ "eval_samples_per_second": 182.016,
751
+ "eval_steps_per_second": 2.849,
752
+ "step": 1160
753
+ },
754
+ {
755
+ "epoch": 4.03,
756
+ "learning_rate": 6.60919540229885e-05,
757
+ "loss": 0.2132,
758
+ "step": 1170
759
+ },
760
+ {
761
+ "epoch": 4.07,
762
+ "learning_rate": 6.551724137931035e-05,
763
+ "loss": 0.1556,
764
+ "step": 1180
765
+ },
766
+ {
767
+ "epoch": 4.1,
768
+ "learning_rate": 6.494252873563218e-05,
769
+ "loss": 0.1607,
770
+ "step": 1190
771
+ },
772
+ {
773
+ "epoch": 4.14,
774
+ "learning_rate": 6.436781609195403e-05,
775
+ "loss": 0.1572,
776
+ "step": 1200
777
+ },
778
+ {
779
+ "epoch": 4.17,
780
+ "learning_rate": 6.379310344827587e-05,
781
+ "loss": 0.1554,
782
+ "step": 1210
783
+ },
784
+ {
785
+ "epoch": 4.21,
786
+ "learning_rate": 6.32183908045977e-05,
787
+ "loss": 0.156,
788
+ "step": 1220
789
+ },
790
+ {
791
+ "epoch": 4.24,
792
+ "learning_rate": 6.264367816091954e-05,
793
+ "loss": 0.1339,
794
+ "step": 1230
795
+ },
796
+ {
797
+ "epoch": 4.28,
798
+ "learning_rate": 6.206896551724138e-05,
799
+ "loss": 0.1565,
800
+ "step": 1240
801
+ },
802
+ {
803
+ "epoch": 4.31,
804
+ "learning_rate": 6.149425287356323e-05,
805
+ "loss": 0.1582,
806
+ "step": 1250
807
+ },
808
+ {
809
+ "epoch": 4.34,
810
+ "learning_rate": 6.0919540229885055e-05,
811
+ "loss": 0.1466,
812
+ "step": 1260
813
+ },
814
+ {
815
+ "epoch": 4.38,
816
+ "learning_rate": 6.0344827586206904e-05,
817
+ "loss": 0.1557,
818
+ "step": 1270
819
+ },
820
+ {
821
+ "epoch": 4.41,
822
+ "learning_rate": 5.977011494252873e-05,
823
+ "loss": 0.1522,
824
+ "step": 1280
825
+ },
826
+ {
827
+ "epoch": 4.45,
828
+ "learning_rate": 5.9195402298850576e-05,
829
+ "loss": 0.1373,
830
+ "step": 1290
831
+ },
832
+ {
833
+ "epoch": 4.48,
834
+ "learning_rate": 5.862068965517242e-05,
835
+ "loss": 0.1515,
836
+ "step": 1300
837
+ },
838
+ {
839
+ "epoch": 4.52,
840
+ "learning_rate": 5.8045977011494254e-05,
841
+ "loss": 0.1473,
842
+ "step": 1310
843
+ },
844
+ {
845
+ "epoch": 4.55,
846
+ "learning_rate": 5.747126436781609e-05,
847
+ "loss": 0.1513,
848
+ "step": 1320
849
+ },
850
+ {
851
+ "epoch": 4.58,
852
+ "learning_rate": 5.689655172413793e-05,
853
+ "loss": 0.1553,
854
+ "step": 1330
855
+ },
856
+ {
857
+ "epoch": 4.62,
858
+ "learning_rate": 5.6321839080459775e-05,
859
+ "loss": 0.158,
860
+ "step": 1340
861
+ },
862
+ {
863
+ "epoch": 4.65,
864
+ "learning_rate": 5.574712643678161e-05,
865
+ "loss": 0.1603,
866
+ "step": 1350
867
+ },
868
+ {
869
+ "epoch": 4.69,
870
+ "learning_rate": 5.5172413793103446e-05,
871
+ "loss": 0.1488,
872
+ "step": 1360
873
+ },
874
+ {
875
+ "epoch": 4.72,
876
+ "learning_rate": 5.459770114942529e-05,
877
+ "loss": 0.1495,
878
+ "step": 1370
879
+ },
880
+ {
881
+ "epoch": 4.76,
882
+ "learning_rate": 5.402298850574713e-05,
883
+ "loss": 0.1598,
884
+ "step": 1380
885
+ },
886
+ {
887
+ "epoch": 4.79,
888
+ "learning_rate": 5.344827586206897e-05,
889
+ "loss": 0.1468,
890
+ "step": 1390
891
+ },
892
+ {
893
+ "epoch": 4.83,
894
+ "learning_rate": 5.28735632183908e-05,
895
+ "loss": 0.1535,
896
+ "step": 1400
897
+ },
898
+ {
899
+ "epoch": 4.86,
900
+ "learning_rate": 5.2298850574712646e-05,
901
+ "loss": 0.1491,
902
+ "step": 1410
903
+ },
904
+ {
905
+ "epoch": 4.89,
906
+ "learning_rate": 5.172413793103448e-05,
907
+ "loss": 0.1436,
908
+ "step": 1420
909
+ },
910
+ {
911
+ "epoch": 4.93,
912
+ "learning_rate": 5.1149425287356324e-05,
913
+ "loss": 0.138,
914
+ "step": 1430
915
+ },
916
+ {
917
+ "epoch": 4.96,
918
+ "learning_rate": 5.057471264367816e-05,
919
+ "loss": 0.1439,
920
+ "step": 1440
921
+ },
922
+ {
923
+ "epoch": 5.0,
924
+ "learning_rate": 5e-05,
925
+ "loss": 0.1644,
926
+ "step": 1450
927
+ },
928
+ {
929
+ "epoch": 5.0,
930
+ "eval_accuracy": 0.7238894266967839,
931
+ "eval_f1": 0.7194230600781256,
932
+ "eval_loss": 1.0820467472076416,
933
+ "eval_precision": 0.7188913807380165,
934
+ "eval_recall": 0.7238894266967839,
935
+ "eval_runtime": 105.8352,
936
+ "eval_samples_per_second": 175.688,
937
+ "eval_steps_per_second": 2.75,
938
+ "step": 1450
939
+ },
940
+ {
941
+ "epoch": 5.03,
942
+ "learning_rate": 4.942528735632184e-05,
943
+ "loss": 0.0688,
944
+ "step": 1460
945
+ },
946
+ {
947
+ "epoch": 5.07,
948
+ "learning_rate": 4.885057471264368e-05,
949
+ "loss": 0.0619,
950
+ "step": 1470
951
+ },
952
+ {
953
+ "epoch": 5.1,
954
+ "learning_rate": 4.8275862068965517e-05,
955
+ "loss": 0.0502,
956
+ "step": 1480
957
+ },
958
+ {
959
+ "epoch": 5.14,
960
+ "learning_rate": 4.770114942528736e-05,
961
+ "loss": 0.0503,
962
+ "step": 1490
963
+ },
964
+ {
965
+ "epoch": 5.17,
966
+ "learning_rate": 4.7126436781609195e-05,
967
+ "loss": 0.0546,
968
+ "step": 1500
969
+ },
970
+ {
971
+ "epoch": 5.21,
972
+ "learning_rate": 4.655172413793104e-05,
973
+ "loss": 0.0529,
974
+ "step": 1510
975
+ },
976
+ {
977
+ "epoch": 5.24,
978
+ "learning_rate": 4.597701149425287e-05,
979
+ "loss": 0.0529,
980
+ "step": 1520
981
+ },
982
+ {
983
+ "epoch": 5.28,
984
+ "learning_rate": 4.540229885057471e-05,
985
+ "loss": 0.0499,
986
+ "step": 1530
987
+ },
988
+ {
989
+ "epoch": 5.31,
990
+ "learning_rate": 4.482758620689655e-05,
991
+ "loss": 0.0525,
992
+ "step": 1540
993
+ },
994
+ {
995
+ "epoch": 5.34,
996
+ "learning_rate": 4.4252873563218394e-05,
997
+ "loss": 0.0582,
998
+ "step": 1550
999
+ },
1000
+ {
1001
+ "epoch": 5.38,
1002
+ "learning_rate": 4.367816091954023e-05,
1003
+ "loss": 0.0598,
1004
+ "step": 1560
1005
+ },
1006
+ {
1007
+ "epoch": 5.41,
1008
+ "learning_rate": 4.3103448275862066e-05,
1009
+ "loss": 0.0587,
1010
+ "step": 1570
1011
+ },
1012
+ {
1013
+ "epoch": 5.45,
1014
+ "learning_rate": 4.252873563218391e-05,
1015
+ "loss": 0.0507,
1016
+ "step": 1580
1017
+ },
1018
+ {
1019
+ "epoch": 5.48,
1020
+ "learning_rate": 4.195402298850575e-05,
1021
+ "loss": 0.0565,
1022
+ "step": 1590
1023
+ },
1024
+ {
1025
+ "epoch": 5.52,
1026
+ "learning_rate": 4.137931034482759e-05,
1027
+ "loss": 0.0609,
1028
+ "step": 1600
1029
+ },
1030
+ {
1031
+ "epoch": 5.55,
1032
+ "learning_rate": 4.080459770114942e-05,
1033
+ "loss": 0.0459,
1034
+ "step": 1610
1035
+ },
1036
+ {
1037
+ "epoch": 5.58,
1038
+ "learning_rate": 4.0229885057471265e-05,
1039
+ "loss": 0.0647,
1040
+ "step": 1620
1041
+ },
1042
+ {
1043
+ "epoch": 5.62,
1044
+ "learning_rate": 3.965517241379311e-05,
1045
+ "loss": 0.0604,
1046
+ "step": 1630
1047
+ },
1048
+ {
1049
+ "epoch": 5.65,
1050
+ "learning_rate": 3.908045977011494e-05,
1051
+ "loss": 0.0648,
1052
+ "step": 1640
1053
+ },
1054
+ {
1055
+ "epoch": 5.69,
1056
+ "learning_rate": 3.850574712643678e-05,
1057
+ "loss": 0.0581,
1058
+ "step": 1650
1059
+ },
1060
+ {
1061
+ "epoch": 5.72,
1062
+ "learning_rate": 3.793103448275862e-05,
1063
+ "loss": 0.0517,
1064
+ "step": 1660
1065
+ },
1066
+ {
1067
+ "epoch": 5.76,
1068
+ "learning_rate": 3.7356321839080464e-05,
1069
+ "loss": 0.0558,
1070
+ "step": 1670
1071
+ },
1072
+ {
1073
+ "epoch": 5.79,
1074
+ "learning_rate": 3.67816091954023e-05,
1075
+ "loss": 0.0464,
1076
+ "step": 1680
1077
+ },
1078
+ {
1079
+ "epoch": 5.83,
1080
+ "learning_rate": 3.6206896551724136e-05,
1081
+ "loss": 0.054,
1082
+ "step": 1690
1083
+ },
1084
+ {
1085
+ "epoch": 5.86,
1086
+ "learning_rate": 3.563218390804598e-05,
1087
+ "loss": 0.0609,
1088
+ "step": 1700
1089
+ },
1090
+ {
1091
+ "epoch": 5.89,
1092
+ "learning_rate": 3.505747126436782e-05,
1093
+ "loss": 0.0507,
1094
+ "step": 1710
1095
+ },
1096
+ {
1097
+ "epoch": 5.93,
1098
+ "learning_rate": 3.4482758620689657e-05,
1099
+ "loss": 0.0521,
1100
+ "step": 1720
1101
+ },
1102
+ {
1103
+ "epoch": 5.96,
1104
+ "learning_rate": 3.390804597701149e-05,
1105
+ "loss": 0.0572,
1106
+ "step": 1730
1107
+ },
1108
+ {
1109
+ "epoch": 6.0,
1110
+ "learning_rate": 3.3333333333333335e-05,
1111
+ "loss": 0.0482,
1112
+ "step": 1740
1113
+ },
1114
+ {
1115
+ "epoch": 6.0,
1116
+ "eval_accuracy": 0.7204474561686566,
1117
+ "eval_f1": 0.71604121882508,
1118
+ "eval_loss": 1.2792034149169922,
1119
+ "eval_precision": 0.7144269552366298,
1120
+ "eval_recall": 0.7204474561686566,
1121
+ "eval_runtime": 99.5538,
1122
+ "eval_samples_per_second": 186.773,
1123
+ "eval_steps_per_second": 2.923,
1124
+ "step": 1740
1125
+ },
1126
+ {
1127
+ "epoch": 6.03,
1128
+ "learning_rate": 3.275862068965518e-05,
1129
+ "loss": 0.0261,
1130
+ "step": 1750
1131
+ },
1132
+ {
1133
+ "epoch": 6.07,
1134
+ "learning_rate": 3.218390804597701e-05,
1135
+ "loss": 0.0242,
1136
+ "step": 1760
1137
+ },
1138
+ {
1139
+ "epoch": 6.1,
1140
+ "learning_rate": 3.160919540229885e-05,
1141
+ "loss": 0.0261,
1142
+ "step": 1770
1143
+ },
1144
+ {
1145
+ "epoch": 6.14,
1146
+ "learning_rate": 3.103448275862069e-05,
1147
+ "loss": 0.0174,
1148
+ "step": 1780
1149
+ },
1150
+ {
1151
+ "epoch": 6.17,
1152
+ "learning_rate": 3.0459770114942527e-05,
1153
+ "loss": 0.0174,
1154
+ "step": 1790
1155
+ },
1156
+ {
1157
+ "epoch": 6.21,
1158
+ "learning_rate": 2.9885057471264367e-05,
1159
+ "loss": 0.0261,
1160
+ "step": 1800
1161
+ },
1162
+ {
1163
+ "epoch": 6.24,
1164
+ "learning_rate": 2.931034482758621e-05,
1165
+ "loss": 0.0233,
1166
+ "step": 1810
1167
+ },
1168
+ {
1169
+ "epoch": 6.28,
1170
+ "learning_rate": 2.8735632183908045e-05,
1171
+ "loss": 0.022,
1172
+ "step": 1820
1173
+ },
1174
+ {
1175
+ "epoch": 6.31,
1176
+ "learning_rate": 2.8160919540229887e-05,
1177
+ "loss": 0.0264,
1178
+ "step": 1830
1179
+ },
1180
+ {
1181
+ "epoch": 6.34,
1182
+ "learning_rate": 2.7586206896551723e-05,
1183
+ "loss": 0.0222,
1184
+ "step": 1840
1185
+ },
1186
+ {
1187
+ "epoch": 6.38,
1188
+ "learning_rate": 2.7011494252873566e-05,
1189
+ "loss": 0.0216,
1190
+ "step": 1850
1191
+ },
1192
+ {
1193
+ "epoch": 6.41,
1194
+ "learning_rate": 2.64367816091954e-05,
1195
+ "loss": 0.0193,
1196
+ "step": 1860
1197
+ },
1198
+ {
1199
+ "epoch": 6.45,
1200
+ "learning_rate": 2.586206896551724e-05,
1201
+ "loss": 0.0222,
1202
+ "step": 1870
1203
+ },
1204
+ {
1205
+ "epoch": 6.48,
1206
+ "learning_rate": 2.528735632183908e-05,
1207
+ "loss": 0.0153,
1208
+ "step": 1880
1209
+ },
1210
+ {
1211
+ "epoch": 6.52,
1212
+ "learning_rate": 2.471264367816092e-05,
1213
+ "loss": 0.0142,
1214
+ "step": 1890
1215
+ },
1216
+ {
1217
+ "epoch": 6.55,
1218
+ "learning_rate": 2.4137931034482758e-05,
1219
+ "loss": 0.025,
1220
+ "step": 1900
1221
+ },
1222
+ {
1223
+ "epoch": 6.58,
1224
+ "learning_rate": 2.3563218390804597e-05,
1225
+ "loss": 0.0197,
1226
+ "step": 1910
1227
+ },
1228
+ {
1229
+ "epoch": 6.62,
1230
+ "learning_rate": 2.2988505747126437e-05,
1231
+ "loss": 0.0212,
1232
+ "step": 1920
1233
+ },
1234
+ {
1235
+ "epoch": 6.65,
1236
+ "learning_rate": 2.2413793103448276e-05,
1237
+ "loss": 0.0181,
1238
+ "step": 1930
1239
+ },
1240
+ {
1241
+ "epoch": 6.69,
1242
+ "learning_rate": 2.1839080459770115e-05,
1243
+ "loss": 0.0228,
1244
+ "step": 1940
1245
+ },
1246
+ {
1247
+ "epoch": 6.72,
1248
+ "learning_rate": 2.1264367816091954e-05,
1249
+ "loss": 0.015,
1250
+ "step": 1950
1251
+ },
1252
+ {
1253
+ "epoch": 6.76,
1254
+ "learning_rate": 2.0689655172413797e-05,
1255
+ "loss": 0.0188,
1256
+ "step": 1960
1257
+ },
1258
+ {
1259
+ "epoch": 6.79,
1260
+ "learning_rate": 2.0114942528735632e-05,
1261
+ "loss": 0.0231,
1262
+ "step": 1970
1263
+ },
1264
+ {
1265
+ "epoch": 6.83,
1266
+ "learning_rate": 1.954022988505747e-05,
1267
+ "loss": 0.0157,
1268
+ "step": 1980
1269
+ },
1270
+ {
1271
+ "epoch": 6.86,
1272
+ "learning_rate": 1.896551724137931e-05,
1273
+ "loss": 0.0155,
1274
+ "step": 1990
1275
+ },
1276
+ {
1277
+ "epoch": 6.89,
1278
+ "learning_rate": 1.839080459770115e-05,
1279
+ "loss": 0.0214,
1280
+ "step": 2000
1281
+ },
1282
+ {
1283
+ "epoch": 6.93,
1284
+ "learning_rate": 1.781609195402299e-05,
1285
+ "loss": 0.0154,
1286
+ "step": 2010
1287
+ },
1288
+ {
1289
+ "epoch": 6.96,
1290
+ "learning_rate": 1.7241379310344828e-05,
1291
+ "loss": 0.0186,
1292
+ "step": 2020
1293
+ },
1294
+ {
1295
+ "epoch": 7.0,
1296
+ "learning_rate": 1.6666666666666667e-05,
1297
+ "loss": 0.0236,
1298
+ "step": 2030
1299
+ },
1300
+ {
1301
+ "epoch": 7.0,
1302
+ "eval_accuracy": 0.7278692051199311,
1303
+ "eval_f1": 0.7209486341359743,
1304
+ "eval_loss": 1.4161986112594604,
1305
+ "eval_precision": 0.7194977934958102,
1306
+ "eval_recall": 0.7278692051199311,
1307
+ "eval_runtime": 103.3877,
1308
+ "eval_samples_per_second": 179.847,
1309
+ "eval_steps_per_second": 2.815,
1310
+ "step": 2030
1311
+ },
1312
+ {
1313
+ "epoch": 7.03,
1314
+ "learning_rate": 1.6091954022988507e-05,
1315
+ "loss": 0.0094,
1316
+ "step": 2040
1317
+ },
1318
+ {
1319
+ "epoch": 7.07,
1320
+ "learning_rate": 1.5517241379310346e-05,
1321
+ "loss": 0.0117,
1322
+ "step": 2050
1323
+ },
1324
+ {
1325
+ "epoch": 7.1,
1326
+ "learning_rate": 1.4942528735632183e-05,
1327
+ "loss": 0.0049,
1328
+ "step": 2060
1329
+ },
1330
+ {
1331
+ "epoch": 7.14,
1332
+ "learning_rate": 1.4367816091954022e-05,
1333
+ "loss": 0.0097,
1334
+ "step": 2070
1335
+ },
1336
+ {
1337
+ "epoch": 7.17,
1338
+ "learning_rate": 1.3793103448275862e-05,
1339
+ "loss": 0.0092,
1340
+ "step": 2080
1341
+ },
1342
+ {
1343
+ "epoch": 7.21,
1344
+ "learning_rate": 1.32183908045977e-05,
1345
+ "loss": 0.0086,
1346
+ "step": 2090
1347
+ },
1348
+ {
1349
+ "epoch": 7.24,
1350
+ "learning_rate": 1.264367816091954e-05,
1351
+ "loss": 0.008,
1352
+ "step": 2100
1353
+ },
1354
+ {
1355
+ "epoch": 7.28,
1356
+ "learning_rate": 1.2068965517241379e-05,
1357
+ "loss": 0.01,
1358
+ "step": 2110
1359
+ },
1360
+ {
1361
+ "epoch": 7.31,
1362
+ "learning_rate": 1.1494252873563218e-05,
1363
+ "loss": 0.0078,
1364
+ "step": 2120
1365
+ },
1366
+ {
1367
+ "epoch": 7.34,
1368
+ "learning_rate": 1.0919540229885057e-05,
1369
+ "loss": 0.0093,
1370
+ "step": 2130
1371
+ },
1372
+ {
1373
+ "epoch": 7.38,
1374
+ "learning_rate": 1.0344827586206898e-05,
1375
+ "loss": 0.0081,
1376
+ "step": 2140
1377
+ },
1378
+ {
1379
+ "epoch": 7.41,
1380
+ "learning_rate": 9.770114942528736e-06,
1381
+ "loss": 0.0057,
1382
+ "step": 2150
1383
+ },
1384
+ {
1385
+ "epoch": 7.45,
1386
+ "learning_rate": 9.195402298850575e-06,
1387
+ "loss": 0.0095,
1388
+ "step": 2160
1389
+ },
1390
+ {
1391
+ "epoch": 7.48,
1392
+ "learning_rate": 8.620689655172414e-06,
1393
+ "loss": 0.0062,
1394
+ "step": 2170
1395
+ },
1396
+ {
1397
+ "epoch": 7.52,
1398
+ "learning_rate": 8.045977011494253e-06,
1399
+ "loss": 0.0124,
1400
+ "step": 2180
1401
+ },
1402
+ {
1403
+ "epoch": 7.55,
1404
+ "learning_rate": 7.471264367816092e-06,
1405
+ "loss": 0.008,
1406
+ "step": 2190
1407
+ },
1408
+ {
1409
+ "epoch": 7.58,
1410
+ "learning_rate": 6.896551724137931e-06,
1411
+ "loss": 0.0067,
1412
+ "step": 2200
1413
+ },
1414
+ {
1415
+ "epoch": 7.62,
1416
+ "learning_rate": 6.32183908045977e-06,
1417
+ "loss": 0.0085,
1418
+ "step": 2210
1419
+ },
1420
+ {
1421
+ "epoch": 7.65,
1422
+ "learning_rate": 5.747126436781609e-06,
1423
+ "loss": 0.0053,
1424
+ "step": 2220
1425
+ },
1426
+ {
1427
+ "epoch": 7.69,
1428
+ "learning_rate": 5.172413793103449e-06,
1429
+ "loss": 0.007,
1430
+ "step": 2230
1431
+ },
1432
+ {
1433
+ "epoch": 7.72,
1434
+ "learning_rate": 4.5977011494252875e-06,
1435
+ "loss": 0.0061,
1436
+ "step": 2240
1437
+ },
1438
+ {
1439
+ "epoch": 7.76,
1440
+ "learning_rate": 4.022988505747127e-06,
1441
+ "loss": 0.0054,
1442
+ "step": 2250
1443
+ },
1444
+ {
1445
+ "epoch": 7.79,
1446
+ "learning_rate": 3.4482758620689654e-06,
1447
+ "loss": 0.0103,
1448
+ "step": 2260
1449
+ },
1450
+ {
1451
+ "epoch": 7.83,
1452
+ "learning_rate": 2.8735632183908046e-06,
1453
+ "loss": 0.0131,
1454
+ "step": 2270
1455
+ },
1456
+ {
1457
+ "epoch": 7.86,
1458
+ "learning_rate": 2.2988505747126437e-06,
1459
+ "loss": 0.0143,
1460
+ "step": 2280
1461
+ },
1462
+ {
1463
+ "epoch": 7.89,
1464
+ "learning_rate": 1.7241379310344827e-06,
1465
+ "loss": 0.0075,
1466
+ "step": 2290
1467
+ },
1468
+ {
1469
+ "epoch": 7.93,
1470
+ "learning_rate": 1.1494252873563219e-06,
1471
+ "loss": 0.0079,
1472
+ "step": 2300
1473
+ },
1474
+ {
1475
+ "epoch": 7.96,
1476
+ "learning_rate": 5.747126436781609e-07,
1477
+ "loss": 0.0077,
1478
+ "step": 2310
1479
+ },
1480
+ {
1481
+ "epoch": 8.0,
1482
+ "learning_rate": 0.0,
1483
+ "loss": 0.0049,
1484
+ "step": 2320
1485
+ },
1486
+ {
1487
+ "epoch": 8.0,
1488
+ "eval_accuracy": 0.7248574809078198,
1489
+ "eval_f1": 0.7195690317790054,
1490
+ "eval_loss": 1.4531140327453613,
1491
+ "eval_precision": 0.717172031675939,
1492
+ "eval_recall": 0.7248574809078198,
1493
+ "eval_runtime": 105.0016,
1494
+ "eval_samples_per_second": 177.083,
1495
+ "eval_steps_per_second": 2.771,
1496
+ "step": 2320
1497
+ },
1498
+ {
1499
+ "epoch": 8.0,
1500
+ "step": 2320,
1501
+ "total_flos": 4.6098795710207476e+19,
1502
+ "train_loss": 0.3678270006343594,
1503
+ "train_runtime": 6722.1212,
1504
+ "train_samples_per_second": 88.513,
1505
+ "train_steps_per_second": 0.345
1506
  }
1507
  ],
1508
+ "max_steps": 2320,
1509
  "num_train_epochs": 8,
1510
+ "total_flos": 4.6098795710207476e+19,
1511
  "trial_name": null,
1512
  "trial_params": null
1513
  }