amyeroberts HF staff commited on
Commit
22a9302
1 Parent(s): 8862926

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 50.0,
3
- "eval_loss": 6389.54150390625,
4
- "eval_runtime": 0.6156,
5
- "eval_samples_per_second": 12.995,
6
- "eval_steps_per_second": 1.624,
7
- "train_loss": 6297.2521484375,
8
- "train_runtime": 886.6987,
9
- "train_samples_per_second": 1.804,
10
- "train_steps_per_second": 0.226
11
  }
 
1
  {
2
  "epoch": 50.0,
3
+ "eval_loss": 6388.880859375,
4
+ "eval_runtime": 0.6142,
5
+ "eval_samples_per_second": 13.026,
6
+ "eval_steps_per_second": 1.628,
7
+ "train_loss": 6407.14462890625,
8
+ "train_runtime": 869.7842,
9
+ "train_samples_per_second": 1.84,
10
+ "train_steps_per_second": 0.23
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 50.0,
3
- "eval_loss": 6389.54150390625,
4
- "eval_runtime": 0.6156,
5
- "eval_samples_per_second": 12.995,
6
- "eval_steps_per_second": 1.624
7
  }
 
1
  {
2
  "epoch": 50.0,
3
+ "eval_loss": 6388.880859375,
4
+ "eval_runtime": 0.6142,
5
+ "eval_samples_per_second": 13.026,
6
+ "eval_steps_per_second": 1.628
7
  }
runs/Jul20_11-08-08_amy-2-gpu/events.out.tfevents.1689852185.amy-2-gpu.149744.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4afae3a6e0edf71618c4506307a2497aa0aef78b9dde5fcfdaed93ac74aacf01
3
+ size 359
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 50.0,
3
- "train_loss": 6297.2521484375,
4
- "train_runtime": 886.6987,
5
- "train_samples_per_second": 1.804,
6
- "train_steps_per_second": 0.226
7
  }
 
1
  {
2
  "epoch": 50.0,
3
+ "train_loss": 6407.14462890625,
4
+ "train_runtime": 869.7842,
5
+ "train_samples_per_second": 1.84,
6
+ "train_steps_per_second": 0.23
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 6389.54150390625,
3
- "best_model_checkpoint": "./coco_outputs/checkpoint-196",
4
  "epoch": 50.0,
5
  "global_step": 200,
6
  "is_hyper_param_search": false,
@@ -9,532 +9,532 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "eval_loss": 6398.52734375,
13
- "eval_runtime": 0.5289,
14
- "eval_samples_per_second": 15.126,
15
- "eval_steps_per_second": 1.891,
16
  "step": 4
17
  },
18
  {
19
  "epoch": 2.0,
20
- "eval_loss": 6397.9267578125,
21
- "eval_runtime": 0.5329,
22
- "eval_samples_per_second": 15.013,
23
- "eval_steps_per_second": 1.877,
24
  "step": 8
25
  },
26
  {
27
  "epoch": 2.5,
28
  "learning_rate": 1.9e-05,
29
- "loss": 6175.8695,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 3.0,
34
- "eval_loss": 6397.248046875,
35
- "eval_runtime": 0.5325,
36
- "eval_samples_per_second": 15.023,
37
- "eval_steps_per_second": 1.878,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 4.0,
42
- "eval_loss": 6396.56494140625,
43
- "eval_runtime": 0.5866,
44
- "eval_samples_per_second": 13.638,
45
- "eval_steps_per_second": 1.705,
46
  "step": 16
47
  },
48
  {
49
  "epoch": 5.0,
50
  "learning_rate": 1.8e-05,
51
- "loss": 6292.1676,
52
  "step": 20
53
  },
54
  {
55
  "epoch": 5.0,
56
- "eval_loss": 6396.06689453125,
57
- "eval_runtime": 0.5343,
58
- "eval_samples_per_second": 14.974,
59
- "eval_steps_per_second": 1.872,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 6.0,
64
- "eval_loss": 6395.65966796875,
65
- "eval_runtime": 0.5271,
66
- "eval_samples_per_second": 15.178,
67
- "eval_steps_per_second": 1.897,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 7.0,
72
- "eval_loss": 6395.27392578125,
73
- "eval_runtime": 0.5241,
74
- "eval_samples_per_second": 15.264,
75
- "eval_steps_per_second": 1.908,
76
  "step": 28
77
  },
78
  {
79
  "epoch": 7.5,
80
  "learning_rate": 1.7e-05,
81
- "loss": 6230.1191,
82
  "step": 30
83
  },
84
  {
85
  "epoch": 8.0,
86
- "eval_loss": 6394.890625,
87
- "eval_runtime": 0.5365,
88
- "eval_samples_per_second": 14.911,
89
- "eval_steps_per_second": 1.864,
90
  "step": 32
91
  },
92
  {
93
  "epoch": 9.0,
94
- "eval_loss": 6394.66796875,
95
- "eval_runtime": 0.5263,
96
- "eval_samples_per_second": 15.201,
97
- "eval_steps_per_second": 1.9,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 10.0,
102
  "learning_rate": 1.6000000000000003e-05,
103
- "loss": 6089.0629,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 10.0,
108
- "eval_loss": 6394.51513671875,
109
- "eval_runtime": 0.5595,
110
- "eval_samples_per_second": 14.298,
111
- "eval_steps_per_second": 1.787,
112
  "step": 40
113
  },
114
  {
115
  "epoch": 11.0,
116
- "eval_loss": 6394.369140625,
117
- "eval_runtime": 0.5347,
118
- "eval_samples_per_second": 14.961,
119
- "eval_steps_per_second": 1.87,
120
  "step": 44
121
  },
122
  {
123
  "epoch": 12.0,
124
- "eval_loss": 6394.39306640625,
125
- "eval_runtime": 0.6066,
126
- "eval_samples_per_second": 13.188,
127
- "eval_steps_per_second": 1.648,
128
  "step": 48
129
  },
130
  {
131
  "epoch": 12.5,
132
  "learning_rate": 1.5000000000000002e-05,
133
- "loss": 6266.6961,
134
  "step": 50
135
  },
136
  {
137
  "epoch": 13.0,
138
- "eval_loss": 6394.3232421875,
139
- "eval_runtime": 0.5315,
140
- "eval_samples_per_second": 15.053,
141
- "eval_steps_per_second": 1.882,
142
  "step": 52
143
  },
144
  {
145
  "epoch": 14.0,
146
- "eval_loss": 6394.1572265625,
147
- "eval_runtime": 0.5445,
148
- "eval_samples_per_second": 14.694,
149
- "eval_steps_per_second": 1.837,
150
  "step": 56
151
  },
152
  {
153
  "epoch": 15.0,
154
  "learning_rate": 1.4e-05,
155
- "loss": 6383.0688,
156
  "step": 60
157
  },
158
  {
159
  "epoch": 15.0,
160
- "eval_loss": 6394.15576171875,
161
- "eval_runtime": 0.5377,
162
- "eval_samples_per_second": 14.877,
163
- "eval_steps_per_second": 1.86,
164
  "step": 60
165
  },
166
  {
167
  "epoch": 16.0,
168
- "eval_loss": 6393.953125,
169
- "eval_runtime": 0.5413,
170
- "eval_samples_per_second": 14.779,
171
- "eval_steps_per_second": 1.847,
172
  "step": 64
173
  },
174
  {
175
  "epoch": 17.0,
176
- "eval_loss": 6393.77197265625,
177
- "eval_runtime": 0.5527,
178
- "eval_samples_per_second": 14.475,
179
- "eval_steps_per_second": 1.809,
180
  "step": 68
181
  },
182
  {
183
  "epoch": 17.5,
184
  "learning_rate": 1.3000000000000001e-05,
185
- "loss": 6323.732,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 18.0,
190
- "eval_loss": 6393.60498046875,
191
- "eval_runtime": 0.5502,
192
- "eval_samples_per_second": 14.54,
193
- "eval_steps_per_second": 1.817,
194
  "step": 72
195
  },
196
  {
197
  "epoch": 19.0,
198
- "eval_loss": 6393.5107421875,
199
- "eval_runtime": 0.5415,
200
- "eval_samples_per_second": 14.774,
201
- "eval_steps_per_second": 1.847,
202
  "step": 76
203
  },
204
  {
205
  "epoch": 20.0,
206
  "learning_rate": 1.2e-05,
207
- "loss": 6262.8367,
208
  "step": 80
209
  },
210
  {
211
  "epoch": 20.0,
212
- "eval_loss": 6393.23046875,
213
- "eval_runtime": 0.5438,
214
- "eval_samples_per_second": 14.71,
215
- "eval_steps_per_second": 1.839,
216
  "step": 80
217
  },
218
  {
219
  "epoch": 21.0,
220
- "eval_loss": 6393.0869140625,
221
- "eval_runtime": 0.5423,
222
- "eval_samples_per_second": 14.753,
223
- "eval_steps_per_second": 1.844,
224
  "step": 84
225
  },
226
  {
227
  "epoch": 22.0,
228
- "eval_loss": 6392.791015625,
229
- "eval_runtime": 0.5523,
230
- "eval_samples_per_second": 14.486,
231
- "eval_steps_per_second": 1.811,
232
  "step": 88
233
  },
234
  {
235
  "epoch": 22.5,
236
  "learning_rate": 1.1000000000000001e-05,
237
- "loss": 6294.5953,
238
  "step": 90
239
  },
240
  {
241
  "epoch": 23.0,
242
- "eval_loss": 6392.60791015625,
243
- "eval_runtime": 0.5553,
244
- "eval_samples_per_second": 14.407,
245
- "eval_steps_per_second": 1.801,
246
  "step": 92
247
  },
248
  {
249
  "epoch": 24.0,
250
- "eval_loss": 6392.46337890625,
251
- "eval_runtime": 0.542,
252
- "eval_samples_per_second": 14.759,
253
- "eval_steps_per_second": 1.845,
254
  "step": 96
255
  },
256
  {
257
  "epoch": 25.0,
258
  "learning_rate": 1e-05,
259
- "loss": 6454.1902,
260
  "step": 100
261
  },
262
  {
263
  "epoch": 25.0,
264
- "eval_loss": 6392.30859375,
265
- "eval_runtime": 0.5462,
266
- "eval_samples_per_second": 14.647,
267
- "eval_steps_per_second": 1.831,
268
  "step": 100
269
  },
270
  {
271
  "epoch": 26.0,
272
- "eval_loss": 6392.1982421875,
273
- "eval_runtime": 0.5465,
274
- "eval_samples_per_second": 14.64,
275
- "eval_steps_per_second": 1.83,
276
  "step": 104
277
  },
278
  {
279
  "epoch": 27.0,
280
- "eval_loss": 6391.966796875,
281
- "eval_runtime": 0.5565,
282
- "eval_samples_per_second": 14.375,
283
- "eval_steps_per_second": 1.797,
284
  "step": 108
285
  },
286
  {
287
  "epoch": 27.5,
288
  "learning_rate": 9e-06,
289
- "loss": 6325.2328,
290
  "step": 110
291
  },
292
  {
293
  "epoch": 28.0,
294
- "eval_loss": 6391.6533203125,
295
- "eval_runtime": 0.5394,
296
- "eval_samples_per_second": 14.833,
297
- "eval_steps_per_second": 1.854,
298
  "step": 112
299
  },
300
  {
301
  "epoch": 29.0,
302
- "eval_loss": 6391.34814453125,
303
- "eval_runtime": 0.5379,
304
- "eval_samples_per_second": 14.872,
305
- "eval_steps_per_second": 1.859,
306
  "step": 116
307
  },
308
  {
309
  "epoch": 30.0,
310
  "learning_rate": 8.000000000000001e-06,
311
- "loss": 6471.3637,
312
  "step": 120
313
  },
314
  {
315
  "epoch": 30.0,
316
- "eval_loss": 6391.04736328125,
317
- "eval_runtime": 0.5998,
318
- "eval_samples_per_second": 13.338,
319
- "eval_steps_per_second": 1.667,
320
  "step": 120
321
  },
322
  {
323
  "epoch": 31.0,
324
- "eval_loss": 6390.94091796875,
325
- "eval_runtime": 0.5467,
326
- "eval_samples_per_second": 14.632,
327
- "eval_steps_per_second": 1.829,
328
  "step": 124
329
  },
330
  {
331
  "epoch": 32.0,
332
- "eval_loss": 6390.81982421875,
333
- "eval_runtime": 0.5421,
334
- "eval_samples_per_second": 14.759,
335
- "eval_steps_per_second": 1.845,
336
  "step": 128
337
  },
338
  {
339
  "epoch": 32.5,
340
  "learning_rate": 7e-06,
341
- "loss": 6308.2656,
342
  "step": 130
343
  },
344
  {
345
  "epoch": 33.0,
346
- "eval_loss": 6390.673828125,
347
- "eval_runtime": 0.5411,
348
- "eval_samples_per_second": 14.785,
349
- "eval_steps_per_second": 1.848,
350
  "step": 132
351
  },
352
  {
353
  "epoch": 34.0,
354
- "eval_loss": 6390.6689453125,
355
- "eval_runtime": 0.5495,
356
- "eval_samples_per_second": 14.56,
357
- "eval_steps_per_second": 1.82,
358
  "step": 136
359
  },
360
  {
361
  "epoch": 35.0,
362
  "learning_rate": 6e-06,
363
- "loss": 6352.0371,
364
  "step": 140
365
  },
366
  {
367
  "epoch": 35.0,
368
- "eval_loss": 6390.62451171875,
369
- "eval_runtime": 0.5561,
370
- "eval_samples_per_second": 14.385,
371
- "eval_steps_per_second": 1.798,
372
  "step": 140
373
  },
374
  {
375
  "epoch": 36.0,
376
- "eval_loss": 6390.5791015625,
377
- "eval_runtime": 0.5601,
378
- "eval_samples_per_second": 14.282,
379
- "eval_steps_per_second": 1.785,
380
  "step": 144
381
  },
382
  {
383
  "epoch": 37.0,
384
- "eval_loss": 6390.419921875,
385
- "eval_runtime": 0.5447,
386
- "eval_samples_per_second": 14.686,
387
- "eval_steps_per_second": 1.836,
388
  "step": 148
389
  },
390
  {
391
  "epoch": 37.5,
392
  "learning_rate": 5e-06,
393
- "loss": 6247.1492,
394
  "step": 150
395
  },
396
  {
397
  "epoch": 38.0,
398
- "eval_loss": 6390.23046875,
399
- "eval_runtime": 0.5405,
400
- "eval_samples_per_second": 14.8,
401
- "eval_steps_per_second": 1.85,
402
  "step": 152
403
  },
404
  {
405
  "epoch": 39.0,
406
- "eval_loss": 6390.07763671875,
407
- "eval_runtime": 0.5527,
408
- "eval_samples_per_second": 14.473,
409
- "eval_steps_per_second": 1.809,
410
  "step": 156
411
  },
412
  {
413
  "epoch": 40.0,
414
  "learning_rate": 4.000000000000001e-06,
415
- "loss": 6385.748,
416
  "step": 160
417
  },
418
  {
419
  "epoch": 40.0,
420
- "eval_loss": 6389.9873046875,
421
- "eval_runtime": 0.5425,
422
- "eval_samples_per_second": 14.746,
423
- "eval_steps_per_second": 1.843,
424
  "step": 160
425
  },
426
  {
427
  "epoch": 41.0,
428
- "eval_loss": 6389.91845703125,
429
- "eval_runtime": 0.5483,
430
- "eval_samples_per_second": 14.591,
431
- "eval_steps_per_second": 1.824,
432
  "step": 164
433
  },
434
  {
435
  "epoch": 42.0,
436
- "eval_loss": 6389.828125,
437
- "eval_runtime": 0.5625,
438
- "eval_samples_per_second": 14.221,
439
- "eval_steps_per_second": 1.778,
440
  "step": 168
441
  },
442
  {
443
  "epoch": 42.5,
444
  "learning_rate": 3e-06,
445
- "loss": 6262.0703,
446
  "step": 170
447
  },
448
  {
449
  "epoch": 43.0,
450
- "eval_loss": 6389.75,
451
- "eval_runtime": 0.5518,
452
- "eval_samples_per_second": 14.498,
453
- "eval_steps_per_second": 1.812,
454
  "step": 172
455
  },
456
  {
457
  "epoch": 44.0,
458
- "eval_loss": 6389.67919921875,
459
- "eval_runtime": 0.5455,
460
- "eval_samples_per_second": 14.666,
461
- "eval_steps_per_second": 1.833,
462
  "step": 176
463
  },
464
  {
465
  "epoch": 45.0,
466
  "learning_rate": 2.0000000000000003e-06,
467
- "loss": 6256.2105,
468
  "step": 180
469
  },
470
  {
471
  "epoch": 45.0,
472
- "eval_loss": 6389.6376953125,
473
- "eval_runtime": 0.5593,
474
- "eval_samples_per_second": 14.304,
475
- "eval_steps_per_second": 1.788,
476
  "step": 180
477
  },
478
  {
479
  "epoch": 46.0,
480
- "eval_loss": 6389.6201171875,
481
- "eval_runtime": 0.555,
482
- "eval_samples_per_second": 14.413,
483
- "eval_steps_per_second": 1.802,
484
  "step": 184
485
  },
486
  {
487
  "epoch": 47.0,
488
- "eval_loss": 6389.57275390625,
489
- "eval_runtime": 0.5436,
490
- "eval_samples_per_second": 14.718,
491
- "eval_steps_per_second": 1.84,
492
  "step": 188
493
  },
494
  {
495
  "epoch": 47.5,
496
  "learning_rate": 1.0000000000000002e-06,
497
- "loss": 6353.4496,
498
  "step": 190
499
  },
500
  {
501
  "epoch": 48.0,
502
- "eval_loss": 6389.5517578125,
503
- "eval_runtime": 0.55,
504
- "eval_samples_per_second": 14.545,
505
- "eval_steps_per_second": 1.818,
506
  "step": 192
507
  },
508
  {
509
  "epoch": 49.0,
510
- "eval_loss": 6389.54150390625,
511
- "eval_runtime": 0.5412,
512
- "eval_samples_per_second": 14.782,
513
- "eval_steps_per_second": 1.848,
514
  "step": 196
515
  },
516
  {
517
  "epoch": 50.0,
518
  "learning_rate": 0.0,
519
- "loss": 6211.1777,
520
  "step": 200
521
  },
522
  {
523
  "epoch": 50.0,
524
- "eval_loss": 6389.552734375,
525
- "eval_runtime": 0.5488,
526
- "eval_samples_per_second": 14.577,
527
- "eval_steps_per_second": 1.822,
528
  "step": 200
529
  },
530
  {
531
  "epoch": 50.0,
532
  "step": 200,
533
  "total_flos": 7.65002115072e+17,
534
- "train_loss": 6297.2521484375,
535
- "train_runtime": 886.6987,
536
- "train_samples_per_second": 1.804,
537
- "train_steps_per_second": 0.226
538
  }
539
  ],
540
  "max_steps": 200,
 
1
  {
2
+ "best_metric": 6388.880859375,
3
+ "best_model_checkpoint": "./coco_outputs/checkpoint-200",
4
  "epoch": 50.0,
5
  "global_step": 200,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "eval_loss": 6398.4208984375,
13
+ "eval_runtime": 0.5249,
14
+ "eval_samples_per_second": 15.242,
15
+ "eval_steps_per_second": 1.905,
16
  "step": 4
17
  },
18
  {
19
  "epoch": 2.0,
20
+ "eval_loss": 6397.8583984375,
21
+ "eval_runtime": 0.5335,
22
+ "eval_samples_per_second": 14.995,
23
+ "eval_steps_per_second": 1.874,
24
  "step": 8
25
  },
26
  {
27
  "epoch": 2.5,
28
  "learning_rate": 1.9e-05,
29
+ "loss": 6370.2805,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 3.0,
34
+ "eval_loss": 6397.47900390625,
35
+ "eval_runtime": 0.5389,
36
+ "eval_samples_per_second": 14.844,
37
+ "eval_steps_per_second": 1.855,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 4.0,
42
+ "eval_loss": 6396.83203125,
43
+ "eval_runtime": 0.5405,
44
+ "eval_samples_per_second": 14.802,
45
+ "eval_steps_per_second": 1.85,
46
  "step": 16
47
  },
48
  {
49
  "epoch": 5.0,
50
  "learning_rate": 1.8e-05,
51
+ "loss": 6424.3547,
52
  "step": 20
53
  },
54
  {
55
  "epoch": 5.0,
56
+ "eval_loss": 6396.29296875,
57
+ "eval_runtime": 0.5443,
58
+ "eval_samples_per_second": 14.697,
59
+ "eval_steps_per_second": 1.837,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 6.0,
64
+ "eval_loss": 6395.72314453125,
65
+ "eval_runtime": 0.5415,
66
+ "eval_samples_per_second": 14.775,
67
+ "eval_steps_per_second": 1.847,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 7.0,
72
+ "eval_loss": 6395.1376953125,
73
+ "eval_runtime": 0.5498,
74
+ "eval_samples_per_second": 14.55,
75
+ "eval_steps_per_second": 1.819,
76
  "step": 28
77
  },
78
  {
79
  "epoch": 7.5,
80
  "learning_rate": 1.7e-05,
81
+ "loss": 6477.4051,
82
  "step": 30
83
  },
84
  {
85
  "epoch": 8.0,
86
+ "eval_loss": 6394.81884765625,
87
+ "eval_runtime": 0.5491,
88
+ "eval_samples_per_second": 14.568,
89
+ "eval_steps_per_second": 1.821,
90
  "step": 32
91
  },
92
  {
93
  "epoch": 9.0,
94
+ "eval_loss": 6394.478515625,
95
+ "eval_runtime": 0.5553,
96
+ "eval_samples_per_second": 14.407,
97
+ "eval_steps_per_second": 1.801,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 10.0,
102
  "learning_rate": 1.6000000000000003e-05,
103
+ "loss": 6381.9848,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 10.0,
108
+ "eval_loss": 6394.22119140625,
109
+ "eval_runtime": 0.5529,
110
+ "eval_samples_per_second": 14.469,
111
+ "eval_steps_per_second": 1.809,
112
  "step": 40
113
  },
114
  {
115
  "epoch": 11.0,
116
+ "eval_loss": 6394.06640625,
117
+ "eval_runtime": 0.5511,
118
+ "eval_samples_per_second": 14.516,
119
+ "eval_steps_per_second": 1.814,
120
  "step": 44
121
  },
122
  {
123
  "epoch": 12.0,
124
+ "eval_loss": 6393.92529296875,
125
+ "eval_runtime": 0.5545,
126
+ "eval_samples_per_second": 14.426,
127
+ "eval_steps_per_second": 1.803,
128
  "step": 48
129
  },
130
  {
131
  "epoch": 12.5,
132
  "learning_rate": 1.5000000000000002e-05,
133
+ "loss": 6343.784,
134
  "step": 50
135
  },
136
  {
137
  "epoch": 13.0,
138
+ "eval_loss": 6393.734375,
139
+ "eval_runtime": 0.552,
140
+ "eval_samples_per_second": 14.494,
141
+ "eval_steps_per_second": 1.812,
142
  "step": 52
143
  },
144
  {
145
  "epoch": 14.0,
146
+ "eval_loss": 6393.61279296875,
147
+ "eval_runtime": 0.5519,
148
+ "eval_samples_per_second": 14.496,
149
+ "eval_steps_per_second": 1.812,
150
  "step": 56
151
  },
152
  {
153
  "epoch": 15.0,
154
  "learning_rate": 1.4e-05,
155
+ "loss": 6458.8668,
156
  "step": 60
157
  },
158
  {
159
  "epoch": 15.0,
160
+ "eval_loss": 6393.43798828125,
161
+ "eval_runtime": 0.5498,
162
+ "eval_samples_per_second": 14.551,
163
+ "eval_steps_per_second": 1.819,
164
  "step": 60
165
  },
166
  {
167
  "epoch": 16.0,
168
+ "eval_loss": 6393.2392578125,
169
+ "eval_runtime": 0.5467,
170
+ "eval_samples_per_second": 14.633,
171
+ "eval_steps_per_second": 1.829,
172
  "step": 64
173
  },
174
  {
175
  "epoch": 17.0,
176
+ "eval_loss": 6393.10400390625,
177
+ "eval_runtime": 0.546,
178
+ "eval_samples_per_second": 14.651,
179
+ "eval_steps_per_second": 1.831,
180
  "step": 68
181
  },
182
  {
183
  "epoch": 17.5,
184
  "learning_rate": 1.3000000000000001e-05,
185
+ "loss": 6414.077,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 18.0,
190
+ "eval_loss": 6392.95751953125,
191
+ "eval_runtime": 0.5445,
192
+ "eval_samples_per_second": 14.693,
193
+ "eval_steps_per_second": 1.837,
194
  "step": 72
195
  },
196
  {
197
  "epoch": 19.0,
198
+ "eval_loss": 6392.830078125,
199
+ "eval_runtime": 0.553,
200
+ "eval_samples_per_second": 14.467,
201
+ "eval_steps_per_second": 1.808,
202
  "step": 76
203
  },
204
  {
205
  "epoch": 20.0,
206
  "learning_rate": 1.2e-05,
207
+ "loss": 6417.8516,
208
  "step": 80
209
  },
210
  {
211
  "epoch": 20.0,
212
+ "eval_loss": 6392.3056640625,
213
+ "eval_runtime": 0.5433,
214
+ "eval_samples_per_second": 14.725,
215
+ "eval_steps_per_second": 1.841,
216
  "step": 80
217
  },
218
  {
219
  "epoch": 21.0,
220
+ "eval_loss": 6391.8310546875,
221
+ "eval_runtime": 0.5461,
222
+ "eval_samples_per_second": 14.65,
223
+ "eval_steps_per_second": 1.831,
224
  "step": 84
225
  },
226
  {
227
  "epoch": 22.0,
228
+ "eval_loss": 6391.55322265625,
229
+ "eval_runtime": 0.5499,
230
+ "eval_samples_per_second": 14.547,
231
+ "eval_steps_per_second": 1.818,
232
  "step": 88
233
  },
234
  {
235
  "epoch": 22.5,
236
  "learning_rate": 1.1000000000000001e-05,
237
+ "loss": 6333.3547,
238
  "step": 90
239
  },
240
  {
241
  "epoch": 23.0,
242
+ "eval_loss": 6391.34033203125,
243
+ "eval_runtime": 0.5549,
244
+ "eval_samples_per_second": 14.418,
245
+ "eval_steps_per_second": 1.802,
246
  "step": 92
247
  },
248
  {
249
  "epoch": 24.0,
250
+ "eval_loss": 6391.193359375,
251
+ "eval_runtime": 0.5535,
252
+ "eval_samples_per_second": 14.454,
253
+ "eval_steps_per_second": 1.807,
254
  "step": 96
255
  },
256
  {
257
  "epoch": 25.0,
258
  "learning_rate": 1e-05,
259
+ "loss": 6455.1539,
260
  "step": 100
261
  },
262
  {
263
  "epoch": 25.0,
264
+ "eval_loss": 6390.97412109375,
265
+ "eval_runtime": 0.5532,
266
+ "eval_samples_per_second": 14.46,
267
+ "eval_steps_per_second": 1.808,
268
  "step": 100
269
  },
270
  {
271
  "epoch": 26.0,
272
+ "eval_loss": 6390.82275390625,
273
+ "eval_runtime": 0.562,
274
+ "eval_samples_per_second": 14.234,
275
+ "eval_steps_per_second": 1.779,
276
  "step": 104
277
  },
278
  {
279
  "epoch": 27.0,
280
+ "eval_loss": 6390.7607421875,
281
+ "eval_runtime": 0.5645,
282
+ "eval_samples_per_second": 14.171,
283
+ "eval_steps_per_second": 1.771,
284
  "step": 108
285
  },
286
  {
287
  "epoch": 27.5,
288
  "learning_rate": 9e-06,
289
+ "loss": 6399.7898,
290
  "step": 110
291
  },
292
  {
293
  "epoch": 28.0,
294
+ "eval_loss": 6390.66552734375,
295
+ "eval_runtime": 0.558,
296
+ "eval_samples_per_second": 14.338,
297
+ "eval_steps_per_second": 1.792,
298
  "step": 112
299
  },
300
  {
301
  "epoch": 29.0,
302
+ "eval_loss": 6390.5859375,
303
+ "eval_runtime": 0.5544,
304
+ "eval_samples_per_second": 14.429,
305
+ "eval_steps_per_second": 1.804,
306
  "step": 116
307
  },
308
  {
309
  "epoch": 30.0,
310
  "learning_rate": 8.000000000000001e-06,
311
+ "loss": 6410.9336,
312
  "step": 120
313
  },
314
  {
315
  "epoch": 30.0,
316
+ "eval_loss": 6390.49072265625,
317
+ "eval_runtime": 0.5598,
318
+ "eval_samples_per_second": 14.291,
319
+ "eval_steps_per_second": 1.786,
320
  "step": 120
321
  },
322
  {
323
  "epoch": 31.0,
324
+ "eval_loss": 6390.3388671875,
325
+ "eval_runtime": 0.562,
326
+ "eval_samples_per_second": 14.236,
327
+ "eval_steps_per_second": 1.779,
328
  "step": 124
329
  },
330
  {
331
  "epoch": 32.0,
332
+ "eval_loss": 6390.19775390625,
333
+ "eval_runtime": 0.546,
334
+ "eval_samples_per_second": 14.652,
335
+ "eval_steps_per_second": 1.831,
336
  "step": 128
337
  },
338
  {
339
  "epoch": 32.5,
340
  "learning_rate": 7e-06,
341
+ "loss": 6409.2,
342
  "step": 130
343
  },
344
  {
345
  "epoch": 33.0,
346
+ "eval_loss": 6390.0341796875,
347
+ "eval_runtime": 0.5441,
348
+ "eval_samples_per_second": 14.704,
349
+ "eval_steps_per_second": 1.838,
350
  "step": 132
351
  },
352
  {
353
  "epoch": 34.0,
354
+ "eval_loss": 6389.96240234375,
355
+ "eval_runtime": 0.5472,
356
+ "eval_samples_per_second": 14.619,
357
+ "eval_steps_per_second": 1.827,
358
  "step": 136
359
  },
360
  {
361
  "epoch": 35.0,
362
  "learning_rate": 6e-06,
363
+ "loss": 6406.6211,
364
  "step": 140
365
  },
366
  {
367
  "epoch": 35.0,
368
+ "eval_loss": 6389.9111328125,
369
+ "eval_runtime": 0.5514,
370
+ "eval_samples_per_second": 14.509,
371
+ "eval_steps_per_second": 1.814,
372
  "step": 140
373
  },
374
  {
375
  "epoch": 36.0,
376
+ "eval_loss": 6389.6875,
377
+ "eval_runtime": 0.5453,
378
+ "eval_samples_per_second": 14.672,
379
+ "eval_steps_per_second": 1.834,
380
  "step": 144
381
  },
382
  {
383
  "epoch": 37.0,
384
+ "eval_loss": 6389.4755859375,
385
+ "eval_runtime": 0.5552,
386
+ "eval_samples_per_second": 14.409,
387
+ "eval_steps_per_second": 1.801,
388
  "step": 148
389
  },
390
  {
391
  "epoch": 37.5,
392
  "learning_rate": 5e-06,
393
+ "loss": 6371.1539,
394
  "step": 150
395
  },
396
  {
397
  "epoch": 38.0,
398
+ "eval_loss": 6389.3515625,
399
+ "eval_runtime": 0.5423,
400
+ "eval_samples_per_second": 14.751,
401
+ "eval_steps_per_second": 1.844,
402
  "step": 152
403
  },
404
  {
405
  "epoch": 39.0,
406
+ "eval_loss": 6389.26953125,
407
+ "eval_runtime": 0.5522,
408
+ "eval_samples_per_second": 14.487,
409
+ "eval_steps_per_second": 1.811,
410
  "step": 156
411
  },
412
  {
413
  "epoch": 40.0,
414
  "learning_rate": 4.000000000000001e-06,
415
+ "loss": 6409.1055,
416
  "step": 160
417
  },
418
  {
419
  "epoch": 40.0,
420
+ "eval_loss": 6389.24951171875,
421
+ "eval_runtime": 0.5572,
422
+ "eval_samples_per_second": 14.356,
423
+ "eval_steps_per_second": 1.795,
424
  "step": 160
425
  },
426
  {
427
  "epoch": 41.0,
428
+ "eval_loss": 6389.208984375,
429
+ "eval_runtime": 0.5378,
430
+ "eval_samples_per_second": 14.876,
431
+ "eval_steps_per_second": 1.86,
432
  "step": 164
433
  },
434
  {
435
  "epoch": 42.0,
436
+ "eval_loss": 6389.10986328125,
437
+ "eval_runtime": 0.539,
438
+ "eval_samples_per_second": 14.841,
439
+ "eval_steps_per_second": 1.855,
440
  "step": 168
441
  },
442
  {
443
  "epoch": 42.5,
444
  "learning_rate": 3e-06,
445
+ "loss": 6453.5285,
446
  "step": 170
447
  },
448
  {
449
  "epoch": 43.0,
450
+ "eval_loss": 6389.04052734375,
451
+ "eval_runtime": 0.54,
452
+ "eval_samples_per_second": 14.815,
453
+ "eval_steps_per_second": 1.852,
454
  "step": 172
455
  },
456
  {
457
  "epoch": 44.0,
458
+ "eval_loss": 6388.99365234375,
459
+ "eval_runtime": 0.5364,
460
+ "eval_samples_per_second": 14.915,
461
+ "eval_steps_per_second": 1.864,
462
  "step": 176
463
  },
464
  {
465
  "epoch": 45.0,
466
  "learning_rate": 2.0000000000000003e-06,
467
+ "loss": 6391.1004,
468
  "step": 180
469
  },
470
  {
471
  "epoch": 45.0,
472
+ "eval_loss": 6388.9541015625,
473
+ "eval_runtime": 0.5495,
474
+ "eval_samples_per_second": 14.558,
475
+ "eval_steps_per_second": 1.82,
476
  "step": 180
477
  },
478
  {
479
  "epoch": 46.0,
480
+ "eval_loss": 6388.923828125,
481
+ "eval_runtime": 0.5496,
482
+ "eval_samples_per_second": 14.557,
483
+ "eval_steps_per_second": 1.82,
484
  "step": 184
485
  },
486
  {
487
  "epoch": 47.0,
488
+ "eval_loss": 6388.90771484375,
489
+ "eval_runtime": 0.5379,
490
+ "eval_samples_per_second": 14.871,
491
+ "eval_steps_per_second": 1.859,
492
  "step": 188
493
  },
494
  {
495
  "epoch": 47.5,
496
  "learning_rate": 1.0000000000000002e-06,
497
+ "loss": 6416.6641,
498
  "step": 190
499
  },
500
  {
501
  "epoch": 48.0,
502
+ "eval_loss": 6388.89111328125,
503
+ "eval_runtime": 0.5385,
504
+ "eval_samples_per_second": 14.857,
505
+ "eval_steps_per_second": 1.857,
506
  "step": 192
507
  },
508
  {
509
  "epoch": 49.0,
510
+ "eval_loss": 6388.8828125,
511
+ "eval_runtime": 0.5466,
512
+ "eval_samples_per_second": 14.635,
513
+ "eval_steps_per_second": 1.829,
514
  "step": 196
515
  },
516
  {
517
  "epoch": 50.0,
518
  "learning_rate": 0.0,
519
+ "loss": 6397.6828,
520
  "step": 200
521
  },
522
  {
523
  "epoch": 50.0,
524
+ "eval_loss": 6388.880859375,
525
+ "eval_runtime": 0.5601,
526
+ "eval_samples_per_second": 14.283,
527
+ "eval_steps_per_second": 1.785,
528
  "step": 200
529
  },
530
  {
531
  "epoch": 50.0,
532
  "step": 200,
533
  "total_flos": 7.65002115072e+17,
534
+ "train_loss": 6407.14462890625,
535
+ "train_runtime": 869.7842,
536
+ "train_samples_per_second": 1.84,
537
+ "train_steps_per_second": 0.23
538
  }
539
  ],
540
  "max_steps": 200,