amyeroberts HF staff commited on
Commit
d7f94bc
1 Parent(s): ac112ba

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 6719.021484375,
4
- "eval_runtime": 17.2616,
5
- "eval_samples_per_second": 14.831,
6
- "eval_steps_per_second": 1.854,
7
- "train_loss": 6244.167712402344,
8
- "train_runtime": 818.3257,
9
- "train_samples_per_second": 6.257,
10
- "train_steps_per_second": 0.782
11
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 6729.03466796875,
4
+ "eval_runtime": 16.97,
5
+ "eval_samples_per_second": 15.085,
6
+ "eval_steps_per_second": 1.886,
7
+ "train_loss": 6251.508282470703,
8
+ "train_runtime": 838.0786,
9
+ "train_samples_per_second": 6.109,
10
+ "train_steps_per_second": 0.764
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 6719.021484375,
4
- "eval_runtime": 17.2616,
5
- "eval_samples_per_second": 14.831,
6
- "eval_steps_per_second": 1.854
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 6729.03466796875,
4
+ "eval_runtime": 16.97,
5
+ "eval_samples_per_second": 15.085,
6
+ "eval_steps_per_second": 1.886
7
  }
runs/Jul19_10-09-01_amy-2-gpu/events.out.tfevents.1689762220.amy-2-gpu.72833.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb8f53bea92c273574b0a3dd53e94d58fcd45fd1de84c546b47fb6c520c3c03
3
+ size 359
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 6244.167712402344,
4
- "train_runtime": 818.3257,
5
- "train_samples_per_second": 6.257,
6
- "train_steps_per_second": 0.782
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 6251.508282470703,
4
+ "train_runtime": 838.0786,
5
+ "train_samples_per_second": 6.109,
6
+ "train_steps_per_second": 0.764
7
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 6719.021484375,
3
  "best_model_checkpoint": "./coco_outputs/checkpoint-640",
4
  "epoch": 5.0,
5
  "global_step": 640,
@@ -9,436 +9,436 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.08,
12
- "learning_rate": 1.96875e-05,
13
- "loss": 6346.0008,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.16,
18
- "learning_rate": 1.9375e-05,
19
- "loss": 6400.0195,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.23,
24
- "learning_rate": 1.9062500000000003e-05,
25
- "loss": 6295.0742,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.31,
30
- "learning_rate": 1.8750000000000002e-05,
31
- "loss": 6280.2168,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.39,
36
- "learning_rate": 1.84375e-05,
37
- "loss": 6138.5656,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.47,
42
- "learning_rate": 1.8125e-05,
43
- "loss": 6088.3699,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.55,
48
- "learning_rate": 1.7812500000000003e-05,
49
- "loss": 6224.8137,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.62,
54
- "learning_rate": 1.7500000000000002e-05,
55
- "loss": 6147.6773,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.7,
60
- "learning_rate": 1.71875e-05,
61
- "loss": 6426.423,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.78,
66
- "learning_rate": 1.6875e-05,
67
- "loss": 6195.2883,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.86,
72
- "learning_rate": 1.6562500000000003e-05,
73
- "loss": 6409.0055,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.94,
78
- "learning_rate": 1.6250000000000002e-05,
79
- "loss": 6457.3832,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 1.0,
84
- "eval_loss": 6720.3701171875,
85
- "eval_runtime": 16.7008,
86
- "eval_samples_per_second": 15.329,
87
- "eval_steps_per_second": 1.916,
88
  "step": 128
89
  },
90
  {
91
  "epoch": 1.02,
92
- "learning_rate": 1.59375e-05,
93
- "loss": 6196.2672,
94
  "step": 130
95
  },
96
  {
97
  "epoch": 1.09,
98
- "learning_rate": 1.5625e-05,
99
- "loss": 6187.7188,
100
  "step": 140
101
  },
102
  {
103
  "epoch": 1.17,
104
- "learning_rate": 1.5312500000000003e-05,
105
- "loss": 6357.3324,
106
  "step": 150
107
  },
108
  {
109
  "epoch": 1.25,
110
- "learning_rate": 1.5000000000000002e-05,
111
- "loss": 6108.043,
112
  "step": 160
113
  },
114
  {
115
  "epoch": 1.33,
116
- "learning_rate": 1.4687500000000001e-05,
117
- "loss": 6499.2922,
118
  "step": 170
119
  },
120
  {
121
  "epoch": 1.41,
122
- "learning_rate": 1.4375e-05,
123
- "loss": 6227.1672,
124
  "step": 180
125
  },
126
  {
127
  "epoch": 1.48,
128
- "learning_rate": 1.4062500000000001e-05,
129
- "loss": 6416.4754,
130
  "step": 190
131
  },
132
  {
133
  "epoch": 1.56,
134
- "learning_rate": 1.375e-05,
135
- "loss": 6024.552,
136
  "step": 200
137
  },
138
  {
139
  "epoch": 1.64,
140
- "learning_rate": 1.3437500000000001e-05,
141
- "loss": 5964.7684,
142
  "step": 210
143
  },
144
  {
145
  "epoch": 1.72,
146
- "learning_rate": 1.3125e-05,
147
- "loss": 5876.4055,
148
  "step": 220
149
  },
150
  {
151
  "epoch": 1.8,
152
- "learning_rate": 1.2812500000000001e-05,
153
- "loss": 6149.1523,
154
  "step": 230
155
  },
156
  {
157
  "epoch": 1.88,
158
- "learning_rate": 1.25e-05,
159
- "loss": 6330.3543,
160
  "step": 240
161
  },
162
  {
163
  "epoch": 1.95,
164
- "learning_rate": 1.2187500000000001e-05,
165
- "loss": 6234.0246,
166
  "step": 250
167
  },
168
  {
169
  "epoch": 2.0,
170
- "eval_loss": 6719.166015625,
171
- "eval_runtime": 16.7578,
172
- "eval_samples_per_second": 15.276,
173
- "eval_steps_per_second": 1.91,
174
  "step": 256
175
  },
176
  {
177
  "epoch": 2.03,
178
- "learning_rate": 1.1875e-05,
179
- "loss": 6279.7289,
180
  "step": 260
181
  },
182
  {
183
  "epoch": 2.11,
184
- "learning_rate": 1.1562500000000002e-05,
185
- "loss": 6253.7887,
186
  "step": 270
187
  },
188
  {
189
  "epoch": 2.19,
190
- "learning_rate": 1.125e-05,
191
- "loss": 6329.3629,
192
  "step": 280
193
  },
194
  {
195
  "epoch": 2.27,
196
- "learning_rate": 1.0937500000000002e-05,
197
- "loss": 6185.052,
198
  "step": 290
199
  },
200
  {
201
  "epoch": 2.34,
202
- "learning_rate": 1.0625e-05,
203
- "loss": 5918.475,
204
  "step": 300
205
  },
206
  {
207
  "epoch": 2.42,
208
- "learning_rate": 1.0312500000000002e-05,
209
- "loss": 6114.2844,
210
  "step": 310
211
  },
212
  {
213
  "epoch": 2.5,
214
- "learning_rate": 1e-05,
215
- "loss": 6149.1941,
216
  "step": 320
217
  },
218
  {
219
  "epoch": 2.58,
220
- "learning_rate": 9.6875e-06,
221
- "loss": 6154.8313,
222
  "step": 330
223
  },
224
  {
225
  "epoch": 2.66,
226
- "learning_rate": 9.375000000000001e-06,
227
- "loss": 6146.2449,
228
  "step": 340
229
  },
230
  {
231
  "epoch": 2.73,
232
- "learning_rate": 9.0625e-06,
233
- "loss": 6099.1227,
234
  "step": 350
235
  },
236
  {
237
  "epoch": 2.81,
238
- "learning_rate": 8.750000000000001e-06,
239
- "loss": 6083.9359,
240
  "step": 360
241
  },
242
  {
243
  "epoch": 2.89,
244
- "learning_rate": 8.4375e-06,
245
- "loss": 6283.482,
246
  "step": 370
247
  },
248
  {
249
  "epoch": 2.97,
250
- "learning_rate": 8.125000000000001e-06,
251
- "loss": 6335.5293,
252
  "step": 380
253
  },
254
  {
255
  "epoch": 3.0,
256
- "eval_loss": 6719.03076171875,
257
- "eval_runtime": 16.7226,
258
- "eval_samples_per_second": 15.309,
259
- "eval_steps_per_second": 1.914,
260
  "step": 384
261
  },
262
  {
263
  "epoch": 3.05,
264
- "learning_rate": 7.8125e-06,
265
- "loss": 6466.4867,
266
  "step": 390
267
  },
268
  {
269
  "epoch": 3.12,
270
- "learning_rate": 7.500000000000001e-06,
271
- "loss": 6327.5625,
272
  "step": 400
273
  },
274
  {
275
  "epoch": 3.2,
276
- "learning_rate": 7.1875e-06,
277
- "loss": 6213.9516,
278
  "step": 410
279
  },
280
  {
281
  "epoch": 3.28,
282
- "learning_rate": 6.875e-06,
283
- "loss": 6337.8066,
284
  "step": 420
285
  },
286
  {
287
  "epoch": 3.36,
288
- "learning_rate": 6.5625e-06,
289
- "loss": 6188.782,
290
  "step": 430
291
  },
292
  {
293
  "epoch": 3.44,
294
- "learning_rate": 6.25e-06,
295
- "loss": 6432.0715,
296
  "step": 440
297
  },
298
  {
299
  "epoch": 3.52,
300
- "learning_rate": 5.9375e-06,
301
- "loss": 6178.6059,
302
  "step": 450
303
  },
304
  {
305
  "epoch": 3.59,
306
- "learning_rate": 5.625e-06,
307
- "loss": 6356.5617,
308
  "step": 460
309
  },
310
  {
311
  "epoch": 3.67,
312
- "learning_rate": 5.3125e-06,
313
- "loss": 6572.6281,
314
  "step": 470
315
  },
316
  {
317
  "epoch": 3.75,
318
- "learning_rate": 5e-06,
319
- "loss": 6376.5836,
320
  "step": 480
321
  },
322
  {
323
  "epoch": 3.83,
324
- "learning_rate": 4.6875000000000004e-06,
325
- "loss": 6186.7969,
326
  "step": 490
327
  },
328
  {
329
  "epoch": 3.91,
330
- "learning_rate": 4.3750000000000005e-06,
331
- "loss": 6402.0125,
332
  "step": 500
333
  },
334
  {
335
  "epoch": 3.98,
336
- "learning_rate": 4.0625000000000005e-06,
337
- "loss": 6520.8047,
338
  "step": 510
339
  },
340
  {
341
  "epoch": 4.0,
342
- "eval_loss": 6719.060546875,
343
- "eval_runtime": 16.8687,
344
- "eval_samples_per_second": 15.176,
345
- "eval_steps_per_second": 1.897,
346
  "step": 512
347
  },
348
  {
349
  "epoch": 4.06,
350
- "learning_rate": 3.7500000000000005e-06,
351
- "loss": 6016.2836,
352
  "step": 520
353
  },
354
  {
355
  "epoch": 4.14,
356
- "learning_rate": 3.4375e-06,
357
- "loss": 5934.718,
358
  "step": 530
359
  },
360
  {
361
  "epoch": 4.22,
362
- "learning_rate": 3.125e-06,
363
- "loss": 6232.1816,
364
  "step": 540
365
  },
366
  {
367
  "epoch": 4.3,
368
- "learning_rate": 2.8125e-06,
369
- "loss": 6584.9633,
370
  "step": 550
371
  },
372
  {
373
  "epoch": 4.38,
374
- "learning_rate": 2.5e-06,
375
- "loss": 6533.6297,
376
  "step": 560
377
  },
378
  {
379
  "epoch": 4.45,
380
- "learning_rate": 2.1875000000000002e-06,
381
- "loss": 6380.1676,
382
  "step": 570
383
  },
384
  {
385
  "epoch": 4.53,
386
- "learning_rate": 1.8750000000000003e-06,
387
- "loss": 6350.5738,
388
  "step": 580
389
  },
390
  {
391
  "epoch": 4.61,
392
- "learning_rate": 1.5625e-06,
393
- "loss": 5988.5664,
394
  "step": 590
395
  },
396
  {
397
  "epoch": 4.69,
398
- "learning_rate": 1.25e-06,
399
- "loss": 6169.0441,
400
  "step": 600
401
  },
402
  {
403
  "epoch": 4.77,
404
- "learning_rate": 9.375000000000001e-07,
405
- "loss": 6024.5156,
406
  "step": 610
407
  },
408
  {
409
  "epoch": 4.84,
410
- "learning_rate": 6.25e-07,
411
- "loss": 6515.2469,
412
  "step": 620
413
  },
414
  {
415
  "epoch": 4.92,
416
- "learning_rate": 3.125e-07,
417
- "loss": 6284.4926,
418
  "step": 630
419
  },
420
  {
421
  "epoch": 5.0,
422
  "learning_rate": 0.0,
423
- "loss": 5738.273,
424
  "step": 640
425
  },
426
  {
427
  "epoch": 5.0,
428
- "eval_loss": 6719.021484375,
429
- "eval_runtime": 17.1732,
430
- "eval_samples_per_second": 14.907,
431
- "eval_steps_per_second": 1.863,
432
  "step": 640
433
  },
434
  {
435
  "epoch": 5.0,
436
  "step": 640,
437
  "total_flos": 2.4480067682304e+18,
438
- "train_loss": 6244.167712402344,
439
- "train_runtime": 818.3257,
440
- "train_samples_per_second": 6.257,
441
- "train_steps_per_second": 0.782
442
  }
443
  ],
444
  "max_steps": 640,
 
1
  {
2
+ "best_metric": 6729.03466796875,
3
  "best_model_checkpoint": "./coco_outputs/checkpoint-640",
4
  "epoch": 5.0,
5
  "global_step": 640,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.08,
12
+ "learning_rate": 1.96875e-07,
13
+ "loss": 6346.4363,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.16,
18
+ "learning_rate": 1.9375e-07,
19
+ "loss": 6401.3863,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.23,
24
+ "learning_rate": 1.90625e-07,
25
+ "loss": 6297.2812,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.31,
30
+ "learning_rate": 1.875e-07,
31
+ "loss": 6283.0867,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.39,
36
+ "learning_rate": 1.8437499999999998e-07,
37
+ "loss": 6141.998,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.47,
42
+ "learning_rate": 1.8124999999999999e-07,
43
+ "loss": 6092.3895,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.55,
48
+ "learning_rate": 1.78125e-07,
49
+ "loss": 6228.5742,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.62,
54
+ "learning_rate": 1.75e-07,
55
+ "loss": 6152.1148,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.7,
60
+ "learning_rate": 1.71875e-07,
61
+ "loss": 6431.5375,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.78,
66
+ "learning_rate": 1.6875e-07,
67
+ "loss": 6201.1875,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.86,
72
+ "learning_rate": 1.65625e-07,
73
+ "loss": 6414.7582,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.94,
78
+ "learning_rate": 1.6249999999999998e-07,
79
+ "loss": 6463.6664,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 1.0,
84
+ "eval_loss": 6729.2021484375,
85
+ "eval_runtime": 17.4768,
86
+ "eval_samples_per_second": 14.648,
87
+ "eval_steps_per_second": 1.831,
88
  "step": 128
89
  },
90
  {
91
  "epoch": 1.02,
92
+ "learning_rate": 1.5937499999999998e-07,
93
+ "loss": 6203.2496,
94
  "step": 130
95
  },
96
  {
97
  "epoch": 1.09,
98
+ "learning_rate": 1.5624999999999999e-07,
99
+ "loss": 6195.1863,
100
  "step": 140
101
  },
102
  {
103
  "epoch": 1.17,
104
+ "learning_rate": 1.53125e-07,
105
+ "loss": 6365.1023,
106
  "step": 150
107
  },
108
  {
109
  "epoch": 1.25,
110
+ "learning_rate": 1.5e-07,
111
+ "loss": 6115.4832,
112
  "step": 160
113
  },
114
  {
115
  "epoch": 1.33,
116
+ "learning_rate": 1.46875e-07,
117
+ "loss": 6507.116,
118
  "step": 170
119
  },
120
  {
121
  "epoch": 1.41,
122
+ "learning_rate": 1.4375e-07,
123
+ "loss": 6235.0008,
124
  "step": 180
125
  },
126
  {
127
  "epoch": 1.48,
128
+ "learning_rate": 1.40625e-07,
129
+ "loss": 6424.7523,
130
  "step": 190
131
  },
132
  {
133
  "epoch": 1.56,
134
+ "learning_rate": 1.375e-07,
135
+ "loss": 6032.757,
136
  "step": 200
137
  },
138
  {
139
  "epoch": 1.64,
140
+ "learning_rate": 1.3437499999999998e-07,
141
+ "loss": 5972.9164,
142
  "step": 210
143
  },
144
  {
145
  "epoch": 1.72,
146
+ "learning_rate": 1.3125e-07,
147
+ "loss": 5884.6973,
148
  "step": 220
149
  },
150
  {
151
  "epoch": 1.8,
152
+ "learning_rate": 1.28125e-07,
153
+ "loss": 6157.3324,
154
  "step": 230
155
  },
156
  {
157
  "epoch": 1.88,
158
+ "learning_rate": 1.25e-07,
159
+ "loss": 6338.2691,
160
  "step": 240
161
  },
162
  {
163
  "epoch": 1.95,
164
+ "learning_rate": 1.21875e-07,
165
+ "loss": 6242.4801,
166
  "step": 250
167
  },
168
  {
169
  "epoch": 2.0,
170
+ "eval_loss": 6729.1201171875,
171
+ "eval_runtime": 17.3704,
172
+ "eval_samples_per_second": 14.738,
173
+ "eval_steps_per_second": 1.842,
174
  "step": 256
175
  },
176
  {
177
  "epoch": 2.03,
178
+ "learning_rate": 1.1875e-07,
179
+ "loss": 6287.9883,
180
  "step": 260
181
  },
182
  {
183
  "epoch": 2.11,
184
+ "learning_rate": 1.1562499999999999e-07,
185
+ "loss": 6261.4707,
186
  "step": 270
187
  },
188
  {
189
  "epoch": 2.19,
190
+ "learning_rate": 1.125e-07,
191
+ "loss": 6337.7199,
192
  "step": 280
193
  },
194
  {
195
  "epoch": 2.27,
196
+ "learning_rate": 1.09375e-07,
197
+ "loss": 6193.377,
198
  "step": 290
199
  },
200
  {
201
  "epoch": 2.34,
202
+ "learning_rate": 1.0624999999999999e-07,
203
+ "loss": 5926.5789,
204
  "step": 300
205
  },
206
  {
207
  "epoch": 2.42,
208
+ "learning_rate": 1.0312499999999999e-07,
209
+ "loss": 6122.5051,
210
  "step": 310
211
  },
212
  {
213
  "epoch": 2.5,
214
+ "learning_rate": 1e-07,
215
+ "loss": 6157.1656,
216
  "step": 320
217
  },
218
  {
219
  "epoch": 2.58,
220
+ "learning_rate": 9.6875e-08,
221
+ "loss": 6162.8938,
222
  "step": 330
223
  },
224
  {
225
  "epoch": 2.66,
226
+ "learning_rate": 9.375e-08,
227
+ "loss": 6154.5023,
228
  "step": 340
229
  },
230
  {
231
  "epoch": 2.73,
232
+ "learning_rate": 9.062499999999999e-08,
233
+ "loss": 6107.1387,
234
  "step": 350
235
  },
236
  {
237
  "epoch": 2.81,
238
+ "learning_rate": 8.75e-08,
239
+ "loss": 6092.2199,
240
  "step": 360
241
  },
242
  {
243
  "epoch": 2.89,
244
+ "learning_rate": 8.4375e-08,
245
+ "loss": 6291.2375,
246
  "step": 370
247
  },
248
  {
249
  "epoch": 2.97,
250
+ "learning_rate": 8.124999999999999e-08,
251
+ "loss": 6344.1949,
252
  "step": 380
253
  },
254
  {
255
  "epoch": 3.0,
256
+ "eval_loss": 6729.05859375,
257
+ "eval_runtime": 17.488,
258
+ "eval_samples_per_second": 14.639,
259
+ "eval_steps_per_second": 1.83,
260
  "step": 384
261
  },
262
  {
263
  "epoch": 3.05,
264
+ "learning_rate": 7.812499999999999e-08,
265
+ "loss": 6475.0023,
266
  "step": 390
267
  },
268
  {
269
  "epoch": 3.12,
270
+ "learning_rate": 7.5e-08,
271
+ "loss": 6335.7672,
272
  "step": 400
273
  },
274
  {
275
  "epoch": 3.2,
276
+ "learning_rate": 7.1875e-08,
277
+ "loss": 6222.3957,
278
  "step": 410
279
  },
280
  {
281
  "epoch": 3.28,
282
+ "learning_rate": 6.875e-08,
283
+ "loss": 6346.1539,
284
  "step": 420
285
  },
286
  {
287
  "epoch": 3.36,
288
+ "learning_rate": 6.5625e-08,
289
+ "loss": 6196.7348,
290
  "step": 430
291
  },
292
  {
293
  "epoch": 3.44,
294
+ "learning_rate": 6.25e-08,
295
+ "loss": 6440.0867,
296
  "step": 440
297
  },
298
  {
299
  "epoch": 3.52,
300
+ "learning_rate": 5.9375e-08,
301
+ "loss": 6186.3539,
302
  "step": 450
303
  },
304
  {
305
  "epoch": 3.59,
306
+ "learning_rate": 5.625e-08,
307
+ "loss": 6364.8961,
308
  "step": 460
309
  },
310
  {
311
  "epoch": 3.67,
312
+ "learning_rate": 5.3124999999999994e-08,
313
+ "loss": 6580.9484,
314
  "step": 470
315
  },
316
  {
317
  "epoch": 3.75,
318
+ "learning_rate": 5e-08,
319
+ "loss": 6384.6832,
320
  "step": 480
321
  },
322
  {
323
  "epoch": 3.83,
324
+ "learning_rate": 4.6875e-08,
325
+ "loss": 6195.5973,
326
  "step": 490
327
  },
328
  {
329
  "epoch": 3.91,
330
+ "learning_rate": 4.375e-08,
331
+ "loss": 6409.9172,
332
  "step": 500
333
  },
334
  {
335
  "epoch": 3.98,
336
+ "learning_rate": 4.0624999999999995e-08,
337
+ "loss": 6528.6664,
338
  "step": 510
339
  },
340
  {
341
  "epoch": 4.0,
342
+ "eval_loss": 6729.0419921875,
343
+ "eval_runtime": 17.1328,
344
+ "eval_samples_per_second": 14.942,
345
+ "eval_steps_per_second": 1.868,
346
  "step": 512
347
  },
348
  {
349
  "epoch": 4.06,
350
+ "learning_rate": 3.75e-08,
351
+ "loss": 6024.7227,
352
  "step": 520
353
  },
354
  {
355
  "epoch": 4.14,
356
+ "learning_rate": 3.4375e-08,
357
+ "loss": 5942.5199,
358
  "step": 530
359
  },
360
  {
361
  "epoch": 4.22,
362
+ "learning_rate": 3.125e-08,
363
+ "loss": 6240.5281,
364
  "step": 540
365
  },
366
  {
367
  "epoch": 4.3,
368
+ "learning_rate": 2.8125e-08,
369
+ "loss": 6593.8445,
370
  "step": 550
371
  },
372
  {
373
  "epoch": 4.38,
374
+ "learning_rate": 2.5e-08,
375
+ "loss": 6542.1891,
376
  "step": 560
377
  },
378
  {
379
  "epoch": 4.45,
380
+ "learning_rate": 2.1875e-08,
381
+ "loss": 6388.3813,
382
  "step": 570
383
  },
384
  {
385
  "epoch": 4.53,
386
+ "learning_rate": 1.875e-08,
387
+ "loss": 6358.7457,
388
  "step": 580
389
  },
390
  {
391
  "epoch": 4.61,
392
+ "learning_rate": 1.5625e-08,
393
+ "loss": 5996.3961,
394
  "step": 590
395
  },
396
  {
397
  "epoch": 4.69,
398
+ "learning_rate": 1.25e-08,
399
+ "loss": 6177.6547,
400
  "step": 600
401
  },
402
  {
403
  "epoch": 4.77,
404
+ "learning_rate": 9.375e-09,
405
+ "loss": 6032.8859,
406
  "step": 610
407
  },
408
  {
409
  "epoch": 4.84,
410
+ "learning_rate": 6.25e-09,
411
+ "loss": 6523.9477,
412
  "step": 620
413
  },
414
  {
415
  "epoch": 4.92,
416
+ "learning_rate": 3.125e-09,
417
+ "loss": 6293.6355,
418
  "step": 630
419
  },
420
  {
421
  "epoch": 5.0,
422
  "learning_rate": 0.0,
423
+ "loss": 5746.1234,
424
  "step": 640
425
  },
426
  {
427
  "epoch": 5.0,
428
+ "eval_loss": 6729.03466796875,
429
+ "eval_runtime": 17.3893,
430
+ "eval_samples_per_second": 14.722,
431
+ "eval_steps_per_second": 1.84,
432
  "step": 640
433
  },
434
  {
435
  "epoch": 5.0,
436
  "step": 640,
437
  "total_flos": 2.4480067682304e+18,
438
+ "train_loss": 6251.508282470703,
439
+ "train_runtime": 838.0786,
440
+ "train_samples_per_second": 6.109,
441
+ "train_steps_per_second": 0.764
442
  }
443
  ],
444
  "max_steps": 640,