duyhngoc commited on
Commit
8d1ad21
1 Parent(s): c93187e

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +14 -0
  2. eval_results.json +9 -0
  3. train_results.json +8 -0
  4. trainer_state.json +571 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.98,
3
+ "eval_loss": 3.5894014835357666,
4
+ "eval_runtime": 450.0701,
5
+ "eval_samples": 760,
6
+ "eval_samples_per_second": 1.689,
7
+ "eval_steps_per_second": 0.211,
8
+ "eval_wer": 1.0,
9
+ "train_loss": 3.4999793866615154,
10
+ "train_runtime": 312015.5793,
11
+ "train_samples": 11660,
12
+ "train_samples_per_second": 0.561,
13
+ "train_steps_per_second": 0.017
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.98,
3
+ "eval_loss": 3.5894014835357666,
4
+ "eval_runtime": 450.0701,
5
+ "eval_samples": 760,
6
+ "eval_samples_per_second": 1.689,
7
+ "eval_steps_per_second": 0.211,
8
+ "eval_wer": 1.0
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.98,
3
+ "train_loss": 3.4999793866615154,
4
+ "train_runtime": 312015.5793,
5
+ "train_samples": 11660,
6
+ "train_samples_per_second": 0.561,
7
+ "train_steps_per_second": 0.017
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 14.979423868312757,
5
+ "global_step": 5460,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.27,
12
+ "eval_loss": 3.9210281372070312,
13
+ "eval_runtime": 448.3805,
14
+ "eval_samples_per_second": 1.695,
15
+ "eval_steps_per_second": 0.212,
16
+ "eval_wer": 1.0,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.55,
21
+ "eval_loss": 3.437487840652466,
22
+ "eval_runtime": 462.5222,
23
+ "eval_samples_per_second": 1.643,
24
+ "eval_steps_per_second": 0.205,
25
+ "eval_wer": 1.0,
26
+ "step": 200
27
+ },
28
+ {
29
+ "epoch": 0.82,
30
+ "eval_loss": 3.4355854988098145,
31
+ "eval_runtime": 445.9044,
32
+ "eval_samples_per_second": 1.704,
33
+ "eval_steps_per_second": 0.213,
34
+ "eval_wer": 1.0,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 1.1,
39
+ "eval_loss": 3.404470920562744,
40
+ "eval_runtime": 449.0136,
41
+ "eval_samples_per_second": 1.693,
42
+ "eval_steps_per_second": 0.212,
43
+ "eval_wer": 1.0,
44
+ "step": 400
45
+ },
46
+ {
47
+ "epoch": 1.37,
48
+ "learning_rate": 0.0003,
49
+ "loss": 4.1866,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 1.37,
54
+ "eval_loss": 3.469388961791992,
55
+ "eval_runtime": 449.2348,
56
+ "eval_samples_per_second": 1.692,
57
+ "eval_steps_per_second": 0.211,
58
+ "eval_wer": 1.0,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 1.65,
63
+ "eval_loss": 3.626600503921509,
64
+ "eval_runtime": 464.0791,
65
+ "eval_samples_per_second": 1.638,
66
+ "eval_steps_per_second": 0.205,
67
+ "eval_wer": 1.0,
68
+ "step": 600
69
+ },
70
+ {
71
+ "epoch": 1.92,
72
+ "eval_loss": 3.5694096088409424,
73
+ "eval_runtime": 447.3938,
74
+ "eval_samples_per_second": 1.699,
75
+ "eval_steps_per_second": 0.212,
76
+ "eval_wer": 1.0,
77
+ "step": 700
78
+ },
79
+ {
80
+ "epoch": 2.19,
81
+ "eval_loss": 3.5733487606048584,
82
+ "eval_runtime": 461.0959,
83
+ "eval_samples_per_second": 1.648,
84
+ "eval_steps_per_second": 0.206,
85
+ "eval_wer": 1.0,
86
+ "step": 800
87
+ },
88
+ {
89
+ "epoch": 2.47,
90
+ "eval_loss": 3.638136625289917,
91
+ "eval_runtime": 444.2877,
92
+ "eval_samples_per_second": 1.711,
93
+ "eval_steps_per_second": 0.214,
94
+ "eval_wer": 1.0,
95
+ "step": 900
96
+ },
97
+ {
98
+ "epoch": 2.74,
99
+ "learning_rate": 0.000269758064516129,
100
+ "loss": 3.4376,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 2.74,
105
+ "eval_loss": 3.6604056358337402,
106
+ "eval_runtime": 462.2298,
107
+ "eval_samples_per_second": 1.644,
108
+ "eval_steps_per_second": 0.206,
109
+ "eval_wer": 1.0,
110
+ "step": 1000
111
+ },
112
+ {
113
+ "epoch": 3.02,
114
+ "eval_loss": 3.5868148803710938,
115
+ "eval_runtime": 450.5266,
116
+ "eval_samples_per_second": 1.687,
117
+ "eval_steps_per_second": 0.211,
118
+ "eval_wer": 1.0,
119
+ "step": 1100
120
+ },
121
+ {
122
+ "epoch": 3.29,
123
+ "eval_loss": 3.4987645149230957,
124
+ "eval_runtime": 449.645,
125
+ "eval_samples_per_second": 1.69,
126
+ "eval_steps_per_second": 0.211,
127
+ "eval_wer": 1.0,
128
+ "step": 1200
129
+ },
130
+ {
131
+ "epoch": 3.57,
132
+ "eval_loss": 3.540862798690796,
133
+ "eval_runtime": 450.1553,
134
+ "eval_samples_per_second": 1.688,
135
+ "eval_steps_per_second": 0.211,
136
+ "eval_wer": 1.0,
137
+ "step": 1300
138
+ },
139
+ {
140
+ "epoch": 3.84,
141
+ "eval_loss": 3.488347053527832,
142
+ "eval_runtime": 451.4108,
143
+ "eval_samples_per_second": 1.684,
144
+ "eval_steps_per_second": 0.21,
145
+ "eval_wer": 1.0,
146
+ "step": 1400
147
+ },
148
+ {
149
+ "epoch": 4.12,
150
+ "learning_rate": 0.00023951612903225802,
151
+ "loss": 3.4365,
152
+ "step": 1500
153
+ },
154
+ {
155
+ "epoch": 4.12,
156
+ "eval_loss": 3.61248517036438,
157
+ "eval_runtime": 448.6902,
158
+ "eval_samples_per_second": 1.694,
159
+ "eval_steps_per_second": 0.212,
160
+ "eval_wer": 1.0,
161
+ "step": 1500
162
+ },
163
+ {
164
+ "epoch": 4.39,
165
+ "eval_loss": 3.6123130321502686,
166
+ "eval_runtime": 447.7696,
167
+ "eval_samples_per_second": 1.697,
168
+ "eval_steps_per_second": 0.212,
169
+ "eval_wer": 1.0,
170
+ "step": 1600
171
+ },
172
+ {
173
+ "epoch": 4.66,
174
+ "eval_loss": 3.5978219509124756,
175
+ "eval_runtime": 448.356,
176
+ "eval_samples_per_second": 1.695,
177
+ "eval_steps_per_second": 0.212,
178
+ "eval_wer": 1.0,
179
+ "step": 1700
180
+ },
181
+ {
182
+ "epoch": 4.94,
183
+ "eval_loss": 3.5693321228027344,
184
+ "eval_runtime": 449.3628,
185
+ "eval_samples_per_second": 1.691,
186
+ "eval_steps_per_second": 0.211,
187
+ "eval_wer": 1.0,
188
+ "step": 1800
189
+ },
190
+ {
191
+ "epoch": 5.21,
192
+ "eval_loss": 3.565913677215576,
193
+ "eval_runtime": 448.6809,
194
+ "eval_samples_per_second": 1.694,
195
+ "eval_steps_per_second": 0.212,
196
+ "eval_wer": 1.0,
197
+ "step": 1900
198
+ },
199
+ {
200
+ "epoch": 5.49,
201
+ "learning_rate": 0.0002092741935483871,
202
+ "loss": 3.4339,
203
+ "step": 2000
204
+ },
205
+ {
206
+ "epoch": 5.49,
207
+ "eval_loss": 3.6234426498413086,
208
+ "eval_runtime": 452.867,
209
+ "eval_samples_per_second": 1.678,
210
+ "eval_steps_per_second": 0.21,
211
+ "eval_wer": 1.0,
212
+ "step": 2000
213
+ },
214
+ {
215
+ "epoch": 5.76,
216
+ "eval_loss": 3.5997350215911865,
217
+ "eval_runtime": 465.9828,
218
+ "eval_samples_per_second": 1.631,
219
+ "eval_steps_per_second": 0.204,
220
+ "eval_wer": 1.0,
221
+ "step": 2100
222
+ },
223
+ {
224
+ "epoch": 6.04,
225
+ "eval_loss": 3.6529293060302734,
226
+ "eval_runtime": 469.5273,
227
+ "eval_samples_per_second": 1.619,
228
+ "eval_steps_per_second": 0.202,
229
+ "eval_wer": 1.0,
230
+ "step": 2200
231
+ },
232
+ {
233
+ "epoch": 6.31,
234
+ "eval_loss": 3.57804274559021,
235
+ "eval_runtime": 466.8404,
236
+ "eval_samples_per_second": 1.628,
237
+ "eval_steps_per_second": 0.203,
238
+ "eval_wer": 1.0,
239
+ "step": 2300
240
+ },
241
+ {
242
+ "epoch": 6.58,
243
+ "eval_loss": 3.5843563079833984,
244
+ "eval_runtime": 462.6509,
245
+ "eval_samples_per_second": 1.643,
246
+ "eval_steps_per_second": 0.205,
247
+ "eval_wer": 1.0,
248
+ "step": 2400
249
+ },
250
+ {
251
+ "epoch": 6.86,
252
+ "learning_rate": 0.00017903225806451613,
253
+ "loss": 3.4333,
254
+ "step": 2500
255
+ },
256
+ {
257
+ "epoch": 6.86,
258
+ "eval_loss": 3.5792107582092285,
259
+ "eval_runtime": 465.647,
260
+ "eval_samples_per_second": 1.632,
261
+ "eval_steps_per_second": 0.204,
262
+ "eval_wer": 1.0,
263
+ "step": 2500
264
+ },
265
+ {
266
+ "epoch": 7.13,
267
+ "eval_loss": 3.5468063354492188,
268
+ "eval_runtime": 451.1451,
269
+ "eval_samples_per_second": 1.685,
270
+ "eval_steps_per_second": 0.211,
271
+ "eval_wer": 1.0,
272
+ "step": 2600
273
+ },
274
+ {
275
+ "epoch": 7.41,
276
+ "eval_loss": 3.56913161277771,
277
+ "eval_runtime": 448.1922,
278
+ "eval_samples_per_second": 1.696,
279
+ "eval_steps_per_second": 0.212,
280
+ "eval_wer": 1.0,
281
+ "step": 2700
282
+ },
283
+ {
284
+ "epoch": 7.68,
285
+ "eval_loss": 3.5407586097717285,
286
+ "eval_runtime": 447.5005,
287
+ "eval_samples_per_second": 1.698,
288
+ "eval_steps_per_second": 0.212,
289
+ "eval_wer": 1.0,
290
+ "step": 2800
291
+ },
292
+ {
293
+ "epoch": 7.96,
294
+ "eval_loss": 3.5482022762298584,
295
+ "eval_runtime": 449.8306,
296
+ "eval_samples_per_second": 1.69,
297
+ "eval_steps_per_second": 0.211,
298
+ "eval_wer": 1.0,
299
+ "step": 2900
300
+ },
301
+ {
302
+ "epoch": 8.23,
303
+ "learning_rate": 0.00014879032258064516,
304
+ "loss": 3.4294,
305
+ "step": 3000
306
+ },
307
+ {
308
+ "epoch": 8.23,
309
+ "eval_loss": 3.6070499420166016,
310
+ "eval_runtime": 456.2387,
311
+ "eval_samples_per_second": 1.666,
312
+ "eval_steps_per_second": 0.208,
313
+ "eval_wer": 1.0,
314
+ "step": 3000
315
+ },
316
+ {
317
+ "epoch": 8.5,
318
+ "eval_loss": 3.5904996395111084,
319
+ "eval_runtime": 449.7992,
320
+ "eval_samples_per_second": 1.69,
321
+ "eval_steps_per_second": 0.211,
322
+ "eval_wer": 1.0,
323
+ "step": 3100
324
+ },
325
+ {
326
+ "epoch": 8.78,
327
+ "eval_loss": 3.601818799972534,
328
+ "eval_runtime": 451.1158,
329
+ "eval_samples_per_second": 1.685,
330
+ "eval_steps_per_second": 0.211,
331
+ "eval_wer": 1.0,
332
+ "step": 3200
333
+ },
334
+ {
335
+ "epoch": 9.05,
336
+ "eval_loss": 3.632572889328003,
337
+ "eval_runtime": 465.9266,
338
+ "eval_samples_per_second": 1.631,
339
+ "eval_steps_per_second": 0.204,
340
+ "eval_wer": 1.0,
341
+ "step": 3300
342
+ },
343
+ {
344
+ "epoch": 9.33,
345
+ "eval_loss": 3.6213583946228027,
346
+ "eval_runtime": 464.4391,
347
+ "eval_samples_per_second": 1.636,
348
+ "eval_steps_per_second": 0.205,
349
+ "eval_wer": 1.0,
350
+ "step": 3400
351
+ },
352
+ {
353
+ "epoch": 9.6,
354
+ "learning_rate": 0.00011854838709677418,
355
+ "loss": 3.4293,
356
+ "step": 3500
357
+ },
358
+ {
359
+ "epoch": 9.6,
360
+ "eval_loss": 3.6371841430664062,
361
+ "eval_runtime": 464.9582,
362
+ "eval_samples_per_second": 1.635,
363
+ "eval_steps_per_second": 0.204,
364
+ "eval_wer": 1.0,
365
+ "step": 3500
366
+ },
367
+ {
368
+ "epoch": 9.88,
369
+ "eval_loss": 3.62145733833313,
370
+ "eval_runtime": 461.084,
371
+ "eval_samples_per_second": 1.648,
372
+ "eval_steps_per_second": 0.206,
373
+ "eval_wer": 1.0,
374
+ "step": 3600
375
+ },
376
+ {
377
+ "epoch": 10.15,
378
+ "eval_loss": 3.5106494426727295,
379
+ "eval_runtime": 456.7101,
380
+ "eval_samples_per_second": 1.664,
381
+ "eval_steps_per_second": 0.208,
382
+ "eval_wer": 1.0,
383
+ "step": 3700
384
+ },
385
+ {
386
+ "epoch": 10.43,
387
+ "eval_loss": 3.5065886974334717,
388
+ "eval_runtime": 450.4612,
389
+ "eval_samples_per_second": 1.687,
390
+ "eval_steps_per_second": 0.211,
391
+ "eval_wer": 1.0,
392
+ "step": 3800
393
+ },
394
+ {
395
+ "epoch": 10.7,
396
+ "eval_loss": 3.53520131111145,
397
+ "eval_runtime": 457.5107,
398
+ "eval_samples_per_second": 1.661,
399
+ "eval_steps_per_second": 0.208,
400
+ "eval_wer": 1.0,
401
+ "step": 3900
402
+ },
403
+ {
404
+ "epoch": 10.97,
405
+ "learning_rate": 8.830645161290322e-05,
406
+ "loss": 3.4295,
407
+ "step": 4000
408
+ },
409
+ {
410
+ "epoch": 10.97,
411
+ "eval_loss": 3.5129199028015137,
412
+ "eval_runtime": 459.6878,
413
+ "eval_samples_per_second": 1.653,
414
+ "eval_steps_per_second": 0.207,
415
+ "eval_wer": 1.0,
416
+ "step": 4000
417
+ },
418
+ {
419
+ "epoch": 11.25,
420
+ "eval_loss": 3.638355016708374,
421
+ "eval_runtime": 446.4108,
422
+ "eval_samples_per_second": 1.702,
423
+ "eval_steps_per_second": 0.213,
424
+ "eval_wer": 1.0,
425
+ "step": 4100
426
+ },
427
+ {
428
+ "epoch": 11.52,
429
+ "eval_loss": 3.601942539215088,
430
+ "eval_runtime": 446.1008,
431
+ "eval_samples_per_second": 1.704,
432
+ "eval_steps_per_second": 0.213,
433
+ "eval_wer": 1.0,
434
+ "step": 4200
435
+ },
436
+ {
437
+ "epoch": 11.8,
438
+ "eval_loss": 3.5876448154449463,
439
+ "eval_runtime": 447.6952,
440
+ "eval_samples_per_second": 1.698,
441
+ "eval_steps_per_second": 0.212,
442
+ "eval_wer": 1.0,
443
+ "step": 4300
444
+ },
445
+ {
446
+ "epoch": 12.07,
447
+ "eval_loss": 3.6206564903259277,
448
+ "eval_runtime": 452.4358,
449
+ "eval_samples_per_second": 1.68,
450
+ "eval_steps_per_second": 0.21,
451
+ "eval_wer": 1.0,
452
+ "step": 4400
453
+ },
454
+ {
455
+ "epoch": 12.35,
456
+ "learning_rate": 5.806451612903225e-05,
457
+ "loss": 3.4252,
458
+ "step": 4500
459
+ },
460
+ {
461
+ "epoch": 12.35,
462
+ "eval_loss": 3.599799633026123,
463
+ "eval_runtime": 465.361,
464
+ "eval_samples_per_second": 1.633,
465
+ "eval_steps_per_second": 0.204,
466
+ "eval_wer": 1.0,
467
+ "step": 4500
468
+ },
469
+ {
470
+ "epoch": 12.62,
471
+ "eval_loss": 3.621551513671875,
472
+ "eval_runtime": 464.2483,
473
+ "eval_samples_per_second": 1.637,
474
+ "eval_steps_per_second": 0.205,
475
+ "eval_wer": 1.0,
476
+ "step": 4600
477
+ },
478
+ {
479
+ "epoch": 12.89,
480
+ "eval_loss": 3.6072838306427,
481
+ "eval_runtime": 451.6809,
482
+ "eval_samples_per_second": 1.683,
483
+ "eval_steps_per_second": 0.21,
484
+ "eval_wer": 1.0,
485
+ "step": 4700
486
+ },
487
+ {
488
+ "epoch": 13.17,
489
+ "eval_loss": 3.5566837787628174,
490
+ "eval_runtime": 462.9799,
491
+ "eval_samples_per_second": 1.642,
492
+ "eval_steps_per_second": 0.205,
493
+ "eval_wer": 1.0,
494
+ "step": 4800
495
+ },
496
+ {
497
+ "epoch": 13.44,
498
+ "eval_loss": 3.5745246410369873,
499
+ "eval_runtime": 462.9458,
500
+ "eval_samples_per_second": 1.642,
501
+ "eval_steps_per_second": 0.205,
502
+ "eval_wer": 1.0,
503
+ "step": 4900
504
+ },
505
+ {
506
+ "epoch": 13.72,
507
+ "learning_rate": 2.7822580645161288e-05,
508
+ "loss": 3.4274,
509
+ "step": 5000
510
+ },
511
+ {
512
+ "epoch": 13.72,
513
+ "eval_loss": 3.57381010055542,
514
+ "eval_runtime": 463.0152,
515
+ "eval_samples_per_second": 1.641,
516
+ "eval_steps_per_second": 0.205,
517
+ "eval_wer": 1.0,
518
+ "step": 5000
519
+ },
520
+ {
521
+ "epoch": 13.99,
522
+ "eval_loss": 3.59135103225708,
523
+ "eval_runtime": 451.4651,
524
+ "eval_samples_per_second": 1.683,
525
+ "eval_steps_per_second": 0.21,
526
+ "eval_wer": 1.0,
527
+ "step": 5100
528
+ },
529
+ {
530
+ "epoch": 14.27,
531
+ "eval_loss": 3.6004159450531006,
532
+ "eval_runtime": 448.4459,
533
+ "eval_samples_per_second": 1.695,
534
+ "eval_steps_per_second": 0.212,
535
+ "eval_wer": 1.0,
536
+ "step": 5200
537
+ },
538
+ {
539
+ "epoch": 14.54,
540
+ "eval_loss": 3.596832036972046,
541
+ "eval_runtime": 447.5291,
542
+ "eval_samples_per_second": 1.698,
543
+ "eval_steps_per_second": 0.212,
544
+ "eval_wer": 1.0,
545
+ "step": 5300
546
+ },
547
+ {
548
+ "epoch": 14.81,
549
+ "eval_loss": 3.590834856033325,
550
+ "eval_runtime": 448.4053,
551
+ "eval_samples_per_second": 1.695,
552
+ "eval_steps_per_second": 0.212,
553
+ "eval_wer": 1.0,
554
+ "step": 5400
555
+ },
556
+ {
557
+ "epoch": 14.98,
558
+ "step": 5460,
559
+ "total_flos": 7.431985836399704e+18,
560
+ "train_loss": 3.4999793866615154,
561
+ "train_runtime": 312015.5793,
562
+ "train_samples_per_second": 0.561,
563
+ "train_steps_per_second": 0.017
564
+ }
565
+ ],
566
+ "max_steps": 5460,
567
+ "num_train_epochs": 15,
568
+ "total_flos": 7.431985836399704e+18,
569
+ "trial_name": null,
570
+ "trial_params": null
571
+ }