amyeroberts HF staff commited on
Commit
33f9027
1 Parent(s): d6fb424

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 6388.880859375,
4
- "eval_runtime": 0.6142,
5
- "eval_samples_per_second": 13.026,
6
- "eval_steps_per_second": 1.628,
7
- "train_loss": 6407.14462890625,
8
- "train_runtime": 869.7842,
9
- "train_samples_per_second": 1.84,
10
- "train_steps_per_second": 0.23
11
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 5.264073848724365,
4
+ "eval_runtime": 0.6226,
5
+ "eval_samples_per_second": 12.849,
6
+ "eval_steps_per_second": 1.606,
7
+ "train_loss": 5.157479572296142,
8
+ "train_runtime": 182.2038,
9
+ "train_samples_per_second": 1.756,
10
+ "train_steps_per_second": 0.22
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 6388.880859375,
4
- "eval_runtime": 0.6142,
5
- "eval_samples_per_second": 13.026,
6
- "eval_steps_per_second": 1.628
7
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 5.264073848724365,
4
+ "eval_runtime": 0.6226,
5
+ "eval_samples_per_second": 12.849,
6
+ "eval_steps_per_second": 1.606
7
  }
runs/Jul20_14-57-59_amy-2-gpu/events.out.tfevents.1689865315.amy-2-gpu.188071.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63eca56ab9e430055bb83367472d2fce4f82f8c81668ac72f0364a2b802822f8
3
+ size 354
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 6407.14462890625,
4
- "train_runtime": 869.7842,
5
- "train_samples_per_second": 1.84,
6
- "train_steps_per_second": 0.23
7
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 5.157479572296142,
4
+ "train_runtime": 182.2038,
5
+ "train_samples_per_second": 1.756,
6
+ "train_steps_per_second": 0.22
7
  }
trainer_state.json CHANGED
@@ -1,545 +1,129 @@
1
  {
2
- "best_metric": 6388.880859375,
3
- "best_model_checkpoint": "./coco_outputs/checkpoint-200",
4
- "epoch": 50.0,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "eval_loss": 6398.4208984375,
13
- "eval_runtime": 0.5249,
14
- "eval_samples_per_second": 15.242,
15
- "eval_steps_per_second": 1.905,
16
  "step": 4
17
  },
18
  {
19
  "epoch": 2.0,
20
- "eval_loss": 6397.8583984375,
21
- "eval_runtime": 0.5335,
22
- "eval_samples_per_second": 14.995,
23
- "eval_steps_per_second": 1.874,
24
  "step": 8
25
  },
26
  {
27
  "epoch": 2.5,
28
- "learning_rate": 1.9e-05,
29
- "loss": 6370.2805,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 3.0,
34
- "eval_loss": 6397.47900390625,
35
- "eval_runtime": 0.5389,
36
- "eval_samples_per_second": 14.844,
37
- "eval_steps_per_second": 1.855,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 4.0,
42
- "eval_loss": 6396.83203125,
43
- "eval_runtime": 0.5405,
44
- "eval_samples_per_second": 14.802,
45
- "eval_steps_per_second": 1.85,
46
  "step": 16
47
  },
48
  {
49
  "epoch": 5.0,
50
- "learning_rate": 1.8e-05,
51
- "loss": 6424.3547,
52
  "step": 20
53
  },
54
  {
55
  "epoch": 5.0,
56
- "eval_loss": 6396.29296875,
57
- "eval_runtime": 0.5443,
58
- "eval_samples_per_second": 14.697,
59
- "eval_steps_per_second": 1.837,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 6.0,
64
- "eval_loss": 6395.72314453125,
65
- "eval_runtime": 0.5415,
66
- "eval_samples_per_second": 14.775,
67
- "eval_steps_per_second": 1.847,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 7.0,
72
- "eval_loss": 6395.1376953125,
73
- "eval_runtime": 0.5498,
74
- "eval_samples_per_second": 14.55,
75
- "eval_steps_per_second": 1.819,
76
  "step": 28
77
  },
78
  {
79
  "epoch": 7.5,
80
- "learning_rate": 1.7e-05,
81
- "loss": 6477.4051,
82
  "step": 30
83
  },
84
  {
85
  "epoch": 8.0,
86
- "eval_loss": 6394.81884765625,
87
- "eval_runtime": 0.5491,
88
- "eval_samples_per_second": 14.568,
89
- "eval_steps_per_second": 1.821,
90
  "step": 32
91
  },
92
  {
93
  "epoch": 9.0,
94
- "eval_loss": 6394.478515625,
95
- "eval_runtime": 0.5553,
96
- "eval_samples_per_second": 14.407,
97
- "eval_steps_per_second": 1.801,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 10.0,
102
- "learning_rate": 1.6000000000000003e-05,
103
- "loss": 6381.9848,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 10.0,
108
- "eval_loss": 6394.22119140625,
109
- "eval_runtime": 0.5529,
110
- "eval_samples_per_second": 14.469,
111
- "eval_steps_per_second": 1.809,
112
  "step": 40
113
  },
114
  {
115
- "epoch": 11.0,
116
- "eval_loss": 6394.06640625,
117
- "eval_runtime": 0.5511,
118
- "eval_samples_per_second": 14.516,
119
- "eval_steps_per_second": 1.814,
120
- "step": 44
121
- },
122
- {
123
- "epoch": 12.0,
124
- "eval_loss": 6393.92529296875,
125
- "eval_runtime": 0.5545,
126
- "eval_samples_per_second": 14.426,
127
- "eval_steps_per_second": 1.803,
128
- "step": 48
129
- },
130
- {
131
- "epoch": 12.5,
132
- "learning_rate": 1.5000000000000002e-05,
133
- "loss": 6343.784,
134
- "step": 50
135
- },
136
- {
137
- "epoch": 13.0,
138
- "eval_loss": 6393.734375,
139
- "eval_runtime": 0.552,
140
- "eval_samples_per_second": 14.494,
141
- "eval_steps_per_second": 1.812,
142
- "step": 52
143
- },
144
- {
145
- "epoch": 14.0,
146
- "eval_loss": 6393.61279296875,
147
- "eval_runtime": 0.5519,
148
- "eval_samples_per_second": 14.496,
149
- "eval_steps_per_second": 1.812,
150
- "step": 56
151
- },
152
- {
153
- "epoch": 15.0,
154
- "learning_rate": 1.4e-05,
155
- "loss": 6458.8668,
156
- "step": 60
157
- },
158
- {
159
- "epoch": 15.0,
160
- "eval_loss": 6393.43798828125,
161
- "eval_runtime": 0.5498,
162
- "eval_samples_per_second": 14.551,
163
- "eval_steps_per_second": 1.819,
164
- "step": 60
165
- },
166
- {
167
- "epoch": 16.0,
168
- "eval_loss": 6393.2392578125,
169
- "eval_runtime": 0.5467,
170
- "eval_samples_per_second": 14.633,
171
- "eval_steps_per_second": 1.829,
172
- "step": 64
173
- },
174
- {
175
- "epoch": 17.0,
176
- "eval_loss": 6393.10400390625,
177
- "eval_runtime": 0.546,
178
- "eval_samples_per_second": 14.651,
179
- "eval_steps_per_second": 1.831,
180
- "step": 68
181
- },
182
- {
183
- "epoch": 17.5,
184
- "learning_rate": 1.3000000000000001e-05,
185
- "loss": 6414.077,
186
- "step": 70
187
- },
188
- {
189
- "epoch": 18.0,
190
- "eval_loss": 6392.95751953125,
191
- "eval_runtime": 0.5445,
192
- "eval_samples_per_second": 14.693,
193
- "eval_steps_per_second": 1.837,
194
- "step": 72
195
- },
196
- {
197
- "epoch": 19.0,
198
- "eval_loss": 6392.830078125,
199
- "eval_runtime": 0.553,
200
- "eval_samples_per_second": 14.467,
201
- "eval_steps_per_second": 1.808,
202
- "step": 76
203
- },
204
- {
205
- "epoch": 20.0,
206
- "learning_rate": 1.2e-05,
207
- "loss": 6417.8516,
208
- "step": 80
209
- },
210
- {
211
- "epoch": 20.0,
212
- "eval_loss": 6392.3056640625,
213
- "eval_runtime": 0.5433,
214
- "eval_samples_per_second": 14.725,
215
- "eval_steps_per_second": 1.841,
216
- "step": 80
217
- },
218
- {
219
- "epoch": 21.0,
220
- "eval_loss": 6391.8310546875,
221
- "eval_runtime": 0.5461,
222
- "eval_samples_per_second": 14.65,
223
- "eval_steps_per_second": 1.831,
224
- "step": 84
225
- },
226
- {
227
- "epoch": 22.0,
228
- "eval_loss": 6391.55322265625,
229
- "eval_runtime": 0.5499,
230
- "eval_samples_per_second": 14.547,
231
- "eval_steps_per_second": 1.818,
232
- "step": 88
233
- },
234
- {
235
- "epoch": 22.5,
236
- "learning_rate": 1.1000000000000001e-05,
237
- "loss": 6333.3547,
238
- "step": 90
239
- },
240
- {
241
- "epoch": 23.0,
242
- "eval_loss": 6391.34033203125,
243
- "eval_runtime": 0.5549,
244
- "eval_samples_per_second": 14.418,
245
- "eval_steps_per_second": 1.802,
246
- "step": 92
247
- },
248
- {
249
- "epoch": 24.0,
250
- "eval_loss": 6391.193359375,
251
- "eval_runtime": 0.5535,
252
- "eval_samples_per_second": 14.454,
253
- "eval_steps_per_second": 1.807,
254
- "step": 96
255
- },
256
- {
257
- "epoch": 25.0,
258
- "learning_rate": 1e-05,
259
- "loss": 6455.1539,
260
- "step": 100
261
- },
262
- {
263
- "epoch": 25.0,
264
- "eval_loss": 6390.97412109375,
265
- "eval_runtime": 0.5532,
266
- "eval_samples_per_second": 14.46,
267
- "eval_steps_per_second": 1.808,
268
- "step": 100
269
- },
270
- {
271
- "epoch": 26.0,
272
- "eval_loss": 6390.82275390625,
273
- "eval_runtime": 0.562,
274
- "eval_samples_per_second": 14.234,
275
- "eval_steps_per_second": 1.779,
276
- "step": 104
277
- },
278
- {
279
- "epoch": 27.0,
280
- "eval_loss": 6390.7607421875,
281
- "eval_runtime": 0.5645,
282
- "eval_samples_per_second": 14.171,
283
- "eval_steps_per_second": 1.771,
284
- "step": 108
285
- },
286
- {
287
- "epoch": 27.5,
288
- "learning_rate": 9e-06,
289
- "loss": 6399.7898,
290
- "step": 110
291
- },
292
- {
293
- "epoch": 28.0,
294
- "eval_loss": 6390.66552734375,
295
- "eval_runtime": 0.558,
296
- "eval_samples_per_second": 14.338,
297
- "eval_steps_per_second": 1.792,
298
- "step": 112
299
- },
300
- {
301
- "epoch": 29.0,
302
- "eval_loss": 6390.5859375,
303
- "eval_runtime": 0.5544,
304
- "eval_samples_per_second": 14.429,
305
- "eval_steps_per_second": 1.804,
306
- "step": 116
307
- },
308
- {
309
- "epoch": 30.0,
310
- "learning_rate": 8.000000000000001e-06,
311
- "loss": 6410.9336,
312
- "step": 120
313
- },
314
- {
315
- "epoch": 30.0,
316
- "eval_loss": 6390.49072265625,
317
- "eval_runtime": 0.5598,
318
- "eval_samples_per_second": 14.291,
319
- "eval_steps_per_second": 1.786,
320
- "step": 120
321
- },
322
- {
323
- "epoch": 31.0,
324
- "eval_loss": 6390.3388671875,
325
- "eval_runtime": 0.562,
326
- "eval_samples_per_second": 14.236,
327
- "eval_steps_per_second": 1.779,
328
- "step": 124
329
- },
330
- {
331
- "epoch": 32.0,
332
- "eval_loss": 6390.19775390625,
333
- "eval_runtime": 0.546,
334
- "eval_samples_per_second": 14.652,
335
- "eval_steps_per_second": 1.831,
336
- "step": 128
337
- },
338
- {
339
- "epoch": 32.5,
340
- "learning_rate": 7e-06,
341
- "loss": 6409.2,
342
- "step": 130
343
- },
344
- {
345
- "epoch": 33.0,
346
- "eval_loss": 6390.0341796875,
347
- "eval_runtime": 0.5441,
348
- "eval_samples_per_second": 14.704,
349
- "eval_steps_per_second": 1.838,
350
- "step": 132
351
- },
352
- {
353
- "epoch": 34.0,
354
- "eval_loss": 6389.96240234375,
355
- "eval_runtime": 0.5472,
356
- "eval_samples_per_second": 14.619,
357
- "eval_steps_per_second": 1.827,
358
- "step": 136
359
- },
360
- {
361
- "epoch": 35.0,
362
- "learning_rate": 6e-06,
363
- "loss": 6406.6211,
364
- "step": 140
365
- },
366
- {
367
- "epoch": 35.0,
368
- "eval_loss": 6389.9111328125,
369
- "eval_runtime": 0.5514,
370
- "eval_samples_per_second": 14.509,
371
- "eval_steps_per_second": 1.814,
372
- "step": 140
373
- },
374
- {
375
- "epoch": 36.0,
376
- "eval_loss": 6389.6875,
377
- "eval_runtime": 0.5453,
378
- "eval_samples_per_second": 14.672,
379
- "eval_steps_per_second": 1.834,
380
- "step": 144
381
- },
382
- {
383
- "epoch": 37.0,
384
- "eval_loss": 6389.4755859375,
385
- "eval_runtime": 0.5552,
386
- "eval_samples_per_second": 14.409,
387
- "eval_steps_per_second": 1.801,
388
- "step": 148
389
- },
390
- {
391
- "epoch": 37.5,
392
- "learning_rate": 5e-06,
393
- "loss": 6371.1539,
394
- "step": 150
395
- },
396
- {
397
- "epoch": 38.0,
398
- "eval_loss": 6389.3515625,
399
- "eval_runtime": 0.5423,
400
- "eval_samples_per_second": 14.751,
401
- "eval_steps_per_second": 1.844,
402
- "step": 152
403
- },
404
- {
405
- "epoch": 39.0,
406
- "eval_loss": 6389.26953125,
407
- "eval_runtime": 0.5522,
408
- "eval_samples_per_second": 14.487,
409
- "eval_steps_per_second": 1.811,
410
- "step": 156
411
- },
412
- {
413
- "epoch": 40.0,
414
- "learning_rate": 4.000000000000001e-06,
415
- "loss": 6409.1055,
416
- "step": 160
417
- },
418
- {
419
- "epoch": 40.0,
420
- "eval_loss": 6389.24951171875,
421
- "eval_runtime": 0.5572,
422
- "eval_samples_per_second": 14.356,
423
- "eval_steps_per_second": 1.795,
424
- "step": 160
425
- },
426
- {
427
- "epoch": 41.0,
428
- "eval_loss": 6389.208984375,
429
- "eval_runtime": 0.5378,
430
- "eval_samples_per_second": 14.876,
431
- "eval_steps_per_second": 1.86,
432
- "step": 164
433
- },
434
- {
435
- "epoch": 42.0,
436
- "eval_loss": 6389.10986328125,
437
- "eval_runtime": 0.539,
438
- "eval_samples_per_second": 14.841,
439
- "eval_steps_per_second": 1.855,
440
- "step": 168
441
- },
442
- {
443
- "epoch": 42.5,
444
- "learning_rate": 3e-06,
445
- "loss": 6453.5285,
446
- "step": 170
447
- },
448
- {
449
- "epoch": 43.0,
450
- "eval_loss": 6389.04052734375,
451
- "eval_runtime": 0.54,
452
- "eval_samples_per_second": 14.815,
453
- "eval_steps_per_second": 1.852,
454
- "step": 172
455
- },
456
- {
457
- "epoch": 44.0,
458
- "eval_loss": 6388.99365234375,
459
- "eval_runtime": 0.5364,
460
- "eval_samples_per_second": 14.915,
461
- "eval_steps_per_second": 1.864,
462
- "step": 176
463
- },
464
- {
465
- "epoch": 45.0,
466
- "learning_rate": 2.0000000000000003e-06,
467
- "loss": 6391.1004,
468
- "step": 180
469
- },
470
- {
471
- "epoch": 45.0,
472
- "eval_loss": 6388.9541015625,
473
- "eval_runtime": 0.5495,
474
- "eval_samples_per_second": 14.558,
475
- "eval_steps_per_second": 1.82,
476
- "step": 180
477
- },
478
- {
479
- "epoch": 46.0,
480
- "eval_loss": 6388.923828125,
481
- "eval_runtime": 0.5496,
482
- "eval_samples_per_second": 14.557,
483
- "eval_steps_per_second": 1.82,
484
- "step": 184
485
- },
486
- {
487
- "epoch": 47.0,
488
- "eval_loss": 6388.90771484375,
489
- "eval_runtime": 0.5379,
490
- "eval_samples_per_second": 14.871,
491
- "eval_steps_per_second": 1.859,
492
- "step": 188
493
- },
494
- {
495
- "epoch": 47.5,
496
- "learning_rate": 1.0000000000000002e-06,
497
- "loss": 6416.6641,
498
- "step": 190
499
- },
500
- {
501
- "epoch": 48.0,
502
- "eval_loss": 6388.89111328125,
503
- "eval_runtime": 0.5385,
504
- "eval_samples_per_second": 14.857,
505
- "eval_steps_per_second": 1.857,
506
- "step": 192
507
- },
508
- {
509
- "epoch": 49.0,
510
- "eval_loss": 6388.8828125,
511
- "eval_runtime": 0.5466,
512
- "eval_samples_per_second": 14.635,
513
- "eval_steps_per_second": 1.829,
514
- "step": 196
515
- },
516
- {
517
- "epoch": 50.0,
518
- "learning_rate": 0.0,
519
- "loss": 6397.6828,
520
- "step": 200
521
- },
522
- {
523
- "epoch": 50.0,
524
- "eval_loss": 6388.880859375,
525
- "eval_runtime": 0.5601,
526
- "eval_samples_per_second": 14.283,
527
- "eval_steps_per_second": 1.785,
528
- "step": 200
529
- },
530
- {
531
- "epoch": 50.0,
532
- "step": 200,
533
- "total_flos": 7.65002115072e+17,
534
- "train_loss": 6407.14462890625,
535
- "train_runtime": 869.7842,
536
- "train_samples_per_second": 1.84,
537
- "train_steps_per_second": 0.23
538
  }
539
  ],
540
- "max_steps": 200,
541
- "num_train_epochs": 50,
542
- "total_flos": 7.65002115072e+17,
543
  "trial_name": null,
544
  "trial_params": null
545
  }
 
1
  {
2
+ "best_metric": 5.264073371887207,
3
+ "best_model_checkpoint": "./coco_outputs/checkpoint-32",
4
+ "epoch": 10.0,
5
+ "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "eval_loss": 5.533133506774902,
13
+ "eval_runtime": 0.5515,
14
+ "eval_samples_per_second": 14.505,
15
+ "eval_steps_per_second": 1.813,
16
  "step": 4
17
  },
18
  {
19
  "epoch": 2.0,
20
+ "eval_loss": 5.527740478515625,
21
+ "eval_runtime": 0.5553,
22
+ "eval_samples_per_second": 14.406,
23
+ "eval_steps_per_second": 1.801,
24
  "step": 8
25
  },
26
  {
27
  "epoch": 2.5,
28
+ "learning_rate": 1.5000000000000002e-05,
29
+ "loss": 5.4377,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 3.0,
34
+ "eval_loss": 5.444962024688721,
35
+ "eval_runtime": 0.5512,
36
+ "eval_samples_per_second": 14.513,
37
+ "eval_steps_per_second": 1.814,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 4.0,
42
+ "eval_loss": 5.395962715148926,
43
+ "eval_runtime": 0.5633,
44
+ "eval_samples_per_second": 14.203,
45
+ "eval_steps_per_second": 1.775,
46
  "step": 16
47
  },
48
  {
49
  "epoch": 5.0,
50
+ "learning_rate": 1e-05,
51
+ "loss": 5.1582,
52
  "step": 20
53
  },
54
  {
55
  "epoch": 5.0,
56
+ "eval_loss": 5.334904670715332,
57
+ "eval_runtime": 0.5523,
58
+ "eval_samples_per_second": 14.484,
59
+ "eval_steps_per_second": 1.81,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 6.0,
64
+ "eval_loss": 5.314438819885254,
65
+ "eval_runtime": 0.552,
66
+ "eval_samples_per_second": 14.493,
67
+ "eval_steps_per_second": 1.812,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 7.0,
72
+ "eval_loss": 5.273761749267578,
73
+ "eval_runtime": 0.5505,
74
+ "eval_samples_per_second": 14.531,
75
+ "eval_steps_per_second": 1.816,
76
  "step": 28
77
  },
78
  {
79
  "epoch": 7.5,
80
+ "learning_rate": 5e-06,
81
+ "loss": 5.0556,
82
  "step": 30
83
  },
84
  {
85
  "epoch": 8.0,
86
+ "eval_loss": 5.264073371887207,
87
+ "eval_runtime": 0.5581,
88
+ "eval_samples_per_second": 14.334,
89
+ "eval_steps_per_second": 1.792,
90
  "step": 32
91
  },
92
  {
93
  "epoch": 9.0,
94
+ "eval_loss": 5.284775733947754,
95
+ "eval_runtime": 0.5423,
96
+ "eval_samples_per_second": 14.752,
97
+ "eval_steps_per_second": 1.844,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 10.0,
102
+ "learning_rate": 0.0,
103
+ "loss": 4.9784,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 10.0,
108
+ "eval_loss": 5.279171943664551,
109
+ "eval_runtime": 0.5633,
110
+ "eval_samples_per_second": 14.202,
111
+ "eval_steps_per_second": 1.775,
112
  "step": 40
113
  },
114
  {
115
+ "epoch": 10.0,
116
+ "step": 40,
117
+ "total_flos": 1.530004230144e+17,
118
+ "train_loss": 5.157479572296142,
119
+ "train_runtime": 182.2038,
120
+ "train_samples_per_second": 1.756,
121
+ "train_steps_per_second": 0.22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  }
123
  ],
124
+ "max_steps": 40,
125
+ "num_train_epochs": 10,
126
+ "total_flos": 1.530004230144e+17,
127
  "trial_name": null,
128
  "trial_params": null
129
  }