p4b commited on
Commit
5f7a912
1 Parent(s): e86dcd8

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 24.04,
3
- "eval_loss": 0.31787109375,
4
- "eval_runtime": 743.378,
5
- "eval_samples_per_second": 2.984,
6
- "eval_steps_per_second": 0.094,
7
- "eval_wer": 27.47628083491461,
8
- "train_loss": 0.31971333821614584,
9
- "train_runtime": 22038.1709,
10
- "train_samples_per_second": 4.356,
11
- "train_steps_per_second": 0.068
12
  }
 
1
  {
2
+ "epoch": 15.02,
3
+ "eval_loss": 0.25927734375,
4
+ "eval_runtime": 704.255,
5
+ "eval_samples_per_second": 3.149,
6
+ "eval_steps_per_second": 0.099,
7
+ "eval_wer": 19.97153700189753,
8
+ "train_loss": 0.5692690700954861,
9
+ "train_runtime": 21274.2497,
10
+ "train_samples_per_second": 2.707,
11
+ "train_steps_per_second": 0.042
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 24.04,
3
- "eval_loss": 0.31787109375,
4
- "eval_runtime": 743.378,
5
- "eval_samples_per_second": 2.984,
6
- "eval_steps_per_second": 0.094,
7
- "eval_wer": 27.47628083491461
8
  }
 
1
  {
2
+ "epoch": 15.02,
3
+ "eval_loss": 0.25927734375,
4
+ "eval_runtime": 704.255,
5
+ "eval_samples_per_second": 3.149,
6
+ "eval_steps_per_second": 0.099,
7
+ "eval_wer": 19.97153700189753
8
  }
runs/Dec19_09-48-52_nipa2022-65403/events.out.tfevents.1671433867.nipa2022-65403.3427.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19d9ca51c3c6769bf3220cdc12c5aa68144d421209f9aeedf7e49bd5959578c8
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 24.04,
3
- "train_loss": 0.31971333821614584,
4
- "train_runtime": 22038.1709,
5
- "train_samples_per_second": 4.356,
6
- "train_steps_per_second": 0.068
7
  }
 
1
  {
2
+ "epoch": 15.02,
3
+ "train_loss": 0.5692690700954861,
4
+ "train_runtime": 21274.2497,
5
+ "train_samples_per_second": 2.707,
6
+ "train_steps_per_second": 0.042
7
  }
trainer_state.json CHANGED
@@ -1,448 +1,277 @@
1
  {
2
- "best_metric": 27.47628083491461,
3
- "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 24.04,
5
- "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.02,
12
- "learning_rate": 3.45e-08,
13
- "loss": 1.3486,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 0.03,
18
- "learning_rate": 7.2e-08,
19
- "loss": 1.1313,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 1.01,
24
- "learning_rate": 1.095e-07,
25
- "loss": 1.2018,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 1.03,
30
- "learning_rate": 1.4699999999999998e-07,
31
- "loss": 1.0324,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 2.0,
36
- "learning_rate": 1.845e-07,
37
- "loss": 0.7448,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 2.02,
42
- "learning_rate": 2.2199999999999998e-07,
43
- "loss": 0.6496,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 2.04,
48
- "learning_rate": 2.5949999999999996e-07,
49
- "loss": 0.4192,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 3.01,
54
- "learning_rate": 2.9699999999999997e-07,
55
- "loss": 0.5148,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 3.01,
60
- "eval_loss": 0.4189453125,
61
- "eval_runtime": 732.3243,
62
- "eval_samples_per_second": 3.029,
63
- "eval_steps_per_second": 0.096,
64
- "eval_wer": 39.34535104364326,
65
  "step": 200
66
  },
67
  {
68
- "epoch": 3.03,
69
- "learning_rate": 2.997683575414969e-07,
70
- "loss": 0.3818,
71
  "step": 225
72
  },
73
  {
74
- "epoch": 4.01,
75
- "learning_rate": 2.9899197858556995e-07,
76
- "loss": 0.3758,
77
  "step": 250
78
  },
79
  {
80
- "epoch": 4.02,
81
- "learning_rate": 2.9767194405746293e-07,
82
- "loss": 0.3957,
83
  "step": 275
84
  },
85
  {
86
- "epoch": 4.04,
87
- "learning_rate": 2.958130706200157e-07,
88
- "loss": 0.2837,
89
  "step": 300
90
  },
91
  {
92
- "epoch": 5.02,
93
- "learning_rate": 2.934221411008129e-07,
94
- "loss": 0.4078,
95
  "step": 325
96
  },
97
  {
98
- "epoch": 5.03,
99
- "learning_rate": 2.905078797423817e-07,
100
- "loss": 0.27,
101
  "step": 350
102
  },
103
  {
104
- "epoch": 6.01,
105
- "learning_rate": 2.870809203683774e-07,
106
- "loss": 0.3376,
107
  "step": 375
108
  },
109
  {
110
- "epoch": 6.03,
111
- "learning_rate": 2.831537675819134e-07,
112
- "loss": 0.3041,
113
  "step": 400
114
  },
115
  {
116
- "epoch": 6.03,
117
- "eval_loss": 0.33349609375,
118
- "eval_runtime": 737.3023,
119
- "eval_samples_per_second": 3.008,
120
- "eval_steps_per_second": 0.095,
121
- "eval_wer": 29.573055028462996,
122
  "step": 400
123
  },
124
  {
125
- "epoch": 7.0,
126
- "learning_rate": 2.78740751137621e-07,
127
- "loss": 0.259,
128
  "step": 425
129
  },
130
  {
131
- "epoch": 7.02,
132
- "learning_rate": 2.738579736539286e-07,
133
- "loss": 0.334,
134
  "step": 450
135
  },
136
  {
137
- "epoch": 7.04,
138
- "learning_rate": 2.6852325185635355e-07,
139
- "loss": 0.2305,
140
  "step": 475
141
  },
142
  {
143
- "epoch": 8.01,
144
- "learning_rate": 2.6275605156620243e-07,
145
- "loss": 0.3156,
146
  "step": 500
147
  },
148
  {
149
- "epoch": 8.03,
150
- "learning_rate": 2.5657741667190066e-07,
151
- "loss": 0.2408,
152
  "step": 525
153
  },
154
  {
155
- "epoch": 9.01,
156
- "learning_rate": 2.500098923421254e-07,
157
- "loss": 0.2577,
158
  "step": 550
159
  },
160
  {
161
- "epoch": 9.02,
162
- "learning_rate": 2.4307744276092984e-07,
163
- "loss": 0.2818,
164
  "step": 575
165
  },
166
  {
167
- "epoch": 9.04,
168
- "learning_rate": 2.3580536368503515e-07,
169
- "loss": 0.1961,
170
  "step": 600
171
  },
172
  {
173
- "epoch": 9.04,
174
- "eval_loss": 0.318603515625,
175
- "eval_runtime": 746.4809,
176
- "eval_samples_per_second": 2.971,
177
- "eval_steps_per_second": 0.094,
178
- "eval_wer": 27.779886148007588,
179
  "step": 600
180
  },
181
  {
182
- "epoch": 10.02,
183
- "learning_rate": 2.2822019014235677e-07,
184
- "loss": 0.3018,
185
  "step": 625
186
  },
187
  {
188
- "epoch": 10.03,
189
- "learning_rate": 2.2034959960856529e-07,
190
- "loss": 0.2028,
191
  "step": 650
192
  },
193
  {
194
- "epoch": 11.01,
195
- "learning_rate": 2.1222231101497956e-07,
196
- "loss": 0.2588,
197
  "step": 675
198
  },
199
  {
200
- "epoch": 11.03,
201
- "learning_rate": 2.0386797995630034e-07,
202
- "loss": 0.2308,
203
  "step": 700
204
  },
205
  {
206
- "epoch": 12.0,
207
- "learning_rate": 1.953170904805618e-07,
208
- "loss": 0.2022,
209
  "step": 725
210
  },
211
  {
212
- "epoch": 12.02,
213
- "learning_rate": 1.8660084385614562e-07,
214
- "loss": 0.2688,
215
  "step": 750
216
  },
217
  {
218
- "epoch": 12.04,
219
- "learning_rate": 1.7775104472173675e-07,
220
- "loss": 0.1752,
221
  "step": 775
222
  },
223
  {
224
- "epoch": 13.01,
225
- "learning_rate": 1.6879998503464563e-07,
226
- "loss": 0.2579,
227
  "step": 800
228
  },
229
  {
230
- "epoch": 13.01,
231
- "eval_loss": 0.316650390625,
232
- "eval_runtime": 737.1707,
233
- "eval_samples_per_second": 3.009,
234
- "eval_steps_per_second": 0.095,
235
- "eval_wer": 27.571157495256166,
236
  "step": 800
237
  },
238
  {
239
- "epoch": 13.03,
240
- "learning_rate": 1.5978032624095823e-07,
241
- "loss": 0.195,
242
  "step": 825
243
  },
244
  {
245
- "epoch": 14.01,
246
- "learning_rate": 1.5072498009746107e-07,
247
- "loss": 0.2044,
248
  "step": 850
249
  },
250
  {
251
- "epoch": 14.02,
252
- "learning_rate": 1.416669885802099e-07,
253
- "loss": 0.2308,
254
  "step": 875
255
  },
256
  {
257
- "epoch": 14.04,
258
- "learning_rate": 1.3263940331794248e-07,
259
- "loss": 0.1651,
260
  "step": 900
261
  },
262
  {
263
  "epoch": 15.02,
264
- "learning_rate": 1.2367516499026965e-07,
265
- "loss": 0.2565,
266
- "step": 925
267
- },
268
- {
269
- "epoch": 15.03,
270
- "learning_rate": 1.1480698313070749e-07,
271
- "loss": 0.1693,
272
- "step": 950
273
- },
274
- {
275
- "epoch": 16.01,
276
- "learning_rate": 1.0606721677313713e-07,
277
- "loss": 0.2152,
278
- "step": 975
279
- },
280
- {
281
- "epoch": 16.03,
282
- "learning_rate": 9.748775637719916e-08,
283
- "loss": 0.2034,
284
- "step": 1000
285
- },
286
- {
287
- "epoch": 16.03,
288
- "eval_loss": 0.31787109375,
289
- "eval_runtime": 744.5018,
290
- "eval_samples_per_second": 2.979,
291
- "eval_steps_per_second": 0.094,
292
- "eval_wer": 27.47628083491461,
293
- "step": 1000
294
- },
295
- {
296
- "epoch": 17.0,
297
- "learning_rate": 8.976315836886941e-08,
298
- "loss": 0.1694,
299
- "step": 1025
300
- },
301
- {
302
- "epoch": 17.02,
303
- "learning_rate": 8.157864194541929e-08,
304
- "loss": 0.227,
305
- "step": 1050
306
- },
307
- {
308
- "epoch": 17.04,
309
- "learning_rate": 7.36437876374443e-08,
310
- "loss": 0.1607,
311
- "step": 1075
312
- },
313
- {
314
- "epoch": 18.01,
315
- "learning_rate": 6.598754886780604e-08,
316
- "loss": 0.2253,
317
- "step": 1100
318
- },
319
- {
320
- "epoch": 18.03,
321
- "learning_rate": 5.863786242148148e-08,
322
- "loss": 0.1737,
323
- "step": 1125
324
- },
325
- {
326
- "epoch": 19.01,
327
- "learning_rate": 5.1621546507264605e-08,
328
- "loss": 0.1891,
329
- "step": 1150
330
- },
331
- {
332
- "epoch": 19.02,
333
- "learning_rate": 4.496420290103045e-08,
334
- "loss": 0.2066,
335
- "step": 1175
336
- },
337
- {
338
- "epoch": 19.04,
339
- "learning_rate": 3.869012352763169e-08,
340
- "loss": 0.1478,
341
- "step": 1200
342
- },
343
- {
344
- "epoch": 19.04,
345
- "eval_loss": 0.3193359375,
346
- "eval_runtime": 742.6411,
347
- "eval_samples_per_second": 2.987,
348
- "eval_steps_per_second": 0.094,
349
- "eval_wer": 27.523719165085385,
350
- "step": 1200
351
- },
352
- {
353
- "epoch": 20.02,
354
- "learning_rate": 3.28222018222991e-08,
355
- "loss": 0.2362,
356
- "step": 1225
357
- },
358
- {
359
- "epoch": 20.03,
360
- "learning_rate": 2.738184919497899e-08,
361
- "loss": 0.1564,
362
- "step": 1250
363
- },
364
- {
365
- "epoch": 21.01,
366
- "learning_rate": 2.2388916902420573e-08,
367
- "loss": 0.2014,
368
- "step": 1275
369
- },
370
- {
371
- "epoch": 21.03,
372
- "learning_rate": 1.7861623613092718e-08,
373
- "loss": 0.1918,
374
- "step": 1300
375
- },
376
- {
377
- "epoch": 22.0,
378
- "learning_rate": 1.381648892923838e-08,
379
- "loss": 0.1614,
380
- "step": 1325
381
- },
382
- {
383
- "epoch": 22.02,
384
- "learning_rate": 1.0268273108637776e-08,
385
- "loss": 0.2187,
386
- "step": 1350
387
- },
388
- {
389
- "epoch": 22.04,
390
- "learning_rate": 7.229923206028154e-09,
391
- "loss": 0.1492,
392
- "step": 1375
393
- },
394
- {
395
- "epoch": 23.01,
396
- "learning_rate": 4.712525830705338e-09,
397
- "loss": 0.2169,
398
- "step": 1400
399
- },
400
- {
401
- "epoch": 23.01,
402
- "eval_loss": 0.31982421875,
403
- "eval_runtime": 739.4797,
404
- "eval_samples_per_second": 2.999,
405
- "eval_steps_per_second": 0.095,
406
- "eval_wer": 27.504743833017077,
407
- "step": 1400
408
- },
409
- {
410
- "epoch": 23.03,
411
- "learning_rate": 2.7252666926886157e-09,
412
- "loss": 0.1702,
413
- "step": 1425
414
- },
415
- {
416
- "epoch": 24.01,
417
- "learning_rate": 1.2753970850610251e-09,
418
- "loss": 0.1815,
419
- "step": 1450
420
- },
421
- {
422
- "epoch": 24.02,
423
- "learning_rate": 3.682074247873257e-10,
424
- "loss": 0.2022,
425
- "step": 1475
426
- },
427
- {
428
- "epoch": 24.04,
429
- "learning_rate": 7.0079485561924665e-12,
430
- "loss": 0.1448,
431
- "step": 1500
432
- },
433
- {
434
- "epoch": 24.04,
435
- "step": 1500,
436
- "total_flos": 2.0191356490809868e+20,
437
- "train_loss": 0.31971333821614584,
438
- "train_runtime": 22038.1709,
439
- "train_samples_per_second": 4.356,
440
- "train_steps_per_second": 0.068
441
  }
442
  ],
443
- "max_steps": 1500,
444
  "num_train_epochs": 9223372036854775807,
445
- "total_flos": 2.0191356490809868e+20,
446
  "trial_name": null,
447
  "trial_params": null
448
  }
 
1
  {
2
+ "best_metric": 19.97153700189753,
3
+ "best_model_checkpoint": "./checkpoint-800",
4
+ "epoch": 15.016666666666667,
5
+ "global_step": 900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.03,
12
+ "learning_rate": 2e-08,
13
+ "loss": 1.2498,
14
  "step": 25
15
  },
16
  {
17
+ "epoch": 0.06,
18
+ "learning_rate": 4.5e-08,
19
+ "loss": 0.9037,
20
  "step": 50
21
  },
22
  {
23
+ "epoch": 1.02,
24
+ "learning_rate": 6.999999999999999e-08,
25
+ "loss": 1.0907,
26
  "step": 75
27
  },
28
  {
29
+ "epoch": 1.05,
30
+ "learning_rate": 9.499999999999999e-08,
31
+ "loss": 0.927,
32
  "step": 100
33
  },
34
  {
35
+ "epoch": 2.01,
36
+ "learning_rate": 9.983007697636658e-08,
37
+ "loss": 0.8001,
38
  "step": 125
39
  },
40
  {
41
+ "epoch": 2.04,
42
+ "learning_rate": 9.918643144747679e-08,
43
+ "loss": 0.8719,
44
  "step": 150
45
  },
46
  {
47
+ "epoch": 2.06,
48
+ "learning_rate": 9.812276182268236e-08,
49
+ "loss": 0.5706,
50
  "step": 175
51
  },
52
  {
53
+ "epoch": 3.03,
54
+ "learning_rate": 9.656074673794016e-08,
55
+ "loss": 0.7919,
56
  "step": 200
57
  },
58
  {
59
+ "epoch": 3.03,
60
+ "eval_loss": 0.279296875,
61
+ "eval_runtime": 701.0908,
62
+ "eval_samples_per_second": 3.164,
63
+ "eval_steps_per_second": 0.1,
64
+ "eval_wer": 22.58064516129032,
65
  "step": 200
66
  },
67
  {
68
+ "epoch": 3.05,
69
+ "learning_rate": 9.455032620941839e-08,
70
+ "loss": 0.5192,
71
  "step": 225
72
  },
73
  {
74
+ "epoch": 4.02,
75
+ "learning_rate": 9.200467769494708e-08,
76
+ "loss": 0.6054,
77
  "step": 250
78
  },
79
  {
80
+ "epoch": 4.04,
81
+ "learning_rate": 8.914398527981508e-08,
82
+ "loss": 0.5903,
83
  "step": 275
84
  },
85
  {
86
+ "epoch": 5.01,
87
+ "learning_rate": 8.590631488815944e-08,
88
+ "loss": 0.469,
89
  "step": 300
90
  },
91
  {
92
+ "epoch": 5.03,
93
+ "learning_rate": 8.23228470557425e-08,
94
+ "loss": 0.6128,
95
  "step": 325
96
  },
97
  {
98
+ "epoch": 5.06,
99
+ "learning_rate": 7.84280925367132e-08,
100
+ "loss": 0.4026,
101
  "step": 350
102
  },
103
  {
104
+ "epoch": 6.02,
105
+ "learning_rate": 7.408768370508576e-08,
106
+ "loss": 0.5923,
107
  "step": 375
108
  },
109
  {
110
+ "epoch": 6.05,
111
+ "learning_rate": 6.967704118273256e-08,
112
+ "loss": 0.4409,
113
  "step": 400
114
  },
115
  {
116
+ "epoch": 6.05,
117
+ "eval_loss": 0.26513671875,
118
+ "eval_runtime": 702.5872,
119
+ "eval_samples_per_second": 3.157,
120
+ "eval_steps_per_second": 0.1,
121
+ "eval_wer": 20.60721062618596,
122
  "step": 400
123
  },
124
  {
125
+ "epoch": 7.01,
126
+ "learning_rate": 6.507689799722478e-08,
127
+ "loss": 0.4854,
128
  "step": 425
129
  },
130
  {
131
+ "epoch": 7.04,
132
+ "learning_rate": 6.03315560421329e-08,
133
+ "loss": 0.4989,
134
  "step": 450
135
  },
136
  {
137
+ "epoch": 8.0,
138
+ "learning_rate": 5.5291509972967234e-08,
139
+ "loss": 0.3982,
140
  "step": 475
141
  },
142
  {
143
+ "epoch": 8.03,
144
+ "learning_rate": 5.0392695044435566e-08,
145
+ "loss": 0.5425,
146
  "step": 500
147
  },
148
  {
149
+ "epoch": 8.06,
150
+ "learning_rate": 4.5490098247957034e-08,
151
+ "loss": 0.3773,
152
  "step": 525
153
  },
154
  {
155
+ "epoch": 9.02,
156
+ "learning_rate": 4.0630934270713755e-08,
157
+ "loss": 0.5168,
158
  "step": 550
159
  },
160
  {
161
+ "epoch": 9.05,
162
+ "learning_rate": 3.586199951809582e-08,
163
+ "loss": 0.4259,
164
  "step": 575
165
  },
166
  {
167
+ "epoch": 10.01,
168
+ "learning_rate": 3.1047378773808274e-08,
169
+ "loss": 0.4393,
170
  "step": 600
171
  },
172
  {
173
+ "epoch": 10.01,
174
+ "eval_loss": 0.260009765625,
175
+ "eval_runtime": 701.0779,
176
+ "eval_samples_per_second": 3.164,
177
+ "eval_steps_per_second": 0.1,
178
+ "eval_wer": 20.06641366223909,
179
  "step": 600
180
  },
181
  {
182
+ "epoch": 10.04,
183
+ "learning_rate": 2.6603509286971338e-08,
184
+ "loss": 0.497,
185
  "step": 625
186
  },
187
  {
188
+ "epoch": 11.0,
189
+ "learning_rate": 2.238496079552367e-08,
190
+ "loss": 0.3649,
191
  "step": 650
192
  },
193
  {
194
+ "epoch": 11.03,
195
+ "learning_rate": 1.8432360227531113e-08,
196
+ "loss": 0.5444,
197
  "step": 675
198
  },
199
  {
200
+ "epoch": 11.06,
201
+ "learning_rate": 1.4783773287174683e-08,
202
+ "loss": 0.3612,
203
  "step": 700
204
  },
205
  {
206
+ "epoch": 12.02,
207
+ "learning_rate": 1.134947733186315e-08,
208
+ "loss": 0.4897,
209
  "step": 725
210
  },
211
  {
212
+ "epoch": 12.05,
213
+ "learning_rate": 8.426519384872732e-09,
214
+ "loss": 0.4383,
215
  "step": 750
216
  },
217
  {
218
+ "epoch": 13.01,
219
+ "learning_rate": 5.903936782582253e-09,
220
+ "loss": 0.4205,
221
  "step": 775
222
  },
223
  {
224
+ "epoch": 13.04,
225
+ "learning_rate": 3.8060233744356625e-09,
226
+ "loss": 0.4975,
227
  "step": 800
228
  },
229
  {
230
+ "epoch": 13.04,
231
+ "eval_loss": 0.25927734375,
232
+ "eval_runtime": 700.0881,
233
+ "eval_samples_per_second": 3.168,
234
+ "eval_steps_per_second": 0.1,
235
+ "eval_wer": 19.97153700189753,
236
  "step": 800
237
  },
238
  {
239
+ "epoch": 13.06,
240
+ "learning_rate": 2.152983213389559e-09,
241
+ "loss": 0.3587,
242
  "step": 825
243
  },
244
  {
245
+ "epoch": 14.03,
246
+ "learning_rate": 9.228083061983804e-10,
247
+ "loss": 0.5509,
248
  "step": 850
249
  },
250
  {
251
+ "epoch": 14.05,
252
+ "learning_rate": 2.2190176984600017e-10,
253
+ "loss": 0.3727,
254
  "step": 875
255
  },
256
  {
257
+ "epoch": 15.02,
258
+ "learning_rate": 3.8553092647219954e-13,
259
+ "loss": 0.4754,
260
  "step": 900
261
  },
262
  {
263
  "epoch": 15.02,
264
+ "step": 900,
265
+ "total_flos": 1.2318638198947106e+20,
266
+ "train_loss": 0.5692690700954861,
267
+ "train_runtime": 21274.2497,
268
+ "train_samples_per_second": 2.707,
269
+ "train_steps_per_second": 0.042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  }
271
  ],
272
+ "max_steps": 900,
273
  "num_train_epochs": 9223372036854775807,
274
+ "total_flos": 1.2318638198947106e+20,
275
  "trial_name": null,
276
  "trial_params": null
277
  }