charsiu commited on
Commit
4f069a1
1 Parent(s): 9e3de7f

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +1096 -0
trainer_state.json ADDED
@@ -0,0 +1,1096 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.646302250803858,
5
+ "global_step": 135000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 0.0003,
13
+ "loss": 2.0973,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.14,
18
+ "learning_rate": 0.0002999616623572683,
19
+ "loss": 0.5502,
20
+ "step": 2000
21
+ },
22
+ {
23
+ "epoch": 0.21,
24
+ "learning_rate": 0.00029984666902607135,
25
+ "loss": 0.2896,
26
+ "step": 3000
27
+ },
28
+ {
29
+ "epoch": 0.29,
30
+ "learning_rate": 0.0002996550787873857,
31
+ "loss": 0.2299,
32
+ "step": 4000
33
+ },
34
+ {
35
+ "epoch": 0.36,
36
+ "learning_rate": 0.0002993869895761197,
37
+ "loss": 0.2005,
38
+ "step": 5000
39
+ },
40
+ {
41
+ "epoch": 0.36,
42
+ "eval_cer": 0.2034279967643908,
43
+ "eval_loss": 0.2542494535446167,
44
+ "eval_runtime": 84.4226,
45
+ "eval_samples_per_second": 58.634,
46
+ "eval_steps_per_second": 0.118,
47
+ "eval_wer": 0.5028282828282828,
48
+ "step": 5000
49
+ },
50
+ {
51
+ "epoch": 0.43,
52
+ "learning_rate": 0.000299042538431052,
53
+ "loss": 0.1818,
54
+ "step": 6000
55
+ },
56
+ {
57
+ "epoch": 0.5,
58
+ "learning_rate": 0.00029862190142478177,
59
+ "loss": 0.168,
60
+ "step": 7000
61
+ },
62
+ {
63
+ "epoch": 0.57,
64
+ "learning_rate": 0.00029812529357372587,
65
+ "loss": 0.1575,
66
+ "step": 8000
67
+ },
68
+ {
69
+ "epoch": 0.64,
70
+ "learning_rate": 0.00029755296872820933,
71
+ "loss": 0.1488,
72
+ "step": 9000
73
+ },
74
+ {
75
+ "epoch": 0.71,
76
+ "learning_rate": 0.0002969052194427048,
77
+ "loss": 0.1409,
78
+ "step": 10000
79
+ },
80
+ {
81
+ "epoch": 0.71,
82
+ "eval_cer": 0.15823877921339718,
83
+ "eval_loss": 0.17229020595550537,
84
+ "eval_runtime": 83.5893,
85
+ "eval_samples_per_second": 59.218,
86
+ "eval_steps_per_second": 0.12,
87
+ "eval_wer": 0.41454545454545455,
88
+ "step": 10000
89
+ },
90
+ {
91
+ "epoch": 0.79,
92
+ "learning_rate": 0.0002961823768262882,
93
+ "loss": 0.1342,
94
+ "step": 11000
95
+ },
96
+ {
97
+ "epoch": 0.86,
98
+ "learning_rate": 0.0002953848103733858,
99
+ "loss": 0.128,
100
+ "step": 12000
101
+ },
102
+ {
103
+ "epoch": 0.93,
104
+ "learning_rate": 0.00029451292777490066,
105
+ "loss": 0.1221,
106
+ "step": 13000
107
+ },
108
+ {
109
+ "epoch": 1.0,
110
+ "learning_rate": 0.0002935671747098137,
111
+ "loss": 0.1175,
112
+ "step": 14000
113
+ },
114
+ {
115
+ "epoch": 1.07,
116
+ "learning_rate": 0.00029254803461736643,
117
+ "loss": 0.1114,
118
+ "step": 15000
119
+ },
120
+ {
121
+ "epoch": 1.07,
122
+ "eval_cer": 0.1419514221376883,
123
+ "eval_loss": 0.13527829945087433,
124
+ "eval_runtime": 83.6072,
125
+ "eval_samples_per_second": 59.205,
126
+ "eval_steps_per_second": 0.12,
127
+ "eval_wer": 0.37757575757575756,
128
+ "step": 15000
129
+ },
130
+ {
131
+ "epoch": 1.14,
132
+ "learning_rate": 0.00029145602844994243,
133
+ "loss": 0.1085,
134
+ "step": 16000
135
+ },
136
+ {
137
+ "epoch": 1.21,
138
+ "learning_rate": 0.0002902917144067724,
139
+ "loss": 0.1043,
140
+ "step": 17000
141
+ },
142
+ {
143
+ "epoch": 1.29,
144
+ "learning_rate": 0.00028905568764860047,
145
+ "loss": 0.1012,
146
+ "step": 18000
147
+ },
148
+ {
149
+ "epoch": 1.36,
150
+ "learning_rate": 0.00028774857999345685,
151
+ "loss": 0.099,
152
+ "step": 19000
153
+ },
154
+ {
155
+ "epoch": 1.43,
156
+ "learning_rate": 0.0002863710595936922,
157
+ "loss": 0.096,
158
+ "step": 20000
159
+ },
160
+ {
161
+ "epoch": 1.43,
162
+ "eval_cer": 0.13088913666076388,
163
+ "eval_loss": 0.11353930085897446,
164
+ "eval_runtime": 83.6733,
165
+ "eval_samples_per_second": 59.159,
166
+ "eval_steps_per_second": 0.12,
167
+ "eval_wer": 0.35434343434343435,
168
+ "step": 20000
169
+ },
170
+ {
171
+ "epoch": 1.5,
172
+ "learning_rate": 0.0002849238305944389,
173
+ "loss": 0.0934,
174
+ "step": 21000
175
+ },
176
+ {
177
+ "epoch": 1.57,
178
+ "learning_rate": 0.00028340763277367477,
179
+ "loss": 0.0916,
180
+ "step": 22000
181
+ },
182
+ {
183
+ "epoch": 1.64,
184
+ "learning_rate": 0.0002818232411640713,
185
+ "loss": 0.0891,
186
+ "step": 23000
187
+ },
188
+ {
189
+ "epoch": 1.71,
190
+ "learning_rate": 0.00028017146565682144,
191
+ "loss": 0.0882,
192
+ "step": 24000
193
+ },
194
+ {
195
+ "epoch": 1.79,
196
+ "learning_rate": 0.00027845315058764886,
197
+ "loss": 0.0852,
198
+ "step": 25000
199
+ },
200
+ {
201
+ "epoch": 1.79,
202
+ "eval_cer": 0.12312804704750661,
203
+ "eval_loss": 0.10132956504821777,
204
+ "eval_runtime": 83.2401,
205
+ "eval_samples_per_second": 59.467,
206
+ "eval_steps_per_second": 0.12,
207
+ "eval_wer": 0.3373737373737374,
208
+ "step": 25000
209
+ },
210
+ {
211
+ "epoch": 1.86,
212
+ "learning_rate": 0.00027666917430520975,
213
+ "loss": 0.0836,
214
+ "step": 26000
215
+ },
216
+ {
217
+ "epoch": 1.93,
218
+ "learning_rate": 0.00027482044872210895,
219
+ "loss": 0.0819,
220
+ "step": 27000
221
+ },
222
+ {
223
+ "epoch": 2.0,
224
+ "learning_rate": 0.0002729079188487587,
225
+ "loss": 0.0806,
226
+ "step": 28000
227
+ },
228
+ {
229
+ "epoch": 2.07,
230
+ "learning_rate": 0.00027093256231031885,
231
+ "loss": 0.0773,
232
+ "step": 29000
233
+ },
234
+ {
235
+ "epoch": 2.14,
236
+ "learning_rate": 0.00026889538884696597,
237
+ "loss": 0.0769,
238
+ "step": 30000
239
+ },
240
+ {
241
+ "epoch": 2.14,
242
+ "eval_cer": 0.1182309088126626,
243
+ "eval_loss": 0.09128542244434357,
244
+ "eval_runtime": 83.8171,
245
+ "eval_samples_per_second": 59.057,
246
+ "eval_steps_per_second": 0.119,
247
+ "eval_wer": 0.3333333333333333,
248
+ "step": 30000
249
+ },
250
+ {
251
+ "epoch": 2.22,
252
+ "learning_rate": 0.0002667974397977457,
253
+ "loss": 0.0757,
254
+ "step": 31000
255
+ },
256
+ {
257
+ "epoch": 2.29,
258
+ "learning_rate": 0.0002646397875682729,
259
+ "loss": 0.0745,
260
+ "step": 32000
261
+ },
262
+ {
263
+ "epoch": 2.36,
264
+ "learning_rate": 0.00026242353508255185,
265
+ "loss": 0.0737,
266
+ "step": 33000
267
+ },
268
+ {
269
+ "epoch": 2.43,
270
+ "learning_rate": 0.0002601498152191957,
271
+ "loss": 0.073,
272
+ "step": 34000
273
+ },
274
+ {
275
+ "epoch": 2.5,
276
+ "learning_rate": 0.0002578197902323352,
277
+ "loss": 0.072,
278
+ "step": 35000
279
+ },
280
+ {
281
+ "epoch": 2.5,
282
+ "eval_cer": 0.11523578408867319,
283
+ "eval_loss": 0.0870400071144104,
284
+ "eval_runtime": 83.456,
285
+ "eval_samples_per_second": 59.313,
286
+ "eval_steps_per_second": 0.12,
287
+ "eval_wer": 0.32666666666666666,
288
+ "step": 35000
289
+ },
290
+ {
291
+ "epoch": 2.57,
292
+ "learning_rate": 0.00025543465115751026,
293
+ "loss": 0.0707,
294
+ "step": 36000
295
+ },
296
+ {
297
+ "epoch": 2.64,
298
+ "learning_rate": 0.0002529956172028505,
299
+ "loss": 0.07,
300
+ "step": 37000
301
+ },
302
+ {
303
+ "epoch": 2.72,
304
+ "learning_rate": 0.0002505039351258541,
305
+ "loss": 0.0692,
306
+ "step": 38000
307
+ },
308
+ {
309
+ "epoch": 2.79,
310
+ "learning_rate": 0.0002479608785960846,
311
+ "loss": 0.0682,
312
+ "step": 39000
313
+ },
314
+ {
315
+ "epoch": 2.86,
316
+ "learning_rate": 0.0002453677475441111,
317
+ "loss": 0.0672,
318
+ "step": 40000
319
+ },
320
+ {
321
+ "epoch": 2.86,
322
+ "eval_cer": 0.11036050807809186,
323
+ "eval_loss": 0.08000758290290833,
324
+ "eval_runtime": 84.1634,
325
+ "eval_samples_per_second": 58.814,
326
+ "eval_steps_per_second": 0.119,
327
+ "eval_wer": 0.31292929292929295,
328
+ "step": 40000
329
+ },
330
+ {
331
+ "epoch": 2.93,
332
+ "learning_rate": 0.00024272586749702474,
333
+ "loss": 0.0666,
334
+ "step": 41000
335
+ },
336
+ {
337
+ "epoch": 3.0,
338
+ "learning_rate": 0.0002400365889008706,
339
+ "loss": 0.0662,
340
+ "step": 42000
341
+ },
342
+ {
343
+ "epoch": 3.07,
344
+ "learning_rate": 0.00023730128643034235,
345
+ "loss": 0.0631,
346
+ "step": 43000
347
+ },
348
+ {
349
+ "epoch": 3.14,
350
+ "learning_rate": 0.00023452135828609167,
351
+ "loss": 0.0635,
352
+ "step": 44000
353
+ },
354
+ {
355
+ "epoch": 3.22,
356
+ "learning_rate": 0.0002316982254800121,
357
+ "loss": 0.0625,
358
+ "step": 45000
359
+ },
360
+ {
361
+ "epoch": 3.22,
362
+ "eval_cer": 0.10987953914431255,
363
+ "eval_loss": 0.0780956819653511,
364
+ "eval_runtime": 83.8176,
365
+ "eval_samples_per_second": 59.057,
366
+ "eval_steps_per_second": 0.119,
367
+ "eval_wer": 0.30646464646464644,
368
+ "step": 45000
369
+ },
370
+ {
371
+ "epoch": 3.29,
372
+ "learning_rate": 0.00022883333110886237,
373
+ "loss": 0.0622,
374
+ "step": 46000
375
+ },
376
+ {
377
+ "epoch": 3.36,
378
+ "learning_rate": 0.00022592813961660067,
379
+ "loss": 0.0615,
380
+ "step": 47000
381
+ },
382
+ {
383
+ "epoch": 3.43,
384
+ "learning_rate": 0.00022298413604580696,
385
+ "loss": 0.0608,
386
+ "step": 48000
387
+ },
388
+ {
389
+ "epoch": 3.5,
390
+ "learning_rate": 0.00022000282527857588,
391
+ "loss": 0.0604,
392
+ "step": 49000
393
+ },
394
+ {
395
+ "epoch": 3.57,
396
+ "learning_rate": 0.0002169857312672683,
397
+ "loss": 0.0597,
398
+ "step": 50000
399
+ },
400
+ {
401
+ "epoch": 3.57,
402
+ "eval_cer": 0.108261734548873,
403
+ "eval_loss": 0.07320722192525864,
404
+ "eval_runtime": 84.043,
405
+ "eval_samples_per_second": 58.898,
406
+ "eval_steps_per_second": 0.119,
407
+ "eval_wer": 0.30484848484848487,
408
+ "step": 50000
409
+ },
410
+ {
411
+ "epoch": 3.64,
412
+ "learning_rate": 0.00021393439625551483,
413
+ "loss": 0.0598,
414
+ "step": 51000
415
+ },
416
+ {
417
+ "epoch": 3.72,
418
+ "learning_rate": 0.00021085037998986924,
419
+ "loss": 0.0588,
420
+ "step": 52000
421
+ },
422
+ {
423
+ "epoch": 3.79,
424
+ "learning_rate": 0.00020773525892251514,
425
+ "loss": 0.0587,
426
+ "step": 53000
427
+ },
428
+ {
429
+ "epoch": 3.86,
430
+ "learning_rate": 0.00020459062540543316,
431
+ "loss": 0.0582,
432
+ "step": 54000
433
+ },
434
+ {
435
+ "epoch": 3.93,
436
+ "learning_rate": 0.00020141808687644067,
437
+ "loss": 0.0572,
438
+ "step": 55000
439
+ },
440
+ {
441
+ "epoch": 3.93,
442
+ "eval_cer": 0.10843663234297457,
443
+ "eval_loss": 0.0711025819182396,
444
+ "eval_runtime": 83.6072,
445
+ "eval_samples_per_second": 59.205,
446
+ "eval_steps_per_second": 0.12,
447
+ "eval_wer": 0.30262626262626263,
448
+ "step": 55000
449
+ },
450
+ {
451
+ "epoch": 4.0,
452
+ "learning_rate": 0.00019821926503751995,
453
+ "loss": 0.0568,
454
+ "step": 56000
455
+ },
456
+ {
457
+ "epoch": 4.07,
458
+ "learning_rate": 0.00019499579502585537,
459
+ "loss": 0.0549,
460
+ "step": 57000
461
+ },
462
+ {
463
+ "epoch": 4.14,
464
+ "learning_rate": 0.00019174932457800242,
465
+ "loss": 0.0544,
466
+ "step": 58000
467
+ },
468
+ {
469
+ "epoch": 4.22,
470
+ "learning_rate": 0.0001884815131876167,
471
+ "loss": 0.0542,
472
+ "step": 59000
473
+ },
474
+ {
475
+ "epoch": 4.29,
476
+ "learning_rate": 0.00018519403125717278,
477
+ "loss": 0.0537,
478
+ "step": 60000
479
+ },
480
+ {
481
+ "epoch": 4.29,
482
+ "eval_cer": 0.10559454318882402,
483
+ "eval_loss": 0.06793416291475296,
484
+ "eval_runtime": 83.7622,
485
+ "eval_samples_per_second": 59.096,
486
+ "eval_steps_per_second": 0.119,
487
+ "eval_wer": 0.29474747474747476,
488
+ "step": 60000
489
+ },
490
+ {
491
+ "epoch": 4.36,
492
+ "learning_rate": 0.00018188855924410722,
493
+ "loss": 0.0539,
494
+ "step": 61000
495
+ },
496
+ {
497
+ "epoch": 4.43,
498
+ "learning_rate": 0.00017856678680182127,
499
+ "loss": 0.0534,
500
+ "step": 62000
501
+ },
502
+ {
503
+ "epoch": 4.5,
504
+ "learning_rate": 0.0001752304119159834,
505
+ "loss": 0.053,
506
+ "step": 63000
507
+ },
508
+ {
509
+ "epoch": 4.57,
510
+ "learning_rate": 0.00017188114003657205,
511
+ "loss": 0.0528,
512
+ "step": 64000
513
+ },
514
+ {
515
+ "epoch": 4.64,
516
+ "learning_rate": 0.00016852068320610358,
517
+ "loss": 0.052,
518
+ "step": 65000
519
+ },
520
+ {
521
+ "epoch": 4.64,
522
+ "eval_cer": 0.10421722306027416,
523
+ "eval_loss": 0.06535057723522186,
524
+ "eval_runtime": 83.4596,
525
+ "eval_samples_per_second": 59.31,
526
+ "eval_steps_per_second": 0.12,
527
+ "eval_wer": 0.2911111111111111,
528
+ "step": 65000
529
+ },
530
+ {
531
+ "epoch": 4.72,
532
+ "learning_rate": 0.00016515075918448972,
533
+ "loss": 0.0519,
534
+ "step": 66000
535
+ },
536
+ {
537
+ "epoch": 4.79,
538
+ "learning_rate": 0.00016177309057097285,
539
+ "loss": 0.0511,
540
+ "step": 67000
541
+ },
542
+ {
543
+ "epoch": 4.86,
544
+ "learning_rate": 0.00015838940392358722,
545
+ "loss": 0.0512,
546
+ "step": 68000
547
+ },
548
+ {
549
+ "epoch": 4.93,
550
+ "learning_rate": 0.00015500142887659688,
551
+ "loss": 0.0506,
552
+ "step": 69000
553
+ },
554
+ {
555
+ "epoch": 5.0,
556
+ "learning_rate": 0.00015161089725636095,
557
+ "loss": 0.0506,
558
+ "step": 70000
559
+ },
560
+ {
561
+ "epoch": 5.0,
562
+ "eval_cer": 0.10323342296845281,
563
+ "eval_loss": 0.06469610333442688,
564
+ "eval_runtime": 83.521,
565
+ "eval_samples_per_second": 59.267,
566
+ "eval_steps_per_second": 0.12,
567
+ "eval_wer": 0.2909090909090909,
568
+ "step": 70000
569
+ },
570
+ {
571
+ "epoch": 5.07,
572
+ "learning_rate": 0.00014821954219607845,
573
+ "loss": 0.0483,
574
+ "step": 71000
575
+ },
576
+ {
577
+ "epoch": 5.14,
578
+ "learning_rate": 0.0001448290972498651,
579
+ "loss": 0.0483,
580
+ "step": 72000
581
+ },
582
+ {
583
+ "epoch": 5.22,
584
+ "learning_rate": 0.00014144129550661485,
585
+ "loss": 0.048,
586
+ "step": 73000
587
+ },
588
+ {
589
+ "epoch": 5.29,
590
+ "learning_rate": 0.0001380578687040995,
591
+ "loss": 0.0478,
592
+ "step": 74000
593
+ },
594
+ {
595
+ "epoch": 5.36,
596
+ "learning_rate": 0.00013468054634375843,
597
+ "loss": 0.0481,
598
+ "step": 75000
599
+ },
600
+ {
601
+ "epoch": 5.36,
602
+ "eval_cer": 0.10148444502743709,
603
+ "eval_loss": 0.06252569705247879,
604
+ "eval_runtime": 83.6066,
605
+ "eval_samples_per_second": 59.206,
606
+ "eval_steps_per_second": 0.12,
607
+ "eval_wer": 0.2886868686868687,
608
+ "step": 75000
609
+ },
610
+ {
611
+ "epoch": 5.43,
612
+ "learning_rate": 0.00013131105480663235,
613
+ "loss": 0.0476,
614
+ "step": 76000
615
+ },
616
+ {
617
+ "epoch": 5.5,
618
+ "learning_rate": 0.000127951116470891,
619
+ "loss": 0.0472,
620
+ "step": 77000
621
+ },
622
+ {
623
+ "epoch": 5.57,
624
+ "learning_rate": 0.00012460244883140783,
625
+ "loss": 0.0469,
626
+ "step": 78000
627
+ },
628
+ {
629
+ "epoch": 5.64,
630
+ "learning_rate": 0.0001212667636218309,
631
+ "loss": 0.0465,
632
+ "step": 79000
633
+ },
634
+ {
635
+ "epoch": 5.72,
636
+ "learning_rate": 0.00011794576593959775,
637
+ "loss": 0.0462,
638
+ "step": 80000
639
+ },
640
+ {
641
+ "epoch": 5.72,
642
+ "eval_cer": 0.10179051616711485,
643
+ "eval_loss": 0.06165655702352524,
644
+ "eval_runtime": 83.7971,
645
+ "eval_samples_per_second": 59.071,
646
+ "eval_steps_per_second": 0.119,
647
+ "eval_wer": 0.28383838383838383,
648
+ "step": 80000
649
+ },
650
+ {
651
+ "epoch": 5.79,
652
+ "learning_rate": 0.00011464115337434394,
653
+ "loss": 0.0463,
654
+ "step": 81000
655
+ },
656
+ {
657
+ "epoch": 5.86,
658
+ "learning_rate": 0.00011135461514014796,
659
+ "loss": 0.0456,
660
+ "step": 82000
661
+ },
662
+ {
663
+ "epoch": 5.93,
664
+ "learning_rate": 0.00010808783121205837,
665
+ "loss": 0.0456,
666
+ "step": 83000
667
+ },
668
+ {
669
+ "epoch": 6.0,
670
+ "learning_rate": 0.00010484247146734352,
671
+ "loss": 0.0454,
672
+ "step": 84000
673
+ },
674
+ {
675
+ "epoch": 6.07,
676
+ "learning_rate": 0.00010162019483190237,
677
+ "loss": 0.0433,
678
+ "step": 85000
679
+ },
680
+ {
681
+ "epoch": 6.07,
682
+ "eval_cer": 0.10122209833628473,
683
+ "eval_loss": 0.05922077223658562,
684
+ "eval_runtime": 83.6915,
685
+ "eval_samples_per_second": 59.146,
686
+ "eval_steps_per_second": 0.119,
687
+ "eval_wer": 0.2824242424242424,
688
+ "step": 85000
689
+ },
690
+ {
691
+ "epoch": 6.15,
692
+ "learning_rate": 9.842264843227404e-05,
693
+ "loss": 0.0439,
694
+ "step": 86000
695
+ },
696
+ {
697
+ "epoch": 6.22,
698
+ "learning_rate": 9.52514667536784e-05,
699
+ "loss": 0.043,
700
+ "step": 87000
701
+ },
702
+ {
703
+ "epoch": 6.29,
704
+ "learning_rate": 9.210827080451842e-05,
705
+ "loss": 0.0431,
706
+ "step": 88000
707
+ },
708
+ {
709
+ "epoch": 6.36,
710
+ "learning_rate": 8.899466728777203e-05,
711
+ "loss": 0.0434,
712
+ "step": 89000
713
+ },
714
+ {
715
+ "epoch": 6.43,
716
+ "learning_rate": 8.591224777969557e-05,
717
+ "loss": 0.0428,
718
+ "step": 90000
719
+ },
720
+ {
721
+ "epoch": 6.43,
722
+ "eval_cer": 0.0995168448437944,
723
+ "eval_loss": 0.05855976790189743,
724
+ "eval_runtime": 83.8689,
725
+ "eval_samples_per_second": 59.021,
726
+ "eval_steps_per_second": 0.119,
727
+ "eval_wer": 0.2791919191919192,
728
+ "step": 90000
729
+ },
730
+ {
731
+ "epoch": 6.5,
732
+ "learning_rate": 8.286258791626041e-05,
733
+ "loss": 0.0428,
734
+ "step": 91000
735
+ },
736
+ {
737
+ "epoch": 6.57,
738
+ "learning_rate": 7.984724658773716e-05,
739
+ "loss": 0.0427,
740
+ "step": 92000
741
+ },
742
+ {
743
+ "epoch": 6.65,
744
+ "learning_rate": 7.686776514184009e-05,
745
+ "loss": 0.0424,
746
+ "step": 93000
747
+ },
748
+ {
749
+ "epoch": 6.72,
750
+ "learning_rate": 7.392566659583846e-05,
751
+ "loss": 0.0422,
752
+ "step": 94000
753
+ },
754
+ {
755
+ "epoch": 6.79,
756
+ "learning_rate": 7.102245485803813e-05,
757
+ "loss": 0.0421,
758
+ "step": 95000
759
+ },
760
+ {
761
+ "epoch": 6.79,
762
+ "eval_cer": 0.10063181828119193,
763
+ "eval_loss": 0.05791032314300537,
764
+ "eval_runtime": 83.4355,
765
+ "eval_samples_per_second": 59.327,
766
+ "eval_steps_per_second": 0.12,
767
+ "eval_wer": 0.2822222222222222,
768
+ "step": 95000
769
+ },
770
+ {
771
+ "epoch": 6.86,
772
+ "learning_rate": 6.81596139590308e-05,
773
+ "loss": 0.042,
774
+ "step": 96000
775
+ },
776
+ {
777
+ "epoch": 6.93,
778
+ "learning_rate": 6.533860729310434e-05,
779
+ "loss": 0.0416,
780
+ "step": 97000
781
+ },
782
+ {
783
+ "epoch": 7.0,
784
+ "learning_rate": 6.256087687020127e-05,
785
+ "loss": 0.0416,
786
+ "step": 98000
787
+ },
788
+ {
789
+ "epoch": 7.07,
790
+ "learning_rate": 5.98278425788092e-05,
791
+ "loss": 0.0402,
792
+ "step": 99000
793
+ },
794
+ {
795
+ "epoch": 7.15,
796
+ "learning_rate": 5.71409014601578e-05,
797
+ "loss": 0.0399,
798
+ "step": 100000
799
+ },
800
+ {
801
+ "epoch": 7.15,
802
+ "eval_cer": 0.09980105375920946,
803
+ "eval_loss": 0.05731820687651634,
804
+ "eval_runtime": 83.6985,
805
+ "eval_samples_per_second": 59.141,
806
+ "eval_steps_per_second": 0.119,
807
+ "eval_wer": 0.2775757575757576,
808
+ "step": 100000
809
+ },
810
+ {
811
+ "epoch": 7.22,
812
+ "learning_rate": 5.4501426994095876e-05,
813
+ "loss": 0.0397,
814
+ "step": 101000
815
+ },
816
+ {
817
+ "epoch": 7.29,
818
+ "learning_rate": 5.191076839701103e-05,
819
+ "loss": 0.0397,
820
+ "step": 102000
821
+ },
822
+ {
823
+ "epoch": 7.36,
824
+ "learning_rate": 4.9370249932153075e-05,
825
+ "loss": 0.0398,
826
+ "step": 103000
827
+ },
828
+ {
829
+ "epoch": 7.43,
830
+ "learning_rate": 4.6881170232712164e-05,
831
+ "loss": 0.0397,
832
+ "step": 104000
833
+ },
834
+ {
835
+ "epoch": 7.5,
836
+ "learning_rate": 4.444480163799822e-05,
837
+ "loss": 0.0396,
838
+ "step": 105000
839
+ },
840
+ {
841
+ "epoch": 7.5,
842
+ "eval_cer": 0.0988828403401762,
843
+ "eval_loss": 0.05615651234984398,
844
+ "eval_runtime": 84.0673,
845
+ "eval_samples_per_second": 58.881,
846
+ "eval_steps_per_second": 0.119,
847
+ "eval_wer": 0.27656565656565657,
848
+ "step": 105000
849
+ },
850
+ {
851
+ "epoch": 7.57,
852
+ "learning_rate": 4.2062389543061265e-05,
853
+ "loss": 0.0395,
854
+ "step": 106000
855
+ },
856
+ {
857
+ "epoch": 7.65,
858
+ "learning_rate": 3.9735151762084384e-05,
859
+ "loss": 0.0394,
860
+ "step": 107000
861
+ },
862
+ {
863
+ "epoch": 7.72,
864
+ "learning_rate": 3.746427790587557e-05,
865
+ "loss": 0.0389,
866
+ "step": 108000
867
+ },
868
+ {
869
+ "epoch": 7.79,
870
+ "learning_rate": 3.525092877377602e-05,
871
+ "loss": 0.0392,
872
+ "step": 109000
873
+ },
874
+ {
875
+ "epoch": 7.86,
876
+ "learning_rate": 3.309623576029597e-05,
877
+ "loss": 0.0388,
878
+ "step": 110000
879
+ },
880
+ {
881
+ "epoch": 7.86,
882
+ "eval_cer": 0.09962615596510789,
883
+ "eval_loss": 0.05532450973987579,
884
+ "eval_runtime": 83.843,
885
+ "eval_samples_per_second": 59.039,
886
+ "eval_steps_per_second": 0.119,
887
+ "eval_wer": 0.281010101010101,
888
+ "step": 110000
889
+ },
890
+ {
891
+ "epoch": 7.93,
892
+ "learning_rate": 3.1001300276781274e-05,
893
+ "loss": 0.039,
894
+ "step": 111000
895
+ },
896
+ {
897
+ "epoch": 8.0,
898
+ "learning_rate": 2.8967193188406938e-05,
899
+ "loss": 0.0386,
900
+ "step": 112000
901
+ },
902
+ {
903
+ "epoch": 8.07,
904
+ "learning_rate": 2.699495426678389e-05,
905
+ "loss": 0.0379,
906
+ "step": 113000
907
+ },
908
+ {
909
+ "epoch": 8.15,
910
+ "learning_rate": 2.5085591658461056e-05,
911
+ "loss": 0.0376,
912
+ "step": 114000
913
+ },
914
+ {
915
+ "epoch": 8.22,
916
+ "learning_rate": 2.3240081369591984e-05,
917
+ "loss": 0.0375,
918
+ "step": 115000
919
+ },
920
+ {
921
+ "epoch": 8.22,
922
+ "eval_cer": 0.09870794254607464,
923
+ "eval_loss": 0.055045340210199356,
924
+ "eval_runtime": 83.5002,
925
+ "eval_samples_per_second": 59.281,
926
+ "eval_steps_per_second": 0.12,
927
+ "eval_wer": 0.27636363636363637,
928
+ "step": 115000
929
+ },
930
+ {
931
+ "epoch": 8.29,
932
+ "learning_rate": 2.1459366767031522e-05,
933
+ "loss": 0.0377,
934
+ "step": 116000
935
+ },
936
+ {
937
+ "epoch": 8.36,
938
+ "learning_rate": 1.9744358096116225e-05,
939
+ "loss": 0.0375,
940
+ "step": 117000
941
+ },
942
+ {
943
+ "epoch": 8.43,
944
+ "learning_rate": 1.8095932015375496e-05,
945
+ "loss": 0.0374,
946
+ "step": 118000
947
+ },
948
+ {
949
+ "epoch": 8.5,
950
+ "learning_rate": 1.65149311484114e-05,
951
+ "loss": 0.0373,
952
+ "step": 119000
953
+ },
954
+ {
955
+ "epoch": 8.57,
956
+ "learning_rate": 1.500216365317587e-05,
957
+ "loss": 0.0372,
958
+ "step": 120000
959
+ },
960
+ {
961
+ "epoch": 8.57,
962
+ "eval_cer": 0.09803021359393105,
963
+ "eval_loss": 0.05470244958996773,
964
+ "eval_runtime": 83.9735,
965
+ "eval_samples_per_second": 58.947,
966
+ "eval_steps_per_second": 0.119,
967
+ "eval_wer": 0.27454545454545454,
968
+ "step": 120000
969
+ },
970
+ {
971
+ "epoch": 8.65,
972
+ "learning_rate": 1.355840280886582e-05,
973
+ "loss": 0.0374,
974
+ "step": 121000
975
+ },
976
+ {
977
+ "epoch": 8.72,
978
+ "learning_rate": 1.2184386620647097e-05,
979
+ "loss": 0.0375,
980
+ "step": 122000
981
+ },
982
+ {
983
+ "epoch": 8.79,
984
+ "learning_rate": 1.0880817442409478e-05,
985
+ "loss": 0.0374,
986
+ "step": 123000
987
+ },
988
+ {
989
+ "epoch": 8.86,
990
+ "learning_rate": 9.648361617745371e-06,
991
+ "loss": 0.0372,
992
+ "step": 124000
993
+ },
994
+ {
995
+ "epoch": 8.93,
996
+ "learning_rate": 8.487649139335962e-06,
997
+ "loss": 0.0373,
998
+ "step": 125000
999
+ },
1000
+ {
1001
+ "epoch": 8.93,
1002
+ "eval_cer": 0.09853304475197307,
1003
+ "eval_loss": 0.054629139602184296,
1004
+ "eval_runtime": 83.9435,
1005
+ "eval_samples_per_second": 58.968,
1006
+ "eval_steps_per_second": 0.119,
1007
+ "eval_wer": 0.27575757575757576,
1008
+ "step": 125000
1009
+ },
1010
+ {
1011
+ "epoch": 9.0,
1012
+ "learning_rate": 7.399273326918692e-06,
1013
+ "loss": 0.0372,
1014
+ "step": 126000
1015
+ },
1016
+ {
1017
+ "epoch": 9.07,
1018
+ "learning_rate": 6.383790524001009e-06,
1019
+ "loss": 0.0367,
1020
+ "step": 127000
1021
+ },
1022
+ {
1023
+ "epoch": 9.15,
1024
+ "learning_rate": 5.441719813474849e-06,
1025
+ "loss": 0.0367,
1026
+ "step": 128000
1027
+ },
1028
+ {
1029
+ "epoch": 9.22,
1030
+ "learning_rate": 4.57354275227797e-06,
1031
+ "loss": 0.0366,
1032
+ "step": 129000
1033
+ },
1034
+ {
1035
+ "epoch": 9.29,
1036
+ "learning_rate": 3.7797031252369767e-06,
1037
+ "loss": 0.0363,
1038
+ "step": 130000
1039
+ },
1040
+ {
1041
+ "epoch": 9.29,
1042
+ "eval_cer": 0.09787717802409217,
1043
+ "eval_loss": 0.05455449968576431,
1044
+ "eval_runtime": 83.6734,
1045
+ "eval_samples_per_second": 59.159,
1046
+ "eval_steps_per_second": 0.12,
1047
+ "eval_wer": 0.2739393939393939,
1048
+ "step": 130000
1049
+ },
1050
+ {
1051
+ "epoch": 9.36,
1052
+ "learning_rate": 3.0606067182186776e-06,
1053
+ "loss": 0.0362,
1054
+ "step": 131000
1055
+ },
1056
+ {
1057
+ "epoch": 9.43,
1058
+ "learning_rate": 2.4166211107049584e-06,
1059
+ "loss": 0.0365,
1060
+ "step": 132000
1061
+ },
1062
+ {
1063
+ "epoch": 9.5,
1064
+ "learning_rate": 1.8480754878977489e-06,
1065
+ "loss": 0.0365,
1066
+ "step": 133000
1067
+ },
1068
+ {
1069
+ "epoch": 9.57,
1070
+ "learning_rate": 1.3552604724498928e-06,
1071
+ "loss": 0.0366,
1072
+ "step": 134000
1073
+ },
1074
+ {
1075
+ "epoch": 9.65,
1076
+ "learning_rate": 9.384279759080127e-07,
1077
+ "loss": 0.0366,
1078
+ "step": 135000
1079
+ },
1080
+ {
1081
+ "epoch": 9.65,
1082
+ "eval_cer": 0.09768041800572791,
1083
+ "eval_loss": 0.05442119389772415,
1084
+ "eval_runtime": 83.7582,
1085
+ "eval_samples_per_second": 59.099,
1086
+ "eval_steps_per_second": 0.119,
1087
+ "eval_wer": 0.2719191919191919,
1088
+ "step": 135000
1089
+ }
1090
+ ],
1091
+ "max_steps": 139950,
1092
+ "num_train_epochs": 10,
1093
+ "total_flos": 3.424533742064886e+17,
1094
+ "trial_name": null,
1095
+ "trial_params": null
1096
+ }