charsiu commited on
Commit
94b9bb0
1 Parent(s): aeef0df

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +1096 -0
trainer_state.json ADDED
@@ -0,0 +1,1096 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2115170197415885,
3
+ "best_model_checkpoint": "/scratch/lingjzhu_root/lingjzhu1/lingjzhu/g2p/byt5_12_layers_baseline/checkpoint-5000",
4
+ "epoch": 9.646302250803858,
5
+ "global_step": 135000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 0.0003,
13
+ "loss": 2.113,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.14,
18
+ "learning_rate": 0.0002999616623572683,
19
+ "loss": 0.5915,
20
+ "step": 2000
21
+ },
22
+ {
23
+ "epoch": 0.21,
24
+ "learning_rate": 0.00029984666902607135,
25
+ "loss": 0.3074,
26
+ "step": 3000
27
+ },
28
+ {
29
+ "epoch": 0.29,
30
+ "learning_rate": 0.0002996550787873857,
31
+ "loss": 0.2407,
32
+ "step": 4000
33
+ },
34
+ {
35
+ "epoch": 0.36,
36
+ "learning_rate": 0.0002993869895761197,
37
+ "loss": 0.2091,
38
+ "step": 5000
39
+ },
40
+ {
41
+ "epoch": 0.36,
42
+ "eval_cer": 0.2115170197415885,
43
+ "eval_loss": 0.2637424170970917,
44
+ "eval_runtime": 81.6578,
45
+ "eval_samples_per_second": 60.619,
46
+ "eval_steps_per_second": 0.122,
47
+ "eval_wer": 0.5187878787878788,
48
+ "step": 5000
49
+ },
50
+ {
51
+ "epoch": 0.43,
52
+ "learning_rate": 0.000299042538431052,
53
+ "loss": 0.1885,
54
+ "step": 6000
55
+ },
56
+ {
57
+ "epoch": 0.5,
58
+ "learning_rate": 0.00029862190142478177,
59
+ "loss": 0.1748,
60
+ "step": 7000
61
+ },
62
+ {
63
+ "epoch": 0.57,
64
+ "learning_rate": 0.00029812529357372587,
65
+ "loss": 0.1637,
66
+ "step": 8000
67
+ },
68
+ {
69
+ "epoch": 0.64,
70
+ "learning_rate": 0.00029755296872820933,
71
+ "loss": 0.1547,
72
+ "step": 9000
73
+ },
74
+ {
75
+ "epoch": 0.71,
76
+ "learning_rate": 0.0002969052194427048,
77
+ "loss": 0.1471,
78
+ "step": 10000
79
+ },
80
+ {
81
+ "epoch": 0.71,
82
+ "eval_cer": 0.16525655320172275,
83
+ "eval_loss": 0.1778797060251236,
84
+ "eval_runtime": 81.5727,
85
+ "eval_samples_per_second": 60.682,
86
+ "eval_steps_per_second": 0.123,
87
+ "eval_wer": 0.4212121212121212,
88
+ "step": 10000
89
+ },
90
+ {
91
+ "epoch": 0.79,
92
+ "learning_rate": 0.0002961823768262882,
93
+ "loss": 0.1408,
94
+ "step": 11000
95
+ },
96
+ {
97
+ "epoch": 0.86,
98
+ "learning_rate": 0.0002953848103733858,
99
+ "loss": 0.1341,
100
+ "step": 12000
101
+ },
102
+ {
103
+ "epoch": 0.93,
104
+ "learning_rate": 0.00029451292777490066,
105
+ "loss": 0.1294,
106
+ "step": 13000
107
+ },
108
+ {
109
+ "epoch": 1.0,
110
+ "learning_rate": 0.0002935671747098137,
111
+ "loss": 0.1233,
112
+ "step": 14000
113
+ },
114
+ {
115
+ "epoch": 1.07,
116
+ "learning_rate": 0.00029254803461736643,
117
+ "loss": 0.1179,
118
+ "step": 15000
119
+ },
120
+ {
121
+ "epoch": 1.07,
122
+ "eval_cer": 0.1474607026518878,
123
+ "eval_loss": 0.14176948368549347,
124
+ "eval_runtime": 81.7933,
125
+ "eval_samples_per_second": 60.518,
126
+ "eval_steps_per_second": 0.122,
127
+ "eval_wer": 0.3838383838383838,
128
+ "step": 15000
129
+ },
130
+ {
131
+ "epoch": 1.14,
132
+ "learning_rate": 0.00029145602844994243,
133
+ "loss": 0.1143,
134
+ "step": 16000
135
+ },
136
+ {
137
+ "epoch": 1.21,
138
+ "learning_rate": 0.0002902917144067724,
139
+ "loss": 0.1107,
140
+ "step": 17000
141
+ },
142
+ {
143
+ "epoch": 1.29,
144
+ "learning_rate": 0.00028905568764860047,
145
+ "loss": 0.107,
146
+ "step": 18000
147
+ },
148
+ {
149
+ "epoch": 1.36,
150
+ "learning_rate": 0.00028774857999345685,
151
+ "loss": 0.1051,
152
+ "step": 19000
153
+ },
154
+ {
155
+ "epoch": 1.43,
156
+ "learning_rate": 0.0002863710595936922,
157
+ "loss": 0.1016,
158
+ "step": 20000
159
+ },
160
+ {
161
+ "epoch": 1.43,
162
+ "eval_cer": 0.1344308169913207,
163
+ "eval_loss": 0.1187734305858612,
164
+ "eval_runtime": 81.9499,
165
+ "eval_samples_per_second": 60.403,
166
+ "eval_steps_per_second": 0.122,
167
+ "eval_wer": 0.36404040404040405,
168
+ "step": 20000
169
+ },
170
+ {
171
+ "epoch": 1.5,
172
+ "learning_rate": 0.0002849238305944389,
173
+ "loss": 0.0988,
174
+ "step": 21000
175
+ },
176
+ {
177
+ "epoch": 1.57,
178
+ "learning_rate": 0.00028340763277367477,
179
+ "loss": 0.0972,
180
+ "step": 22000
181
+ },
182
+ {
183
+ "epoch": 1.64,
184
+ "learning_rate": 0.0002818232411640713,
185
+ "loss": 0.0953,
186
+ "step": 23000
187
+ },
188
+ {
189
+ "epoch": 1.71,
190
+ "learning_rate": 0.00028017146565682144,
191
+ "loss": 0.0933,
192
+ "step": 24000
193
+ },
194
+ {
195
+ "epoch": 1.79,
196
+ "learning_rate": 0.00027845315058764886,
197
+ "loss": 0.0904,
198
+ "step": 25000
199
+ },
200
+ {
201
+ "epoch": 1.79,
202
+ "eval_cer": 0.12669158960232615,
203
+ "eval_loss": 0.10707778483629227,
204
+ "eval_runtime": 81.9487,
205
+ "eval_samples_per_second": 60.404,
206
+ "eval_steps_per_second": 0.122,
207
+ "eval_wer": 0.3466666666666667,
208
+ "step": 25000
209
+ },
210
+ {
211
+ "epoch": 1.86,
212
+ "learning_rate": 0.00027666917430520975,
213
+ "loss": 0.089,
214
+ "step": 26000
215
+ },
216
+ {
217
+ "epoch": 1.93,
218
+ "learning_rate": 0.00027482044872210895,
219
+ "loss": 0.0869,
220
+ "step": 27000
221
+ },
222
+ {
223
+ "epoch": 2.0,
224
+ "learning_rate": 0.0002729079188487587,
225
+ "loss": 0.0856,
226
+ "step": 28000
227
+ },
228
+ {
229
+ "epoch": 2.07,
230
+ "learning_rate": 0.00027093256231031885,
231
+ "loss": 0.0827,
232
+ "step": 29000
233
+ },
234
+ {
235
+ "epoch": 2.14,
236
+ "learning_rate": 0.00026889538884696597,
237
+ "loss": 0.082,
238
+ "step": 30000
239
+ },
240
+ {
241
+ "epoch": 2.14,
242
+ "eval_cer": 0.1201110600992545,
243
+ "eval_loss": 0.09880149364471436,
244
+ "eval_runtime": 81.9937,
245
+ "eval_samples_per_second": 60.371,
246
+ "eval_steps_per_second": 0.122,
247
+ "eval_wer": 0.3375757575757576,
248
+ "step": 30000
249
+ },
250
+ {
251
+ "epoch": 2.22,
252
+ "learning_rate": 0.0002667974397977457,
253
+ "loss": 0.0803,
254
+ "step": 31000
255
+ },
256
+ {
257
+ "epoch": 2.29,
258
+ "learning_rate": 0.0002646397875682729,
259
+ "loss": 0.0794,
260
+ "step": 32000
261
+ },
262
+ {
263
+ "epoch": 2.36,
264
+ "learning_rate": 0.00026242353508255185,
265
+ "loss": 0.0783,
266
+ "step": 33000
267
+ },
268
+ {
269
+ "epoch": 2.43,
270
+ "learning_rate": 0.0002601498152191957,
271
+ "loss": 0.078,
272
+ "step": 34000
273
+ },
274
+ {
275
+ "epoch": 2.5,
276
+ "learning_rate": 0.0002578197902323352,
277
+ "loss": 0.0765,
278
+ "step": 35000
279
+ },
280
+ {
281
+ "epoch": 2.5,
282
+ "eval_cer": 0.11654751754443497,
283
+ "eval_loss": 0.09110759943723679,
284
+ "eval_runtime": 82.1822,
285
+ "eval_samples_per_second": 60.232,
286
+ "eval_steps_per_second": 0.122,
287
+ "eval_wer": 0.32545454545454544,
288
+ "step": 35000
289
+ },
290
+ {
291
+ "epoch": 2.57,
292
+ "learning_rate": 0.00025543465115751026,
293
+ "loss": 0.0749,
294
+ "step": 36000
295
+ },
296
+ {
297
+ "epoch": 2.64,
298
+ "learning_rate": 0.0002529956172028505,
299
+ "loss": 0.0743,
300
+ "step": 37000
301
+ },
302
+ {
303
+ "epoch": 2.72,
304
+ "learning_rate": 0.0002505039351258541,
305
+ "loss": 0.0732,
306
+ "step": 38000
307
+ },
308
+ {
309
+ "epoch": 2.79,
310
+ "learning_rate": 0.0002479608785960846,
311
+ "loss": 0.0722,
312
+ "step": 39000
313
+ },
314
+ {
315
+ "epoch": 2.86,
316
+ "learning_rate": 0.0002453677475441111,
317
+ "loss": 0.0716,
318
+ "step": 40000
319
+ },
320
+ {
321
+ "epoch": 2.86,
322
+ "eval_cer": 0.11418639732406376,
323
+ "eval_loss": 0.0829671323299408,
324
+ "eval_runtime": 81.5052,
325
+ "eval_samples_per_second": 60.732,
326
+ "eval_steps_per_second": 0.123,
327
+ "eval_wer": 0.3208080808080808,
328
+ "step": 40000
329
+ },
330
+ {
331
+ "epoch": 2.93,
332
+ "learning_rate": 0.00024272586749702474,
333
+ "loss": 0.0704,
334
+ "step": 41000
335
+ },
336
+ {
337
+ "epoch": 3.0,
338
+ "learning_rate": 0.0002400365889008706,
339
+ "loss": 0.0702,
340
+ "step": 42000
341
+ },
342
+ {
343
+ "epoch": 3.07,
344
+ "learning_rate": 0.00023730128643034235,
345
+ "loss": 0.0673,
346
+ "step": 43000
347
+ },
348
+ {
349
+ "epoch": 3.14,
350
+ "learning_rate": 0.00023452135828609167,
351
+ "loss": 0.0676,
352
+ "step": 44000
353
+ },
354
+ {
355
+ "epoch": 3.22,
356
+ "learning_rate": 0.0002316982254800121,
357
+ "loss": 0.0671,
358
+ "step": 45000
359
+ },
360
+ {
361
+ "epoch": 3.22,
362
+ "eval_cer": 0.11213134824337027,
363
+ "eval_loss": 0.08274847269058228,
364
+ "eval_runtime": 81.7806,
365
+ "eval_samples_per_second": 60.528,
366
+ "eval_steps_per_second": 0.122,
367
+ "eval_wer": 0.317979797979798,
368
+ "step": 45000
369
+ },
370
+ {
371
+ "epoch": 3.29,
372
+ "learning_rate": 0.00022883333110886237,
373
+ "loss": 0.066,
374
+ "step": 46000
375
+ },
376
+ {
377
+ "epoch": 3.36,
378
+ "learning_rate": 0.00022592813961660067,
379
+ "loss": 0.0656,
380
+ "step": 47000
381
+ },
382
+ {
383
+ "epoch": 3.43,
384
+ "learning_rate": 0.00022298413604580696,
385
+ "loss": 0.0646,
386
+ "step": 48000
387
+ },
388
+ {
389
+ "epoch": 3.5,
390
+ "learning_rate": 0.00022000282527857588,
391
+ "loss": 0.0643,
392
+ "step": 49000
393
+ },
394
+ {
395
+ "epoch": 3.57,
396
+ "learning_rate": 0.0002169857312672683,
397
+ "loss": 0.0638,
398
+ "step": 50000
399
+ },
400
+ {
401
+ "epoch": 3.57,
402
+ "eval_cer": 0.11206576157058219,
403
+ "eval_loss": 0.07710296660661697,
404
+ "eval_runtime": 81.5659,
405
+ "eval_samples_per_second": 60.687,
406
+ "eval_steps_per_second": 0.123,
407
+ "eval_wer": 0.3113131313131313,
408
+ "step": 50000
409
+ },
410
+ {
411
+ "epoch": 3.64,
412
+ "learning_rate": 0.00021393439625551483,
413
+ "loss": 0.0633,
414
+ "step": 51000
415
+ },
416
+ {
417
+ "epoch": 3.72,
418
+ "learning_rate": 0.00021085037998986924,
419
+ "loss": 0.0627,
420
+ "step": 52000
421
+ },
422
+ {
423
+ "epoch": 3.79,
424
+ "learning_rate": 0.00020773525892251514,
425
+ "loss": 0.0624,
426
+ "step": 53000
427
+ },
428
+ {
429
+ "epoch": 3.86,
430
+ "learning_rate": 0.00020459062540543316,
431
+ "loss": 0.0618,
432
+ "step": 54000
433
+ },
434
+ {
435
+ "epoch": 3.93,
436
+ "learning_rate": 0.00020141808687644067,
437
+ "loss": 0.0613,
438
+ "step": 55000
439
+ },
440
+ {
441
+ "epoch": 3.93,
442
+ "eval_cer": 0.10915808574364355,
443
+ "eval_loss": 0.07446856051683426,
444
+ "eval_runtime": 81.6128,
445
+ "eval_samples_per_second": 60.652,
446
+ "eval_steps_per_second": 0.123,
447
+ "eval_wer": 0.30626262626262624,
448
+ "step": 55000
449
+ },
450
+ {
451
+ "epoch": 4.0,
452
+ "learning_rate": 0.00019821926503751995,
453
+ "loss": 0.0604,
454
+ "step": 56000
455
+ },
456
+ {
457
+ "epoch": 4.07,
458
+ "learning_rate": 0.00019499579502585537,
459
+ "loss": 0.059,
460
+ "step": 57000
461
+ },
462
+ {
463
+ "epoch": 4.14,
464
+ "learning_rate": 0.00019174932457800242,
465
+ "loss": 0.0584,
466
+ "step": 58000
467
+ },
468
+ {
469
+ "epoch": 4.22,
470
+ "learning_rate": 0.0001884815131876167,
471
+ "loss": 0.0581,
472
+ "step": 59000
473
+ },
474
+ {
475
+ "epoch": 4.29,
476
+ "learning_rate": 0.00018519403125717278,
477
+ "loss": 0.0574,
478
+ "step": 60000
479
+ },
480
+ {
481
+ "epoch": 4.29,
482
+ "eval_cer": 0.1064034454865438,
483
+ "eval_loss": 0.07090297341346741,
484
+ "eval_runtime": 81.8166,
485
+ "eval_samples_per_second": 60.501,
486
+ "eval_steps_per_second": 0.122,
487
+ "eval_wer": 0.2991919191919192,
488
+ "step": 60000
489
+ },
490
+ {
491
+ "epoch": 4.36,
492
+ "learning_rate": 0.00018188855924410722,
493
+ "loss": 0.0576,
494
+ "step": 61000
495
+ },
496
+ {
497
+ "epoch": 4.43,
498
+ "learning_rate": 0.00017856678680182127,
499
+ "loss": 0.0569,
500
+ "step": 62000
501
+ },
502
+ {
503
+ "epoch": 4.5,
504
+ "learning_rate": 0.0001752304119159834,
505
+ "loss": 0.0568,
506
+ "step": 63000
507
+ },
508
+ {
509
+ "epoch": 4.57,
510
+ "learning_rate": 0.00017188114003657205,
511
+ "loss": 0.0566,
512
+ "step": 64000
513
+ },
514
+ {
515
+ "epoch": 4.64,
516
+ "learning_rate": 0.00016852068320610358,
517
+ "loss": 0.0561,
518
+ "step": 65000
519
+ },
520
+ {
521
+ "epoch": 4.64,
522
+ "eval_cer": 0.10653461883211998,
523
+ "eval_loss": 0.06934704631567001,
524
+ "eval_runtime": 81.8952,
525
+ "eval_samples_per_second": 60.443,
526
+ "eval_steps_per_second": 0.122,
527
+ "eval_wer": 0.298989898989899,
528
+ "step": 65000
529
+ },
530
+ {
531
+ "epoch": 4.72,
532
+ "learning_rate": 0.00016515075918448972,
533
+ "loss": 0.0555,
534
+ "step": 66000
535
+ },
536
+ {
537
+ "epoch": 4.79,
538
+ "learning_rate": 0.00016177309057097285,
539
+ "loss": 0.0548,
540
+ "step": 67000
541
+ },
542
+ {
543
+ "epoch": 4.86,
544
+ "learning_rate": 0.00015838940392358722,
545
+ "loss": 0.0548,
546
+ "step": 68000
547
+ },
548
+ {
549
+ "epoch": 4.93,
550
+ "learning_rate": 0.00015500142887659688,
551
+ "loss": 0.0543,
552
+ "step": 69000
553
+ },
554
+ {
555
+ "epoch": 5.0,
556
+ "learning_rate": 0.00015161089725636095,
557
+ "loss": 0.0542,
558
+ "step": 70000
559
+ },
560
+ {
561
+ "epoch": 5.0,
562
+ "eval_cer": 0.1039767385933845,
563
+ "eval_loss": 0.0671503096818924,
564
+ "eval_runtime": 81.8677,
565
+ "eval_samples_per_second": 60.463,
566
+ "eval_steps_per_second": 0.122,
567
+ "eval_wer": 0.2913131313131313,
568
+ "step": 70000
569
+ },
570
+ {
571
+ "epoch": 5.07,
572
+ "learning_rate": 0.00014821954219607845,
573
+ "loss": 0.0521,
574
+ "step": 71000
575
+ },
576
+ {
577
+ "epoch": 5.14,
578
+ "learning_rate": 0.0001448290972498651,
579
+ "loss": 0.0522,
580
+ "step": 72000
581
+ },
582
+ {
583
+ "epoch": 5.22,
584
+ "learning_rate": 0.00014144129550661485,
585
+ "loss": 0.0519,
586
+ "step": 73000
587
+ },
588
+ {
589
+ "epoch": 5.29,
590
+ "learning_rate": 0.0001380578687040995,
591
+ "loss": 0.0516,
592
+ "step": 74000
593
+ },
594
+ {
595
+ "epoch": 5.36,
596
+ "learning_rate": 0.00013468054634375843,
597
+ "loss": 0.0518,
598
+ "step": 75000
599
+ },
600
+ {
601
+ "epoch": 5.36,
602
+ "eval_cer": 0.1052228853763582,
603
+ "eval_loss": 0.06576833128929138,
604
+ "eval_runtime": 81.8774,
605
+ "eval_samples_per_second": 60.456,
606
+ "eval_steps_per_second": 0.122,
607
+ "eval_wer": 0.29555555555555557,
608
+ "step": 75000
609
+ },
610
+ {
611
+ "epoch": 5.43,
612
+ "learning_rate": 0.00013131105480663235,
613
+ "loss": 0.0511,
614
+ "step": 76000
615
+ },
616
+ {
617
+ "epoch": 5.5,
618
+ "learning_rate": 0.000127951116470891,
619
+ "loss": 0.0512,
620
+ "step": 77000
621
+ },
622
+ {
623
+ "epoch": 5.57,
624
+ "learning_rate": 0.00012460244883140783,
625
+ "loss": 0.0506,
626
+ "step": 78000
627
+ },
628
+ {
629
+ "epoch": 5.64,
630
+ "learning_rate": 0.0001212667636218309,
631
+ "loss": 0.0502,
632
+ "step": 79000
633
+ },
634
+ {
635
+ "epoch": 5.72,
636
+ "learning_rate": 0.00011794576593959775,
637
+ "loss": 0.05,
638
+ "step": 80000
639
+ },
640
+ {
641
+ "epoch": 5.72,
642
+ "eval_cer": 0.1032990096412409,
643
+ "eval_loss": 0.06467730551958084,
644
+ "eval_runtime": 81.823,
645
+ "eval_samples_per_second": 60.496,
646
+ "eval_steps_per_second": 0.122,
647
+ "eval_wer": 0.2901010101010101,
648
+ "step": 80000
649
+ },
650
+ {
651
+ "epoch": 5.79,
652
+ "learning_rate": 0.00011464115337434394,
653
+ "loss": 0.0501,
654
+ "step": 81000
655
+ },
656
+ {
657
+ "epoch": 5.86,
658
+ "learning_rate": 0.00011135461514014796,
659
+ "loss": 0.0492,
660
+ "step": 82000
661
+ },
662
+ {
663
+ "epoch": 5.93,
664
+ "learning_rate": 0.00010808783121205837,
665
+ "loss": 0.0494,
666
+ "step": 83000
667
+ },
668
+ {
669
+ "epoch": 6.0,
670
+ "learning_rate": 0.00010484247146734352,
671
+ "loss": 0.0491,
672
+ "step": 84000
673
+ },
674
+ {
675
+ "epoch": 6.07,
676
+ "learning_rate": 0.00010162019483190237,
677
+ "loss": 0.0472,
678
+ "step": 85000
679
+ },
680
+ {
681
+ "epoch": 6.07,
682
+ "eval_cer": 0.10340832076255438,
683
+ "eval_loss": 0.06243439018726349,
684
+ "eval_runtime": 82.0022,
685
+ "eval_samples_per_second": 60.364,
686
+ "eval_steps_per_second": 0.122,
687
+ "eval_wer": 0.2898989898989899,
688
+ "step": 85000
689
+ },
690
+ {
691
+ "epoch": 6.15,
692
+ "learning_rate": 9.842264843227404e-05,
693
+ "loss": 0.0476,
694
+ "step": 86000
695
+ },
696
+ {
697
+ "epoch": 6.22,
698
+ "learning_rate": 9.52514667536784e-05,
699
+ "loss": 0.0469,
700
+ "step": 87000
701
+ },
702
+ {
703
+ "epoch": 6.29,
704
+ "learning_rate": 9.210827080451842e-05,
705
+ "loss": 0.047,
706
+ "step": 88000
707
+ },
708
+ {
709
+ "epoch": 6.36,
710
+ "learning_rate": 8.899466728777203e-05,
711
+ "loss": 0.0474,
712
+ "step": 89000
713
+ },
714
+ {
715
+ "epoch": 6.43,
716
+ "learning_rate": 8.591224777969557e-05,
717
+ "loss": 0.0467,
718
+ "step": 90000
719
+ },
720
+ {
721
+ "epoch": 6.43,
722
+ "eval_cer": 0.10150630725169979,
723
+ "eval_loss": 0.061206888407468796,
724
+ "eval_runtime": 81.7254,
725
+ "eval_samples_per_second": 60.569,
726
+ "eval_steps_per_second": 0.122,
727
+ "eval_wer": 0.28929292929292927,
728
+ "step": 90000
729
+ },
730
+ {
731
+ "epoch": 6.5,
732
+ "learning_rate": 8.286258791626041e-05,
733
+ "loss": 0.0465,
734
+ "step": 91000
735
+ },
736
+ {
737
+ "epoch": 6.57,
738
+ "learning_rate": 7.984724658773716e-05,
739
+ "loss": 0.0464,
740
+ "step": 92000
741
+ },
742
+ {
743
+ "epoch": 6.65,
744
+ "learning_rate": 7.686776514184009e-05,
745
+ "loss": 0.0462,
746
+ "step": 93000
747
+ },
748
+ {
749
+ "epoch": 6.72,
750
+ "learning_rate": 7.392566659583846e-05,
751
+ "loss": 0.046,
752
+ "step": 94000
753
+ },
754
+ {
755
+ "epoch": 6.79,
756
+ "learning_rate": 7.102245485803813e-05,
757
+ "loss": 0.0461,
758
+ "step": 95000
759
+ },
760
+ {
761
+ "epoch": 6.79,
762
+ "eval_cer": 0.10109092499070856,
763
+ "eval_loss": 0.06065311282873154,
764
+ "eval_runtime": 82.0331,
765
+ "eval_samples_per_second": 60.342,
766
+ "eval_steps_per_second": 0.122,
767
+ "eval_wer": 0.28383838383838383,
768
+ "step": 95000
769
+ },
770
+ {
771
+ "epoch": 6.86,
772
+ "learning_rate": 6.81596139590308e-05,
773
+ "loss": 0.0457,
774
+ "step": 96000
775
+ },
776
+ {
777
+ "epoch": 6.93,
778
+ "learning_rate": 6.533860729310434e-05,
779
+ "loss": 0.0455,
780
+ "step": 97000
781
+ },
782
+ {
783
+ "epoch": 7.0,
784
+ "learning_rate": 6.256087687020127e-05,
785
+ "loss": 0.0454,
786
+ "step": 98000
787
+ },
788
+ {
789
+ "epoch": 7.07,
790
+ "learning_rate": 5.98278425788092e-05,
791
+ "loss": 0.044,
792
+ "step": 99000
793
+ },
794
+ {
795
+ "epoch": 7.15,
796
+ "learning_rate": 5.71409014601578e-05,
797
+ "loss": 0.0439,
798
+ "step": 100000
799
+ },
800
+ {
801
+ "epoch": 7.15,
802
+ "eval_cer": 0.10098161386939507,
803
+ "eval_loss": 0.05971471220254898,
804
+ "eval_runtime": 82.6482,
805
+ "eval_samples_per_second": 59.892,
806
+ "eval_steps_per_second": 0.121,
807
+ "eval_wer": 0.2804040404040404,
808
+ "step": 100000
809
+ },
810
+ {
811
+ "epoch": 7.22,
812
+ "learning_rate": 5.4501426994095876e-05,
813
+ "loss": 0.0436,
814
+ "step": 101000
815
+ },
816
+ {
817
+ "epoch": 7.29,
818
+ "learning_rate": 5.191076839701103e-05,
819
+ "loss": 0.0436,
820
+ "step": 102000
821
+ },
822
+ {
823
+ "epoch": 7.36,
824
+ "learning_rate": 4.9370249932153075e-05,
825
+ "loss": 0.0438,
826
+ "step": 103000
827
+ },
828
+ {
829
+ "epoch": 7.43,
830
+ "learning_rate": 4.6881170232712164e-05,
831
+ "loss": 0.0437,
832
+ "step": 104000
833
+ },
834
+ {
835
+ "epoch": 7.5,
836
+ "learning_rate": 4.444480163799822e-05,
837
+ "loss": 0.0435,
838
+ "step": 105000
839
+ },
840
+ {
841
+ "epoch": 7.5,
842
+ "eval_cer": 0.10120023611202204,
843
+ "eval_loss": 0.058374855667352676,
844
+ "eval_runtime": 81.7698,
845
+ "eval_samples_per_second": 60.536,
846
+ "eval_steps_per_second": 0.122,
847
+ "eval_wer": 0.28383838383838383,
848
+ "step": 105000
849
+ },
850
+ {
851
+ "epoch": 7.57,
852
+ "learning_rate": 4.2062389543061265e-05,
853
+ "loss": 0.0433,
854
+ "step": 106000
855
+ },
856
+ {
857
+ "epoch": 7.65,
858
+ "learning_rate": 3.9735151762084384e-05,
859
+ "loss": 0.0434,
860
+ "step": 107000
861
+ },
862
+ {
863
+ "epoch": 7.72,
864
+ "learning_rate": 3.746427790587557e-05,
865
+ "loss": 0.0428,
866
+ "step": 108000
867
+ },
868
+ {
869
+ "epoch": 7.79,
870
+ "learning_rate": 3.525092877377602e-05,
871
+ "loss": 0.0432,
872
+ "step": 109000
873
+ },
874
+ {
875
+ "epoch": 7.86,
876
+ "learning_rate": 3.309623576029597e-05,
877
+ "loss": 0.0427,
878
+ "step": 110000
879
+ },
880
+ {
881
+ "epoch": 7.86,
882
+ "eval_cer": 0.10012898712314991,
883
+ "eval_loss": 0.05760599672794342,
884
+ "eval_runtime": 81.886,
885
+ "eval_samples_per_second": 60.45,
886
+ "eval_steps_per_second": 0.122,
887
+ "eval_wer": 0.2804040404040404,
888
+ "step": 110000
889
+ },
890
+ {
891
+ "epoch": 7.93,
892
+ "learning_rate": 3.1001300276781274e-05,
893
+ "loss": 0.043,
894
+ "step": 111000
895
+ },
896
+ {
897
+ "epoch": 8.0,
898
+ "learning_rate": 2.8967193188406938e-05,
899
+ "loss": 0.0424,
900
+ "step": 112000
901
+ },
902
+ {
903
+ "epoch": 8.07,
904
+ "learning_rate": 2.699495426678389e-05,
905
+ "loss": 0.042,
906
+ "step": 113000
907
+ },
908
+ {
909
+ "epoch": 8.15,
910
+ "learning_rate": 2.5085591658461056e-05,
911
+ "loss": 0.0416,
912
+ "step": 114000
913
+ },
914
+ {
915
+ "epoch": 8.22,
916
+ "learning_rate": 2.3240081369591984e-05,
917
+ "loss": 0.0415,
918
+ "step": 115000
919
+ },
920
+ {
921
+ "epoch": 8.22,
922
+ "eval_cer": 0.10036947159003957,
923
+ "eval_loss": 0.05732354149222374,
924
+ "eval_runtime": 81.9536,
925
+ "eval_samples_per_second": 60.4,
926
+ "eval_steps_per_second": 0.122,
927
+ "eval_wer": 0.2797979797979798,
928
+ "step": 115000
929
+ },
930
+ {
931
+ "epoch": 8.29,
932
+ "learning_rate": 2.1459366767031522e-05,
933
+ "loss": 0.0417,
934
+ "step": 116000
935
+ },
936
+ {
937
+ "epoch": 8.36,
938
+ "learning_rate": 1.9744358096116225e-05,
939
+ "loss": 0.0414,
940
+ "step": 117000
941
+ },
942
+ {
943
+ "epoch": 8.43,
944
+ "learning_rate": 1.8095932015375496e-05,
945
+ "loss": 0.0412,
946
+ "step": 118000
947
+ },
948
+ {
949
+ "epoch": 8.5,
950
+ "learning_rate": 1.65149311484114e-05,
951
+ "loss": 0.0412,
952
+ "step": 119000
953
+ },
954
+ {
955
+ "epoch": 8.57,
956
+ "learning_rate": 1.500216365317587e-05,
957
+ "loss": 0.0412,
958
+ "step": 120000
959
+ },
960
+ {
961
+ "epoch": 8.57,
962
+ "eval_cer": 0.09988850265626024,
963
+ "eval_loss": 0.05689243599772453,
964
+ "eval_runtime": 81.8151,
965
+ "eval_samples_per_second": 60.502,
966
+ "eval_steps_per_second": 0.122,
967
+ "eval_wer": 0.2783838383838384,
968
+ "step": 120000
969
+ },
970
+ {
971
+ "epoch": 8.65,
972
+ "learning_rate": 1.355840280886582e-05,
973
+ "loss": 0.0414,
974
+ "step": 121000
975
+ },
976
+ {
977
+ "epoch": 8.72,
978
+ "learning_rate": 1.2184386620647097e-05,
979
+ "loss": 0.0416,
980
+ "step": 122000
981
+ },
982
+ {
983
+ "epoch": 8.79,
984
+ "learning_rate": 1.0880817442409478e-05,
985
+ "loss": 0.0413,
986
+ "step": 123000
987
+ },
988
+ {
989
+ "epoch": 8.86,
990
+ "learning_rate": 9.648361617745371e-06,
991
+ "loss": 0.0409,
992
+ "step": 124000
993
+ },
994
+ {
995
+ "epoch": 8.93,
996
+ "learning_rate": 8.487649139335962e-06,
997
+ "loss": 0.0412,
998
+ "step": 125000
999
+ },
1000
+ {
1001
+ "epoch": 8.93,
1002
+ "eval_cer": 0.09986664043199756,
1003
+ "eval_loss": 0.05683436617255211,
1004
+ "eval_runtime": 82.2238,
1005
+ "eval_samples_per_second": 60.202,
1006
+ "eval_steps_per_second": 0.122,
1007
+ "eval_wer": 0.2781818181818182,
1008
+ "step": 125000
1009
+ },
1010
+ {
1011
+ "epoch": 9.0,
1012
+ "learning_rate": 7.399273326918692e-06,
1013
+ "loss": 0.0411,
1014
+ "step": 126000
1015
+ },
1016
+ {
1017
+ "epoch": 9.07,
1018
+ "learning_rate": 6.383790524001009e-06,
1019
+ "loss": 0.0407,
1020
+ "step": 127000
1021
+ },
1022
+ {
1023
+ "epoch": 9.15,
1024
+ "learning_rate": 5.441719813474849e-06,
1025
+ "loss": 0.0406,
1026
+ "step": 128000
1027
+ },
1028
+ {
1029
+ "epoch": 9.22,
1030
+ "learning_rate": 4.57354275227797e-06,
1031
+ "loss": 0.0407,
1032
+ "step": 129000
1033
+ },
1034
+ {
1035
+ "epoch": 9.29,
1036
+ "learning_rate": 3.7797031252369767e-06,
1037
+ "loss": 0.0401,
1038
+ "step": 130000
1039
+ },
1040
+ {
1041
+ "epoch": 9.29,
1042
+ "eval_cer": 0.09988850265626024,
1043
+ "eval_loss": 0.05650737136602402,
1044
+ "eval_runtime": 81.8641,
1045
+ "eval_samples_per_second": 60.466,
1046
+ "eval_steps_per_second": 0.122,
1047
+ "eval_wer": 0.2777777777777778,
1048
+ "step": 130000
1049
+ },
1050
+ {
1051
+ "epoch": 9.36,
1052
+ "learning_rate": 3.0606067182186776e-06,
1053
+ "loss": 0.0403,
1054
+ "step": 131000
1055
+ },
1056
+ {
1057
+ "epoch": 9.43,
1058
+ "learning_rate": 2.4166211107049584e-06,
1059
+ "loss": 0.0406,
1060
+ "step": 132000
1061
+ },
1062
+ {
1063
+ "epoch": 9.5,
1064
+ "learning_rate": 1.8480754878977489e-06,
1065
+ "loss": 0.0406,
1066
+ "step": 133000
1067
+ },
1068
+ {
1069
+ "epoch": 9.57,
1070
+ "learning_rate": 1.3552604724498928e-06,
1071
+ "loss": 0.0406,
1072
+ "step": 134000
1073
+ },
1074
+ {
1075
+ "epoch": 9.65,
1076
+ "learning_rate": 9.384279759080127e-07,
1077
+ "loss": 0.0405,
1078
+ "step": 135000
1079
+ },
1080
+ {
1081
+ "epoch": 9.65,
1082
+ "eval_cer": 0.09984477820773485,
1083
+ "eval_loss": 0.056535448879003525,
1084
+ "eval_runtime": 81.6573,
1085
+ "eval_samples_per_second": 60.619,
1086
+ "eval_steps_per_second": 0.122,
1087
+ "eval_wer": 0.2781818181818182,
1088
+ "step": 135000
1089
+ }
1090
+ ],
1091
+ "max_steps": 139950,
1092
+ "num_train_epochs": 10,
1093
+ "total_flos": 2.638176399703204e+17,
1094
+ "trial_name": null,
1095
+ "trial_params": null
1096
+ }