aasem commited on
Commit
aa9da49
1 Parent(s): ca58ca2

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +1486 -0
trainer_state.json ADDED
@@ -0,0 +1,1486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 32.30769230769231,
5
+ "global_step": 2100,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.15,
12
+ "learning_rate": 9e-07,
13
+ "loss": 18.8043,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.31,
18
+ "learning_rate": 1.8e-06,
19
+ "loss": 23.3938,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.46,
24
+ "learning_rate": 2.8000000000000003e-06,
25
+ "loss": 20.8738,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.62,
30
+ "learning_rate": 3.8e-06,
31
+ "loss": 19.9257,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.77,
36
+ "learning_rate": 4.800000000000001e-06,
37
+ "loss": 22.7793,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.92,
42
+ "learning_rate": 5.8e-06,
43
+ "loss": 18.2911,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 1.08,
48
+ "learning_rate": 6.800000000000001e-06,
49
+ "loss": 20.4742,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 1.23,
54
+ "learning_rate": 7.7e-06,
55
+ "loss": 19.7014,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 1.38,
60
+ "learning_rate": 8.7e-06,
61
+ "loss": 15.6078,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 1.54,
66
+ "learning_rate": 9.7e-06,
67
+ "loss": 15.1693,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 1.54,
72
+ "eval_cer": 0.9953039832285115,
73
+ "eval_loss": 12.956756591796875,
74
+ "eval_runtime": 111.4724,
75
+ "eval_samples_per_second": 3.059,
76
+ "eval_steps_per_second": 0.386,
77
+ "eval_wer": 1.0,
78
+ "step": 100
79
+ },
80
+ {
81
+ "epoch": 1.69,
82
+ "learning_rate": 1.0700000000000001e-05,
83
+ "loss": 12.7027,
84
+ "step": 110
85
+ },
86
+ {
87
+ "epoch": 1.85,
88
+ "learning_rate": 1.1700000000000001e-05,
89
+ "loss": 10.9656,
90
+ "step": 120
91
+ },
92
+ {
93
+ "epoch": 2.0,
94
+ "learning_rate": 1.27e-05,
95
+ "loss": 10.6914,
96
+ "step": 130
97
+ },
98
+ {
99
+ "epoch": 2.15,
100
+ "learning_rate": 1.3700000000000001e-05,
101
+ "loss": 7.6051,
102
+ "step": 140
103
+ },
104
+ {
105
+ "epoch": 2.31,
106
+ "learning_rate": 1.47e-05,
107
+ "loss": 8.7814,
108
+ "step": 150
109
+ },
110
+ {
111
+ "epoch": 2.46,
112
+ "learning_rate": 1.5700000000000002e-05,
113
+ "loss": 7.5898,
114
+ "step": 160
115
+ },
116
+ {
117
+ "epoch": 2.62,
118
+ "learning_rate": 1.6700000000000003e-05,
119
+ "loss": 7.0232,
120
+ "step": 170
121
+ },
122
+ {
123
+ "epoch": 2.77,
124
+ "learning_rate": 1.77e-05,
125
+ "loss": 6.8873,
126
+ "step": 180
127
+ },
128
+ {
129
+ "epoch": 2.92,
130
+ "learning_rate": 1.87e-05,
131
+ "loss": 5.7658,
132
+ "step": 190
133
+ },
134
+ {
135
+ "epoch": 3.08,
136
+ "learning_rate": 1.97e-05,
137
+ "loss": 6.434,
138
+ "step": 200
139
+ },
140
+ {
141
+ "epoch": 3.08,
142
+ "eval_cer": 0.9953039832285115,
143
+ "eval_loss": 5.401930809020996,
144
+ "eval_runtime": 111.63,
145
+ "eval_samples_per_second": 3.055,
146
+ "eval_steps_per_second": 0.385,
147
+ "eval_wer": 1.0,
148
+ "step": 200
149
+ },
150
+ {
151
+ "epoch": 3.23,
152
+ "learning_rate": 2.07e-05,
153
+ "loss": 5.5544,
154
+ "step": 210
155
+ },
156
+ {
157
+ "epoch": 3.38,
158
+ "learning_rate": 2.1700000000000002e-05,
159
+ "loss": 5.2728,
160
+ "step": 220
161
+ },
162
+ {
163
+ "epoch": 3.54,
164
+ "learning_rate": 2.2700000000000003e-05,
165
+ "loss": 5.3269,
166
+ "step": 230
167
+ },
168
+ {
169
+ "epoch": 3.69,
170
+ "learning_rate": 2.37e-05,
171
+ "loss": 4.8495,
172
+ "step": 240
173
+ },
174
+ {
175
+ "epoch": 3.85,
176
+ "learning_rate": 2.47e-05,
177
+ "loss": 4.7196,
178
+ "step": 250
179
+ },
180
+ {
181
+ "epoch": 4.0,
182
+ "learning_rate": 2.57e-05,
183
+ "loss": 4.7143,
184
+ "step": 260
185
+ },
186
+ {
187
+ "epoch": 4.15,
188
+ "learning_rate": 2.6700000000000002e-05,
189
+ "loss": 4.0896,
190
+ "step": 270
191
+ },
192
+ {
193
+ "epoch": 4.31,
194
+ "learning_rate": 2.7700000000000002e-05,
195
+ "loss": 4.342,
196
+ "step": 280
197
+ },
198
+ {
199
+ "epoch": 4.46,
200
+ "learning_rate": 2.87e-05,
201
+ "loss": 3.9603,
202
+ "step": 290
203
+ },
204
+ {
205
+ "epoch": 4.62,
206
+ "learning_rate": 2.97e-05,
207
+ "loss": 3.9273,
208
+ "step": 300
209
+ },
210
+ {
211
+ "epoch": 4.62,
212
+ "eval_cer": 0.9953039832285115,
213
+ "eval_loss": 3.848849058151245,
214
+ "eval_runtime": 111.5668,
215
+ "eval_samples_per_second": 3.056,
216
+ "eval_steps_per_second": 0.385,
217
+ "eval_wer": 1.0,
218
+ "step": 300
219
+ },
220
+ {
221
+ "epoch": 4.77,
222
+ "learning_rate": 3.07e-05,
223
+ "loss": 3.9211,
224
+ "step": 310
225
+ },
226
+ {
227
+ "epoch": 4.92,
228
+ "learning_rate": 3.1700000000000005e-05,
229
+ "loss": 3.6815,
230
+ "step": 320
231
+ },
232
+ {
233
+ "epoch": 5.08,
234
+ "learning_rate": 3.27e-05,
235
+ "loss": 3.7151,
236
+ "step": 330
237
+ },
238
+ {
239
+ "epoch": 5.23,
240
+ "learning_rate": 3.3700000000000006e-05,
241
+ "loss": 3.6599,
242
+ "step": 340
243
+ },
244
+ {
245
+ "epoch": 5.38,
246
+ "learning_rate": 3.4699999999999996e-05,
247
+ "loss": 3.4484,
248
+ "step": 350
249
+ },
250
+ {
251
+ "epoch": 5.54,
252
+ "learning_rate": 3.57e-05,
253
+ "loss": 3.4587,
254
+ "step": 360
255
+ },
256
+ {
257
+ "epoch": 5.69,
258
+ "learning_rate": 3.6700000000000004e-05,
259
+ "loss": 3.3518,
260
+ "step": 370
261
+ },
262
+ {
263
+ "epoch": 5.85,
264
+ "learning_rate": 3.77e-05,
265
+ "loss": 3.3416,
266
+ "step": 380
267
+ },
268
+ {
269
+ "epoch": 6.0,
270
+ "learning_rate": 3.8700000000000006e-05,
271
+ "loss": 3.3674,
272
+ "step": 390
273
+ },
274
+ {
275
+ "epoch": 6.15,
276
+ "learning_rate": 3.97e-05,
277
+ "loss": 3.1977,
278
+ "step": 400
279
+ },
280
+ {
281
+ "epoch": 6.15,
282
+ "eval_cer": 0.9953039832285115,
283
+ "eval_loss": 3.2796881198883057,
284
+ "eval_runtime": 111.0722,
285
+ "eval_samples_per_second": 3.07,
286
+ "eval_steps_per_second": 0.387,
287
+ "eval_wer": 1.0,
288
+ "step": 400
289
+ },
290
+ {
291
+ "epoch": 6.31,
292
+ "learning_rate": 4.07e-05,
293
+ "loss": 3.3328,
294
+ "step": 410
295
+ },
296
+ {
297
+ "epoch": 6.46,
298
+ "learning_rate": 4.17e-05,
299
+ "loss": 3.2257,
300
+ "step": 420
301
+ },
302
+ {
303
+ "epoch": 6.62,
304
+ "learning_rate": 4.27e-05,
305
+ "loss": 3.2563,
306
+ "step": 430
307
+ },
308
+ {
309
+ "epoch": 6.77,
310
+ "learning_rate": 4.3700000000000005e-05,
311
+ "loss": 3.264,
312
+ "step": 440
313
+ },
314
+ {
315
+ "epoch": 6.92,
316
+ "learning_rate": 4.47e-05,
317
+ "loss": 3.166,
318
+ "step": 450
319
+ },
320
+ {
321
+ "epoch": 7.08,
322
+ "learning_rate": 4.5700000000000006e-05,
323
+ "loss": 3.223,
324
+ "step": 460
325
+ },
326
+ {
327
+ "epoch": 7.23,
328
+ "learning_rate": 4.6700000000000003e-05,
329
+ "loss": 3.1885,
330
+ "step": 470
331
+ },
332
+ {
333
+ "epoch": 7.38,
334
+ "learning_rate": 4.77e-05,
335
+ "loss": 3.182,
336
+ "step": 480
337
+ },
338
+ {
339
+ "epoch": 7.54,
340
+ "learning_rate": 4.87e-05,
341
+ "loss": 3.2332,
342
+ "step": 490
343
+ },
344
+ {
345
+ "epoch": 7.69,
346
+ "learning_rate": 4.97e-05,
347
+ "loss": 3.1592,
348
+ "step": 500
349
+ },
350
+ {
351
+ "epoch": 7.69,
352
+ "eval_cer": 0.9953039832285115,
353
+ "eval_loss": 3.201068162918091,
354
+ "eval_runtime": 111.8171,
355
+ "eval_samples_per_second": 3.05,
356
+ "eval_steps_per_second": 0.385,
357
+ "eval_wer": 1.0,
358
+ "step": 500
359
+ },
360
+ {
361
+ "epoch": 7.85,
362
+ "learning_rate": 5.0700000000000006e-05,
363
+ "loss": 3.1996,
364
+ "step": 510
365
+ },
366
+ {
367
+ "epoch": 8.0,
368
+ "learning_rate": 5.17e-05,
369
+ "loss": 3.2977,
370
+ "step": 520
371
+ },
372
+ {
373
+ "epoch": 8.15,
374
+ "learning_rate": 5.270000000000001e-05,
375
+ "loss": 3.1425,
376
+ "step": 530
377
+ },
378
+ {
379
+ "epoch": 8.31,
380
+ "learning_rate": 5.3700000000000004e-05,
381
+ "loss": 3.2105,
382
+ "step": 540
383
+ },
384
+ {
385
+ "epoch": 8.46,
386
+ "learning_rate": 5.470000000000001e-05,
387
+ "loss": 3.1593,
388
+ "step": 550
389
+ },
390
+ {
391
+ "epoch": 8.62,
392
+ "learning_rate": 5.5700000000000005e-05,
393
+ "loss": 3.1665,
394
+ "step": 560
395
+ },
396
+ {
397
+ "epoch": 8.77,
398
+ "learning_rate": 5.6699999999999996e-05,
399
+ "loss": 3.2244,
400
+ "step": 570
401
+ },
402
+ {
403
+ "epoch": 8.92,
404
+ "learning_rate": 5.77e-05,
405
+ "loss": 3.1578,
406
+ "step": 580
407
+ },
408
+ {
409
+ "epoch": 9.08,
410
+ "learning_rate": 5.87e-05,
411
+ "loss": 3.1924,
412
+ "step": 590
413
+ },
414
+ {
415
+ "epoch": 9.23,
416
+ "learning_rate": 5.97e-05,
417
+ "loss": 3.1667,
418
+ "step": 600
419
+ },
420
+ {
421
+ "epoch": 9.23,
422
+ "eval_cer": 0.9953039832285115,
423
+ "eval_loss": 3.139559030532837,
424
+ "eval_runtime": 109.7588,
425
+ "eval_samples_per_second": 3.107,
426
+ "eval_steps_per_second": 0.392,
427
+ "eval_wer": 1.0,
428
+ "step": 600
429
+ },
430
+ {
431
+ "epoch": 9.38,
432
+ "learning_rate": 6.07e-05,
433
+ "loss": 3.1127,
434
+ "step": 610
435
+ },
436
+ {
437
+ "epoch": 9.54,
438
+ "learning_rate": 6.170000000000001e-05,
439
+ "loss": 3.1775,
440
+ "step": 620
441
+ },
442
+ {
443
+ "epoch": 9.69,
444
+ "learning_rate": 6.27e-05,
445
+ "loss": 3.1132,
446
+ "step": 630
447
+ },
448
+ {
449
+ "epoch": 9.85,
450
+ "learning_rate": 6.37e-05,
451
+ "loss": 3.1704,
452
+ "step": 640
453
+ },
454
+ {
455
+ "epoch": 10.0,
456
+ "learning_rate": 6.47e-05,
457
+ "loss": 3.1306,
458
+ "step": 650
459
+ },
460
+ {
461
+ "epoch": 10.15,
462
+ "learning_rate": 6.570000000000001e-05,
463
+ "loss": 3.0967,
464
+ "step": 660
465
+ },
466
+ {
467
+ "epoch": 10.31,
468
+ "learning_rate": 6.670000000000001e-05,
469
+ "loss": 3.1375,
470
+ "step": 670
471
+ },
472
+ {
473
+ "epoch": 10.46,
474
+ "learning_rate": 6.77e-05,
475
+ "loss": 3.1139,
476
+ "step": 680
477
+ },
478
+ {
479
+ "epoch": 10.62,
480
+ "learning_rate": 6.87e-05,
481
+ "loss": 3.098,
482
+ "step": 690
483
+ },
484
+ {
485
+ "epoch": 10.77,
486
+ "learning_rate": 6.97e-05,
487
+ "loss": 3.1111,
488
+ "step": 700
489
+ },
490
+ {
491
+ "epoch": 10.77,
492
+ "eval_cer": 0.9953039832285115,
493
+ "eval_loss": 3.0903170108795166,
494
+ "eval_runtime": 109.1662,
495
+ "eval_samples_per_second": 3.124,
496
+ "eval_steps_per_second": 0.394,
497
+ "eval_wer": 1.0,
498
+ "step": 700
499
+ },
500
+ {
501
+ "epoch": 10.92,
502
+ "learning_rate": 7.07e-05,
503
+ "loss": 3.0775,
504
+ "step": 710
505
+ },
506
+ {
507
+ "epoch": 11.08,
508
+ "learning_rate": 7.17e-05,
509
+ "loss": 3.1294,
510
+ "step": 720
511
+ },
512
+ {
513
+ "epoch": 11.23,
514
+ "learning_rate": 7.27e-05,
515
+ "loss": 3.0943,
516
+ "step": 730
517
+ },
518
+ {
519
+ "epoch": 11.38,
520
+ "learning_rate": 7.37e-05,
521
+ "loss": 3.0812,
522
+ "step": 740
523
+ },
524
+ {
525
+ "epoch": 11.54,
526
+ "learning_rate": 7.47e-05,
527
+ "loss": 3.0843,
528
+ "step": 750
529
+ },
530
+ {
531
+ "epoch": 11.69,
532
+ "learning_rate": 7.570000000000001e-05,
533
+ "loss": 3.0668,
534
+ "step": 760
535
+ },
536
+ {
537
+ "epoch": 11.85,
538
+ "learning_rate": 7.670000000000001e-05,
539
+ "loss": 3.0617,
540
+ "step": 770
541
+ },
542
+ {
543
+ "epoch": 12.0,
544
+ "learning_rate": 7.77e-05,
545
+ "loss": 3.0469,
546
+ "step": 780
547
+ },
548
+ {
549
+ "epoch": 12.15,
550
+ "learning_rate": 7.87e-05,
551
+ "loss": 2.9867,
552
+ "step": 790
553
+ },
554
+ {
555
+ "epoch": 12.31,
556
+ "learning_rate": 7.970000000000001e-05,
557
+ "loss": 2.9955,
558
+ "step": 800
559
+ },
560
+ {
561
+ "epoch": 12.31,
562
+ "eval_cer": 0.9953039832285115,
563
+ "eval_loss": 2.895956039428711,
564
+ "eval_runtime": 109.2356,
565
+ "eval_samples_per_second": 3.122,
566
+ "eval_steps_per_second": 0.394,
567
+ "eval_wer": 1.0,
568
+ "step": 800
569
+ },
570
+ {
571
+ "epoch": 12.46,
572
+ "learning_rate": 8.070000000000001e-05,
573
+ "loss": 2.9201,
574
+ "step": 810
575
+ },
576
+ {
577
+ "epoch": 12.62,
578
+ "learning_rate": 8.17e-05,
579
+ "loss": 2.8683,
580
+ "step": 820
581
+ },
582
+ {
583
+ "epoch": 12.77,
584
+ "learning_rate": 8.27e-05,
585
+ "loss": 2.8432,
586
+ "step": 830
587
+ },
588
+ {
589
+ "epoch": 12.92,
590
+ "learning_rate": 8.37e-05,
591
+ "loss": 2.6776,
592
+ "step": 840
593
+ },
594
+ {
595
+ "epoch": 13.08,
596
+ "learning_rate": 8.47e-05,
597
+ "loss": 2.5611,
598
+ "step": 850
599
+ },
600
+ {
601
+ "epoch": 13.23,
602
+ "learning_rate": 8.57e-05,
603
+ "loss": 2.4031,
604
+ "step": 860
605
+ },
606
+ {
607
+ "epoch": 13.38,
608
+ "learning_rate": 8.67e-05,
609
+ "loss": 2.2939,
610
+ "step": 870
611
+ },
612
+ {
613
+ "epoch": 13.54,
614
+ "learning_rate": 8.77e-05,
615
+ "loss": 2.0835,
616
+ "step": 880
617
+ },
618
+ {
619
+ "epoch": 13.69,
620
+ "learning_rate": 8.87e-05,
621
+ "loss": 1.8785,
622
+ "step": 890
623
+ },
624
+ {
625
+ "epoch": 13.85,
626
+ "learning_rate": 8.970000000000001e-05,
627
+ "loss": 1.784,
628
+ "step": 900
629
+ },
630
+ {
631
+ "epoch": 13.85,
632
+ "eval_cer": 0.33366876310272536,
633
+ "eval_loss": 1.4985727071762085,
634
+ "eval_runtime": 109.5182,
635
+ "eval_samples_per_second": 3.114,
636
+ "eval_steps_per_second": 0.393,
637
+ "eval_wer": 0.8279052553663953,
638
+ "step": 900
639
+ },
640
+ {
641
+ "epoch": 14.0,
642
+ "learning_rate": 9.070000000000001e-05,
643
+ "loss": 1.6083,
644
+ "step": 910
645
+ },
646
+ {
647
+ "epoch": 14.15,
648
+ "learning_rate": 9.17e-05,
649
+ "loss": 1.5164,
650
+ "step": 920
651
+ },
652
+ {
653
+ "epoch": 14.31,
654
+ "learning_rate": 9.27e-05,
655
+ "loss": 1.4019,
656
+ "step": 930
657
+ },
658
+ {
659
+ "epoch": 14.46,
660
+ "learning_rate": 9.370000000000001e-05,
661
+ "loss": 1.3325,
662
+ "step": 940
663
+ },
664
+ {
665
+ "epoch": 14.62,
666
+ "learning_rate": 9.47e-05,
667
+ "loss": 1.3251,
668
+ "step": 950
669
+ },
670
+ {
671
+ "epoch": 14.77,
672
+ "learning_rate": 9.57e-05,
673
+ "loss": 1.2811,
674
+ "step": 960
675
+ },
676
+ {
677
+ "epoch": 14.92,
678
+ "learning_rate": 9.67e-05,
679
+ "loss": 1.1927,
680
+ "step": 970
681
+ },
682
+ {
683
+ "epoch": 15.08,
684
+ "learning_rate": 9.77e-05,
685
+ "loss": 1.2017,
686
+ "step": 980
687
+ },
688
+ {
689
+ "epoch": 15.23,
690
+ "learning_rate": 9.87e-05,
691
+ "loss": 1.0452,
692
+ "step": 990
693
+ },
694
+ {
695
+ "epoch": 15.38,
696
+ "learning_rate": 9.970000000000001e-05,
697
+ "loss": 1.1511,
698
+ "step": 1000
699
+ },
700
+ {
701
+ "epoch": 15.38,
702
+ "eval_cer": 0.21970649895178196,
703
+ "eval_loss": 0.8053019046783447,
704
+ "eval_runtime": 109.46,
705
+ "eval_samples_per_second": 3.115,
706
+ "eval_steps_per_second": 0.393,
707
+ "eval_wer": 0.6402664692820134,
708
+ "step": 1000
709
+ },
710
+ {
711
+ "epoch": 15.54,
712
+ "learning_rate": 9.994166666666667e-05,
713
+ "loss": 5.2926,
714
+ "step": 1010
715
+ },
716
+ {
717
+ "epoch": 15.69,
718
+ "learning_rate": 9.985833333333334e-05,
719
+ "loss": 3.4895,
720
+ "step": 1020
721
+ },
722
+ {
723
+ "epoch": 15.85,
724
+ "learning_rate": 9.977500000000001e-05,
725
+ "loss": 3.3029,
726
+ "step": 1030
727
+ },
728
+ {
729
+ "epoch": 16.0,
730
+ "learning_rate": 9.969166666666667e-05,
731
+ "loss": 3.2695,
732
+ "step": 1040
733
+ },
734
+ {
735
+ "epoch": 16.15,
736
+ "learning_rate": 9.960833333333333e-05,
737
+ "loss": 3.1054,
738
+ "step": 1050
739
+ },
740
+ {
741
+ "epoch": 16.31,
742
+ "learning_rate": 9.952500000000001e-05,
743
+ "loss": 3.0923,
744
+ "step": 1060
745
+ },
746
+ {
747
+ "epoch": 16.46,
748
+ "learning_rate": 9.944166666666667e-05,
749
+ "loss": 2.9955,
750
+ "step": 1070
751
+ },
752
+ {
753
+ "epoch": 16.62,
754
+ "learning_rate": 9.935833333333334e-05,
755
+ "loss": 2.9343,
756
+ "step": 1080
757
+ },
758
+ {
759
+ "epoch": 16.77,
760
+ "learning_rate": 9.9275e-05,
761
+ "loss": 2.7871,
762
+ "step": 1090
763
+ },
764
+ {
765
+ "epoch": 16.92,
766
+ "learning_rate": 9.919166666666667e-05,
767
+ "loss": 2.3674,
768
+ "step": 1100
769
+ },
770
+ {
771
+ "epoch": 16.92,
772
+ "eval_cer": 0.4807547169811321,
773
+ "eval_loss": 1.9613749980926514,
774
+ "eval_runtime": 26.9012,
775
+ "eval_samples_per_second": 12.676,
776
+ "eval_steps_per_second": 1.598,
777
+ "eval_wer": 0.9925980754996299,
778
+ "step": 1100
779
+ },
780
+ {
781
+ "epoch": 17.08,
782
+ "learning_rate": 9.910833333333333e-05,
783
+ "loss": 2.1319,
784
+ "step": 1110
785
+ },
786
+ {
787
+ "epoch": 17.23,
788
+ "learning_rate": 9.9025e-05,
789
+ "loss": 1.6758,
790
+ "step": 1120
791
+ },
792
+ {
793
+ "epoch": 17.38,
794
+ "learning_rate": 9.894166666666668e-05,
795
+ "loss": 1.5306,
796
+ "step": 1130
797
+ },
798
+ {
799
+ "epoch": 17.54,
800
+ "learning_rate": 9.885833333333334e-05,
801
+ "loss": 1.3654,
802
+ "step": 1140
803
+ },
804
+ {
805
+ "epoch": 17.69,
806
+ "learning_rate": 9.8775e-05,
807
+ "loss": 1.2197,
808
+ "step": 1150
809
+ },
810
+ {
811
+ "epoch": 17.85,
812
+ "learning_rate": 9.869166666666668e-05,
813
+ "loss": 1.1739,
814
+ "step": 1160
815
+ },
816
+ {
817
+ "epoch": 18.0,
818
+ "learning_rate": 9.860833333333334e-05,
819
+ "loss": 1.0703,
820
+ "step": 1170
821
+ },
822
+ {
823
+ "epoch": 18.15,
824
+ "learning_rate": 9.8525e-05,
825
+ "loss": 1.064,
826
+ "step": 1180
827
+ },
828
+ {
829
+ "epoch": 18.31,
830
+ "learning_rate": 9.844166666666667e-05,
831
+ "loss": 1.0071,
832
+ "step": 1190
833
+ },
834
+ {
835
+ "epoch": 18.46,
836
+ "learning_rate": 9.835833333333334e-05,
837
+ "loss": 0.8716,
838
+ "step": 1200
839
+ },
840
+ {
841
+ "epoch": 18.46,
842
+ "eval_cer": 0.20771488469601676,
843
+ "eval_loss": 0.7716531753540039,
844
+ "eval_runtime": 26.7363,
845
+ "eval_samples_per_second": 12.754,
846
+ "eval_steps_per_second": 1.608,
847
+ "eval_wer": 0.609178386380459,
848
+ "step": 1200
849
+ },
850
+ {
851
+ "epoch": 18.62,
852
+ "learning_rate": 9.8275e-05,
853
+ "loss": 0.9567,
854
+ "step": 1210
855
+ },
856
+ {
857
+ "epoch": 18.77,
858
+ "learning_rate": 9.819166666666668e-05,
859
+ "loss": 0.9223,
860
+ "step": 1220
861
+ },
862
+ {
863
+ "epoch": 18.92,
864
+ "learning_rate": 9.810833333333334e-05,
865
+ "loss": 0.8467,
866
+ "step": 1230
867
+ },
868
+ {
869
+ "epoch": 19.08,
870
+ "learning_rate": 9.8025e-05,
871
+ "loss": 0.8997,
872
+ "step": 1240
873
+ },
874
+ {
875
+ "epoch": 19.23,
876
+ "learning_rate": 9.794166666666667e-05,
877
+ "loss": 0.7379,
878
+ "step": 1250
879
+ },
880
+ {
881
+ "epoch": 19.38,
882
+ "learning_rate": 9.785833333333334e-05,
883
+ "loss": 0.7997,
884
+ "step": 1260
885
+ },
886
+ {
887
+ "epoch": 19.54,
888
+ "learning_rate": 9.7775e-05,
889
+ "loss": 0.8364,
890
+ "step": 1270
891
+ },
892
+ {
893
+ "epoch": 19.69,
894
+ "learning_rate": 9.769166666666667e-05,
895
+ "loss": 0.6978,
896
+ "step": 1280
897
+ },
898
+ {
899
+ "epoch": 19.85,
900
+ "learning_rate": 9.760833333333334e-05,
901
+ "loss": 0.7572,
902
+ "step": 1290
903
+ },
904
+ {
905
+ "epoch": 20.0,
906
+ "learning_rate": 9.7525e-05,
907
+ "loss": 0.7108,
908
+ "step": 1300
909
+ },
910
+ {
911
+ "epoch": 20.0,
912
+ "eval_cer": 0.16545073375262054,
913
+ "eval_loss": 0.5405445098876953,
914
+ "eval_runtime": 26.8237,
915
+ "eval_samples_per_second": 12.713,
916
+ "eval_steps_per_second": 1.603,
917
+ "eval_wer": 0.5159141376757957,
918
+ "step": 1300
919
+ },
920
+ {
921
+ "epoch": 20.15,
922
+ "learning_rate": 9.744166666666667e-05,
923
+ "loss": 0.7364,
924
+ "step": 1310
925
+ },
926
+ {
927
+ "epoch": 20.31,
928
+ "learning_rate": 9.735833333333335e-05,
929
+ "loss": 0.6907,
930
+ "step": 1320
931
+ },
932
+ {
933
+ "epoch": 20.46,
934
+ "learning_rate": 9.7275e-05,
935
+ "loss": 0.6201,
936
+ "step": 1330
937
+ },
938
+ {
939
+ "epoch": 20.62,
940
+ "learning_rate": 9.719166666666667e-05,
941
+ "loss": 0.6772,
942
+ "step": 1340
943
+ },
944
+ {
945
+ "epoch": 20.77,
946
+ "learning_rate": 9.710833333333334e-05,
947
+ "loss": 0.6928,
948
+ "step": 1350
949
+ },
950
+ {
951
+ "epoch": 20.92,
952
+ "learning_rate": 9.7025e-05,
953
+ "loss": 0.6312,
954
+ "step": 1360
955
+ },
956
+ {
957
+ "epoch": 21.08,
958
+ "learning_rate": 9.694166666666667e-05,
959
+ "loss": 0.6452,
960
+ "step": 1370
961
+ },
962
+ {
963
+ "epoch": 21.23,
964
+ "learning_rate": 9.685833333333333e-05,
965
+ "loss": 0.551,
966
+ "step": 1380
967
+ },
968
+ {
969
+ "epoch": 21.38,
970
+ "learning_rate": 9.677500000000001e-05,
971
+ "loss": 0.6457,
972
+ "step": 1390
973
+ },
974
+ {
975
+ "epoch": 21.54,
976
+ "learning_rate": 9.669166666666667e-05,
977
+ "loss": 0.612,
978
+ "step": 1400
979
+ },
980
+ {
981
+ "epoch": 21.54,
982
+ "eval_cer": 0.140041928721174,
983
+ "eval_loss": 0.42258089780807495,
984
+ "eval_runtime": 27.0022,
985
+ "eval_samples_per_second": 12.629,
986
+ "eval_steps_per_second": 1.592,
987
+ "eval_wer": 0.463360473723168,
988
+ "step": 1400
989
+ },
990
+ {
991
+ "epoch": 21.69,
992
+ "learning_rate": 9.660833333333333e-05,
993
+ "loss": 0.5377,
994
+ "step": 1410
995
+ },
996
+ {
997
+ "epoch": 21.85,
998
+ "learning_rate": 9.652500000000002e-05,
999
+ "loss": 0.625,
1000
+ "step": 1420
1001
+ },
1002
+ {
1003
+ "epoch": 22.0,
1004
+ "learning_rate": 9.644166666666668e-05,
1005
+ "loss": 0.5206,
1006
+ "step": 1430
1007
+ },
1008
+ {
1009
+ "epoch": 22.15,
1010
+ "learning_rate": 9.635833333333334e-05,
1011
+ "loss": 0.6004,
1012
+ "step": 1440
1013
+ },
1014
+ {
1015
+ "epoch": 22.31,
1016
+ "learning_rate": 9.627500000000001e-05,
1017
+ "loss": 0.5728,
1018
+ "step": 1450
1019
+ },
1020
+ {
1021
+ "epoch": 22.46,
1022
+ "learning_rate": 9.619166666666667e-05,
1023
+ "loss": 0.5696,
1024
+ "step": 1460
1025
+ },
1026
+ {
1027
+ "epoch": 22.62,
1028
+ "learning_rate": 9.610833333333333e-05,
1029
+ "loss": 0.5712,
1030
+ "step": 1470
1031
+ },
1032
+ {
1033
+ "epoch": 22.77,
1034
+ "learning_rate": 9.6025e-05,
1035
+ "loss": 0.5303,
1036
+ "step": 1480
1037
+ },
1038
+ {
1039
+ "epoch": 22.92,
1040
+ "learning_rate": 9.594166666666668e-05,
1041
+ "loss": 0.4954,
1042
+ "step": 1490
1043
+ },
1044
+ {
1045
+ "epoch": 23.08,
1046
+ "learning_rate": 9.585833333333334e-05,
1047
+ "loss": 0.4919,
1048
+ "step": 1500
1049
+ },
1050
+ {
1051
+ "epoch": 23.08,
1052
+ "eval_cer": 0.11941299790356394,
1053
+ "eval_loss": 0.3299271762371063,
1054
+ "eval_runtime": 27.0051,
1055
+ "eval_samples_per_second": 12.627,
1056
+ "eval_steps_per_second": 1.592,
1057
+ "eval_wer": 0.4052553663952628,
1058
+ "step": 1500
1059
+ },
1060
+ {
1061
+ "epoch": 23.23,
1062
+ "learning_rate": 9.5775e-05,
1063
+ "loss": 0.391,
1064
+ "step": 1510
1065
+ },
1066
+ {
1067
+ "epoch": 23.38,
1068
+ "learning_rate": 9.569166666666667e-05,
1069
+ "loss": 0.5205,
1070
+ "step": 1520
1071
+ },
1072
+ {
1073
+ "epoch": 23.54,
1074
+ "learning_rate": 9.560833333333333e-05,
1075
+ "loss": 0.5302,
1076
+ "step": 1530
1077
+ },
1078
+ {
1079
+ "epoch": 23.69,
1080
+ "learning_rate": 9.5525e-05,
1081
+ "loss": 0.4806,
1082
+ "step": 1540
1083
+ },
1084
+ {
1085
+ "epoch": 23.85,
1086
+ "learning_rate": 9.544166666666668e-05,
1087
+ "loss": 0.5882,
1088
+ "step": 1550
1089
+ },
1090
+ {
1091
+ "epoch": 24.0,
1092
+ "learning_rate": 9.535833333333334e-05,
1093
+ "loss": 0.4541,
1094
+ "step": 1560
1095
+ },
1096
+ {
1097
+ "epoch": 24.15,
1098
+ "learning_rate": 9.5275e-05,
1099
+ "loss": 0.457,
1100
+ "step": 1570
1101
+ },
1102
+ {
1103
+ "epoch": 24.31,
1104
+ "learning_rate": 9.519166666666667e-05,
1105
+ "loss": 0.4797,
1106
+ "step": 1580
1107
+ },
1108
+ {
1109
+ "epoch": 24.46,
1110
+ "learning_rate": 9.510833333333333e-05,
1111
+ "loss": 0.3973,
1112
+ "step": 1590
1113
+ },
1114
+ {
1115
+ "epoch": 24.62,
1116
+ "learning_rate": 9.5025e-05,
1117
+ "loss": 0.4999,
1118
+ "step": 1600
1119
+ },
1120
+ {
1121
+ "epoch": 24.62,
1122
+ "eval_cer": 0.1020545073375262,
1123
+ "eval_loss": 0.2569698691368103,
1124
+ "eval_runtime": 30.0585,
1125
+ "eval_samples_per_second": 11.345,
1126
+ "eval_steps_per_second": 1.431,
1127
+ "eval_wer": 0.3604737231680237,
1128
+ "step": 1600
1129
+ },
1130
+ {
1131
+ "epoch": 24.77,
1132
+ "learning_rate": 9.494166666666668e-05,
1133
+ "loss": 0.452,
1134
+ "step": 1610
1135
+ },
1136
+ {
1137
+ "epoch": 24.92,
1138
+ "learning_rate": 9.485833333333334e-05,
1139
+ "loss": 0.4148,
1140
+ "step": 1620
1141
+ },
1142
+ {
1143
+ "epoch": 25.08,
1144
+ "learning_rate": 9.4775e-05,
1145
+ "loss": 0.4316,
1146
+ "step": 1630
1147
+ },
1148
+ {
1149
+ "epoch": 25.23,
1150
+ "learning_rate": 9.469166666666667e-05,
1151
+ "loss": 0.3673,
1152
+ "step": 1640
1153
+ },
1154
+ {
1155
+ "epoch": 25.38,
1156
+ "learning_rate": 9.460833333333335e-05,
1157
+ "loss": 0.4365,
1158
+ "step": 1650
1159
+ },
1160
+ {
1161
+ "epoch": 25.54,
1162
+ "learning_rate": 9.452500000000001e-05,
1163
+ "loss": 0.4191,
1164
+ "step": 1660
1165
+ },
1166
+ {
1167
+ "epoch": 25.69,
1168
+ "learning_rate": 9.444166666666667e-05,
1169
+ "loss": 0.3857,
1170
+ "step": 1670
1171
+ },
1172
+ {
1173
+ "epoch": 25.85,
1174
+ "learning_rate": 9.435833333333334e-05,
1175
+ "loss": 0.4627,
1176
+ "step": 1680
1177
+ },
1178
+ {
1179
+ "epoch": 26.0,
1180
+ "learning_rate": 9.4275e-05,
1181
+ "loss": 0.3845,
1182
+ "step": 1690
1183
+ },
1184
+ {
1185
+ "epoch": 26.15,
1186
+ "learning_rate": 9.419166666666666e-05,
1187
+ "loss": 0.4349,
1188
+ "step": 1700
1189
+ },
1190
+ {
1191
+ "epoch": 26.15,
1192
+ "eval_cer": 0.09207547169811321,
1193
+ "eval_loss": 0.21595901250839233,
1194
+ "eval_runtime": 27.0141,
1195
+ "eval_samples_per_second": 12.623,
1196
+ "eval_steps_per_second": 1.592,
1197
+ "eval_wer": 0.33826794966691337,
1198
+ "step": 1700
1199
+ },
1200
+ {
1201
+ "epoch": 26.31,
1202
+ "learning_rate": 9.410833333333335e-05,
1203
+ "loss": 0.359,
1204
+ "step": 1710
1205
+ },
1206
+ {
1207
+ "epoch": 26.46,
1208
+ "learning_rate": 9.402500000000001e-05,
1209
+ "loss": 0.3897,
1210
+ "step": 1720
1211
+ },
1212
+ {
1213
+ "epoch": 26.62,
1214
+ "learning_rate": 9.394166666666667e-05,
1215
+ "loss": 0.3857,
1216
+ "step": 1730
1217
+ },
1218
+ {
1219
+ "epoch": 26.77,
1220
+ "learning_rate": 9.385833333333334e-05,
1221
+ "loss": 0.361,
1222
+ "step": 1740
1223
+ },
1224
+ {
1225
+ "epoch": 26.92,
1226
+ "learning_rate": 9.3775e-05,
1227
+ "loss": 0.4116,
1228
+ "step": 1750
1229
+ },
1230
+ {
1231
+ "epoch": 27.08,
1232
+ "learning_rate": 9.369166666666666e-05,
1233
+ "loss": 0.427,
1234
+ "step": 1760
1235
+ },
1236
+ {
1237
+ "epoch": 27.23,
1238
+ "learning_rate": 9.360833333333334e-05,
1239
+ "loss": 0.2724,
1240
+ "step": 1770
1241
+ },
1242
+ {
1243
+ "epoch": 27.38,
1244
+ "learning_rate": 9.352500000000001e-05,
1245
+ "loss": 0.416,
1246
+ "step": 1780
1247
+ },
1248
+ {
1249
+ "epoch": 27.54,
1250
+ "learning_rate": 9.344166666666667e-05,
1251
+ "loss": 0.3837,
1252
+ "step": 1790
1253
+ },
1254
+ {
1255
+ "epoch": 27.69,
1256
+ "learning_rate": 9.335833333333333e-05,
1257
+ "loss": 0.3205,
1258
+ "step": 1800
1259
+ },
1260
+ {
1261
+ "epoch": 27.69,
1262
+ "eval_cer": 0.07907756813417191,
1263
+ "eval_loss": 0.17836497724056244,
1264
+ "eval_runtime": 26.9274,
1265
+ "eval_samples_per_second": 12.664,
1266
+ "eval_steps_per_second": 1.597,
1267
+ "eval_wer": 0.29533678756476683,
1268
+ "step": 1800
1269
+ },
1270
+ {
1271
+ "epoch": 27.85,
1272
+ "learning_rate": 9.3275e-05,
1273
+ "loss": 0.3578,
1274
+ "step": 1810
1275
+ },
1276
+ {
1277
+ "epoch": 28.0,
1278
+ "learning_rate": 9.319166666666666e-05,
1279
+ "loss": 0.3527,
1280
+ "step": 1820
1281
+ },
1282
+ {
1283
+ "epoch": 28.15,
1284
+ "learning_rate": 9.310833333333334e-05,
1285
+ "loss": 0.3458,
1286
+ "step": 1830
1287
+ },
1288
+ {
1289
+ "epoch": 28.31,
1290
+ "learning_rate": 9.302500000000001e-05,
1291
+ "loss": 0.3527,
1292
+ "step": 1840
1293
+ },
1294
+ {
1295
+ "epoch": 28.46,
1296
+ "learning_rate": 9.294166666666667e-05,
1297
+ "loss": 0.3244,
1298
+ "step": 1850
1299
+ },
1300
+ {
1301
+ "epoch": 28.62,
1302
+ "learning_rate": 9.285833333333333e-05,
1303
+ "loss": 0.393,
1304
+ "step": 1860
1305
+ },
1306
+ {
1307
+ "epoch": 28.77,
1308
+ "learning_rate": 9.2775e-05,
1309
+ "loss": 0.3255,
1310
+ "step": 1870
1311
+ },
1312
+ {
1313
+ "epoch": 28.92,
1314
+ "learning_rate": 9.269166666666668e-05,
1315
+ "loss": 0.2884,
1316
+ "step": 1880
1317
+ },
1318
+ {
1319
+ "epoch": 29.08,
1320
+ "learning_rate": 9.260833333333334e-05,
1321
+ "loss": 0.3382,
1322
+ "step": 1890
1323
+ },
1324
+ {
1325
+ "epoch": 29.23,
1326
+ "learning_rate": 9.252500000000001e-05,
1327
+ "loss": 0.2717,
1328
+ "step": 1900
1329
+ },
1330
+ {
1331
+ "epoch": 29.23,
1332
+ "eval_cer": 0.07127882599580712,
1333
+ "eval_loss": 0.14557726681232452,
1334
+ "eval_runtime": 26.923,
1335
+ "eval_samples_per_second": 12.666,
1336
+ "eval_steps_per_second": 1.597,
1337
+ "eval_wer": 0.2701702442635085,
1338
+ "step": 1900
1339
+ },
1340
+ {
1341
+ "epoch": 29.38,
1342
+ "learning_rate": 9.244166666666667e-05,
1343
+ "loss": 0.3271,
1344
+ "step": 1910
1345
+ },
1346
+ {
1347
+ "epoch": 29.54,
1348
+ "learning_rate": 9.235833333333333e-05,
1349
+ "loss": 0.3439,
1350
+ "step": 1920
1351
+ },
1352
+ {
1353
+ "epoch": 29.69,
1354
+ "learning_rate": 9.2275e-05,
1355
+ "loss": 0.3184,
1356
+ "step": 1930
1357
+ },
1358
+ {
1359
+ "epoch": 29.85,
1360
+ "learning_rate": 9.219166666666668e-05,
1361
+ "loss": 0.3458,
1362
+ "step": 1940
1363
+ },
1364
+ {
1365
+ "epoch": 30.0,
1366
+ "learning_rate": 9.210833333333334e-05,
1367
+ "loss": 0.2775,
1368
+ "step": 1950
1369
+ },
1370
+ {
1371
+ "epoch": 30.15,
1372
+ "learning_rate": 9.2025e-05,
1373
+ "loss": 0.2959,
1374
+ "step": 1960
1375
+ },
1376
+ {
1377
+ "epoch": 30.31,
1378
+ "learning_rate": 9.194166666666667e-05,
1379
+ "loss": 0.2861,
1380
+ "step": 1970
1381
+ },
1382
+ {
1383
+ "epoch": 30.46,
1384
+ "learning_rate": 9.185833333333333e-05,
1385
+ "loss": 0.3047,
1386
+ "step": 1980
1387
+ },
1388
+ {
1389
+ "epoch": 30.62,
1390
+ "learning_rate": 9.1775e-05,
1391
+ "loss": 0.3244,
1392
+ "step": 1990
1393
+ },
1394
+ {
1395
+ "epoch": 30.77,
1396
+ "learning_rate": 9.169166666666668e-05,
1397
+ "loss": 0.2903,
1398
+ "step": 2000
1399
+ },
1400
+ {
1401
+ "epoch": 30.77,
1402
+ "eval_cer": 0.06532494758909853,
1403
+ "eval_loss": 0.1265391856431961,
1404
+ "eval_runtime": 29.4587,
1405
+ "eval_samples_per_second": 11.576,
1406
+ "eval_steps_per_second": 1.46,
1407
+ "eval_wer": 0.2527757216876388,
1408
+ "step": 2000
1409
+ },
1410
+ {
1411
+ "epoch": 30.92,
1412
+ "learning_rate": 9.160833333333334e-05,
1413
+ "loss": 0.2849,
1414
+ "step": 2010
1415
+ },
1416
+ {
1417
+ "epoch": 31.08,
1418
+ "learning_rate": 9.1525e-05,
1419
+ "loss": 0.3266,
1420
+ "step": 2020
1421
+ },
1422
+ {
1423
+ "epoch": 31.23,
1424
+ "learning_rate": 9.144166666666668e-05,
1425
+ "loss": 0.2915,
1426
+ "step": 2030
1427
+ },
1428
+ {
1429
+ "epoch": 31.38,
1430
+ "learning_rate": 9.135833333333334e-05,
1431
+ "loss": 0.3066,
1432
+ "step": 2040
1433
+ },
1434
+ {
1435
+ "epoch": 31.54,
1436
+ "learning_rate": 9.1275e-05,
1437
+ "loss": 0.3167,
1438
+ "step": 2050
1439
+ },
1440
+ {
1441
+ "epoch": 31.69,
1442
+ "learning_rate": 9.119166666666667e-05,
1443
+ "loss": 0.2572,
1444
+ "step": 2060
1445
+ },
1446
+ {
1447
+ "epoch": 31.85,
1448
+ "learning_rate": 9.110833333333334e-05,
1449
+ "loss": 0.3269,
1450
+ "step": 2070
1451
+ },
1452
+ {
1453
+ "epoch": 32.0,
1454
+ "learning_rate": 9.1025e-05,
1455
+ "loss": 0.2422,
1456
+ "step": 2080
1457
+ },
1458
+ {
1459
+ "epoch": 32.15,
1460
+ "learning_rate": 9.094166666666666e-05,
1461
+ "loss": 0.3152,
1462
+ "step": 2090
1463
+ },
1464
+ {
1465
+ "epoch": 32.31,
1466
+ "learning_rate": 9.085833333333334e-05,
1467
+ "loss": 0.2703,
1468
+ "step": 2100
1469
+ },
1470
+ {
1471
+ "epoch": 32.31,
1472
+ "eval_cer": 0.060964360587002095,
1473
+ "eval_loss": 0.11160185188055038,
1474
+ "eval_runtime": 26.8072,
1475
+ "eval_samples_per_second": 12.72,
1476
+ "eval_steps_per_second": 1.604,
1477
+ "eval_wer": 0.24426350851221318,
1478
+ "step": 2100
1479
+ }
1480
+ ],
1481
+ "max_steps": 13000,
1482
+ "num_train_epochs": 200,
1483
+ "total_flos": 8.493815951541043e+18,
1484
+ "trial_name": null,
1485
+ "trial_params": null
1486
+ }