Sercan commited on
Commit
73932b2
1 Parent(s): de67b7b

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_cer": 0.06809827094571802,
4
+ "eval_loss": 0.31785446405410767,
5
+ "eval_runtime": 348.931,
6
+ "eval_samples": 10143,
7
+ "eval_samples_per_second": 29.069,
8
+ "eval_steps_per_second": 3.634,
9
+ "eval_wer": 0.2862633203284225,
10
+ "train_loss": 0.2775966154790558,
11
+ "train_runtime": 58895.0057,
12
+ "train_samples": 36125,
13
+ "train_samples_per_second": 18.401,
14
+ "train_steps_per_second": 0.288
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_cer": 0.06809827094571802,
4
+ "eval_loss": 0.31785446405410767,
5
+ "eval_runtime": 348.931,
6
+ "eval_samples": 10143,
7
+ "eval_samples_per_second": 29.069,
8
+ "eval_steps_per_second": 3.634,
9
+ "eval_wer": 0.2862633203284225
10
+ }
runs/Jan18_15-30-29_129-159-37-182/events.out.tfevents.1674115180.129-159-37-182.1516213.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ead6f53c4f4049a05afd88778f324bbe8afc30ca568b6ad6f4f541791bd0d5
3
+ size 412
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "train_loss": 0.2775966154790558,
4
+ "train_runtime": 58895.0057,
5
+ "train_samples": 36125,
6
+ "train_samples_per_second": 18.401,
7
+ "train_steps_per_second": 0.288
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,643 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 30.0,
5
+ "global_step": 16950,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.71,
12
+ "eval_cer": 0.479669124113056,
13
+ "eval_loss": 1.7289525270462036,
14
+ "eval_runtime": 352.9433,
15
+ "eval_samples_per_second": 28.738,
16
+ "eval_steps_per_second": 3.593,
17
+ "eval_wer": 0.9804149925270289,
18
+ "step": 400
19
+ },
20
+ {
21
+ "epoch": 0.88,
22
+ "learning_rate": 0.0002982,
23
+ "loss": 4.5435,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 1.42,
28
+ "eval_cer": 0.1449701156451013,
29
+ "eval_loss": 0.48100030422210693,
30
+ "eval_runtime": 355.0295,
31
+ "eval_samples_per_second": 28.569,
32
+ "eval_steps_per_second": 3.572,
33
+ "eval_wer": 0.5774374502610687,
34
+ "step": 800
35
+ },
36
+ {
37
+ "epoch": 1.77,
38
+ "learning_rate": 0.0002909361702127659,
39
+ "loss": 0.523,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 2.12,
44
+ "eval_cer": 0.11564221810245158,
45
+ "eval_loss": 0.3859069347381592,
46
+ "eval_runtime": 358.2561,
47
+ "eval_samples_per_second": 28.312,
48
+ "eval_steps_per_second": 3.539,
49
+ "eval_wer": 0.48120111026999746,
50
+ "step": 1200
51
+ },
52
+ {
53
+ "epoch": 2.65,
54
+ "learning_rate": 0.0002818176291793313,
55
+ "loss": 0.3449,
56
+ "step": 1500
57
+ },
58
+ {
59
+ "epoch": 2.83,
60
+ "eval_cer": 0.1094981215960235,
61
+ "eval_loss": 0.34923675656318665,
62
+ "eval_runtime": 353.6657,
63
+ "eval_samples_per_second": 28.68,
64
+ "eval_steps_per_second": 3.585,
65
+ "eval_wer": 0.44977581086589413,
66
+ "step": 1600
67
+ },
68
+ {
69
+ "epoch": 3.54,
70
+ "learning_rate": 0.0002726990881458966,
71
+ "loss": 0.2814,
72
+ "step": 2000
73
+ },
74
+ {
75
+ "epoch": 3.54,
76
+ "eval_cer": 0.10994501740010437,
77
+ "eval_loss": 0.36604171991348267,
78
+ "eval_runtime": 352.2579,
79
+ "eval_samples_per_second": 28.794,
80
+ "eval_steps_per_second": 3.6,
81
+ "eval_wer": 0.44657310895009605,
82
+ "step": 2000
83
+ },
84
+ {
85
+ "epoch": 4.25,
86
+ "eval_cer": 0.10429683107626926,
87
+ "eval_loss": 0.37658488750457764,
88
+ "eval_runtime": 352.034,
89
+ "eval_samples_per_second": 28.813,
90
+ "eval_steps_per_second": 3.602,
91
+ "eval_wer": 0.4235330654709913,
92
+ "step": 2400
93
+ },
94
+ {
95
+ "epoch": 4.42,
96
+ "learning_rate": 0.000263580547112462,
97
+ "loss": 0.2463,
98
+ "step": 2500
99
+ },
100
+ {
101
+ "epoch": 4.96,
102
+ "eval_cer": 0.10097250293365471,
103
+ "eval_loss": 0.34164857864379883,
104
+ "eval_runtime": 355.8322,
105
+ "eval_samples_per_second": 28.505,
106
+ "eval_steps_per_second": 3.563,
107
+ "eval_wer": 0.4119256973155535,
108
+ "step": 2800
109
+ },
110
+ {
111
+ "epoch": 5.31,
112
+ "learning_rate": 0.0002544620060790273,
113
+ "loss": 0.2296,
114
+ "step": 3000
115
+ },
116
+ {
117
+ "epoch": 5.66,
118
+ "eval_cer": 0.09793072826716873,
119
+ "eval_loss": 0.3322136402130127,
120
+ "eval_runtime": 355.102,
121
+ "eval_samples_per_second": 28.564,
122
+ "eval_steps_per_second": 3.571,
123
+ "eval_wer": 0.4012500242628933,
124
+ "step": 3200
125
+ },
126
+ {
127
+ "epoch": 6.19,
128
+ "learning_rate": 0.00024534346504559266,
129
+ "loss": 0.2143,
130
+ "step": 3500
131
+ },
132
+ {
133
+ "epoch": 6.37,
134
+ "eval_cer": 0.09717821339707125,
135
+ "eval_loss": 0.3369796872138977,
136
+ "eval_runtime": 355.7454,
137
+ "eval_samples_per_second": 28.512,
138
+ "eval_steps_per_second": 3.564,
139
+ "eval_wer": 0.395601622702304,
140
+ "step": 3600
141
+ },
142
+ {
143
+ "epoch": 7.08,
144
+ "learning_rate": 0.00023622492401215801,
145
+ "loss": 0.1955,
146
+ "step": 4000
147
+ },
148
+ {
149
+ "epoch": 7.08,
150
+ "eval_cer": 0.09977020906074034,
151
+ "eval_loss": 0.3401270806789398,
152
+ "eval_runtime": 359.7076,
153
+ "eval_samples_per_second": 28.198,
154
+ "eval_steps_per_second": 3.525,
155
+ "eval_wer": 0.40330751761486056,
156
+ "step": 4000
157
+ },
158
+ {
159
+ "epoch": 7.79,
160
+ "eval_cer": 0.09622099141671736,
161
+ "eval_loss": 0.33754295110702515,
162
+ "eval_runtime": 352.1651,
163
+ "eval_samples_per_second": 28.802,
164
+ "eval_steps_per_second": 3.601,
165
+ "eval_wer": 0.3889244744657311,
166
+ "step": 4400
167
+ },
168
+ {
169
+ "epoch": 7.96,
170
+ "learning_rate": 0.0002271063829787234,
171
+ "loss": 0.1845,
172
+ "step": 4500
173
+ },
174
+ {
175
+ "epoch": 8.5,
176
+ "eval_cer": 0.09233155632184571,
177
+ "eval_loss": 0.34551626443862915,
178
+ "eval_runtime": 357.5651,
179
+ "eval_samples_per_second": 28.367,
180
+ "eval_steps_per_second": 3.546,
181
+ "eval_wer": 0.37524020264368485,
182
+ "step": 4800
183
+ },
184
+ {
185
+ "epoch": 8.85,
186
+ "learning_rate": 0.00021798784194528871,
187
+ "loss": 0.1752,
188
+ "step": 5000
189
+ },
190
+ {
191
+ "epoch": 9.2,
192
+ "eval_cer": 0.09245553386749396,
193
+ "eval_loss": 0.3335849642753601,
194
+ "eval_runtime": 351.3613,
195
+ "eval_samples_per_second": 28.868,
196
+ "eval_steps_per_second": 3.609,
197
+ "eval_wer": 0.37176575632291,
198
+ "step": 5200
199
+ },
200
+ {
201
+ "epoch": 9.73,
202
+ "learning_rate": 0.0002088693009118541,
203
+ "loss": 0.1705,
204
+ "step": 5500
205
+ },
206
+ {
207
+ "epoch": 9.91,
208
+ "eval_cer": 0.08918598650086351,
209
+ "eval_loss": 0.3145359754562378,
210
+ "eval_runtime": 355.7051,
211
+ "eval_samples_per_second": 28.515,
212
+ "eval_steps_per_second": 3.565,
213
+ "eval_wer": 0.3653021215473903,
214
+ "step": 5600
215
+ },
216
+ {
217
+ "epoch": 10.62,
218
+ "learning_rate": 0.00019975075987841941,
219
+ "loss": 0.1585,
220
+ "step": 6000
221
+ },
222
+ {
223
+ "epoch": 10.62,
224
+ "eval_cer": 0.09218451318630942,
225
+ "eval_loss": 0.34097233414649963,
226
+ "eval_runtime": 352.3797,
227
+ "eval_samples_per_second": 28.784,
228
+ "eval_steps_per_second": 3.598,
229
+ "eval_wer": 0.37370678778703004,
230
+ "step": 6000
231
+ },
232
+ {
233
+ "epoch": 11.33,
234
+ "eval_cer": 0.08989237019118491,
235
+ "eval_loss": 0.3296053409576416,
236
+ "eval_runtime": 350.4695,
237
+ "eval_samples_per_second": 28.941,
238
+ "eval_steps_per_second": 3.618,
239
+ "eval_wer": 0.3664279197965799,
240
+ "step": 6400
241
+ },
242
+ {
243
+ "epoch": 11.5,
244
+ "learning_rate": 0.0001906322188449848,
245
+ "loss": 0.1474,
246
+ "step": 6500
247
+ },
248
+ {
249
+ "epoch": 12.04,
250
+ "eval_cer": 0.08988083739624089,
251
+ "eval_loss": 0.34918734431266785,
252
+ "eval_runtime": 352.3033,
253
+ "eval_samples_per_second": 28.791,
254
+ "eval_steps_per_second": 3.599,
255
+ "eval_wer": 0.3589937692890002,
256
+ "step": 6800
257
+ },
258
+ {
259
+ "epoch": 12.39,
260
+ "learning_rate": 0.00018153191489361702,
261
+ "loss": 0.1485,
262
+ "step": 7000
263
+ },
264
+ {
265
+ "epoch": 12.74,
266
+ "eval_cer": 0.08672950117778669,
267
+ "eval_loss": 0.31763964891433716,
268
+ "eval_runtime": 352.8906,
269
+ "eval_samples_per_second": 28.743,
270
+ "eval_steps_per_second": 3.593,
271
+ "eval_wer": 0.3506085133640016,
272
+ "step": 7200
273
+ },
274
+ {
275
+ "epoch": 13.27,
276
+ "learning_rate": 0.00017241337386018235,
277
+ "loss": 0.137,
278
+ "step": 7500
279
+ },
280
+ {
281
+ "epoch": 13.45,
282
+ "eval_cer": 0.08901587777543918,
283
+ "eval_loss": 0.3532153367996216,
284
+ "eval_runtime": 357.5465,
285
+ "eval_samples_per_second": 28.368,
286
+ "eval_steps_per_second": 3.546,
287
+ "eval_wer": 0.360041926279625,
288
+ "step": 7600
289
+ },
290
+ {
291
+ "epoch": 14.16,
292
+ "learning_rate": 0.00016329483282674772,
293
+ "loss": 0.1291,
294
+ "step": 8000
295
+ },
296
+ {
297
+ "epoch": 14.16,
298
+ "eval_cer": 0.08733497291234787,
299
+ "eval_loss": 0.33181944489479065,
300
+ "eval_runtime": 352.5023,
301
+ "eval_samples_per_second": 28.774,
302
+ "eval_steps_per_second": 3.597,
303
+ "eval_wer": 0.3570527378248801,
304
+ "step": 8000
305
+ },
306
+ {
307
+ "epoch": 14.87,
308
+ "eval_cer": 0.08829796129017377,
309
+ "eval_loss": 0.33532437682151794,
310
+ "eval_runtime": 357.1338,
311
+ "eval_samples_per_second": 28.401,
312
+ "eval_steps_per_second": 3.55,
313
+ "eval_wer": 0.3547623206972185,
314
+ "step": 8400
315
+ },
316
+ {
317
+ "epoch": 15.04,
318
+ "learning_rate": 0.00015417629179331305,
319
+ "loss": 0.1274,
320
+ "step": 8500
321
+ },
322
+ {
323
+ "epoch": 15.58,
324
+ "eval_cer": 0.08226342633571389,
325
+ "eval_loss": 0.32346823811531067,
326
+ "eval_runtime": 349.5389,
327
+ "eval_samples_per_second": 29.018,
328
+ "eval_steps_per_second": 3.628,
329
+ "eval_wer": 0.339602864962441,
330
+ "step": 8800
331
+ },
332
+ {
333
+ "epoch": 15.93,
334
+ "learning_rate": 0.00014505775075987842,
335
+ "loss": 0.1198,
336
+ "step": 9000
337
+ },
338
+ {
339
+ "epoch": 16.28,
340
+ "eval_cer": 0.08322353151480379,
341
+ "eval_loss": 0.32590439915657043,
342
+ "eval_runtime": 352.9664,
343
+ "eval_samples_per_second": 28.736,
344
+ "eval_steps_per_second": 3.592,
345
+ "eval_wer": 0.33894291426464024,
346
+ "step": 9200
347
+ },
348
+ {
349
+ "epoch": 16.81,
350
+ "learning_rate": 0.00013595744680851063,
351
+ "loss": 0.1164,
352
+ "step": 9500
353
+ },
354
+ {
355
+ "epoch": 16.99,
356
+ "eval_cer": 0.084353745419318,
357
+ "eval_loss": 0.32632604241371155,
358
+ "eval_runtime": 355.0498,
359
+ "eval_samples_per_second": 28.568,
360
+ "eval_steps_per_second": 3.571,
361
+ "eval_wer": 0.3411362798190959,
362
+ "step": 9600
363
+ },
364
+ {
365
+ "epoch": 17.7,
366
+ "learning_rate": 0.00012683890577507598,
367
+ "loss": 0.1119,
368
+ "step": 10000
369
+ },
370
+ {
371
+ "epoch": 17.7,
372
+ "eval_cer": 0.08243353506113824,
373
+ "eval_loss": 0.32535773515701294,
374
+ "eval_runtime": 352.3077,
375
+ "eval_samples_per_second": 28.79,
376
+ "eval_steps_per_second": 3.599,
377
+ "eval_wer": 0.3377006541276034,
378
+ "step": 10000
379
+ },
380
+ {
381
+ "epoch": 18.41,
382
+ "eval_cer": 0.0811533948223517,
383
+ "eval_loss": 0.3243008255958557,
384
+ "eval_runtime": 356.0414,
385
+ "eval_samples_per_second": 28.488,
386
+ "eval_steps_per_second": 3.561,
387
+ "eval_wer": 0.3330615889283565,
388
+ "step": 10400
389
+ },
390
+ {
391
+ "epoch": 18.58,
392
+ "learning_rate": 0.00011772036474164133,
393
+ "loss": 0.1054,
394
+ "step": 10500
395
+ },
396
+ {
397
+ "epoch": 19.12,
398
+ "eval_cer": 0.07895063098804338,
399
+ "eval_loss": 0.32234683632850647,
400
+ "eval_runtime": 353.14,
401
+ "eval_samples_per_second": 28.722,
402
+ "eval_steps_per_second": 3.591,
403
+ "eval_wer": 0.3239387410469924,
404
+ "step": 10800
405
+ },
406
+ {
407
+ "epoch": 19.47,
408
+ "learning_rate": 0.00010860182370820666,
409
+ "loss": 0.1017,
410
+ "step": 11000
411
+ },
412
+ {
413
+ "epoch": 19.82,
414
+ "eval_cer": 0.07741965245922436,
415
+ "eval_loss": 0.305361270904541,
416
+ "eval_runtime": 348.3343,
417
+ "eval_samples_per_second": 29.119,
418
+ "eval_steps_per_second": 3.64,
419
+ "eval_wer": 0.3189502901842039,
420
+ "step": 11200
421
+ },
422
+ {
423
+ "epoch": 20.35,
424
+ "learning_rate": 9.948328267477204e-05,
425
+ "loss": 0.0964,
426
+ "step": 11500
427
+ },
428
+ {
429
+ "epoch": 20.53,
430
+ "eval_cer": 0.07850373518396249,
431
+ "eval_loss": 0.32777705788612366,
432
+ "eval_runtime": 353.3356,
433
+ "eval_samples_per_second": 28.706,
434
+ "eval_steps_per_second": 3.589,
435
+ "eval_wer": 0.3236669966420156,
436
+ "step": 11600
437
+ },
438
+ {
439
+ "epoch": 21.24,
440
+ "learning_rate": 9.036474164133739e-05,
441
+ "loss": 0.0903,
442
+ "step": 12000
443
+ },
444
+ {
445
+ "epoch": 21.24,
446
+ "eval_cer": 0.07744560124784841,
447
+ "eval_loss": 0.3166551887989044,
448
+ "eval_runtime": 353.0166,
449
+ "eval_samples_per_second": 28.732,
450
+ "eval_steps_per_second": 3.592,
451
+ "eval_wer": 0.3177274403618083,
452
+ "step": 12000
453
+ },
454
+ {
455
+ "epoch": 21.95,
456
+ "eval_cer": 0.07655469283842266,
457
+ "eval_loss": 0.33310163021087646,
458
+ "eval_runtime": 354.4672,
459
+ "eval_samples_per_second": 28.615,
460
+ "eval_steps_per_second": 3.577,
461
+ "eval_wer": 0.3124478347794018,
462
+ "step": 12400
463
+ },
464
+ {
465
+ "epoch": 22.12,
466
+ "learning_rate": 8.124620060790274e-05,
467
+ "loss": 0.0886,
468
+ "step": 12500
469
+ },
470
+ {
471
+ "epoch": 22.65,
472
+ "eval_cer": 0.07452492092827466,
473
+ "eval_loss": 0.3098578155040741,
474
+ "eval_runtime": 354.4398,
475
+ "eval_samples_per_second": 28.617,
476
+ "eval_steps_per_second": 3.577,
477
+ "eval_wer": 0.30889574720006213,
478
+ "step": 12800
479
+ },
480
+ {
481
+ "epoch": 23.01,
482
+ "learning_rate": 7.214589665653494e-05,
483
+ "loss": 0.0836,
484
+ "step": 13000
485
+ },
486
+ {
487
+ "epoch": 23.36,
488
+ "eval_cer": 0.07314963513119996,
489
+ "eval_loss": 0.3170570433139801,
490
+ "eval_runtime": 351.2874,
491
+ "eval_samples_per_second": 28.874,
492
+ "eval_steps_per_second": 3.61,
493
+ "eval_wer": 0.3047613501814864,
494
+ "step": 13200
495
+ },
496
+ {
497
+ "epoch": 23.89,
498
+ "learning_rate": 6.30273556231003e-05,
499
+ "loss": 0.0796,
500
+ "step": 13500
501
+ },
502
+ {
503
+ "epoch": 24.07,
504
+ "eval_cer": 0.07325919668316817,
505
+ "eval_loss": 0.315768837928772,
506
+ "eval_runtime": 354.0965,
507
+ "eval_samples_per_second": 28.645,
508
+ "eval_steps_per_second": 3.581,
509
+ "eval_wer": 0.30410139948368564,
510
+ "step": 13600
511
+ },
512
+ {
513
+ "epoch": 24.78,
514
+ "learning_rate": 5.390881458966565e-05,
515
+ "loss": 0.0739,
516
+ "step": 14000
517
+ },
518
+ {
519
+ "epoch": 24.78,
520
+ "eval_cer": 0.07206266920772582,
521
+ "eval_loss": 0.3202644884586334,
522
+ "eval_runtime": 351.0812,
523
+ "eval_samples_per_second": 28.891,
524
+ "eval_steps_per_second": 3.612,
525
+ "eval_wer": 0.3002775674993692,
526
+ "step": 14000
527
+ },
528
+ {
529
+ "epoch": 25.49,
530
+ "eval_cer": 0.07125249036290822,
531
+ "eval_loss": 0.3138331472873688,
532
+ "eval_runtime": 353.8044,
533
+ "eval_samples_per_second": 28.668,
534
+ "eval_steps_per_second": 3.584,
535
+ "eval_wer": 0.2973660203031891,
536
+ "step": 14400
537
+ },
538
+ {
539
+ "epoch": 25.66,
540
+ "learning_rate": 4.4790273556231e-05,
541
+ "loss": 0.0742,
542
+ "step": 14500
543
+ },
544
+ {
545
+ "epoch": 26.19,
546
+ "eval_cer": 0.07109103123369191,
547
+ "eval_loss": 0.3196839392185211,
548
+ "eval_runtime": 353.996,
549
+ "eval_samples_per_second": 28.653,
550
+ "eval_steps_per_second": 3.582,
551
+ "eval_wer": 0.2958520157611755,
552
+ "step": 14800
553
+ },
554
+ {
555
+ "epoch": 26.55,
556
+ "learning_rate": 3.568996960486322e-05,
557
+ "loss": 0.07,
558
+ "step": 15000
559
+ },
560
+ {
561
+ "epoch": 26.9,
562
+ "eval_cer": 0.07031256757497037,
563
+ "eval_loss": 0.3232352137565613,
564
+ "eval_runtime": 355.2159,
565
+ "eval_samples_per_second": 28.554,
566
+ "eval_steps_per_second": 3.57,
567
+ "eval_wer": 0.2951726547487335,
568
+ "step": 15200
569
+ },
570
+ {
571
+ "epoch": 27.43,
572
+ "learning_rate": 2.6571428571428566e-05,
573
+ "loss": 0.0654,
574
+ "step": 15500
575
+ },
576
+ {
577
+ "epoch": 27.61,
578
+ "eval_cer": 0.07008479487482593,
579
+ "eval_loss": 0.3242589831352234,
580
+ "eval_runtime": 352.5643,
581
+ "eval_samples_per_second": 28.769,
582
+ "eval_steps_per_second": 3.597,
583
+ "eval_wer": 0.2938527533531319,
584
+ "step": 15600
585
+ },
586
+ {
587
+ "epoch": 28.32,
588
+ "learning_rate": 1.745288753799392e-05,
589
+ "loss": 0.0631,
590
+ "step": 16000
591
+ },
592
+ {
593
+ "epoch": 28.32,
594
+ "eval_cer": 0.06881907062971944,
595
+ "eval_loss": 0.3212898373603821,
596
+ "eval_runtime": 352.1058,
597
+ "eval_samples_per_second": 28.807,
598
+ "eval_steps_per_second": 3.601,
599
+ "eval_wer": 0.2875638114093829,
600
+ "step": 16000
601
+ },
602
+ {
603
+ "epoch": 29.03,
604
+ "eval_cer": 0.06853940035232689,
605
+ "eval_loss": 0.3151107728481293,
606
+ "eval_runtime": 353.743,
607
+ "eval_samples_per_second": 28.673,
608
+ "eval_steps_per_second": 3.585,
609
+ "eval_wer": 0.28799083833148936,
610
+ "step": 16400
611
+ },
612
+ {
613
+ "epoch": 29.2,
614
+ "learning_rate": 8.33434650455927e-06,
615
+ "loss": 0.0607,
616
+ "step": 16500
617
+ },
618
+ {
619
+ "epoch": 29.73,
620
+ "eval_cer": 0.06810115414445403,
621
+ "eval_loss": 0.31835824251174927,
622
+ "eval_runtime": 352.8555,
623
+ "eval_samples_per_second": 28.745,
624
+ "eval_steps_per_second": 3.594,
625
+ "eval_wer": 0.28665152662124654,
626
+ "step": 16800
627
+ },
628
+ {
629
+ "epoch": 30.0,
630
+ "step": 16950,
631
+ "total_flos": 1.1955997003691401e+20,
632
+ "train_loss": 0.2775966154790558,
633
+ "train_runtime": 58895.0057,
634
+ "train_samples_per_second": 18.401,
635
+ "train_steps_per_second": 0.288
636
+ }
637
+ ],
638
+ "max_steps": 16950,
639
+ "num_train_epochs": 30,
640
+ "total_flos": 1.1955997003691401e+20,
641
+ "trial_name": null,
642
+ "trial_params": null
643
+ }