hts98 commited on
Commit
5bf9062
1 Parent(s): 3da2b7a

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.7895313501358032,
4
+ "eval_runtime": 101.7007,
5
+ "eval_samples": 842,
6
+ "eval_samples_per_second": 8.279,
7
+ "eval_steps_per_second": 1.042,
8
+ "eval_wer": 0.43980671943617605,
9
+ "train_loss": 0.4734274124600994,
10
+ "train_runtime": 45905.6833,
11
+ "train_samples": 3350,
12
+ "train_samples_per_second": 3.649,
13
+ "train_steps_per_second": 0.365
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.7895313501358032,
4
+ "eval_runtime": 101.7007,
5
+ "eval_samples": 842,
6
+ "eval_samples_per_second": 8.279,
7
+ "eval_steps_per_second": 1.042,
8
+ "eval_wer": 0.43980671943617605
9
+ }
runs/Jun30_10-04-05_64df48378bb2/events.out.tfevents.1688166626.64df48378bb2.1419.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff27a790950cd3b58236c28ec90d046792b81fbb25add0c6019ca4bd97d99bfd
3
+ size 412
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "train_loss": 0.4734274124600994,
4
+ "train_runtime": 45905.6833,
5
+ "train_samples": 3350,
6
+ "train_samples_per_second": 3.649,
7
+ "train_steps_per_second": 0.365
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,673 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7895313501358032,
3
+ "best_model_checkpoint": "./wav2vec2-xls-r-300m-paper/checkpoint-3015",
4
+ "epoch": 50.0,
5
+ "global_step": 16750,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_loss": 3.7156972885131836,
13
+ "eval_runtime": 90.6265,
14
+ "eval_samples_per_second": 9.291,
15
+ "eval_steps_per_second": 1.17,
16
+ "eval_wer": 1.0,
17
+ "step": 335
18
+ },
19
+ {
20
+ "epoch": 1.49,
21
+ "learning_rate": 4.9770361298224126e-05,
22
+ "loss": 6.2976,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "eval_loss": 3.364386558532715,
28
+ "eval_runtime": 105.7497,
29
+ "eval_samples_per_second": 7.962,
30
+ "eval_steps_per_second": 1.002,
31
+ "eval_wer": 1.0,
32
+ "step": 670
33
+ },
34
+ {
35
+ "epoch": 2.99,
36
+ "learning_rate": 4.8239436619718316e-05,
37
+ "loss": 3.2342,
38
+ "step": 1000
39
+ },
40
+ {
41
+ "epoch": 3.0,
42
+ "eval_loss": 2.459735155105591,
43
+ "eval_runtime": 100.2813,
44
+ "eval_samples_per_second": 8.396,
45
+ "eval_steps_per_second": 1.057,
46
+ "eval_wer": 0.9739311504731903,
47
+ "step": 1005
48
+ },
49
+ {
50
+ "epoch": 4.0,
51
+ "eval_loss": 1.4160194396972656,
52
+ "eval_runtime": 105.5456,
53
+ "eval_samples_per_second": 7.978,
54
+ "eval_steps_per_second": 1.004,
55
+ "eval_wer": 0.7444104809322317,
56
+ "step": 1340
57
+ },
58
+ {
59
+ "epoch": 4.48,
60
+ "learning_rate": 4.670851194121249e-05,
61
+ "loss": 1.2813,
62
+ "step": 1500
63
+ },
64
+ {
65
+ "epoch": 5.0,
66
+ "eval_loss": 1.1337928771972656,
67
+ "eval_runtime": 101.0094,
68
+ "eval_samples_per_second": 8.336,
69
+ "eval_steps_per_second": 1.049,
70
+ "eval_wer": 0.6543241186913518,
71
+ "step": 1675
72
+ },
73
+ {
74
+ "epoch": 5.97,
75
+ "learning_rate": 4.5177587262706676e-05,
76
+ "loss": 0.7279,
77
+ "step": 2000
78
+ },
79
+ {
80
+ "epoch": 6.0,
81
+ "eval_loss": 1.002017617225647,
82
+ "eval_runtime": 97.2569,
83
+ "eval_samples_per_second": 8.657,
84
+ "eval_steps_per_second": 1.09,
85
+ "eval_wer": 0.5855680286183377,
86
+ "step": 2010
87
+ },
88
+ {
89
+ "epoch": 7.0,
90
+ "eval_loss": 0.8434591293334961,
91
+ "eval_runtime": 101.7349,
92
+ "eval_samples_per_second": 8.276,
93
+ "eval_steps_per_second": 1.042,
94
+ "eval_wer": 0.48233378271954297,
95
+ "step": 2345
96
+ },
97
+ {
98
+ "epoch": 7.46,
99
+ "learning_rate": 4.364666258420086e-05,
100
+ "loss": 0.5226,
101
+ "step": 2500
102
+ },
103
+ {
104
+ "epoch": 8.0,
105
+ "eval_loss": 0.875725507736206,
106
+ "eval_runtime": 100.6138,
107
+ "eval_samples_per_second": 8.369,
108
+ "eval_steps_per_second": 1.054,
109
+ "eval_wer": 0.5078286637211847,
110
+ "step": 2680
111
+ },
112
+ {
113
+ "epoch": 8.96,
114
+ "learning_rate": 4.211573790569504e-05,
115
+ "loss": 0.4218,
116
+ "step": 3000
117
+ },
118
+ {
119
+ "epoch": 9.0,
120
+ "eval_loss": 0.7895313501358032,
121
+ "eval_runtime": 101.3795,
122
+ "eval_samples_per_second": 8.305,
123
+ "eval_steps_per_second": 1.046,
124
+ "eval_wer": 0.43980671943617605,
125
+ "step": 3015
126
+ },
127
+ {
128
+ "epoch": 10.0,
129
+ "eval_loss": 0.7991793155670166,
130
+ "eval_runtime": 87.9454,
131
+ "eval_samples_per_second": 9.574,
132
+ "eval_steps_per_second": 1.205,
133
+ "eval_wer": 0.42284127768063323,
134
+ "step": 3350
135
+ },
136
+ {
137
+ "epoch": 10.45,
138
+ "learning_rate": 4.058481322718922e-05,
139
+ "loss": 0.3421,
140
+ "step": 3500
141
+ },
142
+ {
143
+ "epoch": 11.0,
144
+ "eval_loss": 0.8118342757225037,
145
+ "eval_runtime": 105.2144,
146
+ "eval_samples_per_second": 8.003,
147
+ "eval_steps_per_second": 1.007,
148
+ "eval_wer": 0.43070331166490916,
149
+ "step": 3685
150
+ },
151
+ {
152
+ "epoch": 11.94,
153
+ "learning_rate": 3.905388854868341e-05,
154
+ "loss": 0.287,
155
+ "step": 4000
156
+ },
157
+ {
158
+ "epoch": 12.0,
159
+ "eval_loss": 0.8214530348777771,
160
+ "eval_runtime": 103.7557,
161
+ "eval_samples_per_second": 8.115,
162
+ "eval_steps_per_second": 1.022,
163
+ "eval_wer": 0.4248434934661025,
164
+ "step": 4020
165
+ },
166
+ {
167
+ "epoch": 13.0,
168
+ "eval_loss": 0.8603241443634033,
169
+ "eval_runtime": 98.5504,
170
+ "eval_samples_per_second": 8.544,
171
+ "eval_steps_per_second": 1.076,
172
+ "eval_wer": 0.40767783013201275,
173
+ "step": 4355
174
+ },
175
+ {
176
+ "epoch": 13.43,
177
+ "learning_rate": 3.752296387017759e-05,
178
+ "loss": 0.2415,
179
+ "step": 4500
180
+ },
181
+ {
182
+ "epoch": 14.0,
183
+ "eval_loss": 0.8329221606254578,
184
+ "eval_runtime": 106.1564,
185
+ "eval_samples_per_second": 7.932,
186
+ "eval_steps_per_second": 0.999,
187
+ "eval_wer": 0.3885766915386361,
188
+ "step": 4690
189
+ },
190
+ {
191
+ "epoch": 14.93,
192
+ "learning_rate": 3.5992039191671776e-05,
193
+ "loss": 0.2132,
194
+ "step": 5000
195
+ },
196
+ {
197
+ "epoch": 15.0,
198
+ "eval_loss": 0.8728479743003845,
199
+ "eval_runtime": 106.4415,
200
+ "eval_samples_per_second": 7.91,
201
+ "eval_steps_per_second": 0.996,
202
+ "eval_wer": 0.3955310543668326,
203
+ "step": 5025
204
+ },
205
+ {
206
+ "epoch": 16.0,
207
+ "eval_loss": 0.8741195797920227,
208
+ "eval_runtime": 105.2581,
209
+ "eval_samples_per_second": 7.999,
210
+ "eval_steps_per_second": 1.007,
211
+ "eval_wer": 0.39178023679538687,
212
+ "step": 5360
213
+ },
214
+ {
215
+ "epoch": 16.42,
216
+ "learning_rate": 3.446111451316595e-05,
217
+ "loss": 0.1857,
218
+ "step": 5500
219
+ },
220
+ {
221
+ "epoch": 17.0,
222
+ "eval_loss": 0.8632614612579346,
223
+ "eval_runtime": 107.8251,
224
+ "eval_samples_per_second": 7.809,
225
+ "eval_steps_per_second": 0.983,
226
+ "eval_wer": 0.3674733371597902,
227
+ "step": 5695
228
+ },
229
+ {
230
+ "epoch": 17.91,
231
+ "learning_rate": 3.2930189834660136e-05,
232
+ "loss": 0.1673,
233
+ "step": 6000
234
+ },
235
+ {
236
+ "epoch": 18.0,
237
+ "eval_loss": 0.8884367346763611,
238
+ "eval_runtime": 96.3592,
239
+ "eval_samples_per_second": 8.738,
240
+ "eval_steps_per_second": 1.1,
241
+ "eval_wer": 0.3803943030286851,
242
+ "step": 6030
243
+ },
244
+ {
245
+ "epoch": 19.0,
246
+ "eval_loss": 0.9140524864196777,
247
+ "eval_runtime": 99.6527,
248
+ "eval_samples_per_second": 8.449,
249
+ "eval_steps_per_second": 1.064,
250
+ "eval_wer": 0.3679271727378299,
251
+ "step": 6365
252
+ },
253
+ {
254
+ "epoch": 19.4,
255
+ "learning_rate": 3.139926515615432e-05,
256
+ "loss": 0.1479,
257
+ "step": 6500
258
+ },
259
+ {
260
+ "epoch": 20.0,
261
+ "eval_loss": 0.9567932486534119,
262
+ "eval_runtime": 99.4352,
263
+ "eval_samples_per_second": 8.468,
264
+ "eval_steps_per_second": 1.066,
265
+ "eval_wer": 0.36047893001588427,
266
+ "step": 6700
267
+ },
268
+ {
269
+ "epoch": 20.9,
270
+ "learning_rate": 2.98683404776485e-05,
271
+ "loss": 0.1386,
272
+ "step": 7000
273
+ },
274
+ {
275
+ "epoch": 21.0,
276
+ "eval_loss": 0.9340795874595642,
277
+ "eval_runtime": 103.1923,
278
+ "eval_samples_per_second": 8.16,
279
+ "eval_steps_per_second": 1.027,
280
+ "eval_wer": 0.36301507001081196,
281
+ "step": 7035
282
+ },
283
+ {
284
+ "epoch": 22.0,
285
+ "eval_loss": 0.9644697308540344,
286
+ "eval_runtime": 98.7834,
287
+ "eval_samples_per_second": 8.524,
288
+ "eval_steps_per_second": 1.073,
289
+ "eval_wer": 0.3536847444505252,
290
+ "step": 7370
291
+ },
292
+ {
293
+ "epoch": 22.39,
294
+ "learning_rate": 2.8337415799142685e-05,
295
+ "loss": 0.1233,
296
+ "step": 7500
297
+ },
298
+ {
299
+ "epoch": 23.0,
300
+ "eval_loss": 0.9728893041610718,
301
+ "eval_runtime": 102.9289,
302
+ "eval_samples_per_second": 8.18,
303
+ "eval_steps_per_second": 1.03,
304
+ "eval_wer": 0.35667472002349265,
305
+ "step": 7705
306
+ },
307
+ {
308
+ "epoch": 23.88,
309
+ "learning_rate": 2.680649112063687e-05,
310
+ "loss": 0.1177,
311
+ "step": 8000
312
+ },
313
+ {
314
+ "epoch": 24.0,
315
+ "eval_loss": 1.001348614692688,
316
+ "eval_runtime": 106.3343,
317
+ "eval_samples_per_second": 7.918,
318
+ "eval_steps_per_second": 0.997,
319
+ "eval_wer": 0.34538222299344606,
320
+ "step": 8040
321
+ },
322
+ {
323
+ "epoch": 25.0,
324
+ "eval_loss": 1.0323426723480225,
325
+ "eval_runtime": 102.2816,
326
+ "eval_samples_per_second": 8.232,
327
+ "eval_steps_per_second": 1.036,
328
+ "eval_wer": 0.35967804370169654,
329
+ "step": 8375
330
+ },
331
+ {
332
+ "epoch": 25.37,
333
+ "learning_rate": 2.527556644213105e-05,
334
+ "loss": 0.1061,
335
+ "step": 8500
336
+ },
337
+ {
338
+ "epoch": 26.0,
339
+ "eval_loss": 1.0269230604171753,
340
+ "eval_runtime": 96.8319,
341
+ "eval_samples_per_second": 8.695,
342
+ "eval_steps_per_second": 1.095,
343
+ "eval_wer": 0.3456491850981753,
344
+ "step": 8710
345
+ },
346
+ {
347
+ "epoch": 26.87,
348
+ "learning_rate": 2.3744641763625232e-05,
349
+ "loss": 0.1028,
350
+ "step": 9000
351
+ },
352
+ {
353
+ "epoch": 27.0,
354
+ "eval_loss": 1.0042426586151123,
355
+ "eval_runtime": 94.8369,
356
+ "eval_samples_per_second": 8.878,
357
+ "eval_steps_per_second": 1.118,
358
+ "eval_wer": 0.3424189436309516,
359
+ "step": 9045
360
+ },
361
+ {
362
+ "epoch": 28.0,
363
+ "eval_loss": 1.0424461364746094,
364
+ "eval_runtime": 93.6728,
365
+ "eval_samples_per_second": 8.989,
366
+ "eval_steps_per_second": 1.132,
367
+ "eval_wer": 0.3394423161632206,
368
+ "step": 9380
369
+ },
370
+ {
371
+ "epoch": 28.36,
372
+ "learning_rate": 2.2213717085119412e-05,
373
+ "loss": 0.0961,
374
+ "step": 9500
375
+ },
376
+ {
377
+ "epoch": 29.0,
378
+ "eval_loss": 1.0599919557571411,
379
+ "eval_runtime": 104.5041,
380
+ "eval_samples_per_second": 8.057,
381
+ "eval_steps_per_second": 1.014,
382
+ "eval_wer": 0.3412309622649065,
383
+ "step": 9715
384
+ },
385
+ {
386
+ "epoch": 29.85,
387
+ "learning_rate": 2.0682792406613595e-05,
388
+ "loss": 0.0949,
389
+ "step": 10000
390
+ },
391
+ {
392
+ "epoch": 30.0,
393
+ "eval_loss": 1.051209568977356,
394
+ "eval_runtime": 104.5071,
395
+ "eval_samples_per_second": 8.057,
396
+ "eval_steps_per_second": 1.014,
397
+ "eval_wer": 0.33890839195376216,
398
+ "step": 10050
399
+ },
400
+ {
401
+ "epoch": 31.0,
402
+ "eval_loss": 1.0956796407699585,
403
+ "eval_runtime": 104.4359,
404
+ "eval_samples_per_second": 8.062,
405
+ "eval_steps_per_second": 1.015,
406
+ "eval_wer": 0.3389217400589986,
407
+ "step": 10385
408
+ },
409
+ {
410
+ "epoch": 31.34,
411
+ "learning_rate": 1.9154929577464788e-05,
412
+ "loss": 0.0878,
413
+ "step": 10500
414
+ },
415
+ {
416
+ "epoch": 32.0,
417
+ "eval_loss": 1.09244704246521,
418
+ "eval_runtime": 106.7914,
419
+ "eval_samples_per_second": 7.885,
420
+ "eval_steps_per_second": 0.993,
421
+ "eval_wer": 0.33107305417995914,
422
+ "step": 10720
423
+ },
424
+ {
425
+ "epoch": 32.84,
426
+ "learning_rate": 1.7627066748315983e-05,
427
+ "loss": 0.0852,
428
+ "step": 11000
429
+ },
430
+ {
431
+ "epoch": 33.0,
432
+ "eval_loss": 1.0858749151229858,
433
+ "eval_runtime": 104.2304,
434
+ "eval_samples_per_second": 8.078,
435
+ "eval_steps_per_second": 1.017,
436
+ "eval_wer": 0.3365991697478543,
437
+ "step": 11055
438
+ },
439
+ {
440
+ "epoch": 34.0,
441
+ "eval_loss": 1.1498078107833862,
442
+ "eval_runtime": 96.0047,
443
+ "eval_samples_per_second": 8.77,
444
+ "eval_steps_per_second": 1.104,
445
+ "eval_wer": 0.3450485203625345,
446
+ "step": 11390
447
+ },
448
+ {
449
+ "epoch": 34.33,
450
+ "learning_rate": 1.6096142069810167e-05,
451
+ "loss": 0.0837,
452
+ "step": 11500
453
+ },
454
+ {
455
+ "epoch": 35.0,
456
+ "eval_loss": 1.0844124555587769,
457
+ "eval_runtime": 95.1094,
458
+ "eval_samples_per_second": 8.853,
459
+ "eval_steps_per_second": 1.115,
460
+ "eval_wer": 0.332861700281645,
461
+ "step": 11725
462
+ },
463
+ {
464
+ "epoch": 35.82,
465
+ "learning_rate": 1.4565217391304348e-05,
466
+ "loss": 0.0814,
467
+ "step": 12000
468
+ },
469
+ {
470
+ "epoch": 36.0,
471
+ "eval_loss": 1.1050550937652588,
472
+ "eval_runtime": 105.0526,
473
+ "eval_samples_per_second": 8.015,
474
+ "eval_steps_per_second": 1.009,
475
+ "eval_wer": 0.3321008582831667,
476
+ "step": 12060
477
+ },
478
+ {
479
+ "epoch": 37.0,
480
+ "eval_loss": 1.0878149271011353,
481
+ "eval_runtime": 97.9103,
482
+ "eval_samples_per_second": 8.6,
483
+ "eval_steps_per_second": 1.083,
484
+ "eval_wer": 0.3310597060747227,
485
+ "step": 12395
486
+ },
487
+ {
488
+ "epoch": 37.31,
489
+ "learning_rate": 1.3034292712798532e-05,
490
+ "loss": 0.0793,
491
+ "step": 12500
492
+ },
493
+ {
494
+ "epoch": 38.0,
495
+ "eval_loss": 1.1377496719360352,
496
+ "eval_runtime": 101.9563,
497
+ "eval_samples_per_second": 8.258,
498
+ "eval_steps_per_second": 1.04,
499
+ "eval_wer": 0.32856361039550436,
500
+ "step": 12730
501
+ },
502
+ {
503
+ "epoch": 38.81,
504
+ "learning_rate": 1.1506429883649724e-05,
505
+ "loss": 0.0759,
506
+ "step": 13000
507
+ },
508
+ {
509
+ "epoch": 39.0,
510
+ "eval_loss": 1.1136152744293213,
511
+ "eval_runtime": 98.218,
512
+ "eval_samples_per_second": 8.573,
513
+ "eval_steps_per_second": 1.079,
514
+ "eval_wer": 0.3246125712455117,
515
+ "step": 13065
516
+ },
517
+ {
518
+ "epoch": 40.0,
519
+ "eval_loss": 1.1215593814849854,
520
+ "eval_runtime": 107.5832,
521
+ "eval_samples_per_second": 7.826,
522
+ "eval_steps_per_second": 0.985,
523
+ "eval_wer": 0.32682835671476435,
524
+ "step": 13400
525
+ },
526
+ {
527
+ "epoch": 40.3,
528
+ "learning_rate": 9.975505205143907e-06,
529
+ "loss": 0.0726,
530
+ "step": 13500
531
+ },
532
+ {
533
+ "epoch": 41.0,
534
+ "eval_loss": 1.1300030946731567,
535
+ "eval_runtime": 106.9995,
536
+ "eval_samples_per_second": 7.869,
537
+ "eval_steps_per_second": 0.991,
538
+ "eval_wer": 0.3252666284020983,
539
+ "step": 13735
540
+ },
541
+ {
542
+ "epoch": 41.79,
543
+ "learning_rate": 8.444580526638089e-06,
544
+ "loss": 0.0715,
545
+ "step": 14000
546
+ },
547
+ {
548
+ "epoch": 42.0,
549
+ "eval_loss": 1.1506843566894531,
550
+ "eval_runtime": 93.8834,
551
+ "eval_samples_per_second": 8.969,
552
+ "eval_steps_per_second": 1.129,
553
+ "eval_wer": 0.32622769197912355,
554
+ "step": 14070
555
+ },
556
+ {
557
+ "epoch": 43.0,
558
+ "eval_loss": 1.1561784744262695,
559
+ "eval_runtime": 104.7418,
560
+ "eval_samples_per_second": 8.039,
561
+ "eval_steps_per_second": 1.012,
562
+ "eval_wer": 0.3275491543975333,
563
+ "step": 14405
564
+ },
565
+ {
566
+ "epoch": 43.28,
567
+ "learning_rate": 6.913655848132272e-06,
568
+ "loss": 0.0711,
569
+ "step": 14500
570
+ },
571
+ {
572
+ "epoch": 44.0,
573
+ "eval_loss": 1.1485936641693115,
574
+ "eval_runtime": 104.0283,
575
+ "eval_samples_per_second": 8.094,
576
+ "eval_steps_per_second": 1.019,
577
+ "eval_wer": 0.3218895577772735,
578
+ "step": 14740
579
+ },
580
+ {
581
+ "epoch": 44.78,
582
+ "learning_rate": 5.382731169626455e-06,
583
+ "loss": 0.0699,
584
+ "step": 15000
585
+ },
586
+ {
587
+ "epoch": 45.0,
588
+ "eval_loss": 1.1580160856246948,
589
+ "eval_runtime": 98.4301,
590
+ "eval_samples_per_second": 8.554,
591
+ "eval_steps_per_second": 1.077,
592
+ "eval_wer": 0.31940681020329165,
593
+ "step": 15075
594
+ },
595
+ {
596
+ "epoch": 46.0,
597
+ "eval_loss": 1.1580368280410767,
598
+ "eval_runtime": 96.4383,
599
+ "eval_samples_per_second": 8.731,
600
+ "eval_steps_per_second": 1.099,
601
+ "eval_wer": 0.3195269431504198,
602
+ "step": 15410
603
+ },
604
+ {
605
+ "epoch": 46.27,
606
+ "learning_rate": 3.851806491120637e-06,
607
+ "loss": 0.0667,
608
+ "step": 15500
609
+ },
610
+ {
611
+ "epoch": 47.0,
612
+ "eval_loss": 1.1504408121109009,
613
+ "eval_runtime": 99.6568,
614
+ "eval_samples_per_second": 8.449,
615
+ "eval_steps_per_second": 1.064,
616
+ "eval_wer": 0.321208804410214,
617
+ "step": 15745
618
+ },
619
+ {
620
+ "epoch": 47.76,
621
+ "learning_rate": 2.3208818126148195e-06,
622
+ "loss": 0.0667,
623
+ "step": 16000
624
+ },
625
+ {
626
+ "epoch": 48.0,
627
+ "eval_loss": 1.1580157279968262,
628
+ "eval_runtime": 110.9917,
629
+ "eval_samples_per_second": 7.586,
630
+ "eval_steps_per_second": 0.955,
631
+ "eval_wer": 0.3202610889384252,
632
+ "step": 16080
633
+ },
634
+ {
635
+ "epoch": 49.0,
636
+ "eval_loss": 1.1697617769241333,
637
+ "eval_runtime": 107.1691,
638
+ "eval_samples_per_second": 7.857,
639
+ "eval_steps_per_second": 0.989,
640
+ "eval_wer": 0.3192065886247447,
641
+ "step": 16415
642
+ },
643
+ {
644
+ "epoch": 49.25,
645
+ "learning_rate": 7.899571341090019e-07,
646
+ "loss": 0.0664,
647
+ "step": 16500
648
+ },
649
+ {
650
+ "epoch": 50.0,
651
+ "eval_loss": 1.17439603805542,
652
+ "eval_runtime": 107.0438,
653
+ "eval_samples_per_second": 7.866,
654
+ "eval_steps_per_second": 0.99,
655
+ "eval_wer": 0.3192332848352176,
656
+ "step": 16750
657
+ },
658
+ {
659
+ "epoch": 50.0,
660
+ "step": 16750,
661
+ "total_flos": 1.4087701795050537e+20,
662
+ "train_loss": 0.4734274124600994,
663
+ "train_runtime": 45905.6833,
664
+ "train_samples_per_second": 3.649,
665
+ "train_steps_per_second": 0.365
666
+ }
667
+ ],
668
+ "max_steps": 16750,
669
+ "num_train_epochs": 50,
670
+ "total_flos": 1.4087701795050537e+20,
671
+ "trial_name": null,
672
+ "trial_params": null
673
+ }