anuragshas commited on
Commit
6366826
1 Parent(s): 04b0cc1

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.5878269672393799,
4
+ "eval_runtime": 127.4128,
5
+ "eval_samples": 2095,
6
+ "eval_samples_per_second": 16.443,
7
+ "eval_steps_per_second": 1.028,
8
+ "eval_wer": 0.3419183497004509,
9
+ "train_loss": 0.9074438333835732,
10
+ "train_runtime": 47510.6321,
11
+ "train_samples": 4711,
12
+ "train_samples_per_second": 9.916,
13
+ "train_steps_per_second": 0.309
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.5878269672393799,
4
+ "eval_runtime": 127.4128,
5
+ "eval_samples": 2095,
6
+ "eval_samples_per_second": 16.443,
7
+ "eval_steps_per_second": 1.028,
8
+ "eval_wer": 0.3419183497004509
9
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d75cb080e5d8989be708b511150007749e45725e517b948fcb5280c3c37d5325
3
  size 3850650929
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca46f16fe2f42e9926176e5fb128a15246d287a6e09e1ec444fc5636a8ecfac
3
  size 3850650929
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "train_loss": 0.9074438333835732,
4
+ "train_runtime": 47510.6321,
5
+ "train_samples": 4711,
6
+ "train_samples_per_second": 9.916,
7
+ "train_steps_per_second": 0.309
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 99.99830220713073,
5
+ "global_step": 14700,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.68,
12
+ "learning_rate": 3.6375e-06,
13
+ "loss": 11.6851,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 1.36,
18
+ "learning_rate": 7.3875e-06,
19
+ "loss": 3.4316,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 2.04,
24
+ "learning_rate": 1.1137499999999998e-05,
25
+ "loss": 2.9224,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 2.72,
30
+ "learning_rate": 1.48875e-05,
31
+ "loss": 1.9859,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 2.72,
36
+ "eval_loss": 1.1663333177566528,
37
+ "eval_runtime": 129.56,
38
+ "eval_samples_per_second": 16.17,
39
+ "eval_steps_per_second": 1.011,
40
+ "eval_wer": 0.7947625223889815,
41
+ "step": 400
42
+ },
43
+ {
44
+ "epoch": 3.4,
45
+ "learning_rate": 1.86375e-05,
46
+ "loss": 1.625,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 4.08,
51
+ "learning_rate": 2.23875e-05,
52
+ "loss": 1.4408,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 4.76,
57
+ "learning_rate": 2.6137499999999995e-05,
58
+ "loss": 1.3534,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 5.44,
63
+ "learning_rate": 2.9887499999999998e-05,
64
+ "loss": 1.2969,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 5.44,
69
+ "eval_loss": 0.7725260853767395,
70
+ "eval_runtime": 128.7049,
71
+ "eval_samples_per_second": 16.278,
72
+ "eval_steps_per_second": 1.018,
73
+ "eval_wer": 0.656228769069236,
74
+ "step": 800
75
+ },
76
+ {
77
+ "epoch": 6.12,
78
+ "learning_rate": 3.36375e-05,
79
+ "loss": 1.2621,
80
+ "step": 900
81
+ },
82
+ {
83
+ "epoch": 6.8,
84
+ "learning_rate": 3.7387499999999994e-05,
85
+ "loss": 1.2048,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "epoch": 7.48,
90
+ "learning_rate": 4.11375e-05,
91
+ "loss": 1.1914,
92
+ "step": 1100
93
+ },
94
+ {
95
+ "epoch": 8.16,
96
+ "learning_rate": 4.48875e-05,
97
+ "loss": 1.1954,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 8.16,
102
+ "eval_loss": 0.5940413475036621,
103
+ "eval_runtime": 129.7001,
104
+ "eval_samples_per_second": 16.153,
105
+ "eval_steps_per_second": 1.01,
106
+ "eval_wer": 0.4903958989562102,
107
+ "step": 1200
108
+ },
109
+ {
110
+ "epoch": 8.84,
111
+ "learning_rate": 4.8637499999999996e-05,
112
+ "loss": 1.1675,
113
+ "step": 1300
114
+ },
115
+ {
116
+ "epoch": 9.52,
117
+ "learning_rate": 5.23875e-05,
118
+ "loss": 1.1484,
119
+ "step": 1400
120
+ },
121
+ {
122
+ "epoch": 10.2,
123
+ "learning_rate": 5.61375e-05,
124
+ "loss": 1.1564,
125
+ "step": 1500
126
+ },
127
+ {
128
+ "epoch": 10.88,
129
+ "learning_rate": 5.988749999999999e-05,
130
+ "loss": 1.164,
131
+ "step": 1600
132
+ },
133
+ {
134
+ "epoch": 10.88,
135
+ "eval_loss": 0.5338293313980103,
136
+ "eval_runtime": 129.1415,
137
+ "eval_samples_per_second": 16.223,
138
+ "eval_steps_per_second": 1.014,
139
+ "eval_wer": 0.4315978012476067,
140
+ "step": 1600
141
+ },
142
+ {
143
+ "epoch": 11.56,
144
+ "learning_rate": 6.36375e-05,
145
+ "loss": 1.1467,
146
+ "step": 1700
147
+ },
148
+ {
149
+ "epoch": 12.24,
150
+ "learning_rate": 6.738749999999999e-05,
151
+ "loss": 1.1664,
152
+ "step": 1800
153
+ },
154
+ {
155
+ "epoch": 12.92,
156
+ "learning_rate": 7.11375e-05,
157
+ "loss": 1.1658,
158
+ "step": 1900
159
+ },
160
+ {
161
+ "epoch": 13.6,
162
+ "learning_rate": 7.48875e-05,
163
+ "loss": 1.1464,
164
+ "step": 2000
165
+ },
166
+ {
167
+ "epoch": 13.6,
168
+ "eval_loss": 0.5432471632957458,
169
+ "eval_runtime": 128.5422,
170
+ "eval_samples_per_second": 16.298,
171
+ "eval_steps_per_second": 1.019,
172
+ "eval_wer": 0.42264220863442653,
173
+ "step": 2000
174
+ },
175
+ {
176
+ "epoch": 14.29,
177
+ "learning_rate": 7.44271653543307e-05,
178
+ "loss": 1.1256,
179
+ "step": 2100
180
+ },
181
+ {
182
+ "epoch": 14.96,
183
+ "learning_rate": 7.383661417322834e-05,
184
+ "loss": 1.1464,
185
+ "step": 2200
186
+ },
187
+ {
188
+ "epoch": 15.65,
189
+ "learning_rate": 7.324606299212597e-05,
190
+ "loss": 1.1545,
191
+ "step": 2300
192
+ },
193
+ {
194
+ "epoch": 16.33,
195
+ "learning_rate": 7.265551181102362e-05,
196
+ "loss": 1.1553,
197
+ "step": 2400
198
+ },
199
+ {
200
+ "epoch": 16.33,
201
+ "eval_loss": 0.5470633506774902,
202
+ "eval_runtime": 128.2784,
203
+ "eval_samples_per_second": 16.332,
204
+ "eval_steps_per_second": 1.021,
205
+ "eval_wer": 0.42603915755666727,
206
+ "step": 2400
207
+ },
208
+ {
209
+ "epoch": 17.01,
210
+ "learning_rate": 7.206496062992125e-05,
211
+ "loss": 1.1332,
212
+ "step": 2500
213
+ },
214
+ {
215
+ "epoch": 17.69,
216
+ "learning_rate": 7.14744094488189e-05,
217
+ "loss": 1.1317,
218
+ "step": 2600
219
+ },
220
+ {
221
+ "epoch": 18.37,
222
+ "learning_rate": 7.088385826771653e-05,
223
+ "loss": 1.1197,
224
+ "step": 2700
225
+ },
226
+ {
227
+ "epoch": 19.05,
228
+ "learning_rate": 7.029330708661417e-05,
229
+ "loss": 1.0985,
230
+ "step": 2800
231
+ },
232
+ {
233
+ "epoch": 19.05,
234
+ "eval_loss": 0.5289891958236694,
235
+ "eval_runtime": 128.1965,
236
+ "eval_samples_per_second": 16.342,
237
+ "eval_steps_per_second": 1.022,
238
+ "eval_wer": 0.407572107961213,
239
+ "step": 2800
240
+ },
241
+ {
242
+ "epoch": 19.73,
243
+ "learning_rate": 6.970275590551181e-05,
244
+ "loss": 1.0637,
245
+ "step": 2900
246
+ },
247
+ {
248
+ "epoch": 20.41,
249
+ "learning_rate": 6.911220472440943e-05,
250
+ "loss": 1.0786,
251
+ "step": 3000
252
+ },
253
+ {
254
+ "epoch": 21.09,
255
+ "learning_rate": 6.852165354330708e-05,
256
+ "loss": 1.0638,
257
+ "step": 3100
258
+ },
259
+ {
260
+ "epoch": 21.77,
261
+ "learning_rate": 6.793110236220472e-05,
262
+ "loss": 1.0421,
263
+ "step": 3200
264
+ },
265
+ {
266
+ "epoch": 21.77,
267
+ "eval_loss": 0.5671822428703308,
268
+ "eval_runtime": 128.6139,
269
+ "eval_samples_per_second": 16.289,
270
+ "eval_steps_per_second": 1.019,
271
+ "eval_wer": 0.4181335309739979,
272
+ "step": 3200
273
+ },
274
+ {
275
+ "epoch": 22.45,
276
+ "learning_rate": 6.734055118110236e-05,
277
+ "loss": 1.0587,
278
+ "step": 3300
279
+ },
280
+ {
281
+ "epoch": 23.13,
282
+ "learning_rate": 6.675e-05,
283
+ "loss": 1.0431,
284
+ "step": 3400
285
+ },
286
+ {
287
+ "epoch": 23.81,
288
+ "learning_rate": 6.615944881889763e-05,
289
+ "loss": 1.0282,
290
+ "step": 3500
291
+ },
292
+ {
293
+ "epoch": 24.49,
294
+ "learning_rate": 6.556889763779528e-05,
295
+ "loss": 0.9831,
296
+ "step": 3600
297
+ },
298
+ {
299
+ "epoch": 24.49,
300
+ "eval_loss": 0.5740545392036438,
301
+ "eval_runtime": 128.3992,
302
+ "eval_samples_per_second": 16.316,
303
+ "eval_steps_per_second": 1.02,
304
+ "eval_wer": 0.41405719226730897,
305
+ "step": 3600
306
+ },
307
+ {
308
+ "epoch": 25.17,
309
+ "learning_rate": 6.497834645669291e-05,
310
+ "loss": 1.0409,
311
+ "step": 3700
312
+ },
313
+ {
314
+ "epoch": 25.85,
315
+ "learning_rate": 6.438779527559054e-05,
316
+ "loss": 1.0023,
317
+ "step": 3800
318
+ },
319
+ {
320
+ "epoch": 26.53,
321
+ "learning_rate": 6.379724409448818e-05,
322
+ "loss": 0.9724,
323
+ "step": 3900
324
+ },
325
+ {
326
+ "epoch": 27.21,
327
+ "learning_rate": 6.320669291338583e-05,
328
+ "loss": 0.9827,
329
+ "step": 4000
330
+ },
331
+ {
332
+ "epoch": 27.21,
333
+ "eval_loss": 0.5753942131996155,
334
+ "eval_runtime": 129.7692,
335
+ "eval_samples_per_second": 16.144,
336
+ "eval_steps_per_second": 1.009,
337
+ "eval_wer": 0.4178864801432895,
338
+ "step": 4000
339
+ },
340
+ {
341
+ "epoch": 27.89,
342
+ "learning_rate": 6.261614173228346e-05,
343
+ "loss": 0.9974,
344
+ "step": 4100
345
+ },
346
+ {
347
+ "epoch": 28.57,
348
+ "learning_rate": 6.203149606299212e-05,
349
+ "loss": 0.9666,
350
+ "step": 4200
351
+ },
352
+ {
353
+ "epoch": 29.25,
354
+ "learning_rate": 6.144685039370079e-05,
355
+ "loss": 0.9571,
356
+ "step": 4300
357
+ },
358
+ {
359
+ "epoch": 29.93,
360
+ "learning_rate": 6.085629921259842e-05,
361
+ "loss": 0.9669,
362
+ "step": 4400
363
+ },
364
+ {
365
+ "epoch": 29.93,
366
+ "eval_loss": 0.5309818387031555,
367
+ "eval_runtime": 127.6098,
368
+ "eval_samples_per_second": 16.417,
369
+ "eval_steps_per_second": 1.027,
370
+ "eval_wer": 0.38885800753505034,
371
+ "step": 4400
372
+ },
373
+ {
374
+ "epoch": 30.61,
375
+ "learning_rate": 6.026574803149606e-05,
376
+ "loss": 0.9642,
377
+ "step": 4500
378
+ },
379
+ {
380
+ "epoch": 31.29,
381
+ "learning_rate": 5.967519685039369e-05,
382
+ "loss": 0.9843,
383
+ "step": 4600
384
+ },
385
+ {
386
+ "epoch": 31.97,
387
+ "learning_rate": 5.908464566929133e-05,
388
+ "loss": 0.9589,
389
+ "step": 4700
390
+ },
391
+ {
392
+ "epoch": 32.65,
393
+ "learning_rate": 5.8494094488188974e-05,
394
+ "loss": 0.9496,
395
+ "step": 4800
396
+ },
397
+ {
398
+ "epoch": 32.65,
399
+ "eval_loss": 0.5648738145828247,
400
+ "eval_runtime": 128.3031,
401
+ "eval_samples_per_second": 16.329,
402
+ "eval_steps_per_second": 1.021,
403
+ "eval_wer": 0.4061515656846396,
404
+ "step": 4800
405
+ },
406
+ {
407
+ "epoch": 33.33,
408
+ "learning_rate": 5.7903543307086614e-05,
409
+ "loss": 0.9264,
410
+ "step": 4900
411
+ },
412
+ {
413
+ "epoch": 34.01,
414
+ "learning_rate": 5.731299212598425e-05,
415
+ "loss": 0.9117,
416
+ "step": 5000
417
+ },
418
+ {
419
+ "epoch": 34.69,
420
+ "learning_rate": 5.672244094488189e-05,
421
+ "loss": 0.9176,
422
+ "step": 5100
423
+ },
424
+ {
425
+ "epoch": 35.37,
426
+ "learning_rate": 5.613188976377952e-05,
427
+ "loss": 0.9112,
428
+ "step": 5200
429
+ },
430
+ {
431
+ "epoch": 35.37,
432
+ "eval_loss": 0.5737842321395874,
433
+ "eval_runtime": 127.713,
434
+ "eval_samples_per_second": 16.404,
435
+ "eval_steps_per_second": 1.026,
436
+ "eval_wer": 0.3925637699956766,
437
+ "step": 5200
438
+ },
439
+ {
440
+ "epoch": 36.05,
441
+ "learning_rate": 5.5541338582677156e-05,
442
+ "loss": 0.903,
443
+ "step": 5300
444
+ },
445
+ {
446
+ "epoch": 36.73,
447
+ "learning_rate": 5.49507874015748e-05,
448
+ "loss": 0.8615,
449
+ "step": 5400
450
+ },
451
+ {
452
+ "epoch": 37.41,
453
+ "learning_rate": 5.436023622047244e-05,
454
+ "loss": 0.904,
455
+ "step": 5500
456
+ },
457
+ {
458
+ "epoch": 38.1,
459
+ "learning_rate": 5.376968503937008e-05,
460
+ "loss": 0.8838,
461
+ "step": 5600
462
+ },
463
+ {
464
+ "epoch": 38.1,
465
+ "eval_loss": 0.5232195258140564,
466
+ "eval_runtime": 127.5056,
467
+ "eval_samples_per_second": 16.431,
468
+ "eval_steps_per_second": 1.027,
469
+ "eval_wer": 0.37681427953801494,
470
+ "step": 5600
471
+ },
472
+ {
473
+ "epoch": 38.77,
474
+ "learning_rate": 5.317913385826771e-05,
475
+ "loss": 0.8693,
476
+ "step": 5700
477
+ },
478
+ {
479
+ "epoch": 39.46,
480
+ "learning_rate": 5.258858267716535e-05,
481
+ "loss": 0.8653,
482
+ "step": 5800
483
+ },
484
+ {
485
+ "epoch": 40.14,
486
+ "learning_rate": 5.1998031496062986e-05,
487
+ "loss": 0.8862,
488
+ "step": 5900
489
+ },
490
+ {
491
+ "epoch": 40.81,
492
+ "learning_rate": 5.140748031496062e-05,
493
+ "loss": 0.8666,
494
+ "step": 6000
495
+ },
496
+ {
497
+ "epoch": 40.81,
498
+ "eval_loss": 0.5510496497154236,
499
+ "eval_runtime": 128.5402,
500
+ "eval_samples_per_second": 16.298,
501
+ "eval_steps_per_second": 1.019,
502
+ "eval_wer": 0.3852140077821012,
503
+ "step": 6000
504
+ },
505
+ {
506
+ "epoch": 41.5,
507
+ "learning_rate": 5.081692913385826e-05,
508
+ "loss": 0.8378,
509
+ "step": 6100
510
+ },
511
+ {
512
+ "epoch": 42.18,
513
+ "learning_rate": 5.02263779527559e-05,
514
+ "loss": 0.8636,
515
+ "step": 6200
516
+ },
517
+ {
518
+ "epoch": 42.86,
519
+ "learning_rate": 4.963582677165354e-05,
520
+ "loss": 0.8464,
521
+ "step": 6300
522
+ },
523
+ {
524
+ "epoch": 43.54,
525
+ "learning_rate": 4.9045275590551175e-05,
526
+ "loss": 0.8366,
527
+ "step": 6400
528
+ },
529
+ {
530
+ "epoch": 43.54,
531
+ "eval_loss": 0.5436074733734131,
532
+ "eval_runtime": 127.7975,
533
+ "eval_samples_per_second": 16.393,
534
+ "eval_steps_per_second": 1.025,
535
+ "eval_wer": 0.3837317027978507,
536
+ "step": 6400
537
+ },
538
+ {
539
+ "epoch": 44.22,
540
+ "learning_rate": 4.846062992125984e-05,
541
+ "loss": 0.8298,
542
+ "step": 6500
543
+ },
544
+ {
545
+ "epoch": 44.9,
546
+ "learning_rate": 4.7870078740157475e-05,
547
+ "loss": 0.8385,
548
+ "step": 6600
549
+ },
550
+ {
551
+ "epoch": 45.58,
552
+ "learning_rate": 4.727952755905511e-05,
553
+ "loss": 0.8347,
554
+ "step": 6700
555
+ },
556
+ {
557
+ "epoch": 46.26,
558
+ "learning_rate": 4.668897637795275e-05,
559
+ "loss": 0.7957,
560
+ "step": 6800
561
+ },
562
+ {
563
+ "epoch": 46.26,
564
+ "eval_loss": 0.5337450504302979,
565
+ "eval_runtime": 128.0855,
566
+ "eval_samples_per_second": 16.356,
567
+ "eval_steps_per_second": 1.023,
568
+ "eval_wer": 0.3774936693224631,
569
+ "step": 6800
570
+ },
571
+ {
572
+ "epoch": 46.94,
573
+ "learning_rate": 4.609842519685039e-05,
574
+ "loss": 0.8226,
575
+ "step": 6900
576
+ },
577
+ {
578
+ "epoch": 47.62,
579
+ "learning_rate": 4.550787401574803e-05,
580
+ "loss": 0.8228,
581
+ "step": 7000
582
+ },
583
+ {
584
+ "epoch": 48.3,
585
+ "learning_rate": 4.4917322834645664e-05,
586
+ "loss": 0.815,
587
+ "step": 7100
588
+ },
589
+ {
590
+ "epoch": 48.98,
591
+ "learning_rate": 4.4326771653543305e-05,
592
+ "loss": 0.7834,
593
+ "step": 7200
594
+ },
595
+ {
596
+ "epoch": 48.98,
597
+ "eval_loss": 0.561106264591217,
598
+ "eval_runtime": 127.7689,
599
+ "eval_samples_per_second": 16.397,
600
+ "eval_steps_per_second": 1.025,
601
+ "eval_wer": 0.38441109258229883,
602
+ "step": 7200
603
+ },
604
+ {
605
+ "epoch": 49.66,
606
+ "learning_rate": 4.3736220472440945e-05,
607
+ "loss": 0.7998,
608
+ "step": 7300
609
+ },
610
+ {
611
+ "epoch": 50.34,
612
+ "learning_rate": 4.314566929133857e-05,
613
+ "loss": 0.7898,
614
+ "step": 7400
615
+ },
616
+ {
617
+ "epoch": 51.02,
618
+ "learning_rate": 4.255511811023621e-05,
619
+ "loss": 0.7567,
620
+ "step": 7500
621
+ },
622
+ {
623
+ "epoch": 51.7,
624
+ "learning_rate": 4.1964566929133854e-05,
625
+ "loss": 0.7685,
626
+ "step": 7600
627
+ },
628
+ {
629
+ "epoch": 51.7,
630
+ "eval_loss": 0.571021556854248,
631
+ "eval_runtime": 126.8927,
632
+ "eval_samples_per_second": 16.51,
633
+ "eval_steps_per_second": 1.032,
634
+ "eval_wer": 0.40077821011673154,
635
+ "step": 7600
636
+ },
637
+ {
638
+ "epoch": 52.38,
639
+ "learning_rate": 4.1374015748031494e-05,
640
+ "loss": 0.7744,
641
+ "step": 7700
642
+ },
643
+ {
644
+ "epoch": 53.06,
645
+ "learning_rate": 4.078346456692913e-05,
646
+ "loss": 0.7483,
647
+ "step": 7800
648
+ },
649
+ {
650
+ "epoch": 53.74,
651
+ "learning_rate": 4.019291338582677e-05,
652
+ "loss": 0.76,
653
+ "step": 7900
654
+ },
655
+ {
656
+ "epoch": 54.42,
657
+ "learning_rate": 3.960236220472441e-05,
658
+ "loss": 0.7431,
659
+ "step": 8000
660
+ },
661
+ {
662
+ "epoch": 54.42,
663
+ "eval_loss": 0.5636317729949951,
664
+ "eval_runtime": 128.5647,
665
+ "eval_samples_per_second": 16.295,
666
+ "eval_steps_per_second": 1.019,
667
+ "eval_wer": 0.37261441541597184,
668
+ "step": 8000
669
+ },
670
+ {
671
+ "epoch": 55.1,
672
+ "learning_rate": 3.901181102362205e-05,
673
+ "loss": 0.746,
674
+ "step": 8100
675
+ },
676
+ {
677
+ "epoch": 55.78,
678
+ "learning_rate": 3.842125984251968e-05,
679
+ "loss": 0.7162,
680
+ "step": 8200
681
+ },
682
+ {
683
+ "epoch": 56.46,
684
+ "learning_rate": 3.783070866141732e-05,
685
+ "loss": 0.7253,
686
+ "step": 8300
687
+ },
688
+ {
689
+ "epoch": 57.14,
690
+ "learning_rate": 3.724015748031496e-05,
691
+ "loss": 0.7353,
692
+ "step": 8400
693
+ },
694
+ {
695
+ "epoch": 57.14,
696
+ "eval_loss": 0.5937429070472717,
697
+ "eval_runtime": 127.7429,
698
+ "eval_samples_per_second": 16.4,
699
+ "eval_steps_per_second": 1.025,
700
+ "eval_wer": 0.3836081773824965,
701
+ "step": 8400
702
+ },
703
+ {
704
+ "epoch": 57.82,
705
+ "learning_rate": 3.664960629921259e-05,
706
+ "loss": 0.6947,
707
+ "step": 8500
708
+ },
709
+ {
710
+ "epoch": 58.5,
711
+ "learning_rate": 3.605905511811023e-05,
712
+ "loss": 0.6965,
713
+ "step": 8600
714
+ },
715
+ {
716
+ "epoch": 59.18,
717
+ "learning_rate": 3.546850393700787e-05,
718
+ "loss": 0.6975,
719
+ "step": 8700
720
+ },
721
+ {
722
+ "epoch": 59.86,
723
+ "learning_rate": 3.4877952755905506e-05,
724
+ "loss": 0.7001,
725
+ "step": 8800
726
+ },
727
+ {
728
+ "epoch": 59.86,
729
+ "eval_loss": 0.5815082788467407,
730
+ "eval_runtime": 127.9156,
731
+ "eval_samples_per_second": 16.378,
732
+ "eval_steps_per_second": 1.024,
733
+ "eval_wer": 0.38583163485887223,
734
+ "step": 8800
735
+ },
736
+ {
737
+ "epoch": 60.54,
738
+ "learning_rate": 3.428740157480315e-05,
739
+ "loss": 0.694,
740
+ "step": 8900
741
+ },
742
+ {
743
+ "epoch": 61.22,
744
+ "learning_rate": 3.369685039370079e-05,
745
+ "loss": 0.6867,
746
+ "step": 9000
747
+ },
748
+ {
749
+ "epoch": 61.9,
750
+ "learning_rate": 3.310629921259842e-05,
751
+ "loss": 0.7046,
752
+ "step": 9100
753
+ },
754
+ {
755
+ "epoch": 62.58,
756
+ "learning_rate": 3.2515748031496055e-05,
757
+ "loss": 0.6799,
758
+ "step": 9200
759
+ },
760
+ {
761
+ "epoch": 62.58,
762
+ "eval_loss": 0.5861709713935852,
763
+ "eval_runtime": 127.4436,
764
+ "eval_samples_per_second": 16.439,
765
+ "eval_steps_per_second": 1.028,
766
+ "eval_wer": 0.36964980544747084,
767
+ "step": 9200
768
+ },
769
+ {
770
+ "epoch": 63.26,
771
+ "learning_rate": 3.1925196850393696e-05,
772
+ "loss": 0.6639,
773
+ "step": 9300
774
+ },
775
+ {
776
+ "epoch": 63.94,
777
+ "learning_rate": 3.1334645669291336e-05,
778
+ "loss": 0.6543,
779
+ "step": 9400
780
+ },
781
+ {
782
+ "epoch": 64.62,
783
+ "learning_rate": 3.074409448818898e-05,
784
+ "loss": 0.6746,
785
+ "step": 9500
786
+ },
787
+ {
788
+ "epoch": 65.31,
789
+ "learning_rate": 3.015354330708661e-05,
790
+ "loss": 0.6459,
791
+ "step": 9600
792
+ },
793
+ {
794
+ "epoch": 65.31,
795
+ "eval_loss": 0.6180748343467712,
796
+ "eval_runtime": 127.2163,
797
+ "eval_samples_per_second": 16.468,
798
+ "eval_steps_per_second": 1.03,
799
+ "eval_wer": 0.3761966524612439,
800
+ "step": 9600
801
+ },
802
+ {
803
+ "epoch": 65.98,
804
+ "learning_rate": 2.9562992125984248e-05,
805
+ "loss": 0.625,
806
+ "step": 9700
807
+ },
808
+ {
809
+ "epoch": 66.67,
810
+ "learning_rate": 2.897244094488189e-05,
811
+ "loss": 0.6256,
812
+ "step": 9800
813
+ },
814
+ {
815
+ "epoch": 67.35,
816
+ "learning_rate": 2.8381889763779526e-05,
817
+ "loss": 0.6284,
818
+ "step": 9900
819
+ },
820
+ {
821
+ "epoch": 68.03,
822
+ "learning_rate": 2.7791338582677163e-05,
823
+ "loss": 0.6121,
824
+ "step": 10000
825
+ },
826
+ {
827
+ "epoch": 68.03,
828
+ "eval_loss": 0.5636932253837585,
829
+ "eval_runtime": 128.1438,
830
+ "eval_samples_per_second": 16.349,
831
+ "eval_steps_per_second": 1.022,
832
+ "eval_wer": 0.35896485701933173,
833
+ "step": 10000
834
+ },
835
+ {
836
+ "epoch": 68.71,
837
+ "learning_rate": 2.7206692913385825e-05,
838
+ "loss": 0.6123,
839
+ "step": 10100
840
+ },
841
+ {
842
+ "epoch": 69.39,
843
+ "learning_rate": 2.6616141732283462e-05,
844
+ "loss": 0.605,
845
+ "step": 10200
846
+ },
847
+ {
848
+ "epoch": 70.07,
849
+ "learning_rate": 2.60255905511811e-05,
850
+ "loss": 0.6029,
851
+ "step": 10300
852
+ },
853
+ {
854
+ "epoch": 70.75,
855
+ "learning_rate": 2.5435039370078737e-05,
856
+ "loss": 0.5942,
857
+ "step": 10400
858
+ },
859
+ {
860
+ "epoch": 70.75,
861
+ "eval_loss": 0.6374208927154541,
862
+ "eval_runtime": 127.5478,
863
+ "eval_samples_per_second": 16.425,
864
+ "eval_steps_per_second": 1.027,
865
+ "eval_wer": 0.3881786177506022,
866
+ "step": 10400
867
+ },
868
+ {
869
+ "epoch": 71.43,
870
+ "learning_rate": 2.4844488188976377e-05,
871
+ "loss": 0.6081,
872
+ "step": 10500
873
+ },
874
+ {
875
+ "epoch": 72.11,
876
+ "learning_rate": 2.4253937007874014e-05,
877
+ "loss": 0.5865,
878
+ "step": 10600
879
+ },
880
+ {
881
+ "epoch": 72.79,
882
+ "learning_rate": 2.3663385826771648e-05,
883
+ "loss": 0.5695,
884
+ "step": 10700
885
+ },
886
+ {
887
+ "epoch": 73.47,
888
+ "learning_rate": 2.307283464566929e-05,
889
+ "loss": 0.5769,
890
+ "step": 10800
891
+ },
892
+ {
893
+ "epoch": 73.47,
894
+ "eval_loss": 0.601510226726532,
895
+ "eval_runtime": 127.2518,
896
+ "eval_samples_per_second": 16.463,
897
+ "eval_steps_per_second": 1.029,
898
+ "eval_wer": 0.3639676363411772,
899
+ "step": 10800
900
+ },
901
+ {
902
+ "epoch": 74.15,
903
+ "learning_rate": 2.2482283464566926e-05,
904
+ "loss": 0.5827,
905
+ "step": 10900
906
+ },
907
+ {
908
+ "epoch": 74.83,
909
+ "learning_rate": 2.1891732283464567e-05,
910
+ "loss": 0.5751,
911
+ "step": 11000
912
+ },
913
+ {
914
+ "epoch": 75.51,
915
+ "learning_rate": 2.1307086614173225e-05,
916
+ "loss": 0.5448,
917
+ "step": 11100
918
+ },
919
+ {
920
+ "epoch": 76.19,
921
+ "learning_rate": 2.0716535433070866e-05,
922
+ "loss": 0.5689,
923
+ "step": 11200
924
+ },
925
+ {
926
+ "epoch": 76.19,
927
+ "eval_loss": 0.5669254064559937,
928
+ "eval_runtime": 127.9372,
929
+ "eval_samples_per_second": 16.375,
930
+ "eval_steps_per_second": 1.024,
931
+ "eval_wer": 0.3507504168982768,
932
+ "step": 11200
933
+ },
934
+ {
935
+ "epoch": 76.87,
936
+ "learning_rate": 2.0125984251968503e-05,
937
+ "loss": 0.5606,
938
+ "step": 11300
939
+ },
940
+ {
941
+ "epoch": 77.55,
942
+ "learning_rate": 1.9535433070866137e-05,
943
+ "loss": 0.544,
944
+ "step": 11400
945
+ },
946
+ {
947
+ "epoch": 78.23,
948
+ "learning_rate": 1.8944881889763778e-05,
949
+ "loss": 0.5373,
950
+ "step": 11500
951
+ },
952
+ {
953
+ "epoch": 78.91,
954
+ "learning_rate": 1.8354330708661415e-05,
955
+ "loss": 0.5461,
956
+ "step": 11600
957
+ },
958
+ {
959
+ "epoch": 78.91,
960
+ "eval_loss": 0.5967420339584351,
961
+ "eval_runtime": 127.7784,
962
+ "eval_samples_per_second": 16.396,
963
+ "eval_steps_per_second": 1.025,
964
+ "eval_wer": 0.36205299240318695,
965
+ "step": 11600
966
+ },
967
+ {
968
+ "epoch": 79.59,
969
+ "learning_rate": 1.7763779527559052e-05,
970
+ "loss": 0.5342,
971
+ "step": 11700
972
+ },
973
+ {
974
+ "epoch": 80.27,
975
+ "learning_rate": 1.7173228346456693e-05,
976
+ "loss": 0.5105,
977
+ "step": 11800
978
+ },
979
+ {
980
+ "epoch": 80.95,
981
+ "learning_rate": 1.658267716535433e-05,
982
+ "loss": 0.5289,
983
+ "step": 11900
984
+ },
985
+ {
986
+ "epoch": 81.63,
987
+ "learning_rate": 1.5992125984251967e-05,
988
+ "loss": 0.5286,
989
+ "step": 12000
990
+ },
991
+ {
992
+ "epoch": 81.63,
993
+ "eval_loss": 0.5839831829071045,
994
+ "eval_runtime": 127.0078,
995
+ "eval_samples_per_second": 16.495,
996
+ "eval_steps_per_second": 1.031,
997
+ "eval_wer": 0.36050892471125934,
998
+ "step": 12000
999
+ },
1000
+ {
1001
+ "epoch": 82.31,
1002
+ "learning_rate": 1.5401574803149604e-05,
1003
+ "loss": 0.5179,
1004
+ "step": 12100
1005
+ },
1006
+ {
1007
+ "epoch": 82.99,
1008
+ "learning_rate": 1.4811023622047243e-05,
1009
+ "loss": 0.502,
1010
+ "step": 12200
1011
+ },
1012
+ {
1013
+ "epoch": 83.67,
1014
+ "learning_rate": 1.422047244094488e-05,
1015
+ "loss": 0.5153,
1016
+ "step": 12300
1017
+ },
1018
+ {
1019
+ "epoch": 84.35,
1020
+ "learning_rate": 1.3629921259842519e-05,
1021
+ "loss": 0.5057,
1022
+ "step": 12400
1023
+ },
1024
+ {
1025
+ "epoch": 84.35,
1026
+ "eval_loss": 0.5848367214202881,
1027
+ "eval_runtime": 128.5953,
1028
+ "eval_samples_per_second": 16.291,
1029
+ "eval_steps_per_second": 1.019,
1030
+ "eval_wer": 0.34889753566796367,
1031
+ "step": 12400
1032
+ },
1033
+ {
1034
+ "epoch": 85.03,
1035
+ "learning_rate": 1.3039370078740156e-05,
1036
+ "loss": 0.5021,
1037
+ "step": 12500
1038
+ },
1039
+ {
1040
+ "epoch": 85.71,
1041
+ "learning_rate": 1.2448818897637795e-05,
1042
+ "loss": 0.5002,
1043
+ "step": 12600
1044
+ },
1045
+ {
1046
+ "epoch": 86.39,
1047
+ "learning_rate": 1.1858267716535432e-05,
1048
+ "loss": 0.495,
1049
+ "step": 12700
1050
+ },
1051
+ {
1052
+ "epoch": 87.07,
1053
+ "learning_rate": 1.1267716535433071e-05,
1054
+ "loss": 0.482,
1055
+ "step": 12800
1056
+ },
1057
+ {
1058
+ "epoch": 87.07,
1059
+ "eval_loss": 0.5860108733177185,
1060
+ "eval_runtime": 129.85,
1061
+ "eval_samples_per_second": 16.134,
1062
+ "eval_steps_per_second": 1.009,
1063
+ "eval_wer": 0.34877401025260946,
1064
+ "step": 12800
1065
+ },
1066
+ {
1067
+ "epoch": 87.75,
1068
+ "learning_rate": 1.0677165354330707e-05,
1069
+ "loss": 0.4651,
1070
+ "step": 12900
1071
+ },
1072
+ {
1073
+ "epoch": 88.43,
1074
+ "learning_rate": 1.0086614173228345e-05,
1075
+ "loss": 0.4894,
1076
+ "step": 13000
1077
+ },
1078
+ {
1079
+ "epoch": 89.12,
1080
+ "learning_rate": 9.496062992125983e-06,
1081
+ "loss": 0.49,
1082
+ "step": 13100
1083
+ },
1084
+ {
1085
+ "epoch": 89.79,
1086
+ "learning_rate": 8.905511811023621e-06,
1087
+ "loss": 0.4655,
1088
+ "step": 13200
1089
+ },
1090
+ {
1091
+ "epoch": 89.79,
1092
+ "eval_loss": 0.5780399441719055,
1093
+ "eval_runtime": 126.603,
1094
+ "eval_samples_per_second": 16.548,
1095
+ "eval_steps_per_second": 1.035,
1096
+ "eval_wer": 0.3453152986226916,
1097
+ "step": 13200
1098
+ },
1099
+ {
1100
+ "epoch": 90.48,
1101
+ "learning_rate": 8.31496062992126e-06,
1102
+ "loss": 0.4661,
1103
+ "step": 13300
1104
+ },
1105
+ {
1106
+ "epoch": 91.16,
1107
+ "learning_rate": 7.724409448818898e-06,
1108
+ "loss": 0.4776,
1109
+ "step": 13400
1110
+ },
1111
+ {
1112
+ "epoch": 91.84,
1113
+ "learning_rate": 7.133858267716534e-06,
1114
+ "loss": 0.4697,
1115
+ "step": 13500
1116
+ },
1117
+ {
1118
+ "epoch": 92.52,
1119
+ "learning_rate": 6.543307086614172e-06,
1120
+ "loss": 0.4523,
1121
+ "step": 13600
1122
+ },
1123
+ {
1124
+ "epoch": 92.52,
1125
+ "eval_loss": 0.6150020956993103,
1126
+ "eval_runtime": 127.4399,
1127
+ "eval_samples_per_second": 16.439,
1128
+ "eval_steps_per_second": 1.028,
1129
+ "eval_wer": 0.353220925205361,
1130
+ "step": 13600
1131
+ },
1132
+ {
1133
+ "epoch": 93.2,
1134
+ "learning_rate": 5.95275590551181e-06,
1135
+ "loss": 0.4522,
1136
+ "step": 13700
1137
+ },
1138
+ {
1139
+ "epoch": 93.88,
1140
+ "learning_rate": 5.362204724409448e-06,
1141
+ "loss": 0.4595,
1142
+ "step": 13800
1143
+ },
1144
+ {
1145
+ "epoch": 94.56,
1146
+ "learning_rate": 4.77755905511811e-06,
1147
+ "loss": 0.4518,
1148
+ "step": 13900
1149
+ },
1150
+ {
1151
+ "epoch": 95.24,
1152
+ "learning_rate": 4.1870078740157475e-06,
1153
+ "loss": 0.4422,
1154
+ "step": 14000
1155
+ },
1156
+ {
1157
+ "epoch": 95.24,
1158
+ "eval_loss": 0.5930356383323669,
1159
+ "eval_runtime": 128.0708,
1160
+ "eval_samples_per_second": 16.358,
1161
+ "eval_steps_per_second": 1.023,
1162
+ "eval_wer": 0.3451917732073374,
1163
+ "step": 14000
1164
+ },
1165
+ {
1166
+ "epoch": 95.92,
1167
+ "learning_rate": 3.5964566929133855e-06,
1168
+ "loss": 0.4558,
1169
+ "step": 14100
1170
+ },
1171
+ {
1172
+ "epoch": 96.6,
1173
+ "learning_rate": 3.0059055118110235e-06,
1174
+ "loss": 0.4427,
1175
+ "step": 14200
1176
+ },
1177
+ {
1178
+ "epoch": 97.28,
1179
+ "learning_rate": 2.415354330708661e-06,
1180
+ "loss": 0.4263,
1181
+ "step": 14300
1182
+ },
1183
+ {
1184
+ "epoch": 97.96,
1185
+ "learning_rate": 1.8248031496062992e-06,
1186
+ "loss": 0.4436,
1187
+ "step": 14400
1188
+ },
1189
+ {
1190
+ "epoch": 97.96,
1191
+ "eval_loss": 0.5867109894752502,
1192
+ "eval_runtime": 127.0812,
1193
+ "eval_samples_per_second": 16.486,
1194
+ "eval_steps_per_second": 1.031,
1195
+ "eval_wer": 0.34278302760793034,
1196
+ "step": 14400
1197
+ },
1198
+ {
1199
+ "epoch": 98.64,
1200
+ "learning_rate": 1.234251968503937e-06,
1201
+ "loss": 0.447,
1202
+ "step": 14500
1203
+ },
1204
+ {
1205
+ "epoch": 99.32,
1206
+ "learning_rate": 6.437007874015748e-07,
1207
+ "loss": 0.4325,
1208
+ "step": 14600
1209
+ },
1210
+ {
1211
+ "epoch": 100.0,
1212
+ "learning_rate": 5.31496062992126e-08,
1213
+ "loss": 0.421,
1214
+ "step": 14700
1215
+ },
1216
+ {
1217
+ "epoch": 100.0,
1218
+ "step": 14700,
1219
+ "total_flos": 1.7835239032674828e+20,
1220
+ "train_loss": 0.9074438333835732,
1221
+ "train_runtime": 47510.6321,
1222
+ "train_samples_per_second": 9.916,
1223
+ "train_steps_per_second": 0.309
1224
+ }
1225
+ ],
1226
+ "max_steps": 14700,
1227
+ "num_train_epochs": 100,
1228
+ "total_flos": 1.7835239032674828e+20,
1229
+ "trial_name": null,
1230
+ "trial_params": null
1231
+ }