infinitejoy commited on
Commit
21747b1
1 Parent(s): e7938d6

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.11674057692289352,
4
+ "eval_runtime": 153.2854,
5
+ "eval_samples": 3172,
6
+ "eval_samples_per_second": 20.693,
7
+ "eval_steps_per_second": 20.693,
8
+ "eval_wer": 0.14212066931713924,
9
+ "train_loss": 1.0303706246270075,
10
+ "train_runtime": 15356.7914,
11
+ "train_samples": 7174,
12
+ "train_samples_per_second": 23.358,
13
+ "train_steps_per_second": 0.733
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.11674057692289352,
4
+ "eval_runtime": 153.2854,
5
+ "eval_samples": 3172,
6
+ "eval_samples_per_second": 20.693,
7
+ "eval_steps_per_second": 20.693,
8
+ "eval_wer": 0.14212066931713924
9
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:024938befaa799b4726488842a60a6623796466079461941d0e57edb3c2fa2ca
3
  size 1262108145
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f443ab6ef2a8ce462c9a5b0263a6ef59a64d1e69d90a60c17ae7e7089b43b22a
3
  size 1262108145
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "train_loss": 1.0303706246270075,
4
+ "train_runtime": 15356.7914,
5
+ "train_samples": 7174,
6
+ "train_samples_per_second": 23.358,
7
+ "train_steps_per_second": 0.733
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,742 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
+ "global_step": 11250,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.44,
12
+ "learning_rate": 3.4299999999999998e-06,
13
+ "loss": 12.3928,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.89,
18
+ "learning_rate": 6.93e-06,
19
+ "loss": 7.13,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 1.33,
24
+ "learning_rate": 1.0429999999999998e-05,
25
+ "loss": 4.5055,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 1.78,
30
+ "learning_rate": 1.3929999999999999e-05,
31
+ "loss": 3.8494,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 2.22,
36
+ "learning_rate": 1.7429999999999997e-05,
37
+ "loss": 3.4447,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 2.67,
42
+ "learning_rate": 2.0929999999999998e-05,
43
+ "loss": 3.2021,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 3.11,
48
+ "learning_rate": 2.4429999999999995e-05,
49
+ "loss": 3.0761,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 3.56,
54
+ "learning_rate": 2.793e-05,
55
+ "loss": 3.0165,
56
+ "step": 800
57
+ },
58
+ {
59
+ "epoch": 4.0,
60
+ "learning_rate": 3.1429999999999996e-05,
61
+ "loss": 2.9623,
62
+ "step": 900
63
+ },
64
+ {
65
+ "epoch": 4.44,
66
+ "learning_rate": 3.493e-05,
67
+ "loss": 2.9094,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 4.89,
72
+ "learning_rate": 3.843e-05,
73
+ "loss": 2.8831,
74
+ "step": 1100
75
+ },
76
+ {
77
+ "epoch": 5.33,
78
+ "learning_rate": 4.192999999999999e-05,
79
+ "loss": 2.8667,
80
+ "step": 1200
81
+ },
82
+ {
83
+ "epoch": 5.78,
84
+ "learning_rate": 4.543e-05,
85
+ "loss": 2.7722,
86
+ "step": 1300
87
+ },
88
+ {
89
+ "epoch": 6.22,
90
+ "learning_rate": 4.8929999999999994e-05,
91
+ "loss": 2.2496,
92
+ "step": 1400
93
+ },
94
+ {
95
+ "epoch": 6.67,
96
+ "learning_rate": 5.243e-05,
97
+ "loss": 1.7213,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 7.11,
102
+ "learning_rate": 5.593e-05,
103
+ "loss": 1.5195,
104
+ "step": 1600
105
+ },
106
+ {
107
+ "epoch": 7.56,
108
+ "learning_rate": 5.942999999999999e-05,
109
+ "loss": 1.3847,
110
+ "step": 1700
111
+ },
112
+ {
113
+ "epoch": 8.0,
114
+ "learning_rate": 6.293e-05,
115
+ "loss": 1.3296,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 8.44,
120
+ "learning_rate": 6.642999999999999e-05,
121
+ "loss": 1.2533,
122
+ "step": 1900
123
+ },
124
+ {
125
+ "epoch": 8.89,
126
+ "learning_rate": 6.992999999999999e-05,
127
+ "loss": 1.1973,
128
+ "step": 2000
129
+ },
130
+ {
131
+ "epoch": 8.89,
132
+ "eval_loss": 0.44813069701194763,
133
+ "eval_runtime": 151.5294,
134
+ "eval_samples_per_second": 20.933,
135
+ "eval_steps_per_second": 20.933,
136
+ "eval_wer": 0.4849491021888243,
137
+ "step": 2000
138
+ },
139
+ {
140
+ "epoch": 9.33,
141
+ "learning_rate": 6.925837837837837e-05,
142
+ "loss": 1.08,
143
+ "step": 2100
144
+ },
145
+ {
146
+ "epoch": 9.78,
147
+ "learning_rate": 6.850162162162162e-05,
148
+ "loss": 0.9484,
149
+ "step": 2200
150
+ },
151
+ {
152
+ "epoch": 10.22,
153
+ "learning_rate": 6.774486486486486e-05,
154
+ "loss": 0.8818,
155
+ "step": 2300
156
+ },
157
+ {
158
+ "epoch": 10.67,
159
+ "learning_rate": 6.698810810810811e-05,
160
+ "loss": 0.8043,
161
+ "step": 2400
162
+ },
163
+ {
164
+ "epoch": 11.11,
165
+ "learning_rate": 6.623135135135134e-05,
166
+ "loss": 0.7752,
167
+ "step": 2500
168
+ },
169
+ {
170
+ "epoch": 11.56,
171
+ "learning_rate": 6.547459459459459e-05,
172
+ "loss": 0.7497,
173
+ "step": 2600
174
+ },
175
+ {
176
+ "epoch": 12.0,
177
+ "learning_rate": 6.471783783783783e-05,
178
+ "loss": 0.7486,
179
+ "step": 2700
180
+ },
181
+ {
182
+ "epoch": 12.44,
183
+ "learning_rate": 6.396108108108108e-05,
184
+ "loss": 0.7016,
185
+ "step": 2800
186
+ },
187
+ {
188
+ "epoch": 12.89,
189
+ "learning_rate": 6.320432432432433e-05,
190
+ "loss": 0.6927,
191
+ "step": 2900
192
+ },
193
+ {
194
+ "epoch": 13.33,
195
+ "learning_rate": 6.244756756756756e-05,
196
+ "loss": 0.6609,
197
+ "step": 3000
198
+ },
199
+ {
200
+ "epoch": 13.78,
201
+ "learning_rate": 6.169081081081081e-05,
202
+ "loss": 0.6633,
203
+ "step": 3100
204
+ },
205
+ {
206
+ "epoch": 14.22,
207
+ "learning_rate": 6.093405405405405e-05,
208
+ "loss": 0.6781,
209
+ "step": 3200
210
+ },
211
+ {
212
+ "epoch": 14.67,
213
+ "learning_rate": 6.017729729729729e-05,
214
+ "loss": 0.6486,
215
+ "step": 3300
216
+ },
217
+ {
218
+ "epoch": 15.11,
219
+ "learning_rate": 5.942054054054054e-05,
220
+ "loss": 0.6217,
221
+ "step": 3400
222
+ },
223
+ {
224
+ "epoch": 15.56,
225
+ "learning_rate": 5.866378378378378e-05,
226
+ "loss": 0.6348,
227
+ "step": 3500
228
+ },
229
+ {
230
+ "epoch": 16.0,
231
+ "learning_rate": 5.7907027027027026e-05,
232
+ "loss": 0.6555,
233
+ "step": 3600
234
+ },
235
+ {
236
+ "epoch": 16.44,
237
+ "learning_rate": 5.715027027027027e-05,
238
+ "loss": 0.6179,
239
+ "step": 3700
240
+ },
241
+ {
242
+ "epoch": 16.89,
243
+ "learning_rate": 5.639351351351351e-05,
244
+ "loss": 0.6116,
245
+ "step": 3800
246
+ },
247
+ {
248
+ "epoch": 17.33,
249
+ "learning_rate": 5.5636756756756754e-05,
250
+ "loss": 0.586,
251
+ "step": 3900
252
+ },
253
+ {
254
+ "epoch": 17.78,
255
+ "learning_rate": 5.4879999999999996e-05,
256
+ "loss": 0.6005,
257
+ "step": 4000
258
+ },
259
+ {
260
+ "epoch": 17.78,
261
+ "eval_loss": 0.1420038342475891,
262
+ "eval_runtime": 156.5701,
263
+ "eval_samples_per_second": 20.259,
264
+ "eval_steps_per_second": 20.259,
265
+ "eval_wer": 0.17772729258595832,
266
+ "step": 4000
267
+ },
268
+ {
269
+ "epoch": 18.22,
270
+ "learning_rate": 5.412324324324324e-05,
271
+ "loss": 0.6199,
272
+ "step": 4100
273
+ },
274
+ {
275
+ "epoch": 18.67,
276
+ "learning_rate": 5.336648648648648e-05,
277
+ "loss": 0.6017,
278
+ "step": 4200
279
+ },
280
+ {
281
+ "epoch": 19.11,
282
+ "learning_rate": 5.2609729729729724e-05,
283
+ "loss": 0.5722,
284
+ "step": 4300
285
+ },
286
+ {
287
+ "epoch": 19.56,
288
+ "learning_rate": 5.1852972972972974e-05,
289
+ "loss": 0.5755,
290
+ "step": 4400
291
+ },
292
+ {
293
+ "epoch": 20.0,
294
+ "learning_rate": 5.1096216216216216e-05,
295
+ "loss": 0.6083,
296
+ "step": 4500
297
+ },
298
+ {
299
+ "epoch": 20.44,
300
+ "learning_rate": 5.033945945945946e-05,
301
+ "loss": 0.5677,
302
+ "step": 4600
303
+ },
304
+ {
305
+ "epoch": 20.89,
306
+ "learning_rate": 4.9590270270270266e-05,
307
+ "loss": 0.5652,
308
+ "step": 4700
309
+ },
310
+ {
311
+ "epoch": 21.33,
312
+ "learning_rate": 4.8833513513513516e-05,
313
+ "loss": 0.5509,
314
+ "step": 4800
315
+ },
316
+ {
317
+ "epoch": 21.78,
318
+ "learning_rate": 4.807675675675676e-05,
319
+ "loss": 0.5526,
320
+ "step": 4900
321
+ },
322
+ {
323
+ "epoch": 22.22,
324
+ "learning_rate": 4.732e-05,
325
+ "loss": 0.5589,
326
+ "step": 5000
327
+ },
328
+ {
329
+ "epoch": 22.67,
330
+ "learning_rate": 4.6563243243243244e-05,
331
+ "loss": 0.5394,
332
+ "step": 5100
333
+ },
334
+ {
335
+ "epoch": 23.11,
336
+ "learning_rate": 4.5806486486486486e-05,
337
+ "loss": 0.5329,
338
+ "step": 5200
339
+ },
340
+ {
341
+ "epoch": 23.56,
342
+ "learning_rate": 4.504972972972973e-05,
343
+ "loss": 0.5353,
344
+ "step": 5300
345
+ },
346
+ {
347
+ "epoch": 24.0,
348
+ "learning_rate": 4.429297297297297e-05,
349
+ "loss": 0.5643,
350
+ "step": 5400
351
+ },
352
+ {
353
+ "epoch": 24.44,
354
+ "learning_rate": 4.3536216216216214e-05,
355
+ "loss": 0.537,
356
+ "step": 5500
357
+ },
358
+ {
359
+ "epoch": 24.89,
360
+ "learning_rate": 4.277945945945946e-05,
361
+ "loss": 0.5502,
362
+ "step": 5600
363
+ },
364
+ {
365
+ "epoch": 25.33,
366
+ "learning_rate": 4.20227027027027e-05,
367
+ "loss": 0.5126,
368
+ "step": 5700
369
+ },
370
+ {
371
+ "epoch": 25.78,
372
+ "learning_rate": 4.126594594594594e-05,
373
+ "loss": 0.5315,
374
+ "step": 5800
375
+ },
376
+ {
377
+ "epoch": 26.22,
378
+ "learning_rate": 4.050918918918919e-05,
379
+ "loss": 0.5424,
380
+ "step": 5900
381
+ },
382
+ {
383
+ "epoch": 26.67,
384
+ "learning_rate": 3.9752432432432434e-05,
385
+ "loss": 0.5248,
386
+ "step": 6000
387
+ },
388
+ {
389
+ "epoch": 26.67,
390
+ "eval_loss": 0.13026614487171173,
391
+ "eval_runtime": 153.4664,
392
+ "eval_samples_per_second": 20.669,
393
+ "eval_steps_per_second": 20.669,
394
+ "eval_wer": 0.16505745117742146,
395
+ "step": 6000
396
+ },
397
+ {
398
+ "epoch": 27.11,
399
+ "learning_rate": 3.8995675675675676e-05,
400
+ "loss": 0.5111,
401
+ "step": 6100
402
+ },
403
+ {
404
+ "epoch": 27.56,
405
+ "learning_rate": 3.823891891891892e-05,
406
+ "loss": 0.5226,
407
+ "step": 6200
408
+ },
409
+ {
410
+ "epoch": 28.0,
411
+ "learning_rate": 3.748216216216216e-05,
412
+ "loss": 0.5335,
413
+ "step": 6300
414
+ },
415
+ {
416
+ "epoch": 28.44,
417
+ "learning_rate": 3.6725405405405404e-05,
418
+ "loss": 0.5031,
419
+ "step": 6400
420
+ },
421
+ {
422
+ "epoch": 28.89,
423
+ "learning_rate": 3.596864864864865e-05,
424
+ "loss": 0.5219,
425
+ "step": 6500
426
+ },
427
+ {
428
+ "epoch": 29.33,
429
+ "learning_rate": 3.521189189189189e-05,
430
+ "loss": 0.4853,
431
+ "step": 6600
432
+ },
433
+ {
434
+ "epoch": 29.78,
435
+ "learning_rate": 3.445513513513513e-05,
436
+ "loss": 0.5062,
437
+ "step": 6700
438
+ },
439
+ {
440
+ "epoch": 30.22,
441
+ "learning_rate": 3.370594594594594e-05,
442
+ "loss": 0.5395,
443
+ "step": 6800
444
+ },
445
+ {
446
+ "epoch": 30.67,
447
+ "learning_rate": 3.294918918918919e-05,
448
+ "loss": 0.4876,
449
+ "step": 6900
450
+ },
451
+ {
452
+ "epoch": 31.11,
453
+ "learning_rate": 3.219243243243243e-05,
454
+ "loss": 0.4981,
455
+ "step": 7000
456
+ },
457
+ {
458
+ "epoch": 31.56,
459
+ "learning_rate": 3.1435675675675674e-05,
460
+ "loss": 0.5011,
461
+ "step": 7100
462
+ },
463
+ {
464
+ "epoch": 32.0,
465
+ "learning_rate": 3.067891891891892e-05,
466
+ "loss": 0.511,
467
+ "step": 7200
468
+ },
469
+ {
470
+ "epoch": 32.44,
471
+ "learning_rate": 2.992216216216216e-05,
472
+ "loss": 0.4935,
473
+ "step": 7300
474
+ },
475
+ {
476
+ "epoch": 32.89,
477
+ "learning_rate": 2.9165405405405402e-05,
478
+ "loss": 0.4951,
479
+ "step": 7400
480
+ },
481
+ {
482
+ "epoch": 33.33,
483
+ "learning_rate": 2.8408648648648645e-05,
484
+ "loss": 0.4655,
485
+ "step": 7500
486
+ },
487
+ {
488
+ "epoch": 33.78,
489
+ "learning_rate": 2.765189189189189e-05,
490
+ "loss": 0.4926,
491
+ "step": 7600
492
+ },
493
+ {
494
+ "epoch": 34.22,
495
+ "learning_rate": 2.6895135135135133e-05,
496
+ "loss": 0.5083,
497
+ "step": 7700
498
+ },
499
+ {
500
+ "epoch": 34.67,
501
+ "learning_rate": 2.6138378378378376e-05,
502
+ "loss": 0.4849,
503
+ "step": 7800
504
+ },
505
+ {
506
+ "epoch": 35.11,
507
+ "learning_rate": 2.538162162162162e-05,
508
+ "loss": 0.4673,
509
+ "step": 7900
510
+ },
511
+ {
512
+ "epoch": 35.56,
513
+ "learning_rate": 2.462486486486486e-05,
514
+ "loss": 0.4871,
515
+ "step": 8000
516
+ },
517
+ {
518
+ "epoch": 35.56,
519
+ "eval_loss": 0.12074683606624603,
520
+ "eval_runtime": 154.3437,
521
+ "eval_samples_per_second": 20.552,
522
+ "eval_steps_per_second": 20.552,
523
+ "eval_wer": 0.1523439206605793,
524
+ "step": 8000
525
+ },
526
+ {
527
+ "epoch": 36.0,
528
+ "learning_rate": 2.3875675675675676e-05,
529
+ "loss": 0.4911,
530
+ "step": 8100
531
+ },
532
+ {
533
+ "epoch": 36.44,
534
+ "learning_rate": 2.3118918918918918e-05,
535
+ "loss": 0.4724,
536
+ "step": 8200
537
+ },
538
+ {
539
+ "epoch": 36.89,
540
+ "learning_rate": 2.236216216216216e-05,
541
+ "loss": 0.4784,
542
+ "step": 8300
543
+ },
544
+ {
545
+ "epoch": 37.33,
546
+ "learning_rate": 2.1605405405405403e-05,
547
+ "loss": 0.466,
548
+ "step": 8400
549
+ },
550
+ {
551
+ "epoch": 37.78,
552
+ "learning_rate": 2.0848648648648646e-05,
553
+ "loss": 0.4761,
554
+ "step": 8500
555
+ },
556
+ {
557
+ "epoch": 38.22,
558
+ "learning_rate": 2.0091891891891892e-05,
559
+ "loss": 0.4772,
560
+ "step": 8600
561
+ },
562
+ {
563
+ "epoch": 38.67,
564
+ "learning_rate": 1.9335135135135135e-05,
565
+ "loss": 0.4524,
566
+ "step": 8700
567
+ },
568
+ {
569
+ "epoch": 39.11,
570
+ "learning_rate": 1.8578378378378377e-05,
571
+ "loss": 0.4436,
572
+ "step": 8800
573
+ },
574
+ {
575
+ "epoch": 39.56,
576
+ "learning_rate": 1.782162162162162e-05,
577
+ "loss": 0.4673,
578
+ "step": 8900
579
+ },
580
+ {
581
+ "epoch": 40.0,
582
+ "learning_rate": 1.7064864864864862e-05,
583
+ "loss": 0.4848,
584
+ "step": 9000
585
+ },
586
+ {
587
+ "epoch": 40.44,
588
+ "learning_rate": 1.630810810810811e-05,
589
+ "loss": 0.461,
590
+ "step": 9100
591
+ },
592
+ {
593
+ "epoch": 40.89,
594
+ "learning_rate": 1.555135135135135e-05,
595
+ "loss": 0.465,
596
+ "step": 9200
597
+ },
598
+ {
599
+ "epoch": 41.33,
600
+ "learning_rate": 1.4794594594594594e-05,
601
+ "loss": 0.4398,
602
+ "step": 9300
603
+ },
604
+ {
605
+ "epoch": 41.78,
606
+ "learning_rate": 1.4045405405405405e-05,
607
+ "loss": 0.4552,
608
+ "step": 9400
609
+ },
610
+ {
611
+ "epoch": 42.22,
612
+ "learning_rate": 1.3288648648648647e-05,
613
+ "loss": 0.47,
614
+ "step": 9500
615
+ },
616
+ {
617
+ "epoch": 42.67,
618
+ "learning_rate": 1.253189189189189e-05,
619
+ "loss": 0.4599,
620
+ "step": 9600
621
+ },
622
+ {
623
+ "epoch": 43.11,
624
+ "learning_rate": 1.1775135135135134e-05,
625
+ "loss": 0.4273,
626
+ "step": 9700
627
+ },
628
+ {
629
+ "epoch": 43.56,
630
+ "learning_rate": 1.1018378378378377e-05,
631
+ "loss": 0.4533,
632
+ "step": 9800
633
+ },
634
+ {
635
+ "epoch": 44.0,
636
+ "learning_rate": 1.0261621621621621e-05,
637
+ "loss": 0.4573,
638
+ "step": 9900
639
+ },
640
+ {
641
+ "epoch": 44.44,
642
+ "learning_rate": 9.504864864864864e-06,
643
+ "loss": 0.4428,
644
+ "step": 10000
645
+ },
646
+ {
647
+ "epoch": 44.44,
648
+ "eval_loss": 0.11431078612804413,
649
+ "eval_runtime": 152.8495,
650
+ "eval_samples_per_second": 20.752,
651
+ "eval_steps_per_second": 20.752,
652
+ "eval_wer": 0.14247018218358162,
653
+ "step": 10000
654
+ },
655
+ {
656
+ "epoch": 44.89,
657
+ "learning_rate": 8.748108108108106e-06,
658
+ "loss": 0.4431,
659
+ "step": 10100
660
+ },
661
+ {
662
+ "epoch": 45.33,
663
+ "learning_rate": 7.99135135135135e-06,
664
+ "loss": 0.4124,
665
+ "step": 10200
666
+ },
667
+ {
668
+ "epoch": 45.78,
669
+ "learning_rate": 7.234594594594593e-06,
670
+ "loss": 0.4437,
671
+ "step": 10300
672
+ },
673
+ {
674
+ "epoch": 46.22,
675
+ "learning_rate": 6.4778378378378375e-06,
676
+ "loss": 0.4694,
677
+ "step": 10400
678
+ },
679
+ {
680
+ "epoch": 46.67,
681
+ "learning_rate": 5.721081081081081e-06,
682
+ "loss": 0.4408,
683
+ "step": 10500
684
+ },
685
+ {
686
+ "epoch": 47.11,
687
+ "learning_rate": 4.9643243243243245e-06,
688
+ "loss": 0.428,
689
+ "step": 10600
690
+ },
691
+ {
692
+ "epoch": 47.56,
693
+ "learning_rate": 4.207567567567567e-06,
694
+ "loss": 0.4418,
695
+ "step": 10700
696
+ },
697
+ {
698
+ "epoch": 48.0,
699
+ "learning_rate": 3.4508108108108105e-06,
700
+ "loss": 0.4527,
701
+ "step": 10800
702
+ },
703
+ {
704
+ "epoch": 48.44,
705
+ "learning_rate": 2.6940540540540536e-06,
706
+ "loss": 0.448,
707
+ "step": 10900
708
+ },
709
+ {
710
+ "epoch": 48.89,
711
+ "learning_rate": 1.937297297297297e-06,
712
+ "loss": 0.4399,
713
+ "step": 11000
714
+ },
715
+ {
716
+ "epoch": 49.33,
717
+ "learning_rate": 1.1805405405405403e-06,
718
+ "loss": 0.4111,
719
+ "step": 11100
720
+ },
721
+ {
722
+ "epoch": 49.78,
723
+ "learning_rate": 4.237837837837838e-07,
724
+ "loss": 0.4214,
725
+ "step": 11200
726
+ },
727
+ {
728
+ "epoch": 50.0,
729
+ "step": 11250,
730
+ "total_flos": 4.148416605366081e+19,
731
+ "train_loss": 1.0303706246270075,
732
+ "train_runtime": 15356.7914,
733
+ "train_samples_per_second": 23.358,
734
+ "train_steps_per_second": 0.733
735
+ }
736
+ ],
737
+ "max_steps": 11250,
738
+ "num_train_epochs": 50,
739
+ "total_flos": 4.148416605366081e+19,
740
+ "trial_name": null,
741
+ "trial_params": null
742
+ }