kapilkd13 commited on
Commit
11b606c
1 Parent(s): bd76bcc

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 16.95,
3
- "eval_loss": 0.9201112985610962,
4
- "eval_runtime": 92.4257,
5
  "eval_samples": 2095,
6
- "eval_samples_per_second": 22.667,
7
- "eval_steps_per_second": 2.835,
8
- "eval_wer": 1.009106276241889,
9
- "train_loss": 0.584431241607666,
10
- "train_runtime": 8680.4794,
11
  "train_samples": 4711,
12
- "train_samples_per_second": 9.216,
13
- "train_steps_per_second": 0.576
14
  }
 
1
  {
2
+ "epoch": 27.12,
3
+ "eval_loss": 0.7346429228782654,
4
+ "eval_runtime": 80.4764,
5
  "eval_samples": 2095,
6
+ "eval_samples_per_second": 26.032,
7
+ "eval_steps_per_second": 3.256,
8
+ "eval_wer": 1.0478761110202301,
9
+ "train_loss": 0.46973063707351687,
10
+ "train_runtime": 10600.4383,
11
  "train_samples": 4711,
12
+ "train_samples_per_second": 12.075,
13
+ "train_steps_per_second": 0.755
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 16.95,
3
- "eval_loss": 0.9201112985610962,
4
- "eval_runtime": 92.4257,
5
  "eval_samples": 2095,
6
- "eval_samples_per_second": 22.667,
7
- "eval_steps_per_second": 2.835,
8
- "eval_wer": 1.009106276241889
9
  }
 
1
  {
2
+ "epoch": 27.12,
3
+ "eval_loss": 0.7346429228782654,
4
+ "eval_runtime": 80.4764,
5
  "eval_samples": 2095,
6
+ "eval_samples_per_second": 26.032,
7
+ "eval_steps_per_second": 3.256,
8
+ "eval_wer": 1.0478761110202301
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9110291754af006c8b9d146178f9c2c3d2fa7f4af892200563a5a47e17b85d38
3
  size 1262321393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7586f941c9408ff704686fb109ad61ca786c2510d0f24b51f08ecab6a57debc5
3
  size 1262321393
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 16.95,
3
- "train_loss": 0.584431241607666,
4
- "train_runtime": 8680.4794,
5
  "train_samples": 4711,
6
- "train_samples_per_second": 9.216,
7
- "train_steps_per_second": 0.576
8
  }
 
1
  {
2
+ "epoch": 27.12,
3
+ "train_loss": 0.46973063707351687,
4
+ "train_runtime": 10600.4383,
5
  "train_samples": 4711,
6
+ "train_samples_per_second": 12.075,
7
+ "train_steps_per_second": 0.755
8
  }
trainer_state.json CHANGED
@@ -1,535 +1,301 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.949152542372882,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.34,
12
- "eval_loss": 4.256950378417969,
13
- "eval_runtime": 93.7021,
14
- "eval_samples_per_second": 22.358,
15
- "eval_steps_per_second": 2.796,
16
- "eval_wer": 0.9999454713997492,
17
- "step": 100
18
- },
19
- {
20
- "epoch": 0.68,
21
- "eval_loss": 3.3221993446350098,
22
- "eval_runtime": 95.7275,
23
- "eval_samples_per_second": 21.885,
24
- "eval_steps_per_second": 2.737,
25
- "eval_wer": 1.0005452860025084,
26
- "step": 200
27
- },
28
- {
29
- "epoch": 1.02,
30
- "eval_loss": 2.375530242919922,
31
- "eval_runtime": 93.1661,
32
- "eval_samples_per_second": 22.487,
33
- "eval_steps_per_second": 2.812,
34
- "eval_wer": 1.0,
35
- "step": 300
36
- },
37
  {
38
  "epoch": 1.36,
39
- "eval_loss": 1.407051682472229,
40
- "eval_runtime": 93.2199,
41
- "eval_samples_per_second": 22.474,
42
- "eval_steps_per_second": 2.811,
43
- "eval_wer": 1.0036534162168058,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 1.69,
48
- "learning_rate": 0.00028143749999999995,
49
- "loss": 3.8379,
50
  "step": 500
51
  },
52
- {
53
- "epoch": 1.69,
54
- "eval_loss": 1.2223190069198608,
55
- "eval_runtime": 92.5816,
56
- "eval_samples_per_second": 22.629,
57
- "eval_steps_per_second": 2.83,
58
- "eval_wer": 1.009106276241889,
59
- "step": 500
60
- },
61
- {
62
- "epoch": 2.03,
63
- "eval_loss": 1.0884724855422974,
64
- "eval_runtime": 94.6436,
65
- "eval_samples_per_second": 22.136,
66
- "eval_steps_per_second": 2.768,
67
- "eval_wer": 1.0272643001254158,
68
- "step": 600
69
- },
70
- {
71
- "epoch": 2.37,
72
- "eval_loss": 0.9818190336227417,
73
- "eval_runtime": 92.6738,
74
- "eval_samples_per_second": 22.606,
75
- "eval_steps_per_second": 2.827,
76
- "eval_wer": 1.009760619444899,
77
- "step": 700
78
- },
79
  {
80
  "epoch": 2.71,
81
- "eval_loss": 0.9154127836227417,
82
- "eval_runtime": 93.1212,
83
- "eval_samples_per_second": 22.498,
84
- "eval_steps_per_second": 2.814,
85
- "eval_wer": 1.0109602486504172,
86
  "step": 800
87
  },
88
- {
89
- "epoch": 3.05,
90
- "eval_loss": 0.8868876695632935,
91
- "eval_runtime": 94.7308,
92
- "eval_samples_per_second": 22.115,
93
- "eval_steps_per_second": 2.766,
94
- "eval_wer": 1.0153770652707346,
95
- "step": 900
96
- },
97
- {
98
- "epoch": 3.39,
99
- "learning_rate": 0.0002501875,
100
- "loss": 0.5888,
101
- "step": 1000
102
- },
103
  {
104
  "epoch": 3.39,
105
- "eval_loss": 0.897603452205658,
106
- "eval_runtime": 92.6891,
107
- "eval_samples_per_second": 22.602,
108
- "eval_steps_per_second": 2.827,
109
- "eval_wer": 1.0148863078684771,
110
  "step": 1000
111
  },
112
- {
113
- "epoch": 3.73,
114
- "eval_loss": 0.8322562575340271,
115
- "eval_runtime": 91.8444,
116
- "eval_samples_per_second": 22.81,
117
- "eval_steps_per_second": 2.853,
118
- "eval_wer": 1.0124325208571896,
119
- "step": 1100
120
- },
121
  {
122
  "epoch": 4.07,
123
- "eval_loss": 0.8487809300422668,
124
- "eval_runtime": 92.7251,
125
- "eval_samples_per_second": 22.594,
126
- "eval_steps_per_second": 2.826,
127
- "eval_wer": 1.0086700474398822,
128
  "step": 1200
129
  },
130
- {
131
- "epoch": 4.41,
132
- "eval_loss": 0.8659328818321228,
133
- "eval_runtime": 91.959,
134
- "eval_samples_per_second": 22.782,
135
- "eval_steps_per_second": 2.849,
136
- "eval_wer": 1.0069251322318555,
137
- "step": 1300
138
- },
139
- {
140
- "epoch": 4.75,
141
- "eval_loss": 0.8611888289451599,
142
- "eval_runtime": 92.7084,
143
- "eval_samples_per_second": 22.598,
144
- "eval_steps_per_second": 2.826,
145
- "eval_wer": 1.0042532308195649,
146
- "step": 1400
147
- },
148
  {
149
  "epoch": 5.08,
150
- "learning_rate": 0.00021893749999999998,
151
- "loss": 0.3706,
152
- "step": 1500
153
- },
154
- {
155
- "epoch": 5.08,
156
- "eval_loss": 0.8300378918647766,
157
- "eval_runtime": 91.9965,
158
- "eval_samples_per_second": 22.773,
159
- "eval_steps_per_second": 2.848,
160
- "eval_wer": 1.0182670810840286,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 5.42,
165
- "eval_loss": 0.8417208194732666,
166
- "eval_runtime": 94.3323,
167
- "eval_samples_per_second": 22.209,
168
- "eval_steps_per_second": 2.777,
169
- "eval_wer": 1.009106276241889,
170
  "step": 1600
171
  },
172
- {
173
- "epoch": 5.76,
174
- "eval_loss": 0.8261289000511169,
175
- "eval_runtime": 91.9157,
176
- "eval_samples_per_second": 22.793,
177
- "eval_steps_per_second": 2.85,
178
- "eval_wer": 1.008724576040133,
179
- "step": 1700
180
- },
181
- {
182
- "epoch": 6.1,
183
- "eval_loss": 0.854759931564331,
184
- "eval_runtime": 92.0581,
185
- "eval_samples_per_second": 22.757,
186
- "eval_steps_per_second": 2.846,
187
- "eval_wer": 1.006761546431103,
188
- "step": 1800
189
- },
190
- {
191
- "epoch": 6.44,
192
- "eval_loss": 0.7983754873275757,
193
- "eval_runtime": 92.3841,
194
- "eval_samples_per_second": 22.677,
195
- "eval_steps_per_second": 2.836,
196
- "eval_wer": 1.011014777250668,
197
- "step": 1900
198
- },
199
  {
200
  "epoch": 6.78,
201
- "learning_rate": 0.00018768749999999997,
202
- "loss": 0.2671,
203
  "step": 2000
204
  },
205
  {
206
  "epoch": 6.78,
207
- "eval_loss": 0.8387603163719177,
208
- "eval_runtime": 92.0056,
209
- "eval_samples_per_second": 22.77,
210
- "eval_steps_per_second": 2.848,
211
- "eval_wer": 1.0116691204536779,
212
  "step": 2000
213
  },
214
- {
215
- "epoch": 7.12,
216
- "eval_loss": 0.8498700857162476,
217
- "eval_runtime": 91.7074,
218
- "eval_samples_per_second": 22.844,
219
- "eval_steps_per_second": 2.857,
220
- "eval_wer": 1.0072523038333605,
221
- "step": 2100
222
- },
223
- {
224
- "epoch": 7.46,
225
- "eval_loss": 0.8480040431022644,
226
- "eval_runtime": 92.1173,
227
- "eval_samples_per_second": 22.743,
228
- "eval_steps_per_second": 2.844,
229
- "eval_wer": 1.0112328916516713,
230
- "step": 2200
231
- },
232
- {
233
- "epoch": 7.8,
234
- "eval_loss": 0.7929303646087646,
235
- "eval_runtime": 92.2635,
236
- "eval_samples_per_second": 22.707,
237
- "eval_steps_per_second": 2.84,
238
- "eval_wer": 1.0098696766454005,
239
- "step": 2300
240
- },
241
  {
242
  "epoch": 8.14,
243
- "eval_loss": 0.8658961057662964,
244
- "eval_runtime": 93.603,
245
- "eval_samples_per_second": 22.382,
246
- "eval_steps_per_second": 2.799,
247
- "eval_wer": 1.0088881618408856,
248
  "step": 2400
249
  },
250
  {
251
  "epoch": 8.47,
252
- "learning_rate": 0.0001564375,
253
- "loss": 0.2017,
254
- "step": 2500
255
- },
256
- {
257
- "epoch": 8.47,
258
- "eval_loss": 0.8583062291145325,
259
- "eval_runtime": 96.9455,
260
- "eval_samples_per_second": 21.61,
261
- "eval_steps_per_second": 2.703,
262
- "eval_wer": 1.0062707890288456,
263
  "step": 2500
264
  },
265
- {
266
- "epoch": 8.81,
267
- "eval_loss": 0.8326291441917419,
268
- "eval_runtime": 93.0622,
269
- "eval_samples_per_second": 22.512,
270
- "eval_steps_per_second": 2.815,
271
- "eval_wer": 1.0109602486504172,
272
- "step": 2600
273
- },
274
- {
275
- "epoch": 9.15,
276
- "eval_loss": 0.8759247064590454,
277
- "eval_runtime": 92.8699,
278
- "eval_samples_per_second": 22.558,
279
- "eval_steps_per_second": 2.821,
280
- "eval_wer": 1.0036534162168058,
281
- "step": 2700
282
- },
283
  {
284
  "epoch": 9.49,
285
- "eval_loss": 0.85763019323349,
286
- "eval_runtime": 91.9599,
287
- "eval_samples_per_second": 22.782,
288
- "eval_steps_per_second": 2.849,
289
- "eval_wer": 1.009978733845902,
290
  "step": 2800
291
  },
292
- {
293
- "epoch": 9.83,
294
- "eval_loss": 0.8777465224266052,
295
- "eval_runtime": 92.008,
296
- "eval_samples_per_second": 22.77,
297
- "eval_steps_per_second": 2.848,
298
- "eval_wer": 1.0224112547030917,
299
- "step": 2900
300
- },
301
- {
302
- "epoch": 10.17,
303
- "learning_rate": 0.0001251875,
304
- "loss": 0.1682,
305
- "step": 3000
306
- },
307
  {
308
  "epoch": 10.17,
309
- "eval_loss": 0.886458694934845,
310
- "eval_runtime": 92.2385,
311
- "eval_samples_per_second": 22.713,
312
- "eval_steps_per_second": 2.84,
313
- "eval_wer": 1.0280277005289273,
314
  "step": 3000
315
  },
316
- {
317
- "epoch": 10.51,
318
- "eval_loss": 0.9213446378707886,
319
- "eval_runtime": 93.1739,
320
- "eval_samples_per_second": 22.485,
321
- "eval_steps_per_second": 2.812,
322
- "eval_wer": 1.006761546431103,
323
- "step": 3100
324
- },
325
  {
326
  "epoch": 10.85,
327
- "eval_loss": 0.8880829215049744,
328
- "eval_runtime": 93.2784,
329
- "eval_samples_per_second": 22.46,
330
- "eval_steps_per_second": 2.809,
331
- "eval_wer": 1.0151589508697312,
332
  "step": 3200
333
  },
334
  {
335
- "epoch": 11.19,
336
- "eval_loss": 0.9088767170906067,
337
- "eval_runtime": 93.7626,
338
- "eval_samples_per_second": 22.344,
339
- "eval_steps_per_second": 2.794,
340
- "eval_wer": 1.010033262446153,
341
- "step": 3300
342
  },
343
  {
344
- "epoch": 11.53,
345
- "eval_loss": 0.8973812460899353,
346
- "eval_runtime": 92.2784,
347
- "eval_samples_per_second": 22.703,
348
- "eval_steps_per_second": 2.839,
349
- "eval_wer": 1.0127051638584437,
350
- "step": 3400
351
  },
352
  {
353
- "epoch": 11.86,
354
- "learning_rate": 9.393749999999999e-05,
355
- "loss": 0.1347,
356
- "step": 3500
357
  },
358
  {
359
- "epoch": 11.86,
360
- "eval_loss": 0.9128761887550354,
361
- "eval_runtime": 92.834,
362
- "eval_samples_per_second": 22.567,
363
- "eval_steps_per_second": 2.822,
364
- "eval_wer": 1.012323463656688,
365
- "step": 3500
366
  },
367
  {
368
- "epoch": 12.2,
369
- "eval_loss": 0.9939002394676208,
370
- "eval_runtime": 91.782,
371
- "eval_samples_per_second": 22.826,
372
- "eval_steps_per_second": 2.855,
373
- "eval_wer": 1.0169038660777578,
374
- "step": 3600
375
  },
376
  {
377
- "epoch": 12.54,
378
- "eval_loss": 0.913511335849762,
379
- "eval_runtime": 93.2623,
380
- "eval_samples_per_second": 22.464,
381
- "eval_steps_per_second": 2.809,
382
- "eval_wer": 1.0083428758383772,
383
- "step": 3700
384
  },
385
  {
386
- "epoch": 12.88,
387
- "eval_loss": 0.9228624105453491,
388
- "eval_runtime": 92.6142,
389
- "eval_samples_per_second": 22.621,
390
- "eval_steps_per_second": 2.829,
391
- "eval_wer": 1.0118327062544306,
392
- "step": 3800
393
  },
394
  {
395
- "epoch": 13.22,
396
- "eval_loss": 0.9609713554382324,
397
- "eval_runtime": 91.8115,
398
- "eval_samples_per_second": 22.818,
399
- "eval_steps_per_second": 2.854,
400
- "eval_wer": 1.010687605649163,
401
- "step": 3900
402
  },
403
  {
404
- "epoch": 13.56,
405
- "learning_rate": 6.26875e-05,
406
- "loss": 0.1049,
407
- "step": 4000
 
 
 
408
  },
409
  {
410
- "epoch": 13.56,
411
- "eval_loss": 0.9235594868659973,
412
- "eval_runtime": 92.7509,
413
- "eval_samples_per_second": 22.587,
414
- "eval_steps_per_second": 2.825,
415
- "eval_wer": 1.0098696766454005,
416
- "step": 4000
417
  },
418
  {
419
- "epoch": 13.9,
420
- "eval_loss": 0.8966746926307678,
421
- "eval_runtime": 94.847,
422
- "eval_samples_per_second": 22.088,
423
- "eval_steps_per_second": 2.762,
424
- "eval_wer": 1.0084519330388788,
425
- "step": 4100
426
  },
427
  {
428
- "epoch": 14.24,
429
- "eval_loss": 0.8979936242103577,
430
- "eval_runtime": 92.5697,
431
- "eval_samples_per_second": 22.632,
432
- "eval_steps_per_second": 2.83,
433
- "eval_wer": 1.0081247614373738,
434
- "step": 4200
435
  },
436
  {
437
- "epoch": 14.58,
438
- "eval_loss": 0.9023324251174927,
439
- "eval_runtime": 91.8786,
440
- "eval_samples_per_second": 22.802,
441
- "eval_steps_per_second": 2.852,
442
- "eval_wer": 1.0081247614373738,
443
- "step": 4300
444
  },
445
  {
446
- "epoch": 14.92,
447
- "eval_loss": 0.9215817451477051,
448
- "eval_runtime": 92.0777,
449
- "eval_samples_per_second": 22.753,
450
- "eval_steps_per_second": 2.845,
451
- "eval_wer": 1.0078521184361198,
452
- "step": 4400
453
  },
454
  {
455
- "epoch": 15.25,
456
- "learning_rate": 3.14375e-05,
457
- "loss": 0.0917,
458
- "step": 4500
459
  },
460
  {
461
- "epoch": 15.25,
462
- "eval_loss": 0.9442654252052307,
463
- "eval_runtime": 92.8739,
464
- "eval_samples_per_second": 22.557,
465
- "eval_steps_per_second": 2.821,
466
- "eval_wer": 1.0089972190413872,
467
- "step": 4500
468
  },
469
  {
470
- "epoch": 15.59,
471
- "eval_loss": 0.9389934539794922,
472
- "eval_runtime": 91.9159,
473
- "eval_samples_per_second": 22.793,
474
- "eval_steps_per_second": 2.85,
475
- "eval_wer": 1.009051747641638,
476
- "step": 4600
477
  },
478
  {
479
- "epoch": 15.93,
480
- "eval_loss": 0.9153040647506714,
481
- "eval_runtime": 92.1522,
482
- "eval_samples_per_second": 22.734,
483
- "eval_steps_per_second": 2.843,
484
- "eval_wer": 1.0082883472381263,
485
- "step": 4700
486
  },
487
  {
488
- "epoch": 16.27,
489
- "eval_loss": 0.9189886450767517,
490
- "eval_runtime": 92.2125,
491
- "eval_samples_per_second": 22.719,
492
- "eval_steps_per_second": 2.841,
493
- "eval_wer": 1.0091608048421397,
494
- "step": 4800
495
  },
496
  {
497
- "epoch": 16.61,
498
- "eval_loss": 0.9194196462631226,
499
- "eval_runtime": 92.6007,
500
- "eval_samples_per_second": 22.624,
501
- "eval_steps_per_second": 2.829,
502
- "eval_wer": 1.009051747641638,
503
- "step": 4900
504
  },
505
  {
506
- "epoch": 16.95,
507
- "learning_rate": 1.8749999999999998e-07,
508
- "loss": 0.0786,
509
- "step": 5000
510
  },
511
  {
512
- "epoch": 16.95,
513
- "eval_loss": 0.9201112985610962,
514
- "eval_runtime": 91.9659,
515
- "eval_samples_per_second": 22.78,
516
- "eval_steps_per_second": 2.849,
517
- "eval_wer": 1.009106276241889,
518
- "step": 5000
519
  },
520
  {
521
- "epoch": 16.95,
522
- "step": 5000,
523
- "total_flos": 1.0048485919914449e+19,
524
- "train_loss": 0.584431241607666,
525
- "train_runtime": 8680.4794,
526
- "train_samples_per_second": 9.216,
527
- "train_steps_per_second": 0.576
528
  }
529
  ],
530
- "max_steps": 5000,
531
- "num_train_epochs": 17,
532
- "total_flos": 1.0048485919914449e+19,
533
  "trial_name": null,
534
  "trial_params": null
535
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 27.11864406779661,
5
+ "global_step": 8000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.36,
12
+ "eval_loss": 1.4595407247543335,
13
+ "eval_runtime": 81.6295,
14
+ "eval_samples_per_second": 25.665,
15
+ "eval_steps_per_second": 3.21,
16
+ "eval_wer": 1.0039260592180599,
17
  "step": 400
18
  },
19
  {
20
  "epoch": 1.69,
21
+ "learning_rate": 0.0002982,
22
+ "loss": 4.7778,
23
  "step": 500
24
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  {
26
  "epoch": 2.71,
27
+ "eval_loss": 0.8082281947135925,
28
+ "eval_runtime": 80.8384,
29
+ "eval_samples_per_second": 25.916,
30
+ "eval_steps_per_second": 3.241,
31
+ "eval_wer": 1.0115055346529254,
32
  "step": 800
33
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  {
35
  "epoch": 3.39,
36
+ "learning_rate": 0.00028011999999999997,
37
+ "loss": 0.6408,
 
 
 
38
  "step": 1000
39
  },
 
 
 
 
 
 
 
 
 
40
  {
41
  "epoch": 4.07,
42
+ "eval_loss": 0.7031667828559875,
43
+ "eval_runtime": 84.117,
44
+ "eval_samples_per_second": 24.906,
45
+ "eval_steps_per_second": 3.115,
46
+ "eval_wer": 1.0078521184361198,
47
  "step": 1200
48
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  {
50
  "epoch": 5.08,
51
+ "learning_rate": 0.00026011999999999997,
52
+ "loss": 0.3937,
 
 
 
 
 
 
 
 
 
53
  "step": 1500
54
  },
55
  {
56
  "epoch": 5.42,
57
+ "eval_loss": 0.6889204382896423,
58
+ "eval_runtime": 80.1903,
59
+ "eval_samples_per_second": 26.125,
60
+ "eval_steps_per_second": 3.267,
61
+ "eval_wer": 1.0432957085991603,
62
  "step": 1600
63
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  {
65
  "epoch": 6.78,
66
+ "learning_rate": 0.00024011999999999997,
67
+ "loss": 0.3,
68
  "step": 2000
69
  },
70
  {
71
  "epoch": 6.78,
72
+ "eval_loss": 0.6820310354232788,
73
+ "eval_runtime": 80.2232,
74
+ "eval_samples_per_second": 26.115,
75
+ "eval_steps_per_second": 3.266,
76
+ "eval_wer": 1.0068706036316049,
77
  "step": 2000
78
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  {
80
  "epoch": 8.14,
81
+ "eval_loss": 0.6669920086860657,
82
+ "eval_runtime": 79.89,
83
+ "eval_samples_per_second": 26.224,
84
+ "eval_steps_per_second": 3.28,
85
+ "eval_wer": 1.0196302960902994,
86
  "step": 2400
87
  },
88
  {
89
  "epoch": 8.47,
90
+ "learning_rate": 0.00022011999999999997,
91
+ "loss": 0.226,
 
 
 
 
 
 
 
 
 
92
  "step": 2500
93
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  {
95
  "epoch": 9.49,
96
+ "eval_loss": 0.7215595841407776,
97
+ "eval_runtime": 80.0002,
98
+ "eval_samples_per_second": 26.187,
99
+ "eval_steps_per_second": 3.275,
100
+ "eval_wer": 1.0422051365941436,
101
  "step": 2800
102
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  {
104
  "epoch": 10.17,
105
+ "learning_rate": 0.00020012,
106
+ "loss": 0.197,
 
 
 
107
  "step": 3000
108
  },
 
 
 
 
 
 
 
 
 
109
  {
110
  "epoch": 10.85,
111
+ "eval_loss": 0.7669464945793152,
112
+ "eval_runtime": 80.1105,
113
+ "eval_samples_per_second": 26.151,
114
+ "eval_steps_per_second": 3.27,
115
+ "eval_wer": 1.053383499645564,
116
  "step": 3200
117
  },
118
  {
119
+ "epoch": 11.86,
120
+ "learning_rate": 0.00018012,
121
+ "loss": 0.165,
122
+ "step": 3500
 
 
 
123
  },
124
  {
125
+ "epoch": 12.2,
126
+ "eval_loss": 0.7517344951629639,
127
+ "eval_runtime": 79.716,
128
+ "eval_samples_per_second": 26.281,
129
+ "eval_steps_per_second": 3.287,
130
+ "eval_wer": 1.0199574676918044,
131
+ "step": 3600
132
  },
133
  {
134
+ "epoch": 13.56,
135
+ "learning_rate": 0.00016011999999999998,
136
+ "loss": 0.1486,
137
+ "step": 4000
138
  },
139
  {
140
+ "epoch": 13.56,
141
+ "eval_loss": 0.7124771475791931,
142
+ "eval_runtime": 79.8981,
143
+ "eval_samples_per_second": 26.221,
144
+ "eval_steps_per_second": 3.279,
145
+ "eval_wer": 1.0357162331642946,
146
+ "step": 4000
147
  },
148
  {
149
+ "epoch": 14.92,
150
+ "eval_loss": 0.7447456121444702,
151
+ "eval_runtime": 82.2103,
152
+ "eval_samples_per_second": 25.483,
153
+ "eval_steps_per_second": 3.187,
154
+ "eval_wer": 1.0347347183597797,
155
+ "step": 4400
156
  },
157
  {
158
+ "epoch": 15.25,
159
+ "learning_rate": 0.00014012,
160
+ "loss": 0.122,
161
+ "step": 4500
 
 
 
162
  },
163
  {
164
+ "epoch": 16.27,
165
+ "eval_loss": 0.6899322271347046,
166
+ "eval_runtime": 79.6624,
167
+ "eval_samples_per_second": 26.298,
168
+ "eval_steps_per_second": 3.289,
169
+ "eval_wer": 1.0440045804024212,
170
+ "step": 4800
171
  },
172
  {
173
+ "epoch": 16.95,
174
+ "learning_rate": 0.00012011999999999998,
175
+ "loss": 0.1069,
176
+ "step": 5000
 
 
 
177
  },
178
  {
179
+ "epoch": 17.63,
180
+ "eval_loss": 0.7212241291999817,
181
+ "eval_runtime": 80.3141,
182
+ "eval_samples_per_second": 26.085,
183
+ "eval_steps_per_second": 3.262,
184
+ "eval_wer": 1.0350073613610338,
185
+ "step": 5200
186
  },
187
  {
188
+ "epoch": 18.64,
189
+ "learning_rate": 0.00010011999999999998,
190
+ "loss": 0.0961,
191
+ "step": 5500
 
 
 
192
  },
193
  {
194
+ "epoch": 18.98,
195
+ "eval_loss": 0.7417359352111816,
196
+ "eval_runtime": 80.0211,
197
+ "eval_samples_per_second": 26.181,
198
+ "eval_steps_per_second": 3.274,
199
+ "eval_wer": 1.0408419215878728,
200
+ "step": 5600
201
  },
202
  {
203
+ "epoch": 20.34,
204
+ "learning_rate": 8.012e-05,
205
+ "loss": 0.086,
206
+ "step": 6000
 
 
 
207
  },
208
  {
209
+ "epoch": 20.34,
210
+ "eval_loss": 0.7402010560035706,
211
+ "eval_runtime": 80.0522,
212
+ "eval_samples_per_second": 26.17,
213
+ "eval_steps_per_second": 3.273,
214
+ "eval_wer": 1.0355526473635421,
215
+ "step": 6000
216
  },
217
  {
218
+ "epoch": 21.69,
219
+ "eval_loss": 0.7760854959487915,
220
+ "eval_runtime": 80.138,
221
+ "eval_samples_per_second": 26.142,
222
+ "eval_steps_per_second": 3.269,
223
+ "eval_wer": 1.0419870221931402,
224
+ "step": 6400
225
  },
226
  {
227
+ "epoch": 22.03,
228
+ "learning_rate": 6.0119999999999994e-05,
229
+ "loss": 0.0756,
230
+ "step": 6500
231
  },
232
  {
233
+ "epoch": 23.05,
234
+ "eval_loss": 0.7345677614212036,
235
+ "eval_runtime": 80.4841,
236
+ "eval_samples_per_second": 26.03,
237
+ "eval_steps_per_second": 3.255,
238
+ "eval_wer": 1.036915862369813,
239
+ "step": 6800
240
  },
241
  {
242
+ "epoch": 23.73,
243
+ "learning_rate": 4.012e-05,
244
+ "loss": 0.0666,
245
+ "step": 7000
 
 
 
246
  },
247
  {
248
+ "epoch": 24.41,
249
+ "eval_loss": 0.7506045699119568,
250
+ "eval_runtime": 82.6434,
251
+ "eval_samples_per_second": 25.35,
252
+ "eval_steps_per_second": 3.17,
253
+ "eval_wer": 1.0449315666066852,
254
+ "step": 7200
255
  },
256
  {
257
+ "epoch": 25.42,
258
+ "learning_rate": 2.0119999999999997e-05,
259
+ "loss": 0.0595,
260
+ "step": 7500
 
 
 
261
  },
262
  {
263
+ "epoch": 25.76,
264
+ "eval_loss": 0.7319227457046509,
265
+ "eval_runtime": 79.8082,
266
+ "eval_samples_per_second": 26.25,
267
+ "eval_steps_per_second": 3.283,
268
+ "eval_wer": 1.0476034680189759,
269
+ "step": 7600
270
  },
271
  {
272
+ "epoch": 27.12,
273
+ "learning_rate": 1.6e-07,
274
+ "loss": 0.054,
275
+ "step": 8000
276
  },
277
  {
278
+ "epoch": 27.12,
279
+ "eval_loss": 0.7346429228782654,
280
+ "eval_runtime": 79.9162,
281
+ "eval_samples_per_second": 26.215,
282
+ "eval_steps_per_second": 3.278,
283
+ "eval_wer": 1.0478761110202301,
284
+ "step": 8000
285
  },
286
  {
287
+ "epoch": 27.12,
288
+ "step": 8000,
289
+ "total_flos": 1.592873144248711e+19,
290
+ "train_loss": 0.46973063707351687,
291
+ "train_runtime": 10600.4383,
292
+ "train_samples_per_second": 12.075,
293
+ "train_steps_per_second": 0.755
294
  }
295
  ],
296
+ "max_steps": 8000,
297
+ "num_train_epochs": 28,
298
+ "total_flos": 1.592873144248711e+19,
299
  "trial_name": null,
300
  "trial_params": null
301
  }