ivanlau commited on
Commit
45d3667
1 Parent(s): 65273a5

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +1020 -12
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 2.67260479927063,
4
- "eval_runtime": 163.5078,
5
  "eval_samples": 3659,
6
- "eval_samples_per_second": 22.378,
7
- "eval_steps_per_second": 1.401,
8
- "eval_wer": 0.9814612868047983,
9
- "train_loss": 14.438921352032104,
10
- "train_runtime": 7245.592,
11
  "train_samples": 11686,
12
- "train_samples_per_second": 16.128,
13
- "train_steps_per_second": 0.253
14
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 1.484755277633667,
4
+ "eval_runtime": 166.9526,
5
  "eval_samples": 3659,
6
+ "eval_samples_per_second": 21.916,
7
+ "eval_steps_per_second": 1.372,
8
+ "eval_wer": 0.8004362050163577,
9
+ "train_loss": 0.34965579027686616,
10
+ "train_runtime": 66008.5183,
11
  "train_samples": 11686,
12
+ "train_samples_per_second": 17.704,
13
+ "train_steps_per_second": 0.277
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 2.67260479927063,
4
- "eval_runtime": 163.5078,
5
  "eval_samples": 3659,
6
- "eval_samples_per_second": 22.378,
7
- "eval_steps_per_second": 1.401,
8
- "eval_wer": 0.9814612868047983
9
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 1.484755277633667,
4
+ "eval_runtime": 166.9526,
5
  "eval_samples": 3659,
6
+ "eval_samples_per_second": 21.916,
7
+ "eval_steps_per_second": 1.372,
8
+ "eval_wer": 0.8004362050163577
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 14.438921352032104,
4
- "train_runtime": 7245.592,
5
  "train_samples": 11686,
6
- "train_samples_per_second": 16.128,
7
- "train_steps_per_second": 0.253
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "train_loss": 0.34965579027686616,
4
+ "train_runtime": 66008.5183,
5
  "train_samples": 11686,
6
+ "train_samples_per_second": 17.704,
7
+ "train_steps_per_second": 0.277
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "global_step": 1830,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -116,18 +116,1026 @@
116
  "step": 1830
117
  },
118
  {
119
- "epoch": 10.0,
120
- "step": 1830,
121
- "total_flos": 1.3577354977100892e+19,
122
- "train_loss": 14.438921352032104,
123
- "train_runtime": 7245.592,
124
- "train_samples_per_second": 16.128,
125
- "train_steps_per_second": 0.253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  }
127
  ],
128
- "max_steps": 1830,
129
- "num_train_epochs": 10,
130
- "total_flos": 1.3577354977100892e+19,
131
  "trial_name": null,
132
  "trial_params": null
133
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 100.0,
5
+ "global_step": 18300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
116
  "step": 1830
117
  },
118
  {
119
+ "epoch": 10.93,
120
+ "learning_rate": 0.00027480337078651684,
121
+ "loss": 2.7771,
122
+ "step": 2000
123
+ },
124
+ {
125
+ "epoch": 11.0,
126
+ "eval_loss": 2.7177391052246094,
127
+ "eval_runtime": 191.8821,
128
+ "eval_samples_per_second": 19.069,
129
+ "eval_steps_per_second": 1.193,
130
+ "eval_wer": 0.9809160305343512,
131
+ "step": 2013
132
+ },
133
+ {
134
+ "epoch": 12.0,
135
+ "eval_loss": 2.3581743240356445,
136
+ "eval_runtime": 168.9095,
137
+ "eval_samples_per_second": 21.662,
138
+ "eval_steps_per_second": 1.356,
139
+ "eval_wer": 0.9691930207197382,
140
+ "step": 2196
141
+ },
142
+ {
143
+ "epoch": 13.0,
144
+ "eval_loss": 2.1708498001098633,
145
+ "eval_runtime": 167.2979,
146
+ "eval_samples_per_second": 21.871,
147
+ "eval_steps_per_second": 1.369,
148
+ "eval_wer": 0.9757360959651036,
149
+ "step": 2379
150
+ },
151
+ {
152
+ "epoch": 13.66,
153
+ "learning_rate": 0.000266376404494382,
154
+ "loss": 2.3488,
155
+ "step": 2500
156
+ },
157
+ {
158
+ "epoch": 14.0,
159
+ "eval_loss": 2.049051284790039,
160
+ "eval_runtime": 169.536,
161
+ "eval_samples_per_second": 21.582,
162
+ "eval_steps_per_second": 1.351,
163
+ "eval_wer": 0.9525627044711014,
164
+ "step": 2562
165
+ },
166
+ {
167
+ "epoch": 15.0,
168
+ "eval_loss": 1.8517801761627197,
169
+ "eval_runtime": 166.9821,
170
+ "eval_samples_per_second": 21.913,
171
+ "eval_steps_per_second": 1.371,
172
+ "eval_wer": 0.9378407851690295,
173
+ "step": 2745
174
+ },
175
+ {
176
+ "epoch": 16.0,
177
+ "eval_loss": 1.6844531297683716,
178
+ "eval_runtime": 166.3633,
179
+ "eval_samples_per_second": 21.994,
180
+ "eval_steps_per_second": 1.377,
181
+ "eval_wer": 0.9285714285714286,
182
+ "step": 2928
183
+ },
184
+ {
185
+ "epoch": 16.39,
186
+ "learning_rate": 0.00025794943820224716,
187
+ "loss": 1.7859,
188
+ "step": 3000
189
+ },
190
+ {
191
+ "epoch": 17.0,
192
+ "eval_loss": 1.641157627105713,
193
+ "eval_runtime": 165.2481,
194
+ "eval_samples_per_second": 22.142,
195
+ "eval_steps_per_second": 1.386,
196
+ "eval_wer": 0.9280261723009815,
197
+ "step": 3111
198
+ },
199
+ {
200
+ "epoch": 18.0,
201
+ "eval_loss": 1.5487942695617676,
202
+ "eval_runtime": 166.3028,
203
+ "eval_samples_per_second": 22.002,
204
+ "eval_steps_per_second": 1.377,
205
+ "eval_wer": 0.9034896401308615,
206
+ "step": 3294
207
+ },
208
+ {
209
+ "epoch": 19.0,
210
+ "eval_loss": 1.4545683860778809,
211
+ "eval_runtime": 169.7236,
212
+ "eval_samples_per_second": 21.559,
213
+ "eval_steps_per_second": 1.349,
214
+ "eval_wer": 0.9010359869138496,
215
+ "step": 3477
216
+ },
217
+ {
218
+ "epoch": 19.13,
219
+ "learning_rate": 0.00024952247191011235,
220
+ "loss": 1.3898,
221
+ "step": 3500
222
+ },
223
+ {
224
+ "epoch": 20.0,
225
+ "eval_loss": 1.5146596431732178,
226
+ "eval_runtime": 170.5824,
227
+ "eval_samples_per_second": 21.45,
228
+ "eval_steps_per_second": 1.342,
229
+ "eval_wer": 0.9201199563794984,
230
+ "step": 3660
231
+ },
232
+ {
233
+ "epoch": 21.0,
234
+ "eval_loss": 1.446706771850586,
235
+ "eval_runtime": 164.7146,
236
+ "eval_samples_per_second": 22.214,
237
+ "eval_steps_per_second": 1.39,
238
+ "eval_wer": 0.895856052344602,
239
+ "step": 3843
240
+ },
241
+ {
242
+ "epoch": 21.86,
243
+ "learning_rate": 0.00024111235955056177,
244
+ "loss": 1.1291,
245
+ "step": 4000
246
+ },
247
+ {
248
+ "epoch": 22.0,
249
+ "eval_loss": 1.474255084991455,
250
+ "eval_runtime": 166.7828,
251
+ "eval_samples_per_second": 21.939,
252
+ "eval_steps_per_second": 1.373,
253
+ "eval_wer": 0.9034896401308615,
254
+ "step": 4026
255
+ },
256
+ {
257
+ "epoch": 23.0,
258
+ "eval_loss": 1.3826948404312134,
259
+ "eval_runtime": 165.2382,
260
+ "eval_samples_per_second": 22.144,
261
+ "eval_steps_per_second": 1.386,
262
+ "eval_wer": 0.876226826608506,
263
+ "step": 4209
264
+ },
265
+ {
266
+ "epoch": 24.0,
267
+ "eval_loss": 1.3436840772628784,
268
+ "eval_runtime": 167.9623,
269
+ "eval_samples_per_second": 21.785,
270
+ "eval_steps_per_second": 1.363,
271
+ "eval_wer": 0.8792257360959651,
272
+ "step": 4392
273
+ },
274
+ {
275
+ "epoch": 24.59,
276
+ "learning_rate": 0.00023268539325842696,
277
+ "loss": 0.8993,
278
+ "step": 4500
279
+ },
280
+ {
281
+ "epoch": 25.0,
282
+ "eval_loss": 1.2894562482833862,
283
+ "eval_runtime": 179.0778,
284
+ "eval_samples_per_second": 20.432,
285
+ "eval_steps_per_second": 1.279,
286
+ "eval_wer": 0.8576881134133042,
287
+ "step": 4575
288
+ },
289
+ {
290
+ "epoch": 26.0,
291
+ "eval_loss": 1.2927522659301758,
292
+ "eval_runtime": 173.2382,
293
+ "eval_samples_per_second": 21.121,
294
+ "eval_steps_per_second": 1.322,
295
+ "eval_wer": 0.8557797164667393,
296
+ "step": 4758
297
+ },
298
+ {
299
+ "epoch": 27.0,
300
+ "eval_loss": 1.294653058052063,
301
+ "eval_runtime": 166.3679,
302
+ "eval_samples_per_second": 21.993,
303
+ "eval_steps_per_second": 1.376,
304
+ "eval_wer": 0.9163031624863686,
305
+ "step": 4941
306
+ },
307
+ {
308
+ "epoch": 27.32,
309
+ "learning_rate": 0.0002242584269662921,
310
+ "loss": 0.6298,
311
+ "step": 5000
312
+ },
313
+ {
314
+ "epoch": 28.0,
315
+ "eval_loss": 1.3150520324707031,
316
+ "eval_runtime": 164.8819,
317
+ "eval_samples_per_second": 22.192,
318
+ "eval_steps_per_second": 1.389,
319
+ "eval_wer": 0.873773173391494,
320
+ "step": 5124
321
+ },
322
+ {
323
+ "epoch": 29.0,
324
+ "eval_loss": 1.2971961498260498,
325
+ "eval_runtime": 163.7658,
326
+ "eval_samples_per_second": 22.343,
327
+ "eval_steps_per_second": 1.398,
328
+ "eval_wer": 0.8514176663031625,
329
+ "step": 5307
330
+ },
331
+ {
332
+ "epoch": 30.0,
333
+ "eval_loss": 1.302983283996582,
334
+ "eval_runtime": 162.8739,
335
+ "eval_samples_per_second": 22.465,
336
+ "eval_steps_per_second": 1.406,
337
+ "eval_wer": 0.8432388222464559,
338
+ "step": 5490
339
+ },
340
+ {
341
+ "epoch": 30.05,
342
+ "learning_rate": 0.00021583146067415728,
343
+ "loss": 0.4757,
344
+ "step": 5500
345
+ },
346
+ {
347
+ "epoch": 31.0,
348
+ "eval_loss": 1.3263603448867798,
349
+ "eval_runtime": 163.1422,
350
+ "eval_samples_per_second": 22.428,
351
+ "eval_steps_per_second": 1.404,
352
+ "eval_wer": 0.836423118865867,
353
+ "step": 5673
354
+ },
355
+ {
356
+ "epoch": 32.0,
357
+ "eval_loss": 1.3130974769592285,
358
+ "eval_runtime": 162.7813,
359
+ "eval_samples_per_second": 22.478,
360
+ "eval_steps_per_second": 1.407,
361
+ "eval_wer": 0.8421483097055616,
362
+ "step": 5856
363
+ },
364
+ {
365
+ "epoch": 32.79,
366
+ "learning_rate": 0.00020740449438202247,
367
+ "loss": 0.3735,
368
+ "step": 6000
369
+ },
370
+ {
371
+ "epoch": 33.0,
372
+ "eval_loss": 1.3457393646240234,
373
+ "eval_runtime": 164.771,
374
+ "eval_samples_per_second": 22.207,
375
+ "eval_steps_per_second": 1.39,
376
+ "eval_wer": 0.8587786259541985,
377
+ "step": 6039
378
+ },
379
+ {
380
+ "epoch": 34.0,
381
+ "eval_loss": 1.3450396060943604,
382
+ "eval_runtime": 163.6185,
383
+ "eval_samples_per_second": 22.363,
384
+ "eval_steps_per_second": 1.4,
385
+ "eval_wer": 0.8473282442748091,
386
+ "step": 6222
387
+ },
388
+ {
389
+ "epoch": 35.0,
390
+ "eval_loss": 1.3451658487319946,
391
+ "eval_runtime": 167.7067,
392
+ "eval_samples_per_second": 21.818,
393
+ "eval_steps_per_second": 1.365,
394
+ "eval_wer": 0.9217557251908397,
395
+ "step": 6405
396
+ },
397
+ {
398
+ "epoch": 35.52,
399
+ "learning_rate": 0.0001989775280898876,
400
+ "loss": 0.3253,
401
+ "step": 6500
402
+ },
403
+ {
404
+ "epoch": 36.0,
405
+ "eval_loss": 1.375409722328186,
406
+ "eval_runtime": 164.3009,
407
+ "eval_samples_per_second": 22.27,
408
+ "eval_steps_per_second": 1.394,
409
+ "eval_wer": 0.8396946564885496,
410
+ "step": 6588
411
+ },
412
+ {
413
+ "epoch": 37.0,
414
+ "eval_loss": 1.3554260730743408,
415
+ "eval_runtime": 163.3464,
416
+ "eval_samples_per_second": 22.4,
417
+ "eval_steps_per_second": 1.402,
418
+ "eval_wer": 0.8353326063249727,
419
+ "step": 6771
420
+ },
421
+ {
422
+ "epoch": 38.0,
423
+ "eval_loss": 1.353210687637329,
424
+ "eval_runtime": 166.4062,
425
+ "eval_samples_per_second": 21.988,
426
+ "eval_steps_per_second": 1.376,
427
+ "eval_wer": 0.8312431842966194,
428
+ "step": 6954
429
+ },
430
+ {
431
+ "epoch": 38.25,
432
+ "learning_rate": 0.0001905505617977528,
433
+ "loss": 0.2816,
434
+ "step": 7000
435
+ },
436
+ {
437
+ "epoch": 39.0,
438
+ "eval_loss": 1.369396686553955,
439
+ "eval_runtime": 165.9264,
440
+ "eval_samples_per_second": 22.052,
441
+ "eval_steps_per_second": 1.38,
442
+ "eval_wer": 0.8345147219193021,
443
+ "step": 7137
444
+ },
445
+ {
446
+ "epoch": 40.0,
447
+ "eval_loss": 1.395269751548767,
448
+ "eval_runtime": 163.891,
449
+ "eval_samples_per_second": 22.326,
450
+ "eval_steps_per_second": 1.397,
451
+ "eval_wer": 0.829607415485278,
452
+ "step": 7320
453
+ },
454
+ {
455
+ "epoch": 40.98,
456
+ "learning_rate": 0.00018212359550561795,
457
+ "loss": 0.2397,
458
+ "step": 7500
459
+ },
460
+ {
461
+ "epoch": 41.0,
462
+ "eval_loss": 1.385826826095581,
463
+ "eval_runtime": 165.4232,
464
+ "eval_samples_per_second": 22.119,
465
+ "eval_steps_per_second": 1.384,
466
+ "eval_wer": 0.8293347873500545,
467
+ "step": 7503
468
+ },
469
+ {
470
+ "epoch": 42.0,
471
+ "eval_loss": 1.3958967924118042,
472
+ "eval_runtime": 163.1457,
473
+ "eval_samples_per_second": 22.428,
474
+ "eval_steps_per_second": 1.404,
475
+ "eval_wer": 0.8402399127589967,
476
+ "step": 7686
477
+ },
478
+ {
479
+ "epoch": 43.0,
480
+ "eval_loss": 1.435033917427063,
481
+ "eval_runtime": 163.4766,
482
+ "eval_samples_per_second": 22.382,
483
+ "eval_steps_per_second": 1.401,
484
+ "eval_wer": 0.9318429661941112,
485
+ "step": 7869
486
+ },
487
+ {
488
+ "epoch": 43.72,
489
+ "learning_rate": 0.0001737134831460674,
490
+ "loss": 0.2084,
491
+ "step": 8000
492
+ },
493
+ {
494
+ "epoch": 44.0,
495
+ "eval_loss": 1.4003560543060303,
496
+ "eval_runtime": 164.4371,
497
+ "eval_samples_per_second": 22.252,
498
+ "eval_steps_per_second": 1.393,
499
+ "eval_wer": 0.8805888767720829,
500
+ "step": 8052
501
+ },
502
+ {
503
+ "epoch": 45.0,
504
+ "eval_loss": 1.387134075164795,
505
+ "eval_runtime": 164.8129,
506
+ "eval_samples_per_second": 22.201,
507
+ "eval_steps_per_second": 1.389,
508
+ "eval_wer": 0.8255179934569248,
509
+ "step": 8235
510
+ },
511
+ {
512
+ "epoch": 46.0,
513
+ "eval_loss": 1.4059827327728271,
514
+ "eval_runtime": 166.2017,
515
+ "eval_samples_per_second": 22.015,
516
+ "eval_steps_per_second": 1.378,
517
+ "eval_wer": 0.8252453653217012,
518
+ "step": 8418
519
+ },
520
+ {
521
+ "epoch": 46.45,
522
+ "learning_rate": 0.00016528651685393257,
523
+ "loss": 0.1853,
524
+ "step": 8500
525
+ },
526
+ {
527
+ "epoch": 47.0,
528
+ "eval_loss": 1.3992133140563965,
529
+ "eval_runtime": 164.2693,
530
+ "eval_samples_per_second": 22.274,
531
+ "eval_steps_per_second": 1.394,
532
+ "eval_wer": 0.8500545256270448,
533
+ "step": 8601
534
+ },
535
+ {
536
+ "epoch": 48.0,
537
+ "eval_loss": 1.4186208248138428,
538
+ "eval_runtime": 162.8365,
539
+ "eval_samples_per_second": 22.47,
540
+ "eval_steps_per_second": 1.406,
541
+ "eval_wer": 0.8252453653217012,
542
+ "step": 8784
543
+ },
544
+ {
545
+ "epoch": 49.0,
546
+ "eval_loss": 1.4120242595672607,
547
+ "eval_runtime": 164.9525,
548
+ "eval_samples_per_second": 22.182,
549
+ "eval_steps_per_second": 1.388,
550
+ "eval_wer": 0.8165212649945475,
551
+ "step": 8967
552
+ },
553
+ {
554
+ "epoch": 49.18,
555
+ "learning_rate": 0.00015685955056179775,
556
+ "loss": 0.1671,
557
+ "step": 9000
558
+ },
559
+ {
560
+ "epoch": 50.0,
561
+ "eval_loss": 1.4165652990341187,
562
+ "eval_runtime": 173.8761,
563
+ "eval_samples_per_second": 21.044,
564
+ "eval_steps_per_second": 1.317,
565
+ "eval_wer": 0.8214285714285714,
566
+ "step": 9150
567
+ },
568
+ {
569
+ "epoch": 51.0,
570
+ "eval_loss": 1.4411484003067017,
571
+ "eval_runtime": 165.9975,
572
+ "eval_samples_per_second": 22.043,
573
+ "eval_steps_per_second": 1.38,
574
+ "eval_wer": 0.8500545256270448,
575
+ "step": 9333
576
+ },
577
+ {
578
+ "epoch": 51.91,
579
+ "learning_rate": 0.00014843258426966292,
580
+ "loss": 0.1513,
581
+ "step": 9500
582
+ },
583
+ {
584
+ "epoch": 52.0,
585
+ "eval_loss": 1.4692339897155762,
586
+ "eval_runtime": 163.251,
587
+ "eval_samples_per_second": 22.413,
588
+ "eval_steps_per_second": 1.403,
589
+ "eval_wer": 0.839422028353326,
590
+ "step": 9516
591
+ },
592
+ {
593
+ "epoch": 53.0,
594
+ "eval_loss": 1.4640177488327026,
595
+ "eval_runtime": 162.0066,
596
+ "eval_samples_per_second": 22.586,
597
+ "eval_steps_per_second": 1.414,
598
+ "eval_wer": 0.8391494002181025,
599
+ "step": 9699
600
+ },
601
+ {
602
+ "epoch": 54.0,
603
+ "eval_loss": 1.450060486793518,
604
+ "eval_runtime": 165.9207,
605
+ "eval_samples_per_second": 22.053,
606
+ "eval_steps_per_second": 1.38,
607
+ "eval_wer": 0.8418756815703381,
608
+ "step": 9882
609
+ },
610
+ {
611
+ "epoch": 54.64,
612
+ "learning_rate": 0.00014002247191011234,
613
+ "loss": 0.133,
614
+ "step": 10000
615
+ },
616
+ {
617
+ "epoch": 55.0,
618
+ "eval_loss": 1.4133520126342773,
619
+ "eval_runtime": 163.1361,
620
+ "eval_samples_per_second": 22.429,
621
+ "eval_steps_per_second": 1.404,
622
+ "eval_wer": 0.8350599781897492,
623
+ "step": 10065
624
+ },
625
+ {
626
+ "epoch": 56.0,
627
+ "eval_loss": 1.4592992067337036,
628
+ "eval_runtime": 161.8579,
629
+ "eval_samples_per_second": 22.606,
630
+ "eval_steps_per_second": 1.415,
631
+ "eval_wer": 0.8405125408942202,
632
+ "step": 10248
633
+ },
634
+ {
635
+ "epoch": 57.0,
636
+ "eval_loss": 1.45597505569458,
637
+ "eval_runtime": 161.821,
638
+ "eval_samples_per_second": 22.611,
639
+ "eval_steps_per_second": 1.415,
640
+ "eval_wer": 0.8388767720828789,
641
+ "step": 10431
642
+ },
643
+ {
644
+ "epoch": 57.38,
645
+ "learning_rate": 0.00013159550561797753,
646
+ "loss": 0.1198,
647
+ "step": 10500
648
+ },
649
+ {
650
+ "epoch": 58.0,
651
+ "eval_loss": 1.4733554124832153,
652
+ "eval_runtime": 162.1949,
653
+ "eval_samples_per_second": 22.559,
654
+ "eval_steps_per_second": 1.412,
655
+ "eval_wer": 0.8334242093784079,
656
+ "step": 10614
657
+ },
658
+ {
659
+ "epoch": 59.0,
660
+ "eval_loss": 1.4649208784103394,
661
+ "eval_runtime": 162.1709,
662
+ "eval_samples_per_second": 22.563,
663
+ "eval_steps_per_second": 1.412,
664
+ "eval_wer": 0.8317884405670665,
665
+ "step": 10797
666
+ },
667
+ {
668
+ "epoch": 60.0,
669
+ "eval_loss": 1.4659123420715332,
670
+ "eval_runtime": 163.9506,
671
+ "eval_samples_per_second": 22.318,
672
+ "eval_steps_per_second": 1.397,
673
+ "eval_wer": 0.8099781897491821,
674
+ "step": 10980
675
+ },
676
+ {
677
+ "epoch": 60.11,
678
+ "learning_rate": 0.0001231685393258427,
679
+ "loss": 0.1109,
680
+ "step": 11000
681
+ },
682
+ {
683
+ "epoch": 61.0,
684
+ "eval_loss": 1.4783909320831299,
685
+ "eval_runtime": 165.1461,
686
+ "eval_samples_per_second": 22.156,
687
+ "eval_steps_per_second": 1.387,
688
+ "eval_wer": 0.811886586695747,
689
+ "step": 11163
690
+ },
691
+ {
692
+ "epoch": 62.0,
693
+ "eval_loss": 1.493830680847168,
694
+ "eval_runtime": 164.5944,
695
+ "eval_samples_per_second": 22.23,
696
+ "eval_steps_per_second": 1.391,
697
+ "eval_wer": 0.8148854961832062,
698
+ "step": 11346
699
+ },
700
+ {
701
+ "epoch": 62.84,
702
+ "learning_rate": 0.00011474157303370785,
703
+ "loss": 0.1063,
704
+ "step": 11500
705
+ },
706
+ {
707
+ "epoch": 63.0,
708
+ "eval_loss": 1.5050164461135864,
709
+ "eval_runtime": 163.3536,
710
+ "eval_samples_per_second": 22.399,
711
+ "eval_steps_per_second": 1.402,
712
+ "eval_wer": 0.8151581243184297,
713
+ "step": 11529
714
+ },
715
+ {
716
+ "epoch": 64.0,
717
+ "eval_loss": 1.4773460626602173,
718
+ "eval_runtime": 168.4977,
719
+ "eval_samples_per_second": 21.715,
720
+ "eval_steps_per_second": 1.359,
721
+ "eval_wer": 0.8176117775354417,
722
+ "step": 11712
723
+ },
724
+ {
725
+ "epoch": 65.0,
726
+ "eval_loss": 1.4835622310638428,
727
+ "eval_runtime": 166.8711,
728
+ "eval_samples_per_second": 21.927,
729
+ "eval_steps_per_second": 1.372,
730
+ "eval_wer": 0.8260632497273719,
731
+ "step": 11895
732
+ },
733
+ {
734
+ "epoch": 65.57,
735
+ "learning_rate": 0.00010631460674157301,
736
+ "loss": 0.0966,
737
+ "step": 12000
738
+ },
739
+ {
740
+ "epoch": 66.0,
741
+ "eval_loss": 1.4978560209274292,
742
+ "eval_runtime": 162.3032,
743
+ "eval_samples_per_second": 22.544,
744
+ "eval_steps_per_second": 1.411,
745
+ "eval_wer": 0.8157033805888768,
746
+ "step": 12078
747
+ },
748
+ {
749
+ "epoch": 67.0,
750
+ "eval_loss": 1.4603493213653564,
751
+ "eval_runtime": 162.8062,
752
+ "eval_samples_per_second": 22.475,
753
+ "eval_steps_per_second": 1.407,
754
+ "eval_wer": 0.8047982551799345,
755
+ "step": 12261
756
+ },
757
+ {
758
+ "epoch": 68.0,
759
+ "eval_loss": 1.4802541732788086,
760
+ "eval_runtime": 169.4775,
761
+ "eval_samples_per_second": 21.59,
762
+ "eval_steps_per_second": 1.351,
763
+ "eval_wer": 0.8127044711014176,
764
+ "step": 12444
765
+ },
766
+ {
767
+ "epoch": 68.31,
768
+ "learning_rate": 9.790449438202247e-05,
769
+ "loss": 0.0867,
770
+ "step": 12500
771
+ },
772
+ {
773
+ "epoch": 69.0,
774
+ "eval_loss": 1.4973595142364502,
775
+ "eval_runtime": 164.1372,
776
+ "eval_samples_per_second": 22.292,
777
+ "eval_steps_per_second": 1.395,
778
+ "eval_wer": 0.8129770992366412,
779
+ "step": 12627
780
+ },
781
+ {
782
+ "epoch": 70.0,
783
+ "eval_loss": 1.4721262454986572,
784
+ "eval_runtime": 163.536,
785
+ "eval_samples_per_second": 22.374,
786
+ "eval_steps_per_second": 1.4,
787
+ "eval_wer": 0.8077971646673937,
788
+ "step": 12810
789
+ },
790
+ {
791
+ "epoch": 71.0,
792
+ "eval_loss": 1.4643745422363281,
793
+ "eval_runtime": 166.8819,
794
+ "eval_samples_per_second": 21.926,
795
+ "eval_steps_per_second": 1.372,
796
+ "eval_wer": 0.819247546346783,
797
+ "step": 12993
798
+ },
799
+ {
800
+ "epoch": 71.04,
801
+ "learning_rate": 8.947752808988763e-05,
802
+ "loss": 0.0827,
803
+ "step": 13000
804
+ },
805
+ {
806
+ "epoch": 72.0,
807
+ "eval_loss": 1.4834694862365723,
808
+ "eval_runtime": 167.6508,
809
+ "eval_samples_per_second": 21.825,
810
+ "eval_steps_per_second": 1.366,
811
+ "eval_wer": 0.8137949836423118,
812
+ "step": 13176
813
+ },
814
+ {
815
+ "epoch": 73.0,
816
+ "eval_loss": 1.4933878183364868,
817
+ "eval_runtime": 170.3063,
818
+ "eval_samples_per_second": 21.485,
819
+ "eval_steps_per_second": 1.345,
820
+ "eval_wer": 0.8121592148309705,
821
+ "step": 13359
822
+ },
823
+ {
824
+ "epoch": 73.77,
825
+ "learning_rate": 8.10505617977528e-05,
826
+ "loss": 0.0734,
827
+ "step": 13500
828
+ },
829
+ {
830
+ "epoch": 74.0,
831
+ "eval_loss": 1.4950696229934692,
832
+ "eval_runtime": 175.7645,
833
+ "eval_samples_per_second": 20.818,
834
+ "eval_steps_per_second": 1.303,
835
+ "eval_wer": 0.8061613958560524,
836
+ "step": 13542
837
+ },
838
+ {
839
+ "epoch": 75.0,
840
+ "eval_loss": 1.490771770477295,
841
+ "eval_runtime": 168.0913,
842
+ "eval_samples_per_second": 21.768,
843
+ "eval_steps_per_second": 1.362,
844
+ "eval_wer": 0.806979280261723,
845
+ "step": 13725
846
+ },
847
+ {
848
+ "epoch": 76.0,
849
+ "eval_loss": 1.4876092672348022,
850
+ "eval_runtime": 166.128,
851
+ "eval_samples_per_second": 22.025,
852
+ "eval_steps_per_second": 1.378,
853
+ "eval_wer": 0.8124318429661941,
854
+ "step": 13908
855
+ },
856
+ {
857
+ "epoch": 76.5,
858
+ "learning_rate": 7.262359550561797e-05,
859
+ "loss": 0.0664,
860
+ "step": 14000
861
+ },
862
+ {
863
+ "epoch": 77.0,
864
+ "eval_loss": 1.493386149406433,
865
+ "eval_runtime": 166.8817,
866
+ "eval_samples_per_second": 21.926,
867
+ "eval_steps_per_second": 1.372,
868
+ "eval_wer": 0.8053435114503816,
869
+ "step": 14091
870
+ },
871
+ {
872
+ "epoch": 78.0,
873
+ "eval_loss": 1.4603490829467773,
874
+ "eval_runtime": 169.3203,
875
+ "eval_samples_per_second": 21.61,
876
+ "eval_steps_per_second": 1.352,
877
+ "eval_wer": 0.8047982551799345,
878
+ "step": 14274
879
+ },
880
+ {
881
+ "epoch": 79.0,
882
+ "eval_loss": 1.4732308387756348,
883
+ "eval_runtime": 165.2553,
884
+ "eval_samples_per_second": 22.142,
885
+ "eval_steps_per_second": 1.386,
886
+ "eval_wer": 0.8072519083969466,
887
+ "step": 14457
888
+ },
889
+ {
890
+ "epoch": 79.23,
891
+ "learning_rate": 6.42134831460674e-05,
892
+ "loss": 0.0602,
893
+ "step": 14500
894
+ },
895
+ {
896
+ "epoch": 80.0,
897
+ "eval_loss": 1.4924767017364502,
898
+ "eval_runtime": 166.2646,
899
+ "eval_samples_per_second": 22.007,
900
+ "eval_steps_per_second": 1.377,
901
+ "eval_wer": 0.8077971646673937,
902
+ "step": 14640
903
+ },
904
+ {
905
+ "epoch": 81.0,
906
+ "eval_loss": 1.4812291860580444,
907
+ "eval_runtime": 166.8878,
908
+ "eval_samples_per_second": 21.925,
909
+ "eval_steps_per_second": 1.372,
910
+ "eval_wer": 0.806434023991276,
911
+ "step": 14823
912
+ },
913
+ {
914
+ "epoch": 81.97,
915
+ "learning_rate": 5.578651685393258e-05,
916
+ "loss": 0.057,
917
+ "step": 15000
918
+ },
919
+ {
920
+ "epoch": 82.0,
921
+ "eval_loss": 1.4949839115142822,
922
+ "eval_runtime": 166.616,
923
+ "eval_samples_per_second": 21.961,
924
+ "eval_steps_per_second": 1.374,
925
+ "eval_wer": 0.8012540894220284,
926
+ "step": 15006
927
+ },
928
+ {
929
+ "epoch": 83.0,
930
+ "eval_loss": 1.4784878492355347,
931
+ "eval_runtime": 172.4918,
932
+ "eval_samples_per_second": 21.213,
933
+ "eval_steps_per_second": 1.328,
934
+ "eval_wer": 0.8056161395856052,
935
+ "step": 15189
936
+ },
937
+ {
938
+ "epoch": 84.0,
939
+ "eval_loss": 1.485625982284546,
940
+ "eval_runtime": 165.5579,
941
+ "eval_samples_per_second": 22.101,
942
+ "eval_steps_per_second": 1.383,
943
+ "eval_wer": 0.7993456924754635,
944
+ "step": 15372
945
+ },
946
+ {
947
+ "epoch": 84.7,
948
+ "learning_rate": 4.735955056179775e-05,
949
+ "loss": 0.0517,
950
+ "step": 15500
951
+ },
952
+ {
953
+ "epoch": 85.0,
954
+ "eval_loss": 1.4754849672317505,
955
+ "eval_runtime": 168.7909,
956
+ "eval_samples_per_second": 21.678,
957
+ "eval_steps_per_second": 1.357,
958
+ "eval_wer": 0.8034351145038168,
959
+ "step": 15555
960
+ },
961
+ {
962
+ "epoch": 86.0,
963
+ "eval_loss": 1.4813350439071655,
964
+ "eval_runtime": 168.272,
965
+ "eval_samples_per_second": 21.745,
966
+ "eval_steps_per_second": 1.361,
967
+ "eval_wer": 0.8034351145038168,
968
+ "step": 15738
969
+ },
970
+ {
971
+ "epoch": 87.0,
972
+ "eval_loss": 1.4965763092041016,
973
+ "eval_runtime": 166.9884,
974
+ "eval_samples_per_second": 21.912,
975
+ "eval_steps_per_second": 1.371,
976
+ "eval_wer": 0.8047982551799345,
977
+ "step": 15921
978
+ },
979
+ {
980
+ "epoch": 87.43,
981
+ "learning_rate": 3.893258426966292e-05,
982
+ "loss": 0.0468,
983
+ "step": 16000
984
+ },
985
+ {
986
+ "epoch": 88.0,
987
+ "eval_loss": 1.4883418083190918,
988
+ "eval_runtime": 166.1387,
989
+ "eval_samples_per_second": 22.024,
990
+ "eval_steps_per_second": 1.378,
991
+ "eval_wer": 0.8001635768811342,
992
+ "step": 16104
993
+ },
994
+ {
995
+ "epoch": 89.0,
996
+ "eval_loss": 1.4746148586273193,
997
+ "eval_runtime": 165.9654,
998
+ "eval_samples_per_second": 22.047,
999
+ "eval_steps_per_second": 1.38,
1000
+ "eval_wer": 0.8023446019629226,
1001
+ "step": 16287
1002
+ },
1003
+ {
1004
+ "epoch": 90.0,
1005
+ "eval_loss": 1.4697260856628418,
1006
+ "eval_runtime": 166.9567,
1007
+ "eval_samples_per_second": 21.916,
1008
+ "eval_steps_per_second": 1.372,
1009
+ "eval_wer": 0.7974372955288986,
1010
+ "step": 16470
1011
+ },
1012
+ {
1013
+ "epoch": 90.16,
1014
+ "learning_rate": 3.0505617977528088e-05,
1015
+ "loss": 0.0426,
1016
+ "step": 16500
1017
+ },
1018
+ {
1019
+ "epoch": 91.0,
1020
+ "eval_loss": 1.4775140285491943,
1021
+ "eval_runtime": 165.779,
1022
+ "eval_samples_per_second": 22.072,
1023
+ "eval_steps_per_second": 1.381,
1024
+ "eval_wer": 0.8004362050163577,
1025
+ "step": 16653
1026
+ },
1027
+ {
1028
+ "epoch": 92.0,
1029
+ "eval_loss": 1.4852207899093628,
1030
+ "eval_runtime": 173.415,
1031
+ "eval_samples_per_second": 21.1,
1032
+ "eval_steps_per_second": 1.321,
1033
+ "eval_wer": 0.8023446019629226,
1034
+ "step": 16836
1035
+ },
1036
+ {
1037
+ "epoch": 92.9,
1038
+ "learning_rate": 2.2078651685393255e-05,
1039
+ "loss": 0.0387,
1040
+ "step": 17000
1041
+ },
1042
+ {
1043
+ "epoch": 93.0,
1044
+ "eval_loss": 1.4868098497390747,
1045
+ "eval_runtime": 166.0768,
1046
+ "eval_samples_per_second": 22.032,
1047
+ "eval_steps_per_second": 1.379,
1048
+ "eval_wer": 0.8004362050163577,
1049
+ "step": 17019
1050
+ },
1051
+ {
1052
+ "epoch": 94.0,
1053
+ "eval_loss": 1.47845458984375,
1054
+ "eval_runtime": 171.2193,
1055
+ "eval_samples_per_second": 21.37,
1056
+ "eval_steps_per_second": 1.337,
1057
+ "eval_wer": 0.802071973827699,
1058
+ "step": 17202
1059
+ },
1060
+ {
1061
+ "epoch": 95.0,
1062
+ "eval_loss": 1.4892385005950928,
1063
+ "eval_runtime": 165.5064,
1064
+ "eval_samples_per_second": 22.108,
1065
+ "eval_steps_per_second": 1.384,
1066
+ "eval_wer": 0.8015267175572519,
1067
+ "step": 17385
1068
+ },
1069
+ {
1070
+ "epoch": 95.63,
1071
+ "learning_rate": 1.3668539325842695e-05,
1072
+ "loss": 0.0359,
1073
+ "step": 17500
1074
+ },
1075
+ {
1076
+ "epoch": 96.0,
1077
+ "eval_loss": 1.486182451248169,
1078
+ "eval_runtime": 174.5056,
1079
+ "eval_samples_per_second": 20.968,
1080
+ "eval_steps_per_second": 1.312,
1081
+ "eval_wer": 0.8017993456924755,
1082
+ "step": 17568
1083
+ },
1084
+ {
1085
+ "epoch": 97.0,
1086
+ "eval_loss": 1.4851171970367432,
1087
+ "eval_runtime": 165.0543,
1088
+ "eval_samples_per_second": 22.168,
1089
+ "eval_steps_per_second": 1.387,
1090
+ "eval_wer": 0.8007088331515813,
1091
+ "step": 17751
1092
+ },
1093
+ {
1094
+ "epoch": 98.0,
1095
+ "eval_loss": 1.48457932472229,
1096
+ "eval_runtime": 169.1905,
1097
+ "eval_samples_per_second": 21.627,
1098
+ "eval_steps_per_second": 1.354,
1099
+ "eval_wer": 0.7998909487459106,
1100
+ "step": 17934
1101
+ },
1102
+ {
1103
+ "epoch": 98.36,
1104
+ "learning_rate": 5.241573033707864e-06,
1105
+ "loss": 0.0347,
1106
+ "step": 18000
1107
+ },
1108
+ {
1109
+ "epoch": 99.0,
1110
+ "eval_loss": 1.4852174520492554,
1111
+ "eval_runtime": 168.792,
1112
+ "eval_samples_per_second": 21.678,
1113
+ "eval_steps_per_second": 1.357,
1114
+ "eval_wer": 0.7993456924754635,
1115
+ "step": 18117
1116
+ },
1117
+ {
1118
+ "epoch": 100.0,
1119
+ "eval_loss": 1.484755277633667,
1120
+ "eval_runtime": 179.7891,
1121
+ "eval_samples_per_second": 20.352,
1122
+ "eval_steps_per_second": 1.274,
1123
+ "eval_wer": 0.8004362050163577,
1124
+ "step": 18300
1125
+ },
1126
+ {
1127
+ "epoch": 100.0,
1128
+ "step": 18300,
1129
+ "total_flos": 1.3576650149787481e+20,
1130
+ "train_loss": 0.34965579027686616,
1131
+ "train_runtime": 66008.5183,
1132
+ "train_samples_per_second": 17.704,
1133
+ "train_steps_per_second": 0.277
1134
  }
1135
  ],
1136
+ "max_steps": 18300,
1137
+ "num_train_epochs": 100,
1138
+ "total_flos": 1.3576650149787481e+20,
1139
  "trial_name": null,
1140
  "trial_params": null
1141
  }