DewiBrynJones commited on
Commit
c8a54e7
1 Parent(s): 53f7c3a

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.3847
21
  - Wer: 0.2967
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-normalized
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-NORMALIZED - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.3847
23
  - Wer: 0.2967
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_loss": 0.9668737053871155,
4
- "eval_runtime": 156.2848,
5
  "eval_samples": 5656,
6
- "eval_samples_per_second": 36.19,
7
- "eval_steps_per_second": 4.524,
8
- "eval_wer": 0.6124682055958152,
9
- "total_flos": 4.036432448909298e+19,
10
- "train_loss": 1.1022147617942597,
11
- "train_runtime": 28048.236,
12
  "train_samples": 22621,
13
- "train_samples_per_second": 12.098,
14
- "train_steps_per_second": 0.378
15
  }
 
1
  {
2
+ "epoch": 3.536067892503536,
3
+ "eval_loss": 0.3846580684185028,
4
+ "eval_runtime": 154.4496,
5
  "eval_samples": 5656,
6
+ "eval_samples_per_second": 36.62,
7
+ "eval_steps_per_second": 4.578,
8
+ "eval_wer": 0.29666778646958136,
9
+ "total_flos": 9.55129266706546e+18,
10
+ "train_loss": 1.1546670959472656,
11
+ "train_runtime": 6557.446,
12
  "train_samples": 22621,
13
+ "train_samples_per_second": 12.2,
14
+ "train_steps_per_second": 0.381
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_loss": 0.9668737053871155,
4
- "eval_runtime": 156.2848,
5
  "eval_samples": 5656,
6
- "eval_samples_per_second": 36.19,
7
- "eval_steps_per_second": 4.524,
8
- "eval_wer": 0.6124682055958152
9
  }
 
1
  {
2
+ "epoch": 3.536067892503536,
3
+ "eval_loss": 0.3846580684185028,
4
+ "eval_runtime": 154.4496,
5
  "eval_samples": 5656,
6
+ "eval_samples_per_second": 36.62,
7
+ "eval_steps_per_second": 4.578,
8
+ "eval_wer": 0.29666778646958136
9
  }
runs/May10_18-59-42_09e070d6a7b1/events.out.tfevents.1715370937.09e070d6a7b1.452.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eee0599976dbd99cbdb80f8b30b81cf8b4d0feaf90c1cb5c5e34bf537275fa6a
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "total_flos": 4.036432448909298e+19,
4
- "train_loss": 1.1022147617942597,
5
- "train_runtime": 28048.236,
6
  "train_samples": 22621,
7
- "train_samples_per_second": 12.098,
8
- "train_steps_per_second": 0.378
9
  }
 
1
  {
2
+ "epoch": 3.536067892503536,
3
+ "total_flos": 9.55129266706546e+18,
4
+ "train_loss": 1.1546670959472656,
5
+ "train_runtime": 6557.446,
6
  "train_samples": 22621,
7
+ "train_samples_per_second": 12.2,
8
+ "train_steps_per_second": 0.381
9
  }
trainer_state.json CHANGED
@@ -1,1130 +1,289 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
  "eval_steps": 100,
6
- "global_step": 10605,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.14144271570014144,
13
- "eval_loss": 3.7426819801330566,
14
- "eval_runtime": 154.1912,
15
- "eval_samples_per_second": 36.682,
16
- "eval_steps_per_second": 4.585,
17
  "eval_wer": 1.0,
18
  "step": 100
19
  },
20
  {
21
  "epoch": 0.2828854314002829,
22
- "eval_loss": 2.9179046154022217,
23
- "eval_runtime": 149.8513,
24
- "eval_samples_per_second": 37.744,
25
- "eval_steps_per_second": 4.718,
26
  "eval_wer": 1.0,
27
  "step": 200
28
  },
29
  {
30
  "epoch": 0.4243281471004243,
31
- "eval_loss": 2.8035507202148438,
32
- "eval_runtime": 151.9434,
33
- "eval_samples_per_second": 37.224,
34
- "eval_steps_per_second": 4.653,
35
- "eval_wer": 1.0,
36
  "step": 300
37
  },
38
  {
39
  "epoch": 0.5657708628005658,
40
- "eval_loss": 1.2195814847946167,
41
- "eval_runtime": 152.8075,
42
- "eval_samples_per_second": 37.014,
43
- "eval_steps_per_second": 4.627,
44
- "eval_wer": 0.8933627681528051,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.7072135785007072,
49
- "grad_norm": 2.533352851867676,
50
  "learning_rate": 0.00029699999999999996,
51
- "loss": 3.574,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.7072135785007072,
56
- "eval_loss": 0.9860211610794067,
57
- "eval_runtime": 151.7482,
58
- "eval_samples_per_second": 37.272,
59
- "eval_steps_per_second": 4.659,
60
- "eval_wer": 0.7276159395946313,
61
  "step": 500
62
  },
63
  {
64
  "epoch": 0.8486562942008486,
65
- "eval_loss": 0.8391533493995667,
66
- "eval_runtime": 153.0126,
67
- "eval_samples_per_second": 36.964,
68
- "eval_steps_per_second": 4.621,
69
- "eval_wer": 0.6504295244037049,
70
  "step": 600
71
  },
72
  {
73
  "epoch": 0.9900990099009901,
74
- "eval_loss": 0.7803803086280823,
75
- "eval_runtime": 152.7414,
76
- "eval_samples_per_second": 37.03,
77
- "eval_steps_per_second": 4.629,
78
- "eval_wer": 0.6029178864519844,
79
  "step": 700
80
  },
81
  {
82
  "epoch": 1.1315417256011315,
83
- "eval_loss": 0.6122242212295532,
84
- "eval_runtime": 153.2031,
85
- "eval_samples_per_second": 36.918,
86
- "eval_steps_per_second": 4.615,
87
- "eval_wer": 0.4909056006142919,
88
  "step": 800
89
  },
90
  {
91
  "epoch": 1.272984441301273,
92
- "eval_loss": 0.5901117920875549,
93
- "eval_runtime": 152.5961,
94
- "eval_samples_per_second": 37.065,
95
- "eval_steps_per_second": 4.633,
96
- "eval_wer": 0.48828206235702515,
97
  "step": 900
98
  },
99
  {
100
  "epoch": 1.4144271570014144,
101
- "grad_norm": 0.9860969185829163,
102
- "learning_rate": 0.0002853043047996041,
103
- "loss": 0.811,
104
  "step": 1000
105
  },
106
  {
107
  "epoch": 1.4144271570014144,
108
- "eval_loss": 0.5500049591064453,
109
- "eval_runtime": 153.3918,
110
- "eval_samples_per_second": 36.873,
111
- "eval_steps_per_second": 4.609,
112
- "eval_wer": 0.45078466189950567,
113
  "step": 1000
114
  },
115
  {
116
  "epoch": 1.5558698727015559,
117
- "eval_loss": 0.5231846570968628,
118
- "eval_runtime": 151.7862,
119
- "eval_samples_per_second": 37.263,
120
- "eval_steps_per_second": 4.658,
121
- "eval_wer": 0.41418310377373585,
122
  "step": 1100
123
  },
124
  {
125
  "epoch": 1.6973125884016973,
126
- "eval_loss": 0.518621027469635,
127
- "eval_runtime": 152.9089,
128
- "eval_samples_per_second": 36.989,
129
- "eval_steps_per_second": 4.624,
130
- "eval_wer": 0.4064724608468909,
131
  "step": 1200
132
  },
133
  {
134
  "epoch": 1.8387553041018387,
135
- "eval_loss": 0.49534252285957336,
136
- "eval_runtime": 153.7928,
137
- "eval_samples_per_second": 36.777,
138
- "eval_steps_per_second": 4.597,
139
- "eval_wer": 0.3929388427636736,
140
  "step": 1300
141
  },
142
  {
143
  "epoch": 1.9801980198019802,
144
- "eval_loss": 0.48800522089004517,
145
- "eval_runtime": 152.9541,
146
- "eval_samples_per_second": 36.978,
147
- "eval_steps_per_second": 4.622,
148
- "eval_wer": 0.39279486810321385,
149
  "step": 1400
150
  },
151
  {
152
  "epoch": 2.1216407355021216,
153
- "grad_norm": 0.8855465650558472,
154
- "learning_rate": 0.00027054923305294405,
155
- "loss": 0.6459,
156
  "step": 1500
157
  },
158
  {
159
  "epoch": 2.1216407355021216,
160
- "eval_loss": 0.46446114778518677,
161
- "eval_runtime": 152.609,
162
- "eval_samples_per_second": 37.062,
163
- "eval_steps_per_second": 4.633,
164
- "eval_wer": 0.3691990209723089,
165
  "step": 1500
166
  },
167
  {
168
  "epoch": 2.263083451202263,
169
- "eval_loss": 0.4666256904602051,
170
- "eval_runtime": 153.127,
171
- "eval_samples_per_second": 36.937,
172
- "eval_steps_per_second": 4.617,
173
- "eval_wer": 0.3585928876517733,
174
  "step": 1600
175
  },
176
  {
177
  "epoch": 2.4045261669024045,
178
- "eval_loss": 0.45017901062965393,
179
- "eval_runtime": 154.5149,
180
- "eval_samples_per_second": 36.605,
181
- "eval_steps_per_second": 4.576,
182
- "eval_wer": 0.35934475532306315,
183
  "step": 1700
184
  },
185
  {
186
  "epoch": 2.545968882602546,
187
- "eval_loss": 0.45280084013938904,
188
- "eval_runtime": 152.9066,
189
- "eval_samples_per_second": 36.99,
190
- "eval_steps_per_second": 4.624,
191
- "eval_wer": 0.3637759754283246,
192
  "step": 1800
193
  },
194
  {
195
  "epoch": 2.6874115983026874,
196
- "eval_loss": 0.46647289395332336,
197
- "eval_runtime": 153.103,
198
- "eval_samples_per_second": 36.942,
199
- "eval_steps_per_second": 4.618,
200
- "eval_wer": 0.392618899073763,
201
  "step": 1900
202
  },
203
  {
204
  "epoch": 2.828854314002829,
205
- "grad_norm": 0.6227492690086365,
206
- "learning_rate": 0.00025579416130628403,
207
- "loss": 0.5306,
208
  "step": 2000
209
  },
210
  {
211
  "epoch": 2.828854314002829,
212
- "eval_loss": 0.4328605532646179,
213
- "eval_runtime": 153.5492,
214
- "eval_samples_per_second": 36.835,
215
- "eval_steps_per_second": 4.604,
216
- "eval_wer": 0.3505143094815312,
217
  "step": 2000
218
  },
219
  {
220
  "epoch": 2.9702970297029703,
221
- "eval_loss": 0.4245360791683197,
222
- "eval_runtime": 153.9467,
223
- "eval_samples_per_second": 36.74,
224
- "eval_steps_per_second": 4.592,
225
- "eval_wer": 0.3373806210107021,
226
  "step": 2100
227
  },
228
  {
229
  "epoch": 3.1117397454031117,
230
- "eval_loss": 0.4376748204231262,
231
- "eval_runtime": 152.9335,
232
- "eval_samples_per_second": 36.983,
233
- "eval_steps_per_second": 4.623,
234
- "eval_wer": 0.3340372094511366,
235
  "step": 2200
236
  },
237
  {
238
  "epoch": 3.253182461103253,
239
- "eval_loss": 0.4271674156188965,
240
- "eval_runtime": 153.1738,
241
- "eval_samples_per_second": 36.925,
242
- "eval_steps_per_second": 4.616,
243
- "eval_wer": 0.33373326294572153,
244
  "step": 2300
245
  },
246
  {
247
  "epoch": 3.3946251768033946,
248
- "eval_loss": 0.43350949883461,
249
- "eval_runtime": 153.2683,
250
- "eval_samples_per_second": 36.903,
251
- "eval_steps_per_second": 4.613,
252
- "eval_wer": 0.332597462846539,
253
  "step": 2400
254
  },
255
  {
256
  "epoch": 3.536067892503536,
257
- "grad_norm": 0.6211841106414795,
258
- "learning_rate": 0.0002410094012864918,
259
- "loss": 0.4628,
260
  "step": 2500
261
  },
262
  {
263
  "epoch": 3.536067892503536,
264
- "eval_loss": 0.42679545283317566,
265
- "eval_runtime": 154.4777,
266
- "eval_samples_per_second": 36.614,
267
- "eval_steps_per_second": 4.577,
268
- "eval_wer": 0.3274783638079698,
269
- "step": 2500
270
- },
271
- {
272
- "epoch": 3.6775106082036775,
273
- "eval_loss": 0.4502430558204651,
274
- "eval_runtime": 154.6864,
275
- "eval_samples_per_second": 36.564,
276
- "eval_steps_per_second": 4.571,
277
- "eval_wer": 0.34091599878421397,
278
- "step": 2600
279
- },
280
- {
281
- "epoch": 3.818953323903819,
282
- "eval_loss": 0.6344878673553467,
283
- "eval_runtime": 153.1131,
284
- "eval_samples_per_second": 36.94,
285
- "eval_steps_per_second": 4.618,
286
- "eval_wer": 0.43904272847978754,
287
- "step": 2700
288
- },
289
- {
290
- "epoch": 3.9603960396039604,
291
- "eval_loss": 1.0202795267105103,
292
- "eval_runtime": 153.5324,
293
- "eval_samples_per_second": 36.839,
294
- "eval_steps_per_second": 4.605,
295
- "eval_wer": 0.6403193038025308,
296
- "step": 2800
297
- },
298
- {
299
- "epoch": 4.101838755304102,
300
- "eval_loss": 1.2207801342010498,
301
- "eval_runtime": 154.3697,
302
- "eval_samples_per_second": 36.639,
303
- "eval_steps_per_second": 4.58,
304
- "eval_wer": 0.7921805762185855,
305
- "step": 2900
306
- },
307
- {
308
- "epoch": 4.243281471004243,
309
- "grad_norm": 2.4671809673309326,
310
- "learning_rate": 0.00022619495299356753,
311
- "loss": 0.8685,
312
- "step": 3000
313
- },
314
- {
315
- "epoch": 4.243281471004243,
316
- "eval_loss": 1.101838231086731,
317
- "eval_runtime": 155.2474,
318
- "eval_samples_per_second": 36.432,
319
- "eval_steps_per_second": 4.554,
320
- "eval_wer": 0.7387019884500328,
321
- "step": 3000
322
- },
323
- {
324
- "epoch": 4.384724186704385,
325
- "eval_loss": 1.24972665309906,
326
- "eval_runtime": 154.4564,
327
- "eval_samples_per_second": 36.619,
328
- "eval_steps_per_second": 4.577,
329
- "eval_wer": 0.8061941098366687,
330
- "step": 3100
331
- },
332
- {
333
- "epoch": 4.526166902404526,
334
- "eval_loss": 1.6164859533309937,
335
- "eval_runtime": 153.9159,
336
- "eval_samples_per_second": 36.747,
337
- "eval_steps_per_second": 4.593,
338
- "eval_wer": 0.9616227543952265,
339
- "step": 3200
340
- },
341
- {
342
- "epoch": 4.667609618104668,
343
- "eval_loss": 1.4655081033706665,
344
- "eval_runtime": 153.645,
345
- "eval_samples_per_second": 36.812,
346
- "eval_steps_per_second": 4.602,
347
- "eval_wer": 0.9216617875253955,
348
- "step": 3300
349
- },
350
- {
351
- "epoch": 4.809052333804809,
352
- "eval_loss": 1.0288450717926025,
353
- "eval_runtime": 152.6496,
354
- "eval_samples_per_second": 37.052,
355
- "eval_steps_per_second": 4.632,
356
- "eval_wer": 0.7464606229303643,
357
- "step": 3400
358
- },
359
- {
360
- "epoch": 4.9504950495049505,
361
- "grad_norm": 2.009023666381836,
362
- "learning_rate": 0.00021143988124690746,
363
- "loss": 1.3918,
364
- "step": 3500
365
- },
366
- {
367
- "epoch": 4.9504950495049505,
368
- "eval_loss": 0.9067263603210449,
369
- "eval_runtime": 156.1428,
370
- "eval_samples_per_second": 36.223,
371
- "eval_steps_per_second": 4.528,
372
- "eval_wer": 0.5948393082817424,
373
- "step": 3500
374
- },
375
- {
376
- "epoch": 5.091937765205092,
377
- "eval_loss": 0.9486163258552551,
378
- "eval_runtime": 154.3538,
379
- "eval_samples_per_second": 36.643,
380
- "eval_steps_per_second": 4.58,
381
- "eval_wer": 0.6352801906864392,
382
- "step": 3600
383
- },
384
- {
385
- "epoch": 5.233380480905233,
386
- "eval_loss": 0.8674383163452148,
387
- "eval_runtime": 155.8543,
388
- "eval_samples_per_second": 36.29,
389
- "eval_steps_per_second": 4.536,
390
- "eval_wer": 0.5428324614867783,
391
- "step": 3700
392
- },
393
- {
394
- "epoch": 5.374823196605375,
395
- "eval_loss": 0.9402504563331604,
396
- "eval_runtime": 154.1915,
397
- "eval_samples_per_second": 36.682,
398
- "eval_steps_per_second": 4.585,
399
- "eval_wer": 0.5792900449520885,
400
- "step": 3800
401
- },
402
- {
403
- "epoch": 5.516265912305516,
404
- "eval_loss": 0.948098361492157,
405
- "eval_runtime": 159.4305,
406
- "eval_samples_per_second": 35.476,
407
- "eval_steps_per_second": 4.435,
408
- "eval_wer": 0.5763625601894067,
409
- "step": 3900
410
- },
411
- {
412
- "epoch": 5.657708628005658,
413
- "grad_norm": 1.2370288372039795,
414
- "learning_rate": 0.0001966848095002474,
415
- "loss": 1.0402,
416
- "step": 4000
417
- },
418
- {
419
- "epoch": 5.657708628005658,
420
- "eval_loss": 1.0175639390945435,
421
- "eval_runtime": 154.2179,
422
- "eval_samples_per_second": 36.675,
423
- "eval_steps_per_second": 4.584,
424
- "eval_wer": 0.8256946777367183,
425
- "step": 4000
426
- },
427
- {
428
- "epoch": 5.799151343705799,
429
- "eval_loss": 0.9857003092765808,
430
- "eval_runtime": 154.4099,
431
- "eval_samples_per_second": 36.63,
432
- "eval_steps_per_second": 4.579,
433
- "eval_wer": 0.6342883652477164,
434
- "step": 4100
435
- },
436
- {
437
- "epoch": 5.9405940594059405,
438
- "eval_loss": 1.3289028406143188,
439
- "eval_runtime": 155.7394,
440
- "eval_samples_per_second": 36.317,
441
- "eval_steps_per_second": 4.54,
442
- "eval_wer": 0.9014093519540561,
443
- "step": 4200
444
- },
445
- {
446
- "epoch": 6.082036775106082,
447
- "eval_loss": 2.0890820026397705,
448
- "eval_runtime": 154.2749,
449
- "eval_samples_per_second": 36.662,
450
- "eval_steps_per_second": 4.583,
451
- "eval_wer": 0.7125305946153477,
452
- "step": 4300
453
- },
454
- {
455
- "epoch": 6.223479490806223,
456
- "eval_loss": 1.256324291229248,
457
- "eval_runtime": 154.5375,
458
- "eval_samples_per_second": 36.6,
459
- "eval_steps_per_second": 4.575,
460
- "eval_wer": 0.7696085488953944,
461
- "step": 4400
462
- },
463
- {
464
- "epoch": 6.364922206506365,
465
- "grad_norm": 0.9906980395317078,
466
- "learning_rate": 0.0001819000494804552,
467
- "loss": 1.2886,
468
- "step": 4500
469
- },
470
- {
471
- "epoch": 6.364922206506365,
472
- "eval_loss": 1.1441457271575928,
473
- "eval_runtime": 155.0449,
474
- "eval_samples_per_second": 36.48,
475
- "eval_steps_per_second": 4.56,
476
- "eval_wer": 0.692726080209883,
477
- "step": 4500
478
- },
479
- {
480
- "epoch": 6.506364922206506,
481
- "eval_loss": 1.0626095533370972,
482
- "eval_runtime": 155.5445,
483
- "eval_samples_per_second": 36.363,
484
- "eval_steps_per_second": 4.545,
485
- "eval_wer": 0.6573083137367823,
486
- "step": 4600
487
- },
488
- {
489
- "epoch": 6.647807637906648,
490
- "eval_loss": 0.9997339248657227,
491
- "eval_runtime": 155.8362,
492
- "eval_samples_per_second": 36.295,
493
- "eval_steps_per_second": 4.537,
494
- "eval_wer": 0.6422869574954808,
495
- "step": 4700
496
- },
497
- {
498
- "epoch": 6.789250353606789,
499
- "eval_loss": 0.9813728928565979,
500
- "eval_runtime": 155.2999,
501
- "eval_samples_per_second": 36.42,
502
- "eval_steps_per_second": 4.552,
503
- "eval_wer": 0.6380317064196701,
504
- "step": 4800
505
- },
506
- {
507
- "epoch": 6.930693069306931,
508
- "eval_loss": 1.0955251455307007,
509
- "eval_runtime": 153.746,
510
- "eval_samples_per_second": 36.788,
511
- "eval_steps_per_second": 4.598,
512
- "eval_wer": 0.7651133400521508,
513
- "step": 4900
514
- },
515
- {
516
- "epoch": 7.072135785007072,
517
- "grad_norm": 1.6002442836761475,
518
- "learning_rate": 0.00016708560118753091,
519
- "loss": 1.0984,
520
- "step": 5000
521
- },
522
- {
523
- "epoch": 7.072135785007072,
524
- "eval_loss": 0.9212619066238403,
525
- "eval_runtime": 155.7654,
526
- "eval_samples_per_second": 36.311,
527
- "eval_steps_per_second": 4.539,
528
- "eval_wer": 0.5882964598230711,
529
- "step": 5000
530
- },
531
- {
532
- "epoch": 7.2135785007072135,
533
- "eval_loss": 0.8884870409965515,
534
- "eval_runtime": 153.6804,
535
- "eval_samples_per_second": 36.804,
536
- "eval_steps_per_second": 4.6,
537
- "eval_wer": 0.5932715842011806,
538
- "step": 5100
539
- },
540
- {
541
- "epoch": 7.355021216407355,
542
- "eval_loss": 0.900116503238678,
543
- "eval_runtime": 155.054,
544
- "eval_samples_per_second": 36.478,
545
- "eval_steps_per_second": 4.56,
546
- "eval_wer": 0.5898641839036329,
547
- "step": 5200
548
- },
549
- {
550
- "epoch": 7.496463932107496,
551
- "eval_loss": 0.8783684372901917,
552
- "eval_runtime": 156.2294,
553
- "eval_samples_per_second": 36.203,
554
- "eval_steps_per_second": 4.525,
555
- "eval_wer": 0.5858968821487418,
556
- "step": 5300
557
- },
558
- {
559
- "epoch": 7.637906647807638,
560
- "eval_loss": 0.9072028398513794,
561
- "eval_runtime": 154.0671,
562
- "eval_samples_per_second": 36.711,
563
- "eval_steps_per_second": 4.589,
564
- "eval_wer": 0.5897522036121643,
565
- "step": 5400
566
- },
567
- {
568
- "epoch": 7.779349363507779,
569
- "grad_norm": 1.3490877151489258,
570
- "learning_rate": 0.00015230084116773872,
571
- "loss": 0.9659,
572
- "step": 5500
573
- },
574
- {
575
- "epoch": 7.779349363507779,
576
- "eval_loss": 0.8811922669410706,
577
- "eval_runtime": 153.8934,
578
- "eval_samples_per_second": 36.753,
579
- "eval_steps_per_second": 4.594,
580
- "eval_wer": 0.5841051974852426,
581
- "step": 5500
582
- },
583
- {
584
- "epoch": 7.920792079207921,
585
- "eval_loss": 0.891165018081665,
586
- "eval_runtime": 153.7325,
587
- "eval_samples_per_second": 36.791,
588
- "eval_steps_per_second": 4.599,
589
- "eval_wer": 0.5855129497208491,
590
- "step": 5600
591
- },
592
- {
593
- "epoch": 8.062234794908063,
594
- "eval_loss": 0.8815582990646362,
595
- "eval_runtime": 154.6799,
596
- "eval_samples_per_second": 36.566,
597
- "eval_steps_per_second": 4.571,
598
- "eval_wer": 0.5807137943721905,
599
- "step": 5700
600
- },
601
- {
602
- "epoch": 8.203677510608204,
603
- "eval_loss": 0.891440749168396,
604
- "eval_runtime": 155.4972,
605
- "eval_samples_per_second": 36.374,
606
- "eval_steps_per_second": 4.547,
607
- "eval_wer": 0.5803138647598023,
608
- "step": 5800
609
- },
610
- {
611
- "epoch": 8.345120226308346,
612
- "eval_loss": 0.8956438899040222,
613
- "eval_runtime": 154.6865,
614
- "eval_samples_per_second": 36.564,
615
- "eval_steps_per_second": 4.571,
616
- "eval_wer": 0.5810337380621011,
617
- "step": 5900
618
- },
619
- {
620
- "epoch": 8.486562942008486,
621
- "grad_norm": 1.257432460784912,
622
- "learning_rate": 0.00013754576942107867,
623
- "loss": 0.9679,
624
- "step": 6000
625
- },
626
- {
627
- "epoch": 8.486562942008486,
628
- "eval_loss": 0.9162164330482483,
629
- "eval_runtime": 155.1949,
630
- "eval_samples_per_second": 36.444,
631
- "eval_steps_per_second": 4.556,
632
- "eval_wer": 0.5780262673769416,
633
- "step": 6000
634
- },
635
- {
636
- "epoch": 8.628005657708629,
637
- "eval_loss": 0.9409377574920654,
638
- "eval_runtime": 154.6732,
639
- "eval_samples_per_second": 36.567,
640
- "eval_steps_per_second": 4.571,
641
- "eval_wer": 0.5810177408776055,
642
- "step": 6100
643
- },
644
- {
645
- "epoch": 8.76944837340877,
646
- "eval_loss": 0.9370973706245422,
647
- "eval_runtime": 155.5326,
648
- "eval_samples_per_second": 36.365,
649
- "eval_steps_per_second": 4.546,
650
- "eval_wer": 0.5780742589304283,
651
- "step": 6200
652
- },
653
- {
654
- "epoch": 8.910891089108912,
655
- "eval_loss": 0.941677987575531,
656
- "eval_runtime": 155.4822,
657
- "eval_samples_per_second": 36.377,
658
- "eval_steps_per_second": 4.547,
659
- "eval_wer": 0.5790020956311689,
660
- "step": 6300
661
- },
662
- {
663
- "epoch": 9.052333804809052,
664
- "eval_loss": 0.9663541913032532,
665
- "eval_runtime": 155.844,
666
- "eval_samples_per_second": 36.293,
667
- "eval_steps_per_second": 4.537,
668
- "eval_wer": 0.5783782054358433,
669
- "step": 6400
670
- },
671
- {
672
- "epoch": 9.193776520509195,
673
- "grad_norm": 1.5867938995361328,
674
- "learning_rate": 0.00012273132112815437,
675
- "loss": 1.0241,
676
- "step": 6500
677
- },
678
- {
679
- "epoch": 9.193776520509195,
680
- "eval_loss": 0.9720383286476135,
681
- "eval_runtime": 155.604,
682
- "eval_samples_per_second": 36.349,
683
- "eval_steps_per_second": 4.544,
684
- "eval_wer": 0.5775143574730848,
685
- "step": 6500
686
- },
687
- {
688
- "epoch": 9.335219236209335,
689
- "eval_loss": 0.9840742349624634,
690
- "eval_runtime": 155.8175,
691
- "eval_samples_per_second": 36.299,
692
- "eval_steps_per_second": 4.537,
693
- "eval_wer": 0.5783942026203388,
694
- "step": 6600
695
- },
696
- {
697
- "epoch": 9.476661951909477,
698
- "eval_loss": 0.9574136137962341,
699
- "eval_runtime": 155.2609,
700
- "eval_samples_per_second": 36.429,
701
- "eval_steps_per_second": 4.554,
702
- "eval_wer": 0.5886803922509638,
703
- "step": 6700
704
- },
705
- {
706
- "epoch": 9.618104667609618,
707
- "eval_loss": 1.0725222826004028,
708
- "eval_runtime": 154.2708,
709
- "eval_samples_per_second": 36.663,
710
- "eval_steps_per_second": 4.583,
711
- "eval_wer": 0.606837196653389,
712
- "step": 6800
713
- },
714
- {
715
- "epoch": 9.75954738330976,
716
- "eval_loss": 1.0362112522125244,
717
- "eval_runtime": 155.3381,
718
- "eval_samples_per_second": 36.411,
719
- "eval_steps_per_second": 4.551,
720
- "eval_wer": 0.5999584073203116,
721
- "step": 6900
722
- },
723
- {
724
- "epoch": 9.900990099009901,
725
- "grad_norm": 0.6058325171470642,
726
- "learning_rate": 0.00010794656110836219,
727
- "loss": 1.0797,
728
- "step": 7000
729
- },
730
- {
731
- "epoch": 9.900990099009901,
732
- "eval_loss": 1.0116764307022095,
733
- "eval_runtime": 155.5442,
734
- "eval_samples_per_second": 36.363,
735
- "eval_steps_per_second": 4.545,
736
- "eval_wer": 0.5914319079841948,
737
- "step": 7000
738
- },
739
- {
740
- "epoch": 10.042432814710043,
741
- "eval_loss": 0.9563263058662415,
742
- "eval_runtime": 155.5047,
743
- "eval_samples_per_second": 36.372,
744
- "eval_steps_per_second": 4.546,
745
- "eval_wer": 0.6058293740301707,
746
- "step": 7100
747
- },
748
- {
749
- "epoch": 10.183875530410184,
750
- "eval_loss": 0.9663692116737366,
751
- "eval_runtime": 155.3578,
752
- "eval_samples_per_second": 36.406,
753
- "eval_steps_per_second": 4.551,
754
- "eval_wer": 0.5978307817824063,
755
- "step": 7200
756
- },
757
- {
758
- "epoch": 10.325318246110326,
759
- "eval_loss": 1.0209406614303589,
760
- "eval_runtime": 155.4685,
761
- "eval_samples_per_second": 36.38,
762
- "eval_steps_per_second": 4.548,
763
- "eval_wer": 0.6022140103341812,
764
- "step": 7300
765
- },
766
- {
767
- "epoch": 10.466760961810467,
768
- "eval_loss": 0.9848981499671936,
769
- "eval_runtime": 156.2008,
770
- "eval_samples_per_second": 36.21,
771
- "eval_steps_per_second": 4.526,
772
- "eval_wer": 0.5974788437235047,
773
- "step": 7400
774
- },
775
- {
776
- "epoch": 10.608203677510609,
777
- "grad_norm": 0.0,
778
- "learning_rate": 9.319148936170212e-05,
779
- "loss": 1.0701,
780
- "step": 7500
781
- },
782
- {
783
- "epoch": 10.608203677510609,
784
- "eval_loss": 0.9718888401985168,
785
- "eval_runtime": 156.5494,
786
- "eval_samples_per_second": 36.129,
787
- "eval_steps_per_second": 4.516,
788
- "eval_wer": 0.6057013965542064,
789
- "step": 7500
790
- },
791
- {
792
- "epoch": 10.74964639321075,
793
- "eval_loss": 0.966968834400177,
794
- "eval_runtime": 155.045,
795
- "eval_samples_per_second": 36.48,
796
- "eval_steps_per_second": 4.56,
797
- "eval_wer": 0.6122602421973733,
798
- "step": 7600
799
- },
800
- {
801
- "epoch": 10.891089108910892,
802
- "eval_loss": 0.9668737053871155,
803
- "eval_runtime": 156.2212,
804
- "eval_samples_per_second": 36.205,
805
- "eval_steps_per_second": 4.526,
806
- "eval_wer": 0.6124682055958152,
807
- "step": 7700
808
- },
809
- {
810
- "epoch": 11.032531824611032,
811
- "eval_loss": 0.9668737053871155,
812
- "eval_runtime": 156.7045,
813
- "eval_samples_per_second": 36.093,
814
- "eval_steps_per_second": 4.512,
815
- "eval_wer": 0.6124682055958152,
816
- "step": 7800
817
- },
818
- {
819
- "epoch": 11.173974540311175,
820
- "eval_loss": 0.9668737053871155,
821
- "eval_runtime": 156.3156,
822
- "eval_samples_per_second": 36.183,
823
- "eval_steps_per_second": 4.523,
824
- "eval_wer": 0.6124682055958152,
825
- "step": 7900
826
- },
827
- {
828
- "epoch": 11.315417256011315,
829
- "grad_norm": 0.0,
830
- "learning_rate": 7.843641761504205e-05,
831
- "loss": 1.0518,
832
- "step": 8000
833
- },
834
- {
835
- "epoch": 11.315417256011315,
836
- "eval_loss": 0.9668737053871155,
837
- "eval_runtime": 156.4393,
838
- "eval_samples_per_second": 36.155,
839
- "eval_steps_per_second": 4.519,
840
- "eval_wer": 0.6124682055958152,
841
- "step": 8000
842
- },
843
- {
844
- "epoch": 11.456859971711458,
845
- "eval_loss": 0.9668737053871155,
846
- "eval_runtime": 155.12,
847
- "eval_samples_per_second": 36.462,
848
- "eval_steps_per_second": 4.558,
849
- "eval_wer": 0.6124682055958152,
850
- "step": 8100
851
- },
852
- {
853
- "epoch": 11.598302687411598,
854
- "eval_loss": 0.9668737053871155,
855
- "eval_runtime": 155.4065,
856
- "eval_samples_per_second": 36.395,
857
- "eval_steps_per_second": 4.549,
858
- "eval_wer": 0.6124682055958152,
859
- "step": 8200
860
- },
861
- {
862
- "epoch": 11.73974540311174,
863
- "eval_loss": 0.9668737053871155,
864
- "eval_runtime": 154.2056,
865
- "eval_samples_per_second": 36.678,
866
- "eval_steps_per_second": 4.585,
867
- "eval_wer": 0.6124682055958152,
868
- "step": 8300
869
- },
870
- {
871
- "epoch": 11.881188118811881,
872
- "eval_loss": 0.9668737053871155,
873
- "eval_runtime": 155.9204,
874
- "eval_samples_per_second": 36.275,
875
- "eval_steps_per_second": 4.534,
876
- "eval_wer": 0.6124682055958152,
877
- "step": 8400
878
- },
879
- {
880
- "epoch": 12.022630834512023,
881
- "grad_norm": 0.0,
882
- "learning_rate": 6.362196932211776e-05,
883
- "loss": 1.0594,
884
- "step": 8500
885
- },
886
- {
887
- "epoch": 12.022630834512023,
888
- "eval_loss": 0.9668737053871155,
889
- "eval_runtime": 154.4966,
890
- "eval_samples_per_second": 36.609,
891
- "eval_steps_per_second": 4.576,
892
- "eval_wer": 0.6124682055958152,
893
- "step": 8500
894
- },
895
- {
896
- "epoch": 12.164073550212164,
897
- "eval_loss": 0.9668737053871155,
898
- "eval_runtime": 154.0012,
899
- "eval_samples_per_second": 36.727,
900
- "eval_steps_per_second": 4.591,
901
- "eval_wer": 0.6124682055958152,
902
- "step": 8600
903
- },
904
- {
905
- "epoch": 12.305516265912306,
906
- "eval_loss": 0.9668737053871155,
907
- "eval_runtime": 155.297,
908
- "eval_samples_per_second": 36.421,
909
- "eval_steps_per_second": 4.553,
910
- "eval_wer": 0.6124682055958152,
911
- "step": 8700
912
- },
913
- {
914
- "epoch": 12.446958981612447,
915
- "eval_loss": 0.9668737053871155,
916
- "eval_runtime": 156.7077,
917
- "eval_samples_per_second": 36.093,
918
- "eval_steps_per_second": 4.512,
919
- "eval_wer": 0.6124682055958152,
920
- "step": 8800
921
- },
922
- {
923
- "epoch": 12.58840169731259,
924
- "eval_loss": 0.9668737053871155,
925
- "eval_runtime": 155.8279,
926
- "eval_samples_per_second": 36.296,
927
- "eval_steps_per_second": 4.537,
928
- "eval_wer": 0.6124682055958152,
929
- "step": 8900
930
- },
931
- {
932
- "epoch": 12.72984441301273,
933
- "grad_norm": 0.0,
934
- "learning_rate": 4.8866897575457694e-05,
935
- "loss": 1.0584,
936
- "step": 9000
937
- },
938
- {
939
- "epoch": 12.72984441301273,
940
- "eval_loss": 0.9668737053871155,
941
- "eval_runtime": 155.3599,
942
- "eval_samples_per_second": 36.406,
943
- "eval_steps_per_second": 4.551,
944
- "eval_wer": 0.6124682055958152,
945
- "step": 9000
946
- },
947
- {
948
- "epoch": 12.871287128712872,
949
- "eval_loss": 0.9668737053871155,
950
- "eval_runtime": 155.3076,
951
- "eval_samples_per_second": 36.418,
952
- "eval_steps_per_second": 4.552,
953
- "eval_wer": 0.6124682055958152,
954
- "step": 9100
955
- },
956
- {
957
- "epoch": 13.012729844413013,
958
- "eval_loss": 0.9668737053871155,
959
- "eval_runtime": 155.448,
960
- "eval_samples_per_second": 36.385,
961
- "eval_steps_per_second": 4.548,
962
- "eval_wer": 0.6124682055958152,
963
- "step": 9200
964
- },
965
- {
966
- "epoch": 13.154172560113155,
967
- "eval_loss": 0.9668737053871155,
968
- "eval_runtime": 155.6022,
969
- "eval_samples_per_second": 36.349,
970
- "eval_steps_per_second": 4.544,
971
- "eval_wer": 0.6124682055958152,
972
- "step": 9300
973
- },
974
- {
975
- "epoch": 13.295615275813295,
976
- "eval_loss": 0.9668737053871155,
977
- "eval_runtime": 154.7567,
978
- "eval_samples_per_second": 36.548,
979
- "eval_steps_per_second": 4.568,
980
- "eval_wer": 0.6124682055958152,
981
- "step": 9400
982
- },
983
- {
984
- "epoch": 13.437057991513438,
985
- "grad_norm": 0.0,
986
- "learning_rate": 3.408213755566551e-05,
987
- "loss": 1.0556,
988
- "step": 9500
989
- },
990
- {
991
- "epoch": 13.437057991513438,
992
- "eval_loss": 0.9668737053871155,
993
- "eval_runtime": 154.3187,
994
- "eval_samples_per_second": 36.651,
995
- "eval_steps_per_second": 4.581,
996
- "eval_wer": 0.6124682055958152,
997
- "step": 9500
998
- },
999
- {
1000
- "epoch": 13.578500707213578,
1001
- "eval_loss": 0.9668737053871155,
1002
- "eval_runtime": 155.5249,
1003
- "eval_samples_per_second": 36.367,
1004
- "eval_steps_per_second": 4.546,
1005
- "eval_wer": 0.6124682055958152,
1006
- "step": 9600
1007
- },
1008
- {
1009
- "epoch": 13.71994342291372,
1010
- "eval_loss": 0.9668737053871155,
1011
- "eval_runtime": 155.212,
1012
- "eval_samples_per_second": 36.44,
1013
- "eval_steps_per_second": 4.555,
1014
- "eval_wer": 0.6124682055958152,
1015
- "step": 9700
1016
- },
1017
- {
1018
- "epoch": 13.861386138613861,
1019
- "eval_loss": 0.9668737053871155,
1020
- "eval_runtime": 154.6321,
1021
  "eval_samples_per_second": 36.577,
1022
  "eval_steps_per_second": 4.572,
1023
- "eval_wer": 0.6124682055958152,
1024
- "step": 9800
1025
- },
1026
- {
1027
- "epoch": 14.002828854314004,
1028
- "eval_loss": 0.9668737053871155,
1029
- "eval_runtime": 155.2988,
1030
- "eval_samples_per_second": 36.42,
1031
- "eval_steps_per_second": 4.553,
1032
- "eval_wer": 0.6124682055958152,
1033
- "step": 9900
1034
- },
1035
- {
1036
- "epoch": 14.144271570014144,
1037
- "grad_norm": 0.0,
1038
- "learning_rate": 1.9297377535873327e-05,
1039
- "loss": 1.0511,
1040
- "step": 10000
1041
- },
1042
- {
1043
- "epoch": 14.144271570014144,
1044
- "eval_loss": 0.9668737053871155,
1045
- "eval_runtime": 155.9222,
1046
- "eval_samples_per_second": 36.274,
1047
- "eval_steps_per_second": 4.534,
1048
- "eval_wer": 0.6124682055958152,
1049
- "step": 10000
1050
- },
1051
- {
1052
- "epoch": 14.285714285714286,
1053
- "eval_loss": 0.9668737053871155,
1054
- "eval_runtime": 156.6698,
1055
- "eval_samples_per_second": 36.101,
1056
- "eval_steps_per_second": 4.513,
1057
- "eval_wer": 0.6124682055958152,
1058
- "step": 10100
1059
- },
1060
- {
1061
- "epoch": 14.427157001414427,
1062
- "eval_loss": 0.9668737053871155,
1063
- "eval_runtime": 155.8474,
1064
- "eval_samples_per_second": 36.292,
1065
- "eval_steps_per_second": 4.536,
1066
- "eval_wer": 0.6124682055958152,
1067
- "step": 10200
1068
- },
1069
- {
1070
- "epoch": 14.56859971711457,
1071
- "eval_loss": 0.9668737053871155,
1072
- "eval_runtime": 154.9914,
1073
- "eval_samples_per_second": 36.492,
1074
- "eval_steps_per_second": 4.562,
1075
- "eval_wer": 0.6124682055958152,
1076
- "step": 10300
1077
- },
1078
- {
1079
- "epoch": 14.71004243281471,
1080
- "eval_loss": 0.9668737053871155,
1081
- "eval_runtime": 155.7638,
1082
- "eval_samples_per_second": 36.311,
1083
- "eval_steps_per_second": 4.539,
1084
- "eval_wer": 0.6124682055958152,
1085
- "step": 10400
1086
- },
1087
- {
1088
- "epoch": 14.851485148514852,
1089
- "grad_norm": 0.0,
1090
- "learning_rate": 4.512617516081148e-06,
1091
- "loss": 1.0585,
1092
- "step": 10500
1093
- },
1094
- {
1095
- "epoch": 14.851485148514852,
1096
- "eval_loss": 0.9668737053871155,
1097
- "eval_runtime": 154.954,
1098
- "eval_samples_per_second": 36.501,
1099
- "eval_steps_per_second": 4.563,
1100
- "eval_wer": 0.6124682055958152,
1101
- "step": 10500
1102
- },
1103
- {
1104
- "epoch": 14.992927864214993,
1105
- "eval_loss": 0.9668737053871155,
1106
- "eval_runtime": 154.7299,
1107
- "eval_samples_per_second": 36.554,
1108
- "eval_steps_per_second": 4.569,
1109
- "eval_wer": 0.6124682055958152,
1110
- "step": 10600
1111
  },
1112
  {
1113
- "epoch": 15.0,
1114
- "step": 10605,
1115
- "total_flos": 4.036432448909298e+19,
1116
- "train_loss": 1.1022147617942597,
1117
- "train_runtime": 28048.236,
1118
- "train_samples_per_second": 12.098,
1119
- "train_steps_per_second": 0.378
1120
  }
1121
  ],
1122
  "logging_steps": 500,
1123
- "max_steps": 10605,
1124
  "num_input_tokens_seen": 0,
1125
- "num_train_epochs": 15,
1126
  "save_steps": 400,
1127
- "total_flos": 4.036432448909298e+19,
1128
  "train_batch_size": 16,
1129
  "trial_name": null,
1130
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.536067892503536,
5
  "eval_steps": 100,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.14144271570014144,
13
+ "eval_loss": 3.7463672161102295,
14
+ "eval_runtime": 153.5435,
15
+ "eval_samples_per_second": 36.836,
16
+ "eval_steps_per_second": 4.605,
17
  "eval_wer": 1.0,
18
  "step": 100
19
  },
20
  {
21
  "epoch": 0.2828854314002829,
22
+ "eval_loss": 2.9399216175079346,
23
+ "eval_runtime": 150.7703,
24
+ "eval_samples_per_second": 37.514,
25
+ "eval_steps_per_second": 4.689,
26
  "eval_wer": 1.0,
27
  "step": 200
28
  },
29
  {
30
  "epoch": 0.4243281471004243,
31
+ "eval_loss": 2.5961458683013916,
32
+ "eval_runtime": 151.4236,
33
+ "eval_samples_per_second": 37.352,
34
+ "eval_steps_per_second": 4.669,
35
+ "eval_wer": 0.9991041576682503,
36
  "step": 300
37
  },
38
  {
39
  "epoch": 0.5657708628005658,
40
+ "eval_loss": 1.1618728637695312,
41
+ "eval_runtime": 152.5862,
42
+ "eval_samples_per_second": 37.068,
43
+ "eval_steps_per_second": 4.633,
44
+ "eval_wer": 0.7905328662155461,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.7072135785007072,
49
+ "grad_norm": 1.7047498226165771,
50
  "learning_rate": 0.00029699999999999996,
51
+ "loss": 3.5448,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.7072135785007072,
56
+ "eval_loss": 0.946560800075531,
57
+ "eval_runtime": 153.1427,
58
+ "eval_samples_per_second": 36.933,
59
+ "eval_steps_per_second": 4.617,
60
+ "eval_wer": 0.6897506038937147,
61
  "step": 500
62
  },
63
  {
64
  "epoch": 0.8486562942008486,
65
+ "eval_loss": 0.7894724607467651,
66
+ "eval_runtime": 151.7162,
67
+ "eval_samples_per_second": 37.28,
68
+ "eval_steps_per_second": 4.66,
69
+ "eval_wer": 0.6110604533602086,
70
  "step": 600
71
  },
72
  {
73
  "epoch": 0.9900990099009901,
74
+ "eval_loss": 0.6820164918899536,
75
+ "eval_runtime": 152.0086,
76
+ "eval_samples_per_second": 37.208,
77
+ "eval_steps_per_second": 4.651,
78
+ "eval_wer": 0.5378893314776599,
79
  "step": 700
80
  },
81
  {
82
  "epoch": 1.1315417256011315,
83
+ "eval_loss": 0.6039016842842102,
84
+ "eval_runtime": 152.002,
85
+ "eval_samples_per_second": 37.21,
86
+ "eval_steps_per_second": 4.651,
87
+ "eval_wer": 0.47239685815296506,
88
  "step": 800
89
  },
90
  {
91
  "epoch": 1.272984441301273,
92
+ "eval_loss": 0.5631398558616638,
93
+ "eval_runtime": 153.0321,
94
+ "eval_samples_per_second": 36.96,
95
+ "eval_steps_per_second": 4.62,
96
+ "eval_wer": 0.46745372814384667,
97
  "step": 900
98
  },
99
  {
100
  "epoch": 1.4144271570014144,
101
+ "grad_norm": 0.8790757656097412,
102
+ "learning_rate": 0.00022574999999999996,
103
+ "loss": 0.7808,
104
  "step": 1000
105
  },
106
  {
107
  "epoch": 1.4144271570014144,
108
+ "eval_loss": 0.5279428958892822,
109
+ "eval_runtime": 152.9768,
110
+ "eval_samples_per_second": 36.973,
111
+ "eval_steps_per_second": 4.622,
112
+ "eval_wer": 0.4291084769080642,
113
  "step": 1000
114
  },
115
  {
116
  "epoch": 1.5558698727015559,
117
+ "eval_loss": 0.5024306178092957,
118
+ "eval_runtime": 152.8927,
119
+ "eval_samples_per_second": 36.993,
120
+ "eval_steps_per_second": 4.624,
121
+ "eval_wer": 0.39940170529986724,
122
  "step": 1100
123
  },
124
  {
125
  "epoch": 1.6973125884016973,
126
+ "eval_loss": 0.4894837439060211,
127
+ "eval_runtime": 153.3103,
128
+ "eval_samples_per_second": 36.892,
129
+ "eval_steps_per_second": 4.612,
130
+ "eval_wer": 0.3894514565436483,
131
  "step": 1200
132
  },
133
  {
134
  "epoch": 1.8387553041018387,
135
+ "eval_loss": 0.4595918357372284,
136
+ "eval_runtime": 153.0651,
137
+ "eval_samples_per_second": 36.952,
138
+ "eval_steps_per_second": 4.619,
139
+ "eval_wer": 0.3695829534002016,
140
  "step": 1300
141
  },
142
  {
143
  "epoch": 1.9801980198019802,
144
+ "eval_loss": 0.44729524850845337,
145
+ "eval_runtime": 154.1998,
146
+ "eval_samples_per_second": 36.68,
147
+ "eval_steps_per_second": 4.585,
148
+ "eval_wer": 0.3610884484330758,
149
  "step": 1400
150
  },
151
  {
152
  "epoch": 2.1216407355021216,
153
+ "grad_norm": 0.8205087184906006,
154
+ "learning_rate": 0.0001512,
155
+ "loss": 0.6005,
156
  "step": 1500
157
  },
158
  {
159
  "epoch": 2.1216407355021216,
160
+ "eval_loss": 0.43324384093284607,
161
+ "eval_runtime": 150.9169,
162
+ "eval_samples_per_second": 37.478,
163
+ "eval_steps_per_second": 4.685,
164
+ "eval_wer": 0.3474268528738942,
165
  "step": 1500
166
  },
167
  {
168
  "epoch": 2.263083451202263,
169
+ "eval_loss": 0.4268616735935211,
170
+ "eval_runtime": 152.3911,
171
+ "eval_samples_per_second": 37.115,
172
+ "eval_steps_per_second": 4.639,
173
+ "eval_wer": 0.3418118411159636,
174
  "step": 1600
175
  },
176
  {
177
  "epoch": 2.4045261669024045,
178
+ "eval_loss": 0.4155045449733734,
179
+ "eval_runtime": 153.4832,
180
+ "eval_samples_per_second": 36.851,
181
+ "eval_steps_per_second": 4.606,
182
+ "eval_wer": 0.33606885188206875,
183
  "step": 1700
184
  },
185
  {
186
  "epoch": 2.545968882602546,
187
+ "eval_loss": 0.4121190905570984,
188
+ "eval_runtime": 153.7529,
189
+ "eval_samples_per_second": 36.786,
190
+ "eval_steps_per_second": 4.598,
191
+ "eval_wer": 0.32143142806865993,
192
  "step": 1800
193
  },
194
  {
195
  "epoch": 2.6874115983026874,
196
+ "eval_loss": 0.4145391285419464,
197
+ "eval_runtime": 159.3319,
198
+ "eval_samples_per_second": 35.498,
199
+ "eval_steps_per_second": 4.437,
200
+ "eval_wer": 0.3366447505239078,
201
  "step": 1900
202
  },
203
  {
204
  "epoch": 2.828854314002829,
205
+ "grad_norm": 0.8615767359733582,
206
+ "learning_rate": 7.664999999999999e-05,
207
+ "loss": 0.4666,
208
  "step": 2000
209
  },
210
  {
211
  "epoch": 2.828854314002829,
212
+ "eval_loss": 0.39387884736061096,
213
+ "eval_runtime": 153.1343,
214
+ "eval_samples_per_second": 36.935,
215
+ "eval_steps_per_second": 4.617,
216
+ "eval_wer": 0.3114171905744589,
217
  "step": 2000
218
  },
219
  {
220
  "epoch": 2.9702970297029703,
221
+ "eval_loss": 0.38894009590148926,
222
+ "eval_runtime": 152.8789,
223
+ "eval_samples_per_second": 36.997,
224
+ "eval_steps_per_second": 4.625,
225
+ "eval_wer": 0.30807377901489336,
226
  "step": 2100
227
  },
228
  {
229
  "epoch": 3.1117397454031117,
230
+ "eval_loss": 0.3909347653388977,
231
+ "eval_runtime": 154.4919,
232
+ "eval_samples_per_second": 36.61,
233
+ "eval_steps_per_second": 4.576,
234
+ "eval_wer": 0.30644206619634945,
235
  "step": 2200
236
  },
237
  {
238
  "epoch": 3.253182461103253,
239
+ "eval_loss": 0.3874327838420868,
240
+ "eval_runtime": 153.3025,
241
+ "eval_samples_per_second": 36.894,
242
+ "eval_steps_per_second": 4.612,
243
+ "eval_wer": 0.3015469277407176,
244
  "step": 2300
245
  },
246
  {
247
  "epoch": 3.3946251768033946,
248
+ "eval_loss": 0.386868953704834,
249
+ "eval_runtime": 153.0443,
250
+ "eval_samples_per_second": 36.957,
251
+ "eval_steps_per_second": 4.62,
252
+ "eval_wer": 0.29833149365711636,
253
  "step": 2400
254
  },
255
  {
256
  "epoch": 3.536067892503536,
257
+ "grad_norm": 0.6678842902183533,
258
+ "learning_rate": 1.9499999999999995e-06,
259
+ "loss": 0.3805,
260
  "step": 2500
261
  },
262
  {
263
  "epoch": 3.536067892503536,
264
+ "eval_loss": 0.3846580684185028,
265
+ "eval_runtime": 154.6331,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  "eval_samples_per_second": 36.577,
267
  "eval_steps_per_second": 4.572,
268
+ "eval_wer": 0.29666778646958136,
269
+ "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  },
271
  {
272
+ "epoch": 3.536067892503536,
273
+ "step": 2500,
274
+ "total_flos": 9.55129266706546e+18,
275
+ "train_loss": 1.1546670959472656,
276
+ "train_runtime": 6557.446,
277
+ "train_samples_per_second": 12.2,
278
+ "train_steps_per_second": 0.381
279
  }
280
  ],
281
  "logging_steps": 500,
282
+ "max_steps": 2500,
283
  "num_input_tokens_seen": 0,
284
+ "num_train_epochs": 4,
285
  "save_steps": 400,
286
+ "total_flos": 9.55129266706546e+18,
287
  "train_batch_size": 16,
288
  "trial_name": null,
289
  "trial_params": null