Mofe commited on
Commit
d9e36c1
1 Parent(s): 7de5cd0

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +9 -9
  2. eval_results.json +5 -5
  3. train_results.json +4 -4
  4. trainer_state.json +144 -144
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 79.99,
3
- "eval_loss": 0.49252477288246155,
4
- "eval_runtime": 31.5756,
5
  "eval_samples": 892,
6
- "eval_samples_per_second": 28.25,
7
- "eval_steps_per_second": 3.547,
8
- "eval_wer": 0.5714079630698211,
9
- "train_loss": 1.795632479985555,
10
- "train_runtime": 8730.4816,
11
  "train_samples": 1941,
12
- "train_samples_per_second": 17.786,
13
- "train_steps_per_second": 0.55
14
  }
 
1
  {
2
  "epoch": 79.99,
3
+ "eval_loss": 0.4997510015964508,
4
+ "eval_runtime": 32.8146,
5
  "eval_samples": 892,
6
+ "eval_samples_per_second": 27.183,
7
+ "eval_steps_per_second": 3.413,
8
+ "eval_wer": 0.5152914021927294,
9
+ "train_loss": 1.6836539268493653,
10
+ "train_runtime": 8656.8344,
11
  "train_samples": 1941,
12
+ "train_samples_per_second": 17.937,
13
+ "train_steps_per_second": 0.554
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 79.99,
3
- "eval_loss": 0.49252477288246155,
4
- "eval_runtime": 31.5756,
5
  "eval_samples": 892,
6
- "eval_samples_per_second": 28.25,
7
- "eval_steps_per_second": 3.547,
8
- "eval_wer": 0.5714079630698211
9
  }
 
1
  {
2
  "epoch": 79.99,
3
+ "eval_loss": 0.4997510015964508,
4
+ "eval_runtime": 32.8146,
5
  "eval_samples": 892,
6
+ "eval_samples_per_second": 27.183,
7
+ "eval_steps_per_second": 3.413,
8
+ "eval_wer": 0.5152914021927294
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 79.99,
3
- "train_loss": 1.795632479985555,
4
- "train_runtime": 8730.4816,
5
  "train_samples": 1941,
6
- "train_samples_per_second": 17.786,
7
- "train_steps_per_second": 0.55
8
  }
 
1
  {
2
  "epoch": 79.99,
3
+ "train_loss": 1.6836539268493653,
4
+ "train_runtime": 8656.8344,
5
  "train_samples": 1941,
6
+ "train_samples_per_second": 17.937,
7
+ "train_steps_per_second": 0.554
8
  }
trainer_state.json CHANGED
@@ -9,381 +9,381 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.66,
12
- "learning_rate": 3.7125e-06,
13
- "loss": 14.5868,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 3.33,
18
- "learning_rate": 7.4625e-06,
19
- "loss": 6.8756,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 4.99,
24
- "learning_rate": 1.1212499999999998e-05,
25
- "loss": 4.2978,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 6.66,
30
- "learning_rate": 1.49625e-05,
31
- "loss": 3.6126,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 8.33,
36
- "learning_rate": 1.8712499999999997e-05,
37
- "loss": 3.1674,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 8.33,
42
- "eval_loss": 3.0295047760009766,
43
- "eval_runtime": 32.2697,
44
- "eval_samples_per_second": 27.642,
45
- "eval_steps_per_second": 3.471,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 9.99,
51
- "learning_rate": 2.2462499999999997e-05,
52
- "loss": 2.8989,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 11.66,
57
- "learning_rate": 2.6212499999999997e-05,
58
- "loss": 2.8318,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 13.33,
63
- "learning_rate": 2.99625e-05,
64
- "loss": 2.7744,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 14.99,
69
- "learning_rate": 3.37125e-05,
70
- "loss": 2.7043,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 16.66,
75
- "learning_rate": 3.7462499999999996e-05,
76
- "loss": 2.6987,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 16.66,
81
- "eval_loss": 2.687849760055542,
82
- "eval_runtime": 32.1535,
83
- "eval_samples_per_second": 27.742,
84
- "eval_steps_per_second": 3.483,
85
- "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 18.33,
90
- "learning_rate": 4.12125e-05,
91
- "loss": 2.6565,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 19.99,
96
- "learning_rate": 4.4962499999999995e-05,
97
- "loss": 2.4086,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 21.66,
102
- "learning_rate": 4.871249999999999e-05,
103
- "loss": 1.7625,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 23.33,
108
- "learning_rate": 5.2462499999999994e-05,
109
- "loss": 1.4648,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 24.99,
114
- "learning_rate": 5.62125e-05,
115
- "loss": 1.3454,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 24.99,
120
- "eval_loss": 0.6813644766807556,
121
- "eval_runtime": 32.0646,
122
- "eval_samples_per_second": 27.819,
123
- "eval_steps_per_second": 3.493,
124
- "eval_wer": 0.698066935949221,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 26.66,
129
- "learning_rate": 5.9962499999999994e-05,
130
- "loss": 1.2913,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 28.33,
135
- "learning_rate": 6.37125e-05,
136
- "loss": 1.2416,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 29.99,
141
- "learning_rate": 6.746249999999999e-05,
142
- "loss": 1.1899,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 31.66,
147
- "learning_rate": 7.121249999999999e-05,
148
- "loss": 1.1745,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 33.33,
153
- "learning_rate": 7.492499999999999e-05,
154
- "loss": 1.1227,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 33.33,
159
- "eval_loss": 0.5790585875511169,
160
- "eval_runtime": 32.0814,
161
- "eval_samples_per_second": 27.804,
162
- "eval_steps_per_second": 3.491,
163
- "eval_wer": 0.6513271783035199,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 34.99,
168
- "learning_rate": 7.2375e-05,
169
- "loss": 1.0795,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 36.66,
174
- "learning_rate": 6.972321428571428e-05,
175
- "loss": 1.0646,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 38.33,
180
- "learning_rate": 6.704464285714285e-05,
181
- "loss": 1.05,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 39.99,
186
- "learning_rate": 6.436607142857142e-05,
187
- "loss": 1.0149,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 41.66,
192
- "learning_rate": 6.16875e-05,
193
- "loss": 0.9972,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 41.66,
198
- "eval_loss": 0.5235142111778259,
199
- "eval_runtime": 31.8053,
200
- "eval_samples_per_second": 28.046,
201
- "eval_steps_per_second": 3.521,
202
- "eval_wer": 0.5718407386035776,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 43.33,
207
- "learning_rate": 5.9008928571428565e-05,
208
- "loss": 0.9722,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 44.99,
213
- "learning_rate": 5.633035714285714e-05,
214
- "loss": 0.9401,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 46.66,
219
- "learning_rate": 5.3651785714285706e-05,
220
- "loss": 0.9439,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 48.33,
225
- "learning_rate": 5.0973214285714276e-05,
226
- "loss": 0.923,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 49.99,
231
- "learning_rate": 4.829464285714285e-05,
232
- "loss": 0.9123,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 49.99,
237
- "eval_loss": 0.5104396939277649,
238
- "eval_runtime": 31.7859,
239
- "eval_samples_per_second": 28.063,
240
- "eval_steps_per_second": 3.524,
241
- "eval_wer": 0.5633294864396999,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 51.66,
246
- "learning_rate": 4.561607142857142e-05,
247
- "loss": 0.8979,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 53.33,
252
- "learning_rate": 4.29375e-05,
253
- "loss": 0.8814,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 54.99,
258
- "learning_rate": 4.025892857142857e-05,
259
- "loss": 0.8548,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 56.66,
264
- "learning_rate": 3.760714285714286e-05,
265
- "loss": 0.8515,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 58.33,
270
- "learning_rate": 3.492857142857142e-05,
271
- "loss": 0.836,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 58.33,
276
- "eval_loss": 0.49266090989112854,
277
- "eval_runtime": 31.7464,
278
- "eval_samples_per_second": 28.098,
279
- "eval_steps_per_second": 3.528,
280
- "eval_wer": 0.5579919215233698,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 59.99,
285
- "learning_rate": 3.225e-05,
286
- "loss": 0.8045,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 61.66,
291
- "learning_rate": 2.9571428571428568e-05,
292
- "loss": 0.8036,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 63.33,
297
- "learning_rate": 2.689285714285714e-05,
298
- "loss": 0.7924,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 64.99,
303
- "learning_rate": 2.4214285714285712e-05,
304
- "loss": 0.7837,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 66.66,
309
- "learning_rate": 2.1535714285714285e-05,
310
- "loss": 0.7725,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 66.66,
315
- "eval_loss": 0.5077508687973022,
316
- "eval_runtime": 31.8976,
317
- "eval_samples_per_second": 27.964,
318
- "eval_steps_per_second": 3.511,
319
- "eval_wer": 0.5778995960761685,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 68.33,
324
- "learning_rate": 1.888392857142857e-05,
325
- "loss": 0.7655,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 69.99,
330
- "learning_rate": 1.6205357142857143e-05,
331
- "loss": 0.741,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 71.66,
336
- "learning_rate": 1.3526785714285713e-05,
337
- "loss": 0.7499,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 73.33,
342
- "learning_rate": 1.0848214285714287e-05,
343
- "loss": 0.7479,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 74.99,
348
- "learning_rate": 8.169642857142857e-06,
349
- "loss": 0.7297,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 74.99,
354
- "eval_loss": 0.4939458966255188,
355
- "eval_runtime": 31.9903,
356
- "eval_samples_per_second": 27.883,
357
- "eval_steps_per_second": 3.501,
358
- "eval_wer": 0.5737160992498558,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 76.66,
363
- "learning_rate": 5.491071428571429e-06,
364
- "loss": 0.7318,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 78.33,
369
- "learning_rate": 2.8124999999999998e-06,
370
- "loss": 0.7232,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 79.99,
375
- "learning_rate": 1.3392857142857142e-07,
376
- "loss": 0.7195,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 79.99,
381
  "step": 4800,
382
  "total_flos": 2.017023736432276e+19,
383
- "train_loss": 1.795632479985555,
384
- "train_runtime": 8730.4816,
385
- "train_samples_per_second": 17.786,
386
- "train_steps_per_second": 0.55
387
  }
388
  ],
389
  "max_steps": 4800,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.66,
12
+ "learning_rate": 4.752e-06,
13
+ "loss": 14.1408,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 3.33,
18
+ "learning_rate": 9.552000000000001e-06,
19
+ "loss": 6.1022,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 4.99,
24
+ "learning_rate": 1.4351999999999999e-05,
25
+ "loss": 4.037,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 6.66,
30
+ "learning_rate": 1.9152000000000002e-05,
31
+ "loss": 3.3885,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 8.33,
36
+ "learning_rate": 2.3952e-05,
37
+ "loss": 3.0021,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 8.33,
42
+ "eval_loss": 2.90588116645813,
43
+ "eval_runtime": 32.387,
44
+ "eval_samples_per_second": 27.542,
45
+ "eval_steps_per_second": 3.458,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 9.99,
51
+ "learning_rate": 2.8752e-05,
52
+ "loss": 2.8318,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 11.66,
57
+ "learning_rate": 3.3552e-05,
58
+ "loss": 2.7909,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 13.33,
63
+ "learning_rate": 3.8352e-05,
64
+ "loss": 2.7355,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 14.99,
69
+ "learning_rate": 4.3152e-05,
70
+ "loss": 2.6796,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 16.66,
75
+ "learning_rate": 4.7952000000000004e-05,
76
+ "loss": 2.6604,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 16.66,
81
+ "eval_loss": 2.640192985534668,
82
+ "eval_runtime": 31.7049,
83
+ "eval_samples_per_second": 28.134,
84
+ "eval_steps_per_second": 3.533,
85
+ "eval_wer": 0.9891806116560877,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 18.33,
90
+ "learning_rate": 5.2752e-05,
91
+ "loss": 2.4114,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 19.99,
96
+ "learning_rate": 5.755200000000001e-05,
97
+ "loss": 1.675,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 21.66,
102
+ "learning_rate": 6.2352e-05,
103
+ "loss": 1.4118,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 23.33,
108
+ "learning_rate": 6.7152e-05,
109
+ "loss": 1.291,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 24.99,
114
+ "learning_rate": 7.1952e-05,
115
+ "loss": 1.2216,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 24.99,
120
+ "eval_loss": 0.6051220893859863,
121
+ "eval_runtime": 31.8239,
122
+ "eval_samples_per_second": 28.029,
123
+ "eval_steps_per_second": 3.519,
124
+ "eval_wer": 0.6850836699365263,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 26.66,
129
+ "learning_rate": 7.6752e-05,
130
+ "loss": 1.2115,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 28.33,
135
+ "learning_rate": 8.1504e-05,
136
+ "loss": 1.1555,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 29.99,
141
+ "learning_rate": 8.6304e-05,
142
+ "loss": 1.1142,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 31.66,
147
+ "learning_rate": 9.1104e-05,
148
+ "loss": 1.1029,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 33.33,
153
+ "learning_rate": 9.590400000000001e-05,
154
+ "loss": 1.0754,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 33.33,
159
+ "eval_loss": 0.5407921075820923,
160
+ "eval_runtime": 31.9875,
161
+ "eval_samples_per_second": 27.886,
162
+ "eval_steps_per_second": 3.501,
163
+ "eval_wer": 0.6464223889209464,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 34.99,
168
+ "learning_rate": 9.264e-05,
169
+ "loss": 1.05,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 36.66,
174
+ "learning_rate": 8.924571428571428e-05,
175
+ "loss": 1.0239,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 38.33,
180
+ "learning_rate": 8.581714285714286e-05,
181
+ "loss": 1.0023,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 39.99,
186
+ "learning_rate": 8.238857142857142e-05,
187
+ "loss": 0.9695,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 41.66,
192
+ "learning_rate": 7.896e-05,
193
+ "loss": 0.9582,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 41.66,
198
+ "eval_loss": 0.5521320700645447,
199
+ "eval_runtime": 31.6386,
200
+ "eval_samples_per_second": 28.193,
201
+ "eval_steps_per_second": 3.54,
202
+ "eval_wer": 0.5934795152914022,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 43.33,
207
+ "learning_rate": 7.553142857142858e-05,
208
+ "loss": 0.9312,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 44.99,
213
+ "learning_rate": 7.210285714285715e-05,
214
+ "loss": 0.9136,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 46.66,
219
+ "learning_rate": 6.867428571428571e-05,
220
+ "loss": 0.9065,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 48.33,
225
+ "learning_rate": 6.524571428571428e-05,
226
+ "loss": 0.8843,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 49.99,
231
+ "learning_rate": 6.181714285714286e-05,
232
+ "loss": 0.8653,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 49.99,
237
+ "eval_loss": 0.5156339406967163,
238
+ "eval_runtime": 31.9186,
239
+ "eval_samples_per_second": 27.946,
240
+ "eval_steps_per_second": 3.509,
241
+ "eval_wer": 0.5549624927870744,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 51.66,
246
+ "learning_rate": 5.842285714285714e-05,
247
+ "loss": 0.8526,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 53.33,
252
+ "learning_rate": 5.4994285714285715e-05,
253
+ "loss": 0.8323,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 54.99,
258
+ "learning_rate": 5.156571428571429e-05,
259
+ "loss": 0.8089,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 56.66,
264
+ "learning_rate": 4.813714285714286e-05,
265
+ "loss": 0.8016,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 58.33,
270
+ "learning_rate": 4.4708571428571425e-05,
271
+ "loss": 0.7867,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 58.33,
276
+ "eval_loss": 0.543922483921051,
277
+ "eval_runtime": 31.6944,
278
+ "eval_samples_per_second": 28.144,
279
+ "eval_steps_per_second": 3.534,
280
+ "eval_wer": 0.5605885747259088,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 59.99,
285
+ "learning_rate": 4.128e-05,
286
+ "loss": 0.7598,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 61.66,
291
+ "learning_rate": 3.785142857142857e-05,
292
+ "loss": 0.7582,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 63.33,
297
+ "learning_rate": 3.442285714285715e-05,
298
+ "loss": 0.7423,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 64.99,
303
+ "learning_rate": 3.0994285714285715e-05,
304
+ "loss": 0.7313,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 66.66,
309
+ "learning_rate": 2.7565714285714287e-05,
310
+ "loss": 0.7265,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 66.66,
315
+ "eval_loss": 0.48627379536628723,
316
+ "eval_runtime": 31.7498,
317
+ "eval_samples_per_second": 28.095,
318
+ "eval_steps_per_second": 3.528,
319
+ "eval_wer": 0.525533756491633,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 68.33,
324
+ "learning_rate": 2.413714285714286e-05,
325
+ "loss": 0.7134,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 69.99,
330
+ "learning_rate": 2.0708571428571428e-05,
331
+ "loss": 0.6867,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 71.66,
336
+ "learning_rate": 1.728e-05,
337
+ "loss": 0.6987,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 73.33,
342
+ "learning_rate": 1.3851428571428573e-05,
343
+ "loss": 0.6976,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 74.99,
348
+ "learning_rate": 1.0422857142857143e-05,
349
+ "loss": 0.6699,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 74.99,
354
+ "eval_loss": 0.5050373673439026,
355
+ "eval_runtime": 31.7366,
356
+ "eval_samples_per_second": 28.106,
357
+ "eval_steps_per_second": 3.529,
358
+ "eval_wer": 0.5168782458165032,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 76.66,
363
+ "learning_rate": 6.994285714285714e-06,
364
+ "loss": 0.6777,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 78.33,
369
+ "learning_rate": 3.565714285714286e-06,
370
+ "loss": 0.6669,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 79.99,
375
+ "learning_rate": 1.3714285714285715e-07,
376
+ "loss": 0.6602,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 79.99,
381
  "step": 4800,
382
  "total_flos": 2.017023736432276e+19,
383
+ "train_loss": 1.6836539268493653,
384
+ "train_runtime": 8656.8344,
385
+ "train_samples_per_second": 17.937,
386
+ "train_steps_per_second": 0.554
387
  }
388
  ],
389
  "max_steps": 4800,