chmanoj commited on
Commit
d03e6f1
1 Parent(s): ac36a33

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.5928943157196045,
4
- "eval_runtime": 66.288,
5
  "eval_samples": 1112,
6
- "eval_samples_per_second": 16.775,
7
- "eval_steps_per_second": 4.194,
8
- "eval_wer": 0.7180269058295964,
9
- "train_loss": 1.2273276989276594,
10
- "train_runtime": 32753.7935,
11
  "train_samples": 3336,
12
- "train_samples_per_second": 10.185,
13
- "train_steps_per_second": 0.159
14
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 3.384493350982666,
4
+ "eval_runtime": 65.2378,
5
  "eval_samples": 1112,
6
+ "eval_samples_per_second": 17.045,
7
+ "eval_steps_per_second": 4.261,
8
+ "eval_wer": 0.9869058295964126,
9
+ "train_loss": 2.424026096050556,
10
+ "train_runtime": 30676.4279,
11
  "train_samples": 3336,
12
+ "train_samples_per_second": 10.875,
13
+ "train_steps_per_second": 0.17
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.5928943157196045,
4
- "eval_runtime": 66.288,
5
  "eval_samples": 1112,
6
- "eval_samples_per_second": 16.775,
7
- "eval_steps_per_second": 4.194,
8
- "eval_wer": 0.7180269058295964
9
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 3.384493350982666,
4
+ "eval_runtime": 65.2378,
5
  "eval_samples": 1112,
6
+ "eval_samples_per_second": 17.045,
7
+ "eval_steps_per_second": 4.261,
8
+ "eval_wer": 0.9869058295964126
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:810b36f805c9412b3235dab58050d680cb8084ef2efd1ac272ab78267cd6d389
3
  size 3850717489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06fe919eab98bc12ffc7b9ac5f1bb776f71d1502f7036df84461f078d7f4357d
3
  size 3850717489
runs/Jan28_06-54-16_job-e3562c4a-f6a3-47dc-8131-7e578f1139a3/events.out.tfevents.1643352878.job-e3562c4a-f6a3-47dc-8131-7e578f1139a3.778557.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcd59225c940f726389353f59ffa22210a77d66451ca8236a21b03dddaaf4477
3
- size 5779
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79086fce91a69cadecf971dbba0baaa955ef5be4371bf2a9647257c72a27483b
3
+ size 16374
runs/Jan28_06-54-16_job-e3562c4a-f6a3-47dc-8131-7e578f1139a3/events.out.tfevents.1643383627.job-e3562c4a-f6a3-47dc-8131-7e578f1139a3.778557.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33e0fa954a0844e51d914b007036c4780bb75a9cc5b86a1de1b2859a91eafb04
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 1.2273276989276594,
4
- "train_runtime": 32753.7935,
5
  "train_samples": 3336,
6
- "train_samples_per_second": 10.185,
7
- "train_steps_per_second": 0.159
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 2.424026096050556,
4
+ "train_runtime": 30676.4279,
5
  "train_samples": 3336,
6
+ "train_samples_per_second": 10.875,
7
+ "train_steps_per_second": 0.17
8
  }
trainer_state.json CHANGED
@@ -9,414 +9,414 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.92,
12
- "learning_rate": 3.2333333333333334e-06,
13
- "loss": 13.5304,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 3.84,
18
- "learning_rate": 6.566666666666667e-06,
19
- "loss": 3.5412,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 5.77,
24
- "learning_rate": 9.9e-06,
25
- "loss": 3.1425,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 7.69,
30
- "learning_rate": 9.80204081632653e-06,
31
- "loss": 2.0204,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 9.61,
36
- "learning_rate": 9.59795918367347e-06,
37
- "loss": 1.5794,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 9.61,
42
- "eval_loss": 0.9859886169433594,
43
- "eval_runtime": 65.6063,
44
- "eval_samples_per_second": 16.95,
45
- "eval_steps_per_second": 4.237,
46
- "eval_wer": 0.9582062780269058,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 11.54,
51
- "learning_rate": 9.39387755102041e-06,
52
- "loss": 1.4015,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 13.46,
57
- "learning_rate": 9.189795918367347e-06,
58
- "loss": 1.2803,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 15.38,
63
- "learning_rate": 8.985714285714287e-06,
64
- "loss": 1.1969,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 17.31,
69
- "learning_rate": 8.781632653061225e-06,
70
- "loss": 1.1458,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 19.23,
75
- "learning_rate": 8.577551020408163e-06,
76
- "loss": 1.0985,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 19.23,
81
- "eval_loss": 0.5948446989059448,
82
- "eval_runtime": 63.7089,
83
- "eval_samples_per_second": 17.454,
84
- "eval_steps_per_second": 4.364,
85
- "eval_wer": 0.7533632286995515,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 21.15,
90
- "learning_rate": 8.373469387755104e-06,
91
- "loss": 1.0639,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 23.08,
96
- "learning_rate": 8.169387755102042e-06,
97
- "loss": 1.0214,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 25.0,
102
- "learning_rate": 7.96530612244898e-06,
103
- "loss": 1.0047,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 26.92,
108
- "learning_rate": 7.76122448979592e-06,
109
- "loss": 0.971,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 28.84,
114
- "learning_rate": 7.557142857142857e-06,
115
- "loss": 0.9551,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 28.84,
120
- "eval_loss": 0.5892378091812134,
121
- "eval_runtime": 64.7699,
122
- "eval_samples_per_second": 17.168,
123
- "eval_steps_per_second": 4.292,
124
- "eval_wer": 0.7391928251121076,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 30.77,
129
- "learning_rate": 7.353061224489797e-06,
130
- "loss": 0.9387,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 32.69,
135
- "learning_rate": 7.148979591836735e-06,
136
- "loss": 0.9272,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 34.61,
141
- "learning_rate": 6.944897959183674e-06,
142
- "loss": 0.9192,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 36.54,
147
- "learning_rate": 6.740816326530613e-06,
148
- "loss": 0.892,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 38.46,
153
- "learning_rate": 6.5367346938775515e-06,
154
- "loss": 0.8699,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 38.46,
159
- "eval_loss": 0.5926958322525024,
160
- "eval_runtime": 64.9144,
161
- "eval_samples_per_second": 17.13,
162
- "eval_steps_per_second": 4.283,
163
- "eval_wer": 0.72,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 40.38,
168
- "learning_rate": 6.332653061224491e-06,
169
- "loss": 0.8779,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 42.31,
174
- "learning_rate": 6.128571428571429e-06,
175
- "loss": 0.8511,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 44.23,
180
- "learning_rate": 5.924489795918368e-06,
181
- "loss": 0.8532,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 46.15,
186
- "learning_rate": 5.720408163265306e-06,
187
- "loss": 0.8442,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 48.08,
192
- "learning_rate": 5.516326530612245e-06,
193
- "loss": 0.8372,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 48.08,
198
- "eval_loss": 0.5938696265220642,
199
- "eval_runtime": 64.5641,
200
- "eval_samples_per_second": 17.223,
201
- "eval_steps_per_second": 4.306,
202
- "eval_wer": 0.73847533632287,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 50.0,
207
- "learning_rate": 5.312244897959185e-06,
208
- "loss": 0.8112,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 51.92,
213
- "learning_rate": 5.108163265306123e-06,
214
- "loss": 0.8165,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 53.84,
219
- "learning_rate": 4.904081632653061e-06,
220
- "loss": 0.8045,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 55.77,
225
- "learning_rate": 4.7e-06,
226
- "loss": 0.7974,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 57.69,
231
- "learning_rate": 4.4959183673469394e-06,
232
- "loss": 0.7794,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 57.69,
237
- "eval_loss": 0.5800846815109253,
238
- "eval_runtime": 63.4747,
239
- "eval_samples_per_second": 17.519,
240
- "eval_steps_per_second": 4.38,
241
- "eval_wer": 0.7388340807174888,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 59.61,
246
- "learning_rate": 4.291836734693878e-06,
247
- "loss": 0.7752,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 61.54,
252
- "learning_rate": 4.087755102040817e-06,
253
- "loss": 0.7765,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 63.46,
258
- "learning_rate": 3.8836734693877556e-06,
259
- "loss": 0.7584,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 65.38,
264
- "learning_rate": 3.6795918367346943e-06,
265
- "loss": 0.7589,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 67.31,
270
- "learning_rate": 3.475510204081633e-06,
271
- "loss": 0.7565,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 67.31,
276
- "eval_loss": 0.582656979560852,
277
- "eval_runtime": 71.1025,
278
- "eval_samples_per_second": 15.639,
279
- "eval_steps_per_second": 3.91,
280
- "eval_wer": 0.7183856502242153,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 69.23,
285
- "learning_rate": 3.2714285714285717e-06,
286
- "loss": 0.7595,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 71.15,
291
- "learning_rate": 3.0673469387755104e-06,
292
- "loss": 0.747,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 73.08,
297
- "learning_rate": 2.863265306122449e-06,
298
- "loss": 0.7373,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 75.0,
303
- "learning_rate": 2.6591836734693882e-06,
304
- "loss": 0.7268,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 76.92,
309
- "learning_rate": 2.455102040816327e-06,
310
- "loss": 0.7303,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 76.92,
315
- "eval_loss": 0.580017626285553,
316
- "eval_runtime": 63.495,
317
- "eval_samples_per_second": 17.513,
318
- "eval_steps_per_second": 4.378,
319
- "eval_wer": 0.7296860986547086,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 78.84,
324
- "learning_rate": 2.2510204081632656e-06,
325
- "loss": 0.7239,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 80.77,
330
- "learning_rate": 2.0469387755102044e-06,
331
- "loss": 0.7201,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 82.69,
336
- "learning_rate": 1.8428571428571428e-06,
337
- "loss": 0.7185,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 84.61,
342
- "learning_rate": 1.6387755102040818e-06,
343
- "loss": 0.7149,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 86.54,
348
- "learning_rate": 1.4346938775510205e-06,
349
- "loss": 0.7128,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 86.54,
354
- "eval_loss": 0.5977014899253845,
355
- "eval_runtime": 64.1516,
356
- "eval_samples_per_second": 17.334,
357
- "eval_steps_per_second": 4.333,
358
- "eval_wer": 0.7149775784753363,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 88.46,
363
- "learning_rate": 1.2306122448979594e-06,
364
- "loss": 0.705,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 90.38,
369
- "learning_rate": 1.026530612244898e-06,
370
- "loss": 0.7129,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 92.31,
375
- "learning_rate": 8.224489795918368e-07,
376
- "loss": 0.7076,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 94.23,
381
- "learning_rate": 6.183673469387755e-07,
382
- "loss": 0.7126,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 96.15,
387
- "learning_rate": 4.142857142857143e-07,
388
- "loss": 0.6972,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 96.15,
393
- "eval_loss": 0.5900700092315674,
394
- "eval_runtime": 63.2743,
395
- "eval_samples_per_second": 17.574,
396
- "eval_steps_per_second": 4.394,
397
- "eval_wer": 0.7176681614349776,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 98.08,
402
- "learning_rate": 2.1020408163265306e-07,
403
- "loss": 0.7001,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 100.0,
408
- "learning_rate": 6.1224489795918365e-09,
409
- "loss": 0.6965,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 100.0,
414
  "step": 5200,
415
  "total_flos": 1.4653197993726655e+20,
416
- "train_loss": 1.2273276989276594,
417
- "train_runtime": 32753.7935,
418
- "train_samples_per_second": 10.185,
419
- "train_steps_per_second": 0.159
420
  }
421
  ],
422
  "max_steps": 5200,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.92,
12
+ "learning_rate": 1.6166666666666665e-07,
13
+ "loss": 24.0656,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 3.84,
18
+ "learning_rate": 3.283333333333333e-07,
19
+ "loss": 19.0268,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 5.77,
24
+ "learning_rate": 4.95e-07,
25
+ "loss": 6.1995,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 7.69,
30
+ "learning_rate": 4.901020408163265e-07,
31
+ "loss": 3.6484,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 9.61,
36
+ "learning_rate": 4.798979591836734e-07,
37
+ "loss": 3.5131,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 9.61,
42
+ "eval_loss": 3.5294487476348877,
43
+ "eval_runtime": 64.5425,
44
+ "eval_samples_per_second": 17.229,
45
+ "eval_steps_per_second": 4.307,
46
+ "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 11.54,
51
+ "learning_rate": 4.696938775510204e-07,
52
+ "loss": 3.4699,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 13.46,
57
+ "learning_rate": 4.5948979591836735e-07,
58
+ "loss": 3.3973,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 15.38,
63
+ "learning_rate": 4.4928571428571426e-07,
64
+ "loss": 3.2876,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 17.31,
69
+ "learning_rate": 4.390816326530612e-07,
70
+ "loss": 3.0902,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 19.23,
75
+ "learning_rate": 4.288775510204081e-07,
76
+ "loss": 2.8596,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 19.23,
81
+ "eval_loss": 3.5708465576171875,
82
+ "eval_runtime": 66.2433,
83
+ "eval_samples_per_second": 16.787,
84
+ "eval_steps_per_second": 4.197,
85
+ "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 21.15,
90
+ "learning_rate": 4.1867346938775513e-07,
91
+ "loss": 2.6397,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 23.08,
96
+ "learning_rate": 4.0846938775510203e-07,
97
+ "loss": 2.4302,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 25.0,
102
+ "learning_rate": 3.98265306122449e-07,
103
+ "loss": 2.2341,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 26.92,
108
+ "learning_rate": 3.880612244897959e-07,
109
+ "loss": 2.071,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 28.84,
114
+ "learning_rate": 3.778571428571428e-07,
115
+ "loss": 1.9055,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 28.84,
120
+ "eval_loss": 3.6432793140411377,
121
+ "eval_runtime": 65.1639,
122
+ "eval_samples_per_second": 17.065,
123
+ "eval_steps_per_second": 4.266,
124
+ "eval_wer": 1.0007174887892376,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 30.77,
129
+ "learning_rate": 3.676530612244898e-07,
130
+ "loss": 1.7787,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 32.69,
135
+ "learning_rate": 3.574489795918367e-07,
136
+ "loss": 1.6655,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 34.61,
141
+ "learning_rate": 3.4724489795918366e-07,
142
+ "loss": 1.5696,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 36.54,
147
+ "learning_rate": 3.3704081632653057e-07,
148
+ "loss": 1.4902,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 38.46,
153
+ "learning_rate": 3.268367346938775e-07,
154
+ "loss": 1.4239,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 38.46,
159
+ "eval_loss": 3.6568963527679443,
160
+ "eval_runtime": 66.0028,
161
+ "eval_samples_per_second": 16.848,
162
+ "eval_steps_per_second": 4.212,
163
+ "eval_wer": 0.9994618834080717,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 40.38,
168
+ "learning_rate": 3.166326530612245e-07,
169
+ "loss": 1.3735,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 42.31,
174
+ "learning_rate": 3.0642857142857144e-07,
175
+ "loss": 1.3228,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 44.23,
180
+ "learning_rate": 2.9622448979591834e-07,
181
+ "loss": 1.2834,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 46.15,
186
+ "learning_rate": 2.860204081632653e-07,
187
+ "loss": 1.2438,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 48.08,
192
+ "learning_rate": 2.758163265306122e-07,
193
+ "loss": 1.2168,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 48.08,
198
+ "eval_loss": 3.6079351902008057,
199
+ "eval_runtime": 65.3166,
200
+ "eval_samples_per_second": 17.025,
201
+ "eval_steps_per_second": 4.256,
202
+ "eval_wer": 0.995695067264574,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 50.0,
207
+ "learning_rate": 2.656122448979592e-07,
208
+ "loss": 1.1792,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 51.92,
213
+ "learning_rate": 2.554081632653061e-07,
214
+ "loss": 1.1706,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 53.84,
219
+ "learning_rate": 2.4520408163265307e-07,
220
+ "loss": 1.1429,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 55.77,
225
+ "learning_rate": 2.3499999999999997e-07,
226
+ "loss": 1.1318,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 57.69,
231
+ "learning_rate": 2.2479591836734693e-07,
232
+ "loss": 1.1063,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 57.69,
237
+ "eval_loss": 3.5737504959106445,
238
+ "eval_runtime": 64.9336,
239
+ "eval_samples_per_second": 17.125,
240
+ "eval_steps_per_second": 4.281,
241
+ "eval_wer": 0.9924663677130044,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 59.61,
246
+ "learning_rate": 2.1459183673469386e-07,
247
+ "loss": 1.0893,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 61.54,
252
+ "learning_rate": 2.0438775510204082e-07,
253
+ "loss": 1.0744,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 63.46,
258
+ "learning_rate": 1.9418367346938775e-07,
259
+ "loss": 1.0591,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 65.38,
264
+ "learning_rate": 1.839795918367347e-07,
265
+ "loss": 1.05,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 67.31,
270
+ "learning_rate": 1.7377551020408163e-07,
271
+ "loss": 1.0404,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 67.31,
276
+ "eval_loss": 3.4857470989227295,
277
+ "eval_runtime": 64.9602,
278
+ "eval_samples_per_second": 17.118,
279
+ "eval_steps_per_second": 4.28,
280
+ "eval_wer": 0.9888789237668162,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 69.23,
285
+ "learning_rate": 1.6357142857142856e-07,
286
+ "loss": 1.0325,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 71.15,
291
+ "learning_rate": 1.5336734693877552e-07,
292
+ "loss": 1.0266,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 73.08,
297
+ "learning_rate": 1.4316326530612245e-07,
298
+ "loss": 1.0164,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 75.0,
303
+ "learning_rate": 1.329591836734694e-07,
304
+ "loss": 1.0002,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 76.92,
309
+ "learning_rate": 1.227551020408163e-07,
310
+ "loss": 1.001,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 76.92,
315
+ "eval_loss": 3.4881510734558105,
316
+ "eval_runtime": 64.7811,
317
+ "eval_samples_per_second": 17.165,
318
+ "eval_steps_per_second": 4.291,
319
+ "eval_wer": 0.9858295964125561,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 78.84,
324
+ "learning_rate": 1.1255102040816327e-07,
325
+ "loss": 0.997,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 80.77,
330
+ "learning_rate": 1.0234693877551021e-07,
331
+ "loss": 0.9896,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 82.69,
336
+ "learning_rate": 9.214285714285714e-08,
337
+ "loss": 0.986,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 84.61,
342
+ "learning_rate": 8.193877551020407e-08,
343
+ "loss": 0.9823,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 86.54,
348
+ "learning_rate": 7.173469387755101e-08,
349
+ "loss": 0.982,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 86.54,
354
+ "eval_loss": 3.3850555419921875,
355
+ "eval_runtime": 65.2436,
356
+ "eval_samples_per_second": 17.044,
357
+ "eval_steps_per_second": 4.261,
358
+ "eval_wer": 0.987085201793722,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 88.46,
363
+ "learning_rate": 6.153061224489796e-08,
364
+ "loss": 0.9675,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 90.38,
369
+ "learning_rate": 5.132653061224489e-08,
370
+ "loss": 0.9774,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 92.31,
375
+ "learning_rate": 4.1122448979591836e-08,
376
+ "loss": 0.9687,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 94.23,
381
+ "learning_rate": 3.091836734693877e-08,
382
+ "loss": 0.9771,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 96.15,
387
+ "learning_rate": 2.0714285714285713e-08,
388
+ "loss": 0.9612,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 96.15,
393
+ "eval_loss": 3.386908531188965,
394
+ "eval_runtime": 65.5128,
395
+ "eval_samples_per_second": 16.974,
396
+ "eval_steps_per_second": 4.243,
397
+ "eval_wer": 0.9872645739910314,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 98.08,
402
+ "learning_rate": 1.0510204081632651e-08,
403
+ "loss": 0.9667,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 100.0,
408
+ "learning_rate": 3.061224489795918e-10,
409
+ "loss": 0.9664,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 100.0,
414
  "step": 5200,
415
  "total_flos": 1.4653197993726655e+20,
416
+ "train_loss": 2.424026096050556,
417
+ "train_runtime": 30676.4279,
418
+ "train_samples_per_second": 10.875,
419
+ "train_steps_per_second": 0.17
420
  }
421
  ],
422
  "max_steps": 5200,