pierreguillou commited on
Commit
5724c63
1 Parent(s): 060ea69

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.0,
3
+ "eval_loss": 0.2628188133239746,
4
+ "eval_runtime": 4484.4888,
5
+ "eval_samples_per_second": 1.938,
6
+ "eval_steps_per_second": 0.121,
7
+ "eval_wer": 6.598745817992301,
8
+ "train_loss": 0.04298678698204458,
9
+ "train_runtime": 52040.1074,
10
+ "train_samples_per_second": 3.689,
11
+ "train_steps_per_second": 0.115
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.0,
3
+ "eval_loss": 0.2628188133239746,
4
+ "eval_runtime": 4484.4888,
5
+ "eval_samples_per_second": 1.938,
6
+ "eval_steps_per_second": 0.121,
7
+ "eval_wer": 6.598745817992301
8
+ }
runs/Dec15_10-00-45_132-145-143-6/events.out.tfevents.1671155169.132-145-143-6.128248.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665c3b7bc009f7e881b359e377efa0c0904d41998e75bda06130fa19e6ca9653
3
+ size 358
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.0,
3
+ "train_loss": 0.04298678698204458,
4
+ "train_runtime": 52040.1074,
5
+ "train_samples_per_second": 3.689,
6
+ "train_steps_per_second": 0.115
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,1501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.598745817992301,
3
+ "best_model_checkpoint": "./checkpoint-3000",
4
+ "epoch": 11.0045,
5
+ "global_step": 6000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 3.96e-07,
13
+ "loss": 1.249,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "learning_rate": 8.46e-07,
19
+ "loss": 1.0081,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.01,
24
+ "learning_rate": 1.296e-06,
25
+ "loss": 0.4711,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.02,
30
+ "learning_rate": 1.746e-06,
31
+ "loss": 0.281,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.02,
36
+ "learning_rate": 2.196e-06,
37
+ "loss": 0.219,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.03,
42
+ "learning_rate": 2.6459999999999997e-06,
43
+ "loss": 0.2109,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.03,
48
+ "learning_rate": 3.0959999999999997e-06,
49
+ "loss": 0.2049,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 0.03,
54
+ "learning_rate": 3.5460000000000004e-06,
55
+ "loss": 0.1752,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 0.04,
60
+ "learning_rate": 3.996e-06,
61
+ "loss": 0.1854,
62
+ "step": 225
63
+ },
64
+ {
65
+ "epoch": 0.04,
66
+ "learning_rate": 4.446e-06,
67
+ "loss": 0.172,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 0.05,
72
+ "learning_rate": 4.896e-06,
73
+ "loss": 0.1769,
74
+ "step": 275
75
+ },
76
+ {
77
+ "epoch": 0.05,
78
+ "learning_rate": 5.346e-06,
79
+ "loss": 0.18,
80
+ "step": 300
81
+ },
82
+ {
83
+ "epoch": 0.05,
84
+ "learning_rate": 5.796e-06,
85
+ "loss": 0.1905,
86
+ "step": 325
87
+ },
88
+ {
89
+ "epoch": 0.06,
90
+ "learning_rate": 6.246e-06,
91
+ "loss": 0.1491,
92
+ "step": 350
93
+ },
94
+ {
95
+ "epoch": 0.06,
96
+ "learning_rate": 6.696e-06,
97
+ "loss": 0.1631,
98
+ "step": 375
99
+ },
100
+ {
101
+ "epoch": 0.07,
102
+ "learning_rate": 7.146000000000001e-06,
103
+ "loss": 0.1645,
104
+ "step": 400
105
+ },
106
+ {
107
+ "epoch": 0.07,
108
+ "learning_rate": 7.596e-06,
109
+ "loss": 0.1531,
110
+ "step": 425
111
+ },
112
+ {
113
+ "epoch": 0.07,
114
+ "learning_rate": 8.046e-06,
115
+ "loss": 0.1653,
116
+ "step": 450
117
+ },
118
+ {
119
+ "epoch": 0.08,
120
+ "learning_rate": 8.496e-06,
121
+ "loss": 0.1695,
122
+ "step": 475
123
+ },
124
+ {
125
+ "epoch": 0.08,
126
+ "learning_rate": 8.946e-06,
127
+ "loss": 0.1693,
128
+ "step": 500
129
+ },
130
+ {
131
+ "epoch": 0.09,
132
+ "learning_rate": 8.964000000000001e-06,
133
+ "loss": 0.1649,
134
+ "step": 525
135
+ },
136
+ {
137
+ "epoch": 1.0,
138
+ "learning_rate": 8.92309090909091e-06,
139
+ "loss": 0.1554,
140
+ "step": 550
141
+ },
142
+ {
143
+ "epoch": 1.01,
144
+ "learning_rate": 8.882181818181818e-06,
145
+ "loss": 0.1702,
146
+ "step": 575
147
+ },
148
+ {
149
+ "epoch": 1.01,
150
+ "learning_rate": 8.841272727272727e-06,
151
+ "loss": 0.1502,
152
+ "step": 600
153
+ },
154
+ {
155
+ "epoch": 1.01,
156
+ "learning_rate": 8.800363636363636e-06,
157
+ "loss": 0.1362,
158
+ "step": 625
159
+ },
160
+ {
161
+ "epoch": 1.02,
162
+ "learning_rate": 8.759454545454546e-06,
163
+ "loss": 0.1332,
164
+ "step": 650
165
+ },
166
+ {
167
+ "epoch": 1.02,
168
+ "learning_rate": 8.718545454545455e-06,
169
+ "loss": 0.1116,
170
+ "step": 675
171
+ },
172
+ {
173
+ "epoch": 1.03,
174
+ "learning_rate": 8.677636363636364e-06,
175
+ "loss": 0.1231,
176
+ "step": 700
177
+ },
178
+ {
179
+ "epoch": 1.03,
180
+ "learning_rate": 8.636727272727272e-06,
181
+ "loss": 0.1102,
182
+ "step": 725
183
+ },
184
+ {
185
+ "epoch": 1.03,
186
+ "learning_rate": 8.595818181818183e-06,
187
+ "loss": 0.0975,
188
+ "step": 750
189
+ },
190
+ {
191
+ "epoch": 1.04,
192
+ "learning_rate": 8.554909090909091e-06,
193
+ "loss": 0.1098,
194
+ "step": 775
195
+ },
196
+ {
197
+ "epoch": 1.04,
198
+ "learning_rate": 8.514e-06,
199
+ "loss": 0.0917,
200
+ "step": 800
201
+ },
202
+ {
203
+ "epoch": 1.05,
204
+ "learning_rate": 8.473090909090909e-06,
205
+ "loss": 0.0914,
206
+ "step": 825
207
+ },
208
+ {
209
+ "epoch": 1.05,
210
+ "learning_rate": 8.43218181818182e-06,
211
+ "loss": 0.0903,
212
+ "step": 850
213
+ },
214
+ {
215
+ "epoch": 1.06,
216
+ "learning_rate": 8.391272727272728e-06,
217
+ "loss": 0.088,
218
+ "step": 875
219
+ },
220
+ {
221
+ "epoch": 1.06,
222
+ "learning_rate": 8.350363636363637e-06,
223
+ "loss": 0.0816,
224
+ "step": 900
225
+ },
226
+ {
227
+ "epoch": 1.06,
228
+ "learning_rate": 8.309454545454545e-06,
229
+ "loss": 0.071,
230
+ "step": 925
231
+ },
232
+ {
233
+ "epoch": 1.07,
234
+ "learning_rate": 8.268545454545454e-06,
235
+ "loss": 0.0717,
236
+ "step": 950
237
+ },
238
+ {
239
+ "epoch": 1.07,
240
+ "learning_rate": 8.227636363636365e-06,
241
+ "loss": 0.0709,
242
+ "step": 975
243
+ },
244
+ {
245
+ "epoch": 1.08,
246
+ "learning_rate": 8.186727272727273e-06,
247
+ "loss": 0.0704,
248
+ "step": 1000
249
+ },
250
+ {
251
+ "epoch": 1.08,
252
+ "learning_rate": 8.145818181818182e-06,
253
+ "loss": 0.0695,
254
+ "step": 1025
255
+ },
256
+ {
257
+ "epoch": 1.08,
258
+ "learning_rate": 8.10490909090909e-06,
259
+ "loss": 0.0739,
260
+ "step": 1050
261
+ },
262
+ {
263
+ "epoch": 1.09,
264
+ "learning_rate": 8.064000000000001e-06,
265
+ "loss": 0.0689,
266
+ "step": 1075
267
+ },
268
+ {
269
+ "epoch": 2.0,
270
+ "learning_rate": 8.02309090909091e-06,
271
+ "loss": 0.0679,
272
+ "step": 1100
273
+ },
274
+ {
275
+ "epoch": 2.01,
276
+ "learning_rate": 7.982181818181818e-06,
277
+ "loss": 0.0706,
278
+ "step": 1125
279
+ },
280
+ {
281
+ "epoch": 2.01,
282
+ "learning_rate": 7.941272727272727e-06,
283
+ "loss": 0.0617,
284
+ "step": 1150
285
+ },
286
+ {
287
+ "epoch": 2.01,
288
+ "learning_rate": 7.900363636363636e-06,
289
+ "loss": 0.0568,
290
+ "step": 1175
291
+ },
292
+ {
293
+ "epoch": 2.02,
294
+ "learning_rate": 7.859454545454546e-06,
295
+ "loss": 0.0661,
296
+ "step": 1200
297
+ },
298
+ {
299
+ "epoch": 2.02,
300
+ "learning_rate": 7.818545454545455e-06,
301
+ "loss": 0.0474,
302
+ "step": 1225
303
+ },
304
+ {
305
+ "epoch": 2.03,
306
+ "learning_rate": 7.777636363636364e-06,
307
+ "loss": 0.0568,
308
+ "step": 1250
309
+ },
310
+ {
311
+ "epoch": 2.03,
312
+ "learning_rate": 7.736727272727272e-06,
313
+ "loss": 0.0495,
314
+ "step": 1275
315
+ },
316
+ {
317
+ "epoch": 2.04,
318
+ "learning_rate": 7.695818181818183e-06,
319
+ "loss": 0.0458,
320
+ "step": 1300
321
+ },
322
+ {
323
+ "epoch": 2.04,
324
+ "learning_rate": 7.65490909090909e-06,
325
+ "loss": 0.0484,
326
+ "step": 1325
327
+ },
328
+ {
329
+ "epoch": 2.04,
330
+ "learning_rate": 7.614e-06,
331
+ "loss": 0.0433,
332
+ "step": 1350
333
+ },
334
+ {
335
+ "epoch": 2.05,
336
+ "learning_rate": 7.573090909090909e-06,
337
+ "loss": 0.0494,
338
+ "step": 1375
339
+ },
340
+ {
341
+ "epoch": 2.05,
342
+ "learning_rate": 7.5321818181818186e-06,
343
+ "loss": 0.0397,
344
+ "step": 1400
345
+ },
346
+ {
347
+ "epoch": 2.06,
348
+ "learning_rate": 7.491272727272728e-06,
349
+ "loss": 0.0372,
350
+ "step": 1425
351
+ },
352
+ {
353
+ "epoch": 2.06,
354
+ "learning_rate": 7.450363636363636e-06,
355
+ "loss": 0.0425,
356
+ "step": 1450
357
+ },
358
+ {
359
+ "epoch": 2.06,
360
+ "learning_rate": 7.4094545454545455e-06,
361
+ "loss": 0.0348,
362
+ "step": 1475
363
+ },
364
+ {
365
+ "epoch": 2.07,
366
+ "learning_rate": 7.368545454545455e-06,
367
+ "loss": 0.0333,
368
+ "step": 1500
369
+ },
370
+ {
371
+ "epoch": 2.07,
372
+ "eval_loss": 0.20725364983081818,
373
+ "eval_runtime": 4481.6137,
374
+ "eval_samples_per_second": 1.94,
375
+ "eval_steps_per_second": 0.121,
376
+ "eval_wer": 6.977017871253005,
377
+ "step": 1500
378
+ },
379
+ {
380
+ "epoch": 2.07,
381
+ "learning_rate": 7.327636363636365e-06,
382
+ "loss": 0.038,
383
+ "step": 1525
384
+ },
385
+ {
386
+ "epoch": 2.08,
387
+ "learning_rate": 7.2867272727272725e-06,
388
+ "loss": 0.0362,
389
+ "step": 1550
390
+ },
391
+ {
392
+ "epoch": 2.08,
393
+ "learning_rate": 7.245818181818182e-06,
394
+ "loss": 0.0398,
395
+ "step": 1575
396
+ },
397
+ {
398
+ "epoch": 2.09,
399
+ "learning_rate": 7.204909090909092e-06,
400
+ "loss": 0.038,
401
+ "step": 1600
402
+ },
403
+ {
404
+ "epoch": 2.09,
405
+ "learning_rate": 7.164e-06,
406
+ "loss": 0.0362,
407
+ "step": 1625
408
+ },
409
+ {
410
+ "epoch": 3.0,
411
+ "learning_rate": 7.123090909090909e-06,
412
+ "loss": 0.037,
413
+ "step": 1650
414
+ },
415
+ {
416
+ "epoch": 3.01,
417
+ "learning_rate": 7.082181818181818e-06,
418
+ "loss": 0.0303,
419
+ "step": 1675
420
+ },
421
+ {
422
+ "epoch": 3.01,
423
+ "learning_rate": 7.041272727272727e-06,
424
+ "loss": 0.0281,
425
+ "step": 1700
426
+ },
427
+ {
428
+ "epoch": 3.02,
429
+ "learning_rate": 7.000363636363637e-06,
430
+ "loss": 0.0278,
431
+ "step": 1725
432
+ },
433
+ {
434
+ "epoch": 3.02,
435
+ "learning_rate": 6.959454545454545e-06,
436
+ "loss": 0.0296,
437
+ "step": 1750
438
+ },
439
+ {
440
+ "epoch": 3.02,
441
+ "learning_rate": 6.918545454545454e-06,
442
+ "loss": 0.0279,
443
+ "step": 1775
444
+ },
445
+ {
446
+ "epoch": 3.03,
447
+ "learning_rate": 6.877636363636364e-06,
448
+ "loss": 0.025,
449
+ "step": 1800
450
+ },
451
+ {
452
+ "epoch": 3.03,
453
+ "learning_rate": 6.8367272727272734e-06,
454
+ "loss": 0.0265,
455
+ "step": 1825
456
+ },
457
+ {
458
+ "epoch": 3.04,
459
+ "learning_rate": 6.795818181818182e-06,
460
+ "loss": 0.0223,
461
+ "step": 1850
462
+ },
463
+ {
464
+ "epoch": 3.04,
465
+ "learning_rate": 6.754909090909091e-06,
466
+ "loss": 0.0247,
467
+ "step": 1875
468
+ },
469
+ {
470
+ "epoch": 3.05,
471
+ "learning_rate": 6.7140000000000004e-06,
472
+ "loss": 0.0189,
473
+ "step": 1900
474
+ },
475
+ {
476
+ "epoch": 3.05,
477
+ "learning_rate": 6.673090909090909e-06,
478
+ "loss": 0.0186,
479
+ "step": 1925
480
+ },
481
+ {
482
+ "epoch": 3.05,
483
+ "learning_rate": 6.632181818181819e-06,
484
+ "loss": 0.0228,
485
+ "step": 1950
486
+ },
487
+ {
488
+ "epoch": 3.06,
489
+ "learning_rate": 6.591272727272727e-06,
490
+ "loss": 0.0181,
491
+ "step": 1975
492
+ },
493
+ {
494
+ "epoch": 3.06,
495
+ "learning_rate": 6.550363636363636e-06,
496
+ "loss": 0.0181,
497
+ "step": 2000
498
+ },
499
+ {
500
+ "epoch": 3.07,
501
+ "learning_rate": 6.509454545454546e-06,
502
+ "loss": 0.0164,
503
+ "step": 2025
504
+ },
505
+ {
506
+ "epoch": 3.07,
507
+ "learning_rate": 6.468545454545455e-06,
508
+ "loss": 0.0189,
509
+ "step": 2050
510
+ },
511
+ {
512
+ "epoch": 3.07,
513
+ "learning_rate": 6.427636363636363e-06,
514
+ "loss": 0.0168,
515
+ "step": 2075
516
+ },
517
+ {
518
+ "epoch": 3.08,
519
+ "learning_rate": 6.386727272727273e-06,
520
+ "loss": 0.0209,
521
+ "step": 2100
522
+ },
523
+ {
524
+ "epoch": 3.08,
525
+ "learning_rate": 6.345818181818182e-06,
526
+ "loss": 0.0157,
527
+ "step": 2125
528
+ },
529
+ {
530
+ "epoch": 3.09,
531
+ "learning_rate": 6.304909090909092e-06,
532
+ "loss": 0.0175,
533
+ "step": 2150
534
+ },
535
+ {
536
+ "epoch": 4.0,
537
+ "learning_rate": 6.264e-06,
538
+ "loss": 0.0164,
539
+ "step": 2175
540
+ },
541
+ {
542
+ "epoch": 4.0,
543
+ "learning_rate": 6.223090909090909e-06,
544
+ "loss": 0.0149,
545
+ "step": 2200
546
+ },
547
+ {
548
+ "epoch": 4.01,
549
+ "learning_rate": 6.182181818181819e-06,
550
+ "loss": 0.0199,
551
+ "step": 2225
552
+ },
553
+ {
554
+ "epoch": 4.01,
555
+ "learning_rate": 6.1412727272727275e-06,
556
+ "loss": 0.0127,
557
+ "step": 2250
558
+ },
559
+ {
560
+ "epoch": 4.02,
561
+ "learning_rate": 6.100363636363636e-06,
562
+ "loss": 0.0114,
563
+ "step": 2275
564
+ },
565
+ {
566
+ "epoch": 4.02,
567
+ "learning_rate": 6.059454545454546e-06,
568
+ "loss": 0.0129,
569
+ "step": 2300
570
+ },
571
+ {
572
+ "epoch": 4.03,
573
+ "learning_rate": 6.0185454545454545e-06,
574
+ "loss": 0.012,
575
+ "step": 2325
576
+ },
577
+ {
578
+ "epoch": 4.03,
579
+ "learning_rate": 5.977636363636364e-06,
580
+ "loss": 0.0125,
581
+ "step": 2350
582
+ },
583
+ {
584
+ "epoch": 4.03,
585
+ "learning_rate": 5.936727272727274e-06,
586
+ "loss": 0.0136,
587
+ "step": 2375
588
+ },
589
+ {
590
+ "epoch": 4.04,
591
+ "learning_rate": 5.8958181818181815e-06,
592
+ "loss": 0.0148,
593
+ "step": 2400
594
+ },
595
+ {
596
+ "epoch": 4.04,
597
+ "learning_rate": 5.854909090909091e-06,
598
+ "loss": 0.0076,
599
+ "step": 2425
600
+ },
601
+ {
602
+ "epoch": 4.05,
603
+ "learning_rate": 5.814000000000001e-06,
604
+ "loss": 0.0111,
605
+ "step": 2450
606
+ },
607
+ {
608
+ "epoch": 4.05,
609
+ "learning_rate": 5.773090909090909e-06,
610
+ "loss": 0.0139,
611
+ "step": 2475
612
+ },
613
+ {
614
+ "epoch": 4.05,
615
+ "learning_rate": 5.732181818181818e-06,
616
+ "loss": 0.0117,
617
+ "step": 2500
618
+ },
619
+ {
620
+ "epoch": 4.06,
621
+ "learning_rate": 5.6912727272727276e-06,
622
+ "loss": 0.0144,
623
+ "step": 2525
624
+ },
625
+ {
626
+ "epoch": 4.06,
627
+ "learning_rate": 5.650363636363636e-06,
628
+ "loss": 0.0086,
629
+ "step": 2550
630
+ },
631
+ {
632
+ "epoch": 4.07,
633
+ "learning_rate": 5.609454545454546e-06,
634
+ "loss": 0.0119,
635
+ "step": 2575
636
+ },
637
+ {
638
+ "epoch": 4.07,
639
+ "learning_rate": 5.5685454545454545e-06,
640
+ "loss": 0.0075,
641
+ "step": 2600
642
+ },
643
+ {
644
+ "epoch": 4.08,
645
+ "learning_rate": 5.527636363636363e-06,
646
+ "loss": 0.0091,
647
+ "step": 2625
648
+ },
649
+ {
650
+ "epoch": 4.08,
651
+ "learning_rate": 5.486727272727273e-06,
652
+ "loss": 0.0091,
653
+ "step": 2650
654
+ },
655
+ {
656
+ "epoch": 4.08,
657
+ "learning_rate": 5.445818181818182e-06,
658
+ "loss": 0.0076,
659
+ "step": 2675
660
+ },
661
+ {
662
+ "epoch": 4.09,
663
+ "learning_rate": 5.40490909090909e-06,
664
+ "loss": 0.0085,
665
+ "step": 2700
666
+ },
667
+ {
668
+ "epoch": 5.0,
669
+ "learning_rate": 5.364e-06,
670
+ "loss": 0.0083,
671
+ "step": 2725
672
+ },
673
+ {
674
+ "epoch": 5.01,
675
+ "learning_rate": 5.323090909090909e-06,
676
+ "loss": 0.0077,
677
+ "step": 2750
678
+ },
679
+ {
680
+ "epoch": 5.01,
681
+ "learning_rate": 5.282181818181819e-06,
682
+ "loss": 0.0071,
683
+ "step": 2775
684
+ },
685
+ {
686
+ "epoch": 5.01,
687
+ "learning_rate": 5.241272727272728e-06,
688
+ "loss": 0.0068,
689
+ "step": 2800
690
+ },
691
+ {
692
+ "epoch": 5.02,
693
+ "learning_rate": 5.200363636363636e-06,
694
+ "loss": 0.0076,
695
+ "step": 2825
696
+ },
697
+ {
698
+ "epoch": 5.02,
699
+ "learning_rate": 5.159454545454546e-06,
700
+ "loss": 0.0062,
701
+ "step": 2850
702
+ },
703
+ {
704
+ "epoch": 5.03,
705
+ "learning_rate": 5.118545454545455e-06,
706
+ "loss": 0.0078,
707
+ "step": 2875
708
+ },
709
+ {
710
+ "epoch": 5.03,
711
+ "learning_rate": 5.077636363636364e-06,
712
+ "loss": 0.0057,
713
+ "step": 2900
714
+ },
715
+ {
716
+ "epoch": 5.04,
717
+ "learning_rate": 5.036727272727273e-06,
718
+ "loss": 0.0085,
719
+ "step": 2925
720
+ },
721
+ {
722
+ "epoch": 5.04,
723
+ "learning_rate": 4.995818181818182e-06,
724
+ "loss": 0.0065,
725
+ "step": 2950
726
+ },
727
+ {
728
+ "epoch": 5.04,
729
+ "learning_rate": 4.954909090909091e-06,
730
+ "loss": 0.0059,
731
+ "step": 2975
732
+ },
733
+ {
734
+ "epoch": 5.05,
735
+ "learning_rate": 4.914000000000001e-06,
736
+ "loss": 0.0061,
737
+ "step": 3000
738
+ },
739
+ {
740
+ "epoch": 5.05,
741
+ "eval_loss": 0.2628188133239746,
742
+ "eval_runtime": 4477.0926,
743
+ "eval_samples_per_second": 1.942,
744
+ "eval_steps_per_second": 0.122,
745
+ "eval_wer": 6.598745817992301,
746
+ "step": 3000
747
+ },
748
+ {
749
+ "epoch": 5.05,
750
+ "learning_rate": 4.873090909090909e-06,
751
+ "loss": 0.0034,
752
+ "step": 3025
753
+ },
754
+ {
755
+ "epoch": 5.06,
756
+ "learning_rate": 4.832181818181818e-06,
757
+ "loss": 0.0071,
758
+ "step": 3050
759
+ },
760
+ {
761
+ "epoch": 5.06,
762
+ "learning_rate": 4.791272727272728e-06,
763
+ "loss": 0.0051,
764
+ "step": 3075
765
+ },
766
+ {
767
+ "epoch": 5.06,
768
+ "learning_rate": 4.750363636363636e-06,
769
+ "loss": 0.0042,
770
+ "step": 3100
771
+ },
772
+ {
773
+ "epoch": 5.07,
774
+ "learning_rate": 4.709454545454545e-06,
775
+ "loss": 0.0057,
776
+ "step": 3125
777
+ },
778
+ {
779
+ "epoch": 5.07,
780
+ "learning_rate": 4.668545454545455e-06,
781
+ "loss": 0.0039,
782
+ "step": 3150
783
+ },
784
+ {
785
+ "epoch": 5.08,
786
+ "learning_rate": 4.627636363636363e-06,
787
+ "loss": 0.0043,
788
+ "step": 3175
789
+ },
790
+ {
791
+ "epoch": 5.08,
792
+ "learning_rate": 4.586727272727273e-06,
793
+ "loss": 0.0053,
794
+ "step": 3200
795
+ },
796
+ {
797
+ "epoch": 5.08,
798
+ "learning_rate": 4.5458181818181825e-06,
799
+ "loss": 0.0044,
800
+ "step": 3225
801
+ },
802
+ {
803
+ "epoch": 5.09,
804
+ "learning_rate": 4.50490909090909e-06,
805
+ "loss": 0.0039,
806
+ "step": 3250
807
+ },
808
+ {
809
+ "epoch": 6.0,
810
+ "learning_rate": 4.464e-06,
811
+ "loss": 0.0038,
812
+ "step": 3275
813
+ },
814
+ {
815
+ "epoch": 6.01,
816
+ "learning_rate": 4.4230909090909095e-06,
817
+ "loss": 0.004,
818
+ "step": 3300
819
+ },
820
+ {
821
+ "epoch": 6.01,
822
+ "learning_rate": 4.382181818181818e-06,
823
+ "loss": 0.0031,
824
+ "step": 3325
825
+ },
826
+ {
827
+ "epoch": 6.02,
828
+ "learning_rate": 4.341272727272728e-06,
829
+ "loss": 0.0036,
830
+ "step": 3350
831
+ },
832
+ {
833
+ "epoch": 6.02,
834
+ "learning_rate": 4.3003636363636365e-06,
835
+ "loss": 0.0049,
836
+ "step": 3375
837
+ },
838
+ {
839
+ "epoch": 6.02,
840
+ "learning_rate": 4.259454545454546e-06,
841
+ "loss": 0.0024,
842
+ "step": 3400
843
+ },
844
+ {
845
+ "epoch": 6.03,
846
+ "learning_rate": 4.218545454545455e-06,
847
+ "loss": 0.0038,
848
+ "step": 3425
849
+ },
850
+ {
851
+ "epoch": 6.03,
852
+ "learning_rate": 4.1776363636363635e-06,
853
+ "loss": 0.0038,
854
+ "step": 3450
855
+ },
856
+ {
857
+ "epoch": 6.04,
858
+ "learning_rate": 4.136727272727273e-06,
859
+ "loss": 0.0023,
860
+ "step": 3475
861
+ },
862
+ {
863
+ "epoch": 6.04,
864
+ "learning_rate": 4.095818181818182e-06,
865
+ "loss": 0.0027,
866
+ "step": 3500
867
+ },
868
+ {
869
+ "epoch": 6.04,
870
+ "learning_rate": 4.0549090909090905e-06,
871
+ "loss": 0.0036,
872
+ "step": 3525
873
+ },
874
+ {
875
+ "epoch": 6.05,
876
+ "learning_rate": 4.014e-06,
877
+ "loss": 0.0041,
878
+ "step": 3550
879
+ },
880
+ {
881
+ "epoch": 6.05,
882
+ "learning_rate": 3.973090909090909e-06,
883
+ "loss": 0.0028,
884
+ "step": 3575
885
+ },
886
+ {
887
+ "epoch": 6.06,
888
+ "learning_rate": 3.932181818181818e-06,
889
+ "loss": 0.002,
890
+ "step": 3600
891
+ },
892
+ {
893
+ "epoch": 6.06,
894
+ "learning_rate": 3.891272727272727e-06,
895
+ "loss": 0.0027,
896
+ "step": 3625
897
+ },
898
+ {
899
+ "epoch": 6.07,
900
+ "learning_rate": 3.8503636363636366e-06,
901
+ "loss": 0.0022,
902
+ "step": 3650
903
+ },
904
+ {
905
+ "epoch": 6.07,
906
+ "learning_rate": 3.8094545454545457e-06,
907
+ "loss": 0.0028,
908
+ "step": 3675
909
+ },
910
+ {
911
+ "epoch": 6.07,
912
+ "learning_rate": 3.7685454545454544e-06,
913
+ "loss": 0.0039,
914
+ "step": 3700
915
+ },
916
+ {
917
+ "epoch": 6.08,
918
+ "learning_rate": 3.727636363636364e-06,
919
+ "loss": 0.0021,
920
+ "step": 3725
921
+ },
922
+ {
923
+ "epoch": 6.08,
924
+ "learning_rate": 3.6867272727272727e-06,
925
+ "loss": 0.0024,
926
+ "step": 3750
927
+ },
928
+ {
929
+ "epoch": 6.09,
930
+ "learning_rate": 3.6458181818181822e-06,
931
+ "loss": 0.0019,
932
+ "step": 3775
933
+ },
934
+ {
935
+ "epoch": 6.09,
936
+ "learning_rate": 3.604909090909091e-06,
937
+ "loss": 0.0017,
938
+ "step": 3800
939
+ },
940
+ {
941
+ "epoch": 7.0,
942
+ "learning_rate": 3.564e-06,
943
+ "loss": 0.0024,
944
+ "step": 3825
945
+ },
946
+ {
947
+ "epoch": 7.01,
948
+ "learning_rate": 3.5230909090909092e-06,
949
+ "loss": 0.0024,
950
+ "step": 3850
951
+ },
952
+ {
953
+ "epoch": 7.01,
954
+ "learning_rate": 3.4821818181818184e-06,
955
+ "loss": 0.002,
956
+ "step": 3875
957
+ },
958
+ {
959
+ "epoch": 7.02,
960
+ "learning_rate": 3.441272727272727e-06,
961
+ "loss": 0.0013,
962
+ "step": 3900
963
+ },
964
+ {
965
+ "epoch": 7.02,
966
+ "learning_rate": 3.4003636363636366e-06,
967
+ "loss": 0.0021,
968
+ "step": 3925
969
+ },
970
+ {
971
+ "epoch": 7.02,
972
+ "learning_rate": 3.3594545454545454e-06,
973
+ "loss": 0.0015,
974
+ "step": 3950
975
+ },
976
+ {
977
+ "epoch": 7.03,
978
+ "learning_rate": 3.318545454545455e-06,
979
+ "loss": 0.0017,
980
+ "step": 3975
981
+ },
982
+ {
983
+ "epoch": 7.03,
984
+ "learning_rate": 3.2776363636363636e-06,
985
+ "loss": 0.0023,
986
+ "step": 4000
987
+ },
988
+ {
989
+ "epoch": 7.04,
990
+ "learning_rate": 3.2367272727272728e-06,
991
+ "loss": 0.0022,
992
+ "step": 4025
993
+ },
994
+ {
995
+ "epoch": 7.04,
996
+ "learning_rate": 3.195818181818182e-06,
997
+ "loss": 0.0026,
998
+ "step": 4050
999
+ },
1000
+ {
1001
+ "epoch": 7.05,
1002
+ "learning_rate": 3.154909090909091e-06,
1003
+ "loss": 0.0014,
1004
+ "step": 4075
1005
+ },
1006
+ {
1007
+ "epoch": 7.05,
1008
+ "learning_rate": 3.1139999999999997e-06,
1009
+ "loss": 0.0021,
1010
+ "step": 4100
1011
+ },
1012
+ {
1013
+ "epoch": 7.05,
1014
+ "learning_rate": 3.0730909090909093e-06,
1015
+ "loss": 0.0018,
1016
+ "step": 4125
1017
+ },
1018
+ {
1019
+ "epoch": 7.06,
1020
+ "learning_rate": 3.0321818181818184e-06,
1021
+ "loss": 0.0009,
1022
+ "step": 4150
1023
+ },
1024
+ {
1025
+ "epoch": 7.06,
1026
+ "learning_rate": 2.991272727272727e-06,
1027
+ "loss": 0.0018,
1028
+ "step": 4175
1029
+ },
1030
+ {
1031
+ "epoch": 7.07,
1032
+ "learning_rate": 2.9503636363636367e-06,
1033
+ "loss": 0.0014,
1034
+ "step": 4200
1035
+ },
1036
+ {
1037
+ "epoch": 7.07,
1038
+ "learning_rate": 2.9094545454545454e-06,
1039
+ "loss": 0.0013,
1040
+ "step": 4225
1041
+ },
1042
+ {
1043
+ "epoch": 7.07,
1044
+ "learning_rate": 2.868545454545455e-06,
1045
+ "loss": 0.0011,
1046
+ "step": 4250
1047
+ },
1048
+ {
1049
+ "epoch": 7.08,
1050
+ "learning_rate": 2.8276363636363637e-06,
1051
+ "loss": 0.0023,
1052
+ "step": 4275
1053
+ },
1054
+ {
1055
+ "epoch": 7.08,
1056
+ "learning_rate": 2.786727272727273e-06,
1057
+ "loss": 0.002,
1058
+ "step": 4300
1059
+ },
1060
+ {
1061
+ "epoch": 7.09,
1062
+ "learning_rate": 2.745818181818182e-06,
1063
+ "loss": 0.0012,
1064
+ "step": 4325
1065
+ },
1066
+ {
1067
+ "epoch": 8.0,
1068
+ "learning_rate": 2.704909090909091e-06,
1069
+ "loss": 0.0009,
1070
+ "step": 4350
1071
+ },
1072
+ {
1073
+ "epoch": 8.01,
1074
+ "learning_rate": 2.664e-06,
1075
+ "loss": 0.0007,
1076
+ "step": 4375
1077
+ },
1078
+ {
1079
+ "epoch": 8.01,
1080
+ "learning_rate": 2.6230909090909094e-06,
1081
+ "loss": 0.001,
1082
+ "step": 4400
1083
+ },
1084
+ {
1085
+ "epoch": 8.01,
1086
+ "learning_rate": 2.582181818181818e-06,
1087
+ "loss": 0.0006,
1088
+ "step": 4425
1089
+ },
1090
+ {
1091
+ "epoch": 8.02,
1092
+ "learning_rate": 2.5412727272727272e-06,
1093
+ "loss": 0.0008,
1094
+ "step": 4450
1095
+ },
1096
+ {
1097
+ "epoch": 8.02,
1098
+ "learning_rate": 2.5003636363636364e-06,
1099
+ "loss": 0.0016,
1100
+ "step": 4475
1101
+ },
1102
+ {
1103
+ "epoch": 8.03,
1104
+ "learning_rate": 2.4594545454545455e-06,
1105
+ "loss": 0.0007,
1106
+ "step": 4500
1107
+ },
1108
+ {
1109
+ "epoch": 8.03,
1110
+ "eval_loss": 0.29601243138313293,
1111
+ "eval_runtime": 4468.4473,
1112
+ "eval_samples_per_second": 1.945,
1113
+ "eval_steps_per_second": 0.122,
1114
+ "eval_wer": 6.697937156402885,
1115
+ "step": 4500
1116
+ },
1117
+ {
1118
+ "epoch": 8.03,
1119
+ "learning_rate": 2.4185454545454542e-06,
1120
+ "loss": 0.0008,
1121
+ "step": 4525
1122
+ },
1123
+ {
1124
+ "epoch": 8.03,
1125
+ "learning_rate": 2.3776363636363638e-06,
1126
+ "loss": 0.0013,
1127
+ "step": 4550
1128
+ },
1129
+ {
1130
+ "epoch": 8.04,
1131
+ "learning_rate": 2.3367272727272725e-06,
1132
+ "loss": 0.0017,
1133
+ "step": 4575
1134
+ },
1135
+ {
1136
+ "epoch": 8.04,
1137
+ "learning_rate": 2.295818181818182e-06,
1138
+ "loss": 0.0006,
1139
+ "step": 4600
1140
+ },
1141
+ {
1142
+ "epoch": 8.05,
1143
+ "learning_rate": 2.254909090909091e-06,
1144
+ "loss": 0.0006,
1145
+ "step": 4625
1146
+ },
1147
+ {
1148
+ "epoch": 8.05,
1149
+ "learning_rate": 2.214e-06,
1150
+ "loss": 0.0006,
1151
+ "step": 4650
1152
+ },
1153
+ {
1154
+ "epoch": 8.06,
1155
+ "learning_rate": 2.173090909090909e-06,
1156
+ "loss": 0.0008,
1157
+ "step": 4675
1158
+ },
1159
+ {
1160
+ "epoch": 8.06,
1161
+ "learning_rate": 2.132181818181818e-06,
1162
+ "loss": 0.0007,
1163
+ "step": 4700
1164
+ },
1165
+ {
1166
+ "epoch": 8.06,
1167
+ "learning_rate": 2.0912727272727273e-06,
1168
+ "loss": 0.001,
1169
+ "step": 4725
1170
+ },
1171
+ {
1172
+ "epoch": 8.07,
1173
+ "learning_rate": 2.0503636363636364e-06,
1174
+ "loss": 0.0005,
1175
+ "step": 4750
1176
+ },
1177
+ {
1178
+ "epoch": 8.07,
1179
+ "learning_rate": 2.0094545454545456e-06,
1180
+ "loss": 0.0005,
1181
+ "step": 4775
1182
+ },
1183
+ {
1184
+ "epoch": 8.08,
1185
+ "learning_rate": 1.9685454545454543e-06,
1186
+ "loss": 0.0005,
1187
+ "step": 4800
1188
+ },
1189
+ {
1190
+ "epoch": 8.08,
1191
+ "learning_rate": 1.927636363636364e-06,
1192
+ "loss": 0.0006,
1193
+ "step": 4825
1194
+ },
1195
+ {
1196
+ "epoch": 8.08,
1197
+ "learning_rate": 1.886727272727273e-06,
1198
+ "loss": 0.0005,
1199
+ "step": 4850
1200
+ },
1201
+ {
1202
+ "epoch": 8.09,
1203
+ "learning_rate": 1.845818181818182e-06,
1204
+ "loss": 0.0005,
1205
+ "step": 4875
1206
+ },
1207
+ {
1208
+ "epoch": 9.0,
1209
+ "learning_rate": 1.804909090909091e-06,
1210
+ "loss": 0.0006,
1211
+ "step": 4900
1212
+ },
1213
+ {
1214
+ "epoch": 9.01,
1215
+ "learning_rate": 1.7640000000000002e-06,
1216
+ "loss": 0.0007,
1217
+ "step": 4925
1218
+ },
1219
+ {
1220
+ "epoch": 9.01,
1221
+ "learning_rate": 1.723090909090909e-06,
1222
+ "loss": 0.0005,
1223
+ "step": 4950
1224
+ },
1225
+ {
1226
+ "epoch": 9.01,
1227
+ "learning_rate": 1.6821818181818182e-06,
1228
+ "loss": 0.0014,
1229
+ "step": 4975
1230
+ },
1231
+ {
1232
+ "epoch": 9.02,
1233
+ "learning_rate": 1.6412727272727274e-06,
1234
+ "loss": 0.0005,
1235
+ "step": 5000
1236
+ },
1237
+ {
1238
+ "epoch": 9.02,
1239
+ "learning_rate": 1.6003636363636365e-06,
1240
+ "loss": 0.0004,
1241
+ "step": 5025
1242
+ },
1243
+ {
1244
+ "epoch": 9.03,
1245
+ "learning_rate": 1.5594545454545454e-06,
1246
+ "loss": 0.0005,
1247
+ "step": 5050
1248
+ },
1249
+ {
1250
+ "epoch": 9.03,
1251
+ "learning_rate": 1.5185454545454546e-06,
1252
+ "loss": 0.0004,
1253
+ "step": 5075
1254
+ },
1255
+ {
1256
+ "epoch": 9.04,
1257
+ "learning_rate": 1.4776363636363637e-06,
1258
+ "loss": 0.0009,
1259
+ "step": 5100
1260
+ },
1261
+ {
1262
+ "epoch": 9.04,
1263
+ "learning_rate": 1.4367272727272726e-06,
1264
+ "loss": 0.0004,
1265
+ "step": 5125
1266
+ },
1267
+ {
1268
+ "epoch": 9.04,
1269
+ "learning_rate": 1.3958181818181818e-06,
1270
+ "loss": 0.0004,
1271
+ "step": 5150
1272
+ },
1273
+ {
1274
+ "epoch": 9.05,
1275
+ "learning_rate": 1.354909090909091e-06,
1276
+ "loss": 0.0004,
1277
+ "step": 5175
1278
+ },
1279
+ {
1280
+ "epoch": 9.05,
1281
+ "learning_rate": 1.314e-06,
1282
+ "loss": 0.0006,
1283
+ "step": 5200
1284
+ },
1285
+ {
1286
+ "epoch": 9.06,
1287
+ "learning_rate": 1.273090909090909e-06,
1288
+ "loss": 0.0004,
1289
+ "step": 5225
1290
+ },
1291
+ {
1292
+ "epoch": 9.06,
1293
+ "learning_rate": 1.232181818181818e-06,
1294
+ "loss": 0.0009,
1295
+ "step": 5250
1296
+ },
1297
+ {
1298
+ "epoch": 9.06,
1299
+ "learning_rate": 1.1912727272727272e-06,
1300
+ "loss": 0.0004,
1301
+ "step": 5275
1302
+ },
1303
+ {
1304
+ "epoch": 9.07,
1305
+ "learning_rate": 1.1503636363636366e-06,
1306
+ "loss": 0.0004,
1307
+ "step": 5300
1308
+ },
1309
+ {
1310
+ "epoch": 9.07,
1311
+ "learning_rate": 1.1094545454545455e-06,
1312
+ "loss": 0.0004,
1313
+ "step": 5325
1314
+ },
1315
+ {
1316
+ "epoch": 9.08,
1317
+ "learning_rate": 1.0685454545454544e-06,
1318
+ "loss": 0.0004,
1319
+ "step": 5350
1320
+ },
1321
+ {
1322
+ "epoch": 9.08,
1323
+ "learning_rate": 1.0276363636363636e-06,
1324
+ "loss": 0.0004,
1325
+ "step": 5375
1326
+ },
1327
+ {
1328
+ "epoch": 9.09,
1329
+ "learning_rate": 9.86727272727273e-07,
1330
+ "loss": 0.0004,
1331
+ "step": 5400
1332
+ },
1333
+ {
1334
+ "epoch": 9.09,
1335
+ "learning_rate": 9.458181818181818e-07,
1336
+ "loss": 0.0004,
1337
+ "step": 5425
1338
+ },
1339
+ {
1340
+ "epoch": 10.0,
1341
+ "learning_rate": 9.04909090909091e-07,
1342
+ "loss": 0.0004,
1343
+ "step": 5450
1344
+ },
1345
+ {
1346
+ "epoch": 10.01,
1347
+ "learning_rate": 8.64e-07,
1348
+ "loss": 0.0004,
1349
+ "step": 5475
1350
+ },
1351
+ {
1352
+ "epoch": 10.01,
1353
+ "learning_rate": 8.230909090909091e-07,
1354
+ "loss": 0.0004,
1355
+ "step": 5500
1356
+ },
1357
+ {
1358
+ "epoch": 10.02,
1359
+ "learning_rate": 7.821818181818182e-07,
1360
+ "loss": 0.0004,
1361
+ "step": 5525
1362
+ },
1363
+ {
1364
+ "epoch": 10.02,
1365
+ "learning_rate": 7.412727272727273e-07,
1366
+ "loss": 0.0004,
1367
+ "step": 5550
1368
+ },
1369
+ {
1370
+ "epoch": 10.02,
1371
+ "learning_rate": 7.003636363636363e-07,
1372
+ "loss": 0.0004,
1373
+ "step": 5575
1374
+ },
1375
+ {
1376
+ "epoch": 10.03,
1377
+ "learning_rate": 6.594545454545454e-07,
1378
+ "loss": 0.0004,
1379
+ "step": 5600
1380
+ },
1381
+ {
1382
+ "epoch": 10.03,
1383
+ "learning_rate": 6.185454545454546e-07,
1384
+ "loss": 0.0005,
1385
+ "step": 5625
1386
+ },
1387
+ {
1388
+ "epoch": 10.04,
1389
+ "learning_rate": 5.776363636363637e-07,
1390
+ "loss": 0.0004,
1391
+ "step": 5650
1392
+ },
1393
+ {
1394
+ "epoch": 10.04,
1395
+ "learning_rate": 5.367272727272727e-07,
1396
+ "loss": 0.0004,
1397
+ "step": 5675
1398
+ },
1399
+ {
1400
+ "epoch": 10.04,
1401
+ "learning_rate": 4.958181818181818e-07,
1402
+ "loss": 0.0004,
1403
+ "step": 5700
1404
+ },
1405
+ {
1406
+ "epoch": 10.05,
1407
+ "learning_rate": 4.5490909090909094e-07,
1408
+ "loss": 0.0004,
1409
+ "step": 5725
1410
+ },
1411
+ {
1412
+ "epoch": 10.05,
1413
+ "learning_rate": 4.1400000000000003e-07,
1414
+ "loss": 0.0004,
1415
+ "step": 5750
1416
+ },
1417
+ {
1418
+ "epoch": 10.06,
1419
+ "learning_rate": 3.7309090909090906e-07,
1420
+ "loss": 0.0004,
1421
+ "step": 5775
1422
+ },
1423
+ {
1424
+ "epoch": 10.06,
1425
+ "learning_rate": 3.321818181818182e-07,
1426
+ "loss": 0.0004,
1427
+ "step": 5800
1428
+ },
1429
+ {
1430
+ "epoch": 10.07,
1431
+ "learning_rate": 2.912727272727273e-07,
1432
+ "loss": 0.0003,
1433
+ "step": 5825
1434
+ },
1435
+ {
1436
+ "epoch": 10.07,
1437
+ "learning_rate": 2.5036363636363636e-07,
1438
+ "loss": 0.0004,
1439
+ "step": 5850
1440
+ },
1441
+ {
1442
+ "epoch": 10.07,
1443
+ "learning_rate": 2.0945454545454544e-07,
1444
+ "loss": 0.0004,
1445
+ "step": 5875
1446
+ },
1447
+ {
1448
+ "epoch": 10.08,
1449
+ "learning_rate": 1.6854545454545455e-07,
1450
+ "loss": 0.0004,
1451
+ "step": 5900
1452
+ },
1453
+ {
1454
+ "epoch": 10.08,
1455
+ "learning_rate": 1.2763636363636364e-07,
1456
+ "loss": 0.0004,
1457
+ "step": 5925
1458
+ },
1459
+ {
1460
+ "epoch": 10.09,
1461
+ "learning_rate": 8.672727272727272e-08,
1462
+ "loss": 0.0004,
1463
+ "step": 5950
1464
+ },
1465
+ {
1466
+ "epoch": 11.0,
1467
+ "learning_rate": 4.581818181818182e-08,
1468
+ "loss": 0.0004,
1469
+ "step": 5975
1470
+ },
1471
+ {
1472
+ "epoch": 11.0,
1473
+ "learning_rate": 4.90909090909091e-09,
1474
+ "loss": 0.0004,
1475
+ "step": 6000
1476
+ },
1477
+ {
1478
+ "epoch": 11.0,
1479
+ "eval_loss": 0.32118794322013855,
1480
+ "eval_runtime": 4478.9685,
1481
+ "eval_samples_per_second": 1.941,
1482
+ "eval_steps_per_second": 0.121,
1483
+ "eval_wer": 6.6794438560212495,
1484
+ "step": 6000
1485
+ },
1486
+ {
1487
+ "epoch": 11.0,
1488
+ "step": 6000,
1489
+ "total_flos": 1.9595614224384e+20,
1490
+ "train_loss": 0.04298678698204458,
1491
+ "train_runtime": 52040.1074,
1492
+ "train_samples_per_second": 3.689,
1493
+ "train_steps_per_second": 0.115
1494
+ }
1495
+ ],
1496
+ "max_steps": 6000,
1497
+ "num_train_epochs": 9223372036854775807,
1498
+ "total_flos": 1.9595614224384e+20,
1499
+ "trial_name": null,
1500
+ "trial_params": null
1501
+ }