drmeeseeks commited on
Commit
c6a3800
1 Parent(s): 745dd79

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3000.0,
3
+ "eval_loss": 7.86700439453125,
4
+ "eval_runtime": 212.8072,
5
+ "eval_samples": 50,
6
+ "eval_samples_per_second": 0.235,
7
+ "eval_steps_per_second": 0.019,
8
+ "eval_wer": 154.41176470588235,
9
+ "train_loss": 0.047578999360693465,
10
+ "train_runtime": 7446.2425,
11
+ "train_samples_per_second": 12.892,
12
+ "train_steps_per_second": 0.403
13
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3000.0,
3
+ "eval_loss": 7.86700439453125,
4
+ "eval_runtime": 212.8072,
5
+ "eval_samples": 50,
6
+ "eval_samples_per_second": 0.235,
7
+ "eval_steps_per_second": 0.019,
8
+ "eval_wer": 154.41176470588235
9
+ }
runs/Dec25_16-43-39_129-213-85-198/events.out.tfevents.1671994503.129-213-85-198.84469.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5728c3353817f63bf6bdc85f28a2aa5204a502e85c347316e98eca96463393ad
3
+ size 358
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3000.0,
3
+ "train_loss": 0.047578999360693465,
4
+ "train_runtime": 7446.2425,
5
+ "train_samples_per_second": 12.892,
6
+ "train_steps_per_second": 0.403
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,1015 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 141.1764705882353,
3
+ "best_model_checkpoint": "./whisper-medium-v2-amet/checkpoint-1000",
4
+ "epoch": 3000.0,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 25.0,
12
+ "learning_rate": 4.2000000000000006e-07,
13
+ "loss": 2.8559,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 50.0,
18
+ "learning_rate": 9.200000000000001e-07,
19
+ "loss": 1.9972,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 75.0,
24
+ "learning_rate": 1.42e-06,
25
+ "loss": 0.8333,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 100.0,
30
+ "learning_rate": 1.9200000000000003e-06,
31
+ "loss": 0.0194,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 100.0,
36
+ "eval_loss": 3.8540303707122803,
37
+ "eval_runtime": 172.9892,
38
+ "eval_samples_per_second": 0.289,
39
+ "eval_steps_per_second": 0.023,
40
+ "eval_wer": 147.99465240641712,
41
+ "step": 100
42
+ },
43
+ {
44
+ "epoch": 125.0,
45
+ "learning_rate": 2.42e-06,
46
+ "loss": 0.0002,
47
+ "step": 125
48
+ },
49
+ {
50
+ "epoch": 150.0,
51
+ "learning_rate": 2.92e-06,
52
+ "loss": 0.0001,
53
+ "step": 150
54
+ },
55
+ {
56
+ "epoch": 175.0,
57
+ "learning_rate": 3.4200000000000007e-06,
58
+ "loss": 0.0001,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 200.0,
63
+ "learning_rate": 3.920000000000001e-06,
64
+ "loss": 0.0001,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 200.0,
69
+ "eval_loss": 4.147902011871338,
70
+ "eval_runtime": 173.0433,
71
+ "eval_samples_per_second": 0.289,
72
+ "eval_steps_per_second": 0.023,
73
+ "eval_wer": 148.1283422459893,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 225.0,
78
+ "learning_rate": 4.42e-06,
79
+ "loss": 0.0001,
80
+ "step": 225
81
+ },
82
+ {
83
+ "epoch": 250.0,
84
+ "learning_rate": 4.92e-06,
85
+ "loss": 0.0001,
86
+ "step": 250
87
+ },
88
+ {
89
+ "epoch": 275.0,
90
+ "learning_rate": 5.420000000000001e-06,
91
+ "loss": 0.0001,
92
+ "step": 275
93
+ },
94
+ {
95
+ "epoch": 300.0,
96
+ "learning_rate": 5.92e-06,
97
+ "loss": 0.0001,
98
+ "step": 300
99
+ },
100
+ {
101
+ "epoch": 300.0,
102
+ "eval_loss": 4.183959007263184,
103
+ "eval_runtime": 212.6643,
104
+ "eval_samples_per_second": 0.235,
105
+ "eval_steps_per_second": 0.019,
106
+ "eval_wer": 150.53475935828877,
107
+ "step": 300
108
+ },
109
+ {
110
+ "epoch": 325.0,
111
+ "learning_rate": 6.42e-06,
112
+ "loss": 0.0001,
113
+ "step": 325
114
+ },
115
+ {
116
+ "epoch": 350.0,
117
+ "learning_rate": 6.92e-06,
118
+ "loss": 0.0001,
119
+ "step": 350
120
+ },
121
+ {
122
+ "epoch": 375.0,
123
+ "learning_rate": 7.420000000000001e-06,
124
+ "loss": 0.0001,
125
+ "step": 375
126
+ },
127
+ {
128
+ "epoch": 400.0,
129
+ "learning_rate": 7.92e-06,
130
+ "loss": 0.0001,
131
+ "step": 400
132
+ },
133
+ {
134
+ "epoch": 400.0,
135
+ "eval_loss": 4.333920001983643,
136
+ "eval_runtime": 205.1394,
137
+ "eval_samples_per_second": 0.244,
138
+ "eval_steps_per_second": 0.019,
139
+ "eval_wer": 177.94117647058823,
140
+ "step": 400
141
+ },
142
+ {
143
+ "epoch": 425.0,
144
+ "learning_rate": 8.42e-06,
145
+ "loss": 0.0001,
146
+ "step": 425
147
+ },
148
+ {
149
+ "epoch": 450.0,
150
+ "learning_rate": 8.920000000000001e-06,
151
+ "loss": 0.0001,
152
+ "step": 450
153
+ },
154
+ {
155
+ "epoch": 475.0,
156
+ "learning_rate": 9.42e-06,
157
+ "loss": 0.0001,
158
+ "step": 475
159
+ },
160
+ {
161
+ "epoch": 500.0,
162
+ "learning_rate": 9.920000000000002e-06,
163
+ "loss": 0.0,
164
+ "step": 500
165
+ },
166
+ {
167
+ "epoch": 500.0,
168
+ "eval_loss": 4.583085536956787,
169
+ "eval_runtime": 212.5309,
170
+ "eval_samples_per_second": 0.235,
171
+ "eval_steps_per_second": 0.019,
172
+ "eval_wer": 151.06951871657753,
173
+ "step": 500
174
+ },
175
+ {
176
+ "epoch": 525.0,
177
+ "learning_rate": 9.916000000000001e-06,
178
+ "loss": 0.0,
179
+ "step": 525
180
+ },
181
+ {
182
+ "epoch": 550.0,
183
+ "learning_rate": 9.816e-06,
184
+ "loss": 0.0,
185
+ "step": 550
186
+ },
187
+ {
188
+ "epoch": 575.0,
189
+ "learning_rate": 9.716000000000002e-06,
190
+ "loss": 0.0,
191
+ "step": 575
192
+ },
193
+ {
194
+ "epoch": 600.0,
195
+ "learning_rate": 9.616e-06,
196
+ "loss": 0.0,
197
+ "step": 600
198
+ },
199
+ {
200
+ "epoch": 600.0,
201
+ "eval_loss": 4.931729793548584,
202
+ "eval_runtime": 212.8362,
203
+ "eval_samples_per_second": 0.235,
204
+ "eval_steps_per_second": 0.019,
205
+ "eval_wer": 164.0374331550802,
206
+ "step": 600
207
+ },
208
+ {
209
+ "epoch": 625.0,
210
+ "learning_rate": 9.516e-06,
211
+ "loss": 0.0,
212
+ "step": 625
213
+ },
214
+ {
215
+ "epoch": 650.0,
216
+ "learning_rate": 9.416000000000001e-06,
217
+ "loss": 0.0,
218
+ "step": 650
219
+ },
220
+ {
221
+ "epoch": 675.0,
222
+ "learning_rate": 9.316e-06,
223
+ "loss": 0.0,
224
+ "step": 675
225
+ },
226
+ {
227
+ "epoch": 700.0,
228
+ "learning_rate": 9.216000000000001e-06,
229
+ "loss": 0.0,
230
+ "step": 700
231
+ },
232
+ {
233
+ "epoch": 700.0,
234
+ "eval_loss": 5.303104877471924,
235
+ "eval_runtime": 204.8283,
236
+ "eval_samples_per_second": 0.244,
237
+ "eval_steps_per_second": 0.02,
238
+ "eval_wer": 141.0427807486631,
239
+ "step": 700
240
+ },
241
+ {
242
+ "epoch": 725.0,
243
+ "learning_rate": 9.116e-06,
244
+ "loss": 0.0,
245
+ "step": 725
246
+ },
247
+ {
248
+ "epoch": 750.0,
249
+ "learning_rate": 9.016e-06,
250
+ "loss": 0.0,
251
+ "step": 750
252
+ },
253
+ {
254
+ "epoch": 775.0,
255
+ "learning_rate": 8.916e-06,
256
+ "loss": 0.0,
257
+ "step": 775
258
+ },
259
+ {
260
+ "epoch": 800.0,
261
+ "learning_rate": 8.816000000000002e-06,
262
+ "loss": 0.0,
263
+ "step": 800
264
+ },
265
+ {
266
+ "epoch": 800.0,
267
+ "eval_loss": 5.658377170562744,
268
+ "eval_runtime": 204.6203,
269
+ "eval_samples_per_second": 0.244,
270
+ "eval_steps_per_second": 0.02,
271
+ "eval_wer": 122.32620320855614,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 825.0,
276
+ "learning_rate": 8.716000000000001e-06,
277
+ "loss": 0.0,
278
+ "step": 825
279
+ },
280
+ {
281
+ "epoch": 850.0,
282
+ "learning_rate": 8.616000000000002e-06,
283
+ "loss": 0.0,
284
+ "step": 850
285
+ },
286
+ {
287
+ "epoch": 875.0,
288
+ "learning_rate": 8.516000000000001e-06,
289
+ "loss": 0.0,
290
+ "step": 875
291
+ },
292
+ {
293
+ "epoch": 900.0,
294
+ "learning_rate": 8.416e-06,
295
+ "loss": 0.0,
296
+ "step": 900
297
+ },
298
+ {
299
+ "epoch": 900.0,
300
+ "eval_loss": 5.971085071563721,
301
+ "eval_runtime": 213.3134,
302
+ "eval_samples_per_second": 0.234,
303
+ "eval_steps_per_second": 0.019,
304
+ "eval_wer": 157.4866310160428,
305
+ "step": 900
306
+ },
307
+ {
308
+ "epoch": 925.0,
309
+ "learning_rate": 8.316000000000001e-06,
310
+ "loss": 0.0,
311
+ "step": 925
312
+ },
313
+ {
314
+ "epoch": 950.0,
315
+ "learning_rate": 8.216e-06,
316
+ "loss": 0.0,
317
+ "step": 950
318
+ },
319
+ {
320
+ "epoch": 975.0,
321
+ "learning_rate": 8.116e-06,
322
+ "loss": 0.0,
323
+ "step": 975
324
+ },
325
+ {
326
+ "epoch": 1000.0,
327
+ "learning_rate": 8.016e-06,
328
+ "loss": 0.0,
329
+ "step": 1000
330
+ },
331
+ {
332
+ "epoch": 1000.0,
333
+ "eval_loss": 6.246512413024902,
334
+ "eval_runtime": 213.4328,
335
+ "eval_samples_per_second": 0.234,
336
+ "eval_steps_per_second": 0.019,
337
+ "eval_wer": 141.1764705882353,
338
+ "step": 1000
339
+ },
340
+ {
341
+ "epoch": 1025.0,
342
+ "learning_rate": 7.916e-06,
343
+ "loss": 0.0,
344
+ "step": 1025
345
+ },
346
+ {
347
+ "epoch": 1050.0,
348
+ "learning_rate": 7.816000000000001e-06,
349
+ "loss": 0.0,
350
+ "step": 1050
351
+ },
352
+ {
353
+ "epoch": 1075.0,
354
+ "learning_rate": 7.716e-06,
355
+ "loss": 0.0,
356
+ "step": 1075
357
+ },
358
+ {
359
+ "epoch": 1100.0,
360
+ "learning_rate": 7.616000000000001e-06,
361
+ "loss": 0.0,
362
+ "step": 1100
363
+ },
364
+ {
365
+ "epoch": 1100.0,
366
+ "eval_loss": 6.483173847198486,
367
+ "eval_runtime": 213.8254,
368
+ "eval_samples_per_second": 0.234,
369
+ "eval_steps_per_second": 0.019,
370
+ "eval_wer": 169.6524064171123,
371
+ "step": 1100
372
+ },
373
+ {
374
+ "epoch": 1125.0,
375
+ "learning_rate": 7.516000000000001e-06,
376
+ "loss": 0.0,
377
+ "step": 1125
378
+ },
379
+ {
380
+ "epoch": 1150.0,
381
+ "learning_rate": 7.416000000000001e-06,
382
+ "loss": 0.0,
383
+ "step": 1150
384
+ },
385
+ {
386
+ "epoch": 1175.0,
387
+ "learning_rate": 7.316000000000001e-06,
388
+ "loss": 0.0,
389
+ "step": 1175
390
+ },
391
+ {
392
+ "epoch": 1200.0,
393
+ "learning_rate": 7.216000000000001e-06,
394
+ "loss": 0.0,
395
+ "step": 1200
396
+ },
397
+ {
398
+ "epoch": 1200.0,
399
+ "eval_loss": 6.6890459060668945,
400
+ "eval_runtime": 209.6765,
401
+ "eval_samples_per_second": 0.238,
402
+ "eval_steps_per_second": 0.019,
403
+ "eval_wer": 155.0802139037433,
404
+ "step": 1200
405
+ },
406
+ {
407
+ "epoch": 1225.0,
408
+ "learning_rate": 7.116000000000001e-06,
409
+ "loss": 0.0,
410
+ "step": 1225
411
+ },
412
+ {
413
+ "epoch": 1250.0,
414
+ "learning_rate": 7.016e-06,
415
+ "loss": 0.0,
416
+ "step": 1250
417
+ },
418
+ {
419
+ "epoch": 1275.0,
420
+ "learning_rate": 6.916e-06,
421
+ "loss": 0.0,
422
+ "step": 1275
423
+ },
424
+ {
425
+ "epoch": 1300.0,
426
+ "learning_rate": 6.8160000000000005e-06,
427
+ "loss": 0.0,
428
+ "step": 1300
429
+ },
430
+ {
431
+ "epoch": 1300.0,
432
+ "eval_loss": 6.867943286895752,
433
+ "eval_runtime": 209.9316,
434
+ "eval_samples_per_second": 0.238,
435
+ "eval_steps_per_second": 0.019,
436
+ "eval_wer": 159.75935828877004,
437
+ "step": 1300
438
+ },
439
+ {
440
+ "epoch": 1325.0,
441
+ "learning_rate": 6.716000000000001e-06,
442
+ "loss": 0.0,
443
+ "step": 1325
444
+ },
445
+ {
446
+ "epoch": 1350.0,
447
+ "learning_rate": 6.616e-06,
448
+ "loss": 0.0,
449
+ "step": 1350
450
+ },
451
+ {
452
+ "epoch": 1375.0,
453
+ "learning_rate": 6.516e-06,
454
+ "loss": 0.0,
455
+ "step": 1375
456
+ },
457
+ {
458
+ "epoch": 1400.0,
459
+ "learning_rate": 6.416e-06,
460
+ "loss": 0.0,
461
+ "step": 1400
462
+ },
463
+ {
464
+ "epoch": 1400.0,
465
+ "eval_loss": 7.025048732757568,
466
+ "eval_runtime": 212.7993,
467
+ "eval_samples_per_second": 0.235,
468
+ "eval_steps_per_second": 0.019,
469
+ "eval_wer": 155.0802139037433,
470
+ "step": 1400
471
+ },
472
+ {
473
+ "epoch": 1425.0,
474
+ "learning_rate": 6.316000000000001e-06,
475
+ "loss": 0.0,
476
+ "step": 1425
477
+ },
478
+ {
479
+ "epoch": 1450.0,
480
+ "learning_rate": 6.216000000000001e-06,
481
+ "loss": 0.0,
482
+ "step": 1450
483
+ },
484
+ {
485
+ "epoch": 1475.0,
486
+ "learning_rate": 6.116000000000001e-06,
487
+ "loss": 0.0,
488
+ "step": 1475
489
+ },
490
+ {
491
+ "epoch": 1500.0,
492
+ "learning_rate": 6.0160000000000005e-06,
493
+ "loss": 0.0,
494
+ "step": 1500
495
+ },
496
+ {
497
+ "epoch": 1500.0,
498
+ "eval_loss": 7.161492347717285,
499
+ "eval_runtime": 212.8178,
500
+ "eval_samples_per_second": 0.235,
501
+ "eval_steps_per_second": 0.019,
502
+ "eval_wer": 146.2566844919786,
503
+ "step": 1500
504
+ },
505
+ {
506
+ "epoch": 1525.0,
507
+ "learning_rate": 5.916000000000001e-06,
508
+ "loss": 0.0,
509
+ "step": 1525
510
+ },
511
+ {
512
+ "epoch": 1550.0,
513
+ "learning_rate": 5.816000000000001e-06,
514
+ "loss": 0.0,
515
+ "step": 1550
516
+ },
517
+ {
518
+ "epoch": 1575.0,
519
+ "learning_rate": 5.716000000000001e-06,
520
+ "loss": 0.0,
521
+ "step": 1575
522
+ },
523
+ {
524
+ "epoch": 1600.0,
525
+ "learning_rate": 5.616e-06,
526
+ "loss": 0.0,
527
+ "step": 1600
528
+ },
529
+ {
530
+ "epoch": 1600.0,
531
+ "eval_loss": 7.2876811027526855,
532
+ "eval_runtime": 212.9353,
533
+ "eval_samples_per_second": 0.235,
534
+ "eval_steps_per_second": 0.019,
535
+ "eval_wer": 143.04812834224597,
536
+ "step": 1600
537
+ },
538
+ {
539
+ "epoch": 1625.0,
540
+ "learning_rate": 5.516e-06,
541
+ "loss": 0.0,
542
+ "step": 1625
543
+ },
544
+ {
545
+ "epoch": 1650.0,
546
+ "learning_rate": 5.416e-06,
547
+ "loss": 0.0,
548
+ "step": 1650
549
+ },
550
+ {
551
+ "epoch": 1675.0,
552
+ "learning_rate": 5.3160000000000004e-06,
553
+ "loss": 0.0,
554
+ "step": 1675
555
+ },
556
+ {
557
+ "epoch": 1700.0,
558
+ "learning_rate": 5.216e-06,
559
+ "loss": 0.0,
560
+ "step": 1700
561
+ },
562
+ {
563
+ "epoch": 1700.0,
564
+ "eval_loss": 7.3986992835998535,
565
+ "eval_runtime": 212.9015,
566
+ "eval_samples_per_second": 0.235,
567
+ "eval_steps_per_second": 0.019,
568
+ "eval_wer": 148.52941176470588,
569
+ "step": 1700
570
+ },
571
+ {
572
+ "epoch": 1725.0,
573
+ "learning_rate": 5.116000000000001e-06,
574
+ "loss": 0.0,
575
+ "step": 1725
576
+ },
577
+ {
578
+ "epoch": 1750.0,
579
+ "learning_rate": 5.016000000000001e-06,
580
+ "loss": 0.0,
581
+ "step": 1750
582
+ },
583
+ {
584
+ "epoch": 1775.0,
585
+ "learning_rate": 4.916e-06,
586
+ "loss": 0.0,
587
+ "step": 1775
588
+ },
589
+ {
590
+ "epoch": 1800.0,
591
+ "learning_rate": 4.816e-06,
592
+ "loss": 0.0,
593
+ "step": 1800
594
+ },
595
+ {
596
+ "epoch": 1800.0,
597
+ "eval_loss": 7.501037120819092,
598
+ "eval_runtime": 213.1608,
599
+ "eval_samples_per_second": 0.235,
600
+ "eval_steps_per_second": 0.019,
601
+ "eval_wer": 142.5133689839572,
602
+ "step": 1800
603
+ },
604
+ {
605
+ "epoch": 1825.0,
606
+ "learning_rate": 4.716e-06,
607
+ "loss": 0.0,
608
+ "step": 1825
609
+ },
610
+ {
611
+ "epoch": 1850.0,
612
+ "learning_rate": 4.616e-06,
613
+ "loss": 0.0,
614
+ "step": 1850
615
+ },
616
+ {
617
+ "epoch": 1875.0,
618
+ "learning_rate": 4.5160000000000005e-06,
619
+ "loss": 0.0,
620
+ "step": 1875
621
+ },
622
+ {
623
+ "epoch": 1900.0,
624
+ "learning_rate": 4.416000000000001e-06,
625
+ "loss": 0.0,
626
+ "step": 1900
627
+ },
628
+ {
629
+ "epoch": 1900.0,
630
+ "eval_loss": 7.584920883178711,
631
+ "eval_runtime": 213.8853,
632
+ "eval_samples_per_second": 0.234,
633
+ "eval_steps_per_second": 0.019,
634
+ "eval_wer": 136.76470588235296,
635
+ "step": 1900
636
+ },
637
+ {
638
+ "epoch": 1925.0,
639
+ "learning_rate": 4.316e-06,
640
+ "loss": 0.0,
641
+ "step": 1925
642
+ },
643
+ {
644
+ "epoch": 1950.0,
645
+ "learning_rate": 4.216e-06,
646
+ "loss": 0.0,
647
+ "step": 1950
648
+ },
649
+ {
650
+ "epoch": 1975.0,
651
+ "learning_rate": 4.116000000000001e-06,
652
+ "loss": 0.0,
653
+ "step": 1975
654
+ },
655
+ {
656
+ "epoch": 2000.0,
657
+ "learning_rate": 4.016e-06,
658
+ "loss": 0.0,
659
+ "step": 2000
660
+ },
661
+ {
662
+ "epoch": 2000.0,
663
+ "eval_loss": 7.668895244598389,
664
+ "eval_runtime": 212.6893,
665
+ "eval_samples_per_second": 0.235,
666
+ "eval_steps_per_second": 0.019,
667
+ "eval_wer": 148.26203208556151,
668
+ "step": 2000
669
+ },
670
+ {
671
+ "epoch": 2025.0,
672
+ "learning_rate": 3.916e-06,
673
+ "loss": 0.0,
674
+ "step": 2025
675
+ },
676
+ {
677
+ "epoch": 2050.0,
678
+ "learning_rate": 3.816e-06,
679
+ "loss": 0.0,
680
+ "step": 2050
681
+ },
682
+ {
683
+ "epoch": 2075.0,
684
+ "learning_rate": 3.716e-06,
685
+ "loss": 0.0,
686
+ "step": 2075
687
+ },
688
+ {
689
+ "epoch": 2100.0,
690
+ "learning_rate": 3.616e-06,
691
+ "loss": 0.0,
692
+ "step": 2100
693
+ },
694
+ {
695
+ "epoch": 2100.0,
696
+ "eval_loss": 7.695452690124512,
697
+ "eval_runtime": 213.8269,
698
+ "eval_samples_per_second": 0.234,
699
+ "eval_steps_per_second": 0.019,
700
+ "eval_wer": 165.37433155080214,
701
+ "step": 2100
702
+ },
703
+ {
704
+ "epoch": 2125.0,
705
+ "learning_rate": 3.5160000000000007e-06,
706
+ "loss": 0.0,
707
+ "step": 2125
708
+ },
709
+ {
710
+ "epoch": 2150.0,
711
+ "learning_rate": 3.4160000000000004e-06,
712
+ "loss": 0.0,
713
+ "step": 2150
714
+ },
715
+ {
716
+ "epoch": 2175.0,
717
+ "learning_rate": 3.3160000000000005e-06,
718
+ "loss": 0.0,
719
+ "step": 2175
720
+ },
721
+ {
722
+ "epoch": 2200.0,
723
+ "learning_rate": 3.216e-06,
724
+ "loss": 0.0,
725
+ "step": 2200
726
+ },
727
+ {
728
+ "epoch": 2200.0,
729
+ "eval_loss": 7.724720001220703,
730
+ "eval_runtime": 213.0213,
731
+ "eval_samples_per_second": 0.235,
732
+ "eval_steps_per_second": 0.019,
733
+ "eval_wer": 162.96791443850267,
734
+ "step": 2200
735
+ },
736
+ {
737
+ "epoch": 2225.0,
738
+ "learning_rate": 3.1160000000000003e-06,
739
+ "loss": 0.0,
740
+ "step": 2225
741
+ },
742
+ {
743
+ "epoch": 2250.0,
744
+ "learning_rate": 3.016e-06,
745
+ "loss": 0.0,
746
+ "step": 2250
747
+ },
748
+ {
749
+ "epoch": 2275.0,
750
+ "learning_rate": 2.9160000000000005e-06,
751
+ "loss": 0.0,
752
+ "step": 2275
753
+ },
754
+ {
755
+ "epoch": 2300.0,
756
+ "learning_rate": 2.8160000000000002e-06,
757
+ "loss": 0.0,
758
+ "step": 2300
759
+ },
760
+ {
761
+ "epoch": 2300.0,
762
+ "eval_loss": 7.755704402923584,
763
+ "eval_runtime": 212.9136,
764
+ "eval_samples_per_second": 0.235,
765
+ "eval_steps_per_second": 0.019,
766
+ "eval_wer": 161.63101604278074,
767
+ "step": 2300
768
+ },
769
+ {
770
+ "epoch": 2325.0,
771
+ "learning_rate": 2.7160000000000003e-06,
772
+ "loss": 0.0,
773
+ "step": 2325
774
+ },
775
+ {
776
+ "epoch": 2350.0,
777
+ "learning_rate": 2.616e-06,
778
+ "loss": 0.0,
779
+ "step": 2350
780
+ },
781
+ {
782
+ "epoch": 2375.0,
783
+ "learning_rate": 2.516e-06,
784
+ "loss": 0.0,
785
+ "step": 2375
786
+ },
787
+ {
788
+ "epoch": 2400.0,
789
+ "learning_rate": 2.4160000000000002e-06,
790
+ "loss": 0.0,
791
+ "step": 2400
792
+ },
793
+ {
794
+ "epoch": 2400.0,
795
+ "eval_loss": 7.784246444702148,
796
+ "eval_runtime": 213.1347,
797
+ "eval_samples_per_second": 0.235,
798
+ "eval_steps_per_second": 0.019,
799
+ "eval_wer": 162.29946524064172,
800
+ "step": 2400
801
+ },
802
+ {
803
+ "epoch": 2425.0,
804
+ "learning_rate": 2.3160000000000004e-06,
805
+ "loss": 0.0,
806
+ "step": 2425
807
+ },
808
+ {
809
+ "epoch": 2450.0,
810
+ "learning_rate": 2.216e-06,
811
+ "loss": 0.0,
812
+ "step": 2450
813
+ },
814
+ {
815
+ "epoch": 2475.0,
816
+ "learning_rate": 2.116e-06,
817
+ "loss": 0.0,
818
+ "step": 2475
819
+ },
820
+ {
821
+ "epoch": 2500.0,
822
+ "learning_rate": 2.0160000000000003e-06,
823
+ "loss": 0.0,
824
+ "step": 2500
825
+ },
826
+ {
827
+ "epoch": 2500.0,
828
+ "eval_loss": 7.807443141937256,
829
+ "eval_runtime": 212.982,
830
+ "eval_samples_per_second": 0.235,
831
+ "eval_steps_per_second": 0.019,
832
+ "eval_wer": 150.93582887700535,
833
+ "step": 2500
834
+ },
835
+ {
836
+ "epoch": 2525.0,
837
+ "learning_rate": 1.916e-06,
838
+ "loss": 0.0,
839
+ "step": 2525
840
+ },
841
+ {
842
+ "epoch": 2550.0,
843
+ "learning_rate": 1.8160000000000003e-06,
844
+ "loss": 0.0,
845
+ "step": 2550
846
+ },
847
+ {
848
+ "epoch": 2575.0,
849
+ "learning_rate": 1.7160000000000002e-06,
850
+ "loss": 0.0,
851
+ "step": 2575
852
+ },
853
+ {
854
+ "epoch": 2600.0,
855
+ "learning_rate": 1.616e-06,
856
+ "loss": 0.0,
857
+ "step": 2600
858
+ },
859
+ {
860
+ "epoch": 2600.0,
861
+ "eval_loss": 7.8286895751953125,
862
+ "eval_runtime": 213.1069,
863
+ "eval_samples_per_second": 0.235,
864
+ "eval_steps_per_second": 0.019,
865
+ "eval_wer": 154.81283422459893,
866
+ "step": 2600
867
+ },
868
+ {
869
+ "epoch": 2625.0,
870
+ "learning_rate": 1.5160000000000002e-06,
871
+ "loss": 0.0,
872
+ "step": 2625
873
+ },
874
+ {
875
+ "epoch": 2650.0,
876
+ "learning_rate": 1.416e-06,
877
+ "loss": 0.0,
878
+ "step": 2650
879
+ },
880
+ {
881
+ "epoch": 2675.0,
882
+ "learning_rate": 1.316e-06,
883
+ "loss": 0.0,
884
+ "step": 2675
885
+ },
886
+ {
887
+ "epoch": 2700.0,
888
+ "learning_rate": 1.216e-06,
889
+ "loss": 0.0,
890
+ "step": 2700
891
+ },
892
+ {
893
+ "epoch": 2700.0,
894
+ "eval_loss": 7.843414783477783,
895
+ "eval_runtime": 212.8223,
896
+ "eval_samples_per_second": 0.235,
897
+ "eval_steps_per_second": 0.019,
898
+ "eval_wer": 155.4812834224599,
899
+ "step": 2700
900
+ },
901
+ {
902
+ "epoch": 2725.0,
903
+ "learning_rate": 1.1160000000000002e-06,
904
+ "loss": 0.0,
905
+ "step": 2725
906
+ },
907
+ {
908
+ "epoch": 2750.0,
909
+ "learning_rate": 1.016e-06,
910
+ "loss": 0.0,
911
+ "step": 2750
912
+ },
913
+ {
914
+ "epoch": 2775.0,
915
+ "learning_rate": 9.160000000000001e-07,
916
+ "loss": 0.0,
917
+ "step": 2775
918
+ },
919
+ {
920
+ "epoch": 2800.0,
921
+ "learning_rate": 8.160000000000001e-07,
922
+ "loss": 0.0,
923
+ "step": 2800
924
+ },
925
+ {
926
+ "epoch": 2800.0,
927
+ "eval_loss": 7.856691837310791,
928
+ "eval_runtime": 212.6712,
929
+ "eval_samples_per_second": 0.235,
930
+ "eval_steps_per_second": 0.019,
931
+ "eval_wer": 154.41176470588235,
932
+ "step": 2800
933
+ },
934
+ {
935
+ "epoch": 2825.0,
936
+ "learning_rate": 7.16e-07,
937
+ "loss": 0.0,
938
+ "step": 2825
939
+ },
940
+ {
941
+ "epoch": 2850.0,
942
+ "learning_rate": 6.160000000000001e-07,
943
+ "loss": 0.0,
944
+ "step": 2850
945
+ },
946
+ {
947
+ "epoch": 2875.0,
948
+ "learning_rate": 5.16e-07,
949
+ "loss": 0.0,
950
+ "step": 2875
951
+ },
952
+ {
953
+ "epoch": 2900.0,
954
+ "learning_rate": 4.16e-07,
955
+ "loss": 0.0,
956
+ "step": 2900
957
+ },
958
+ {
959
+ "epoch": 2900.0,
960
+ "eval_loss": 7.863497257232666,
961
+ "eval_runtime": 212.9291,
962
+ "eval_samples_per_second": 0.235,
963
+ "eval_steps_per_second": 0.019,
964
+ "eval_wer": 154.41176470588235,
965
+ "step": 2900
966
+ },
967
+ {
968
+ "epoch": 2925.0,
969
+ "learning_rate": 3.160000000000001e-07,
970
+ "loss": 0.0,
971
+ "step": 2925
972
+ },
973
+ {
974
+ "epoch": 2950.0,
975
+ "learning_rate": 2.1600000000000003e-07,
976
+ "loss": 0.0,
977
+ "step": 2950
978
+ },
979
+ {
980
+ "epoch": 2975.0,
981
+ "learning_rate": 1.16e-07,
982
+ "loss": 0.0,
983
+ "step": 2975
984
+ },
985
+ {
986
+ "epoch": 3000.0,
987
+ "learning_rate": 1.6e-08,
988
+ "loss": 0.0,
989
+ "step": 3000
990
+ },
991
+ {
992
+ "epoch": 3000.0,
993
+ "eval_loss": 7.86700439453125,
994
+ "eval_runtime": 212.9038,
995
+ "eval_samples_per_second": 0.235,
996
+ "eval_steps_per_second": 0.019,
997
+ "eval_wer": 154.41176470588235,
998
+ "step": 3000
999
+ },
1000
+ {
1001
+ "epoch": 3000.0,
1002
+ "step": 3000,
1003
+ "total_flos": 3.06181472256e+18,
1004
+ "train_loss": 0.047578999360693465,
1005
+ "train_runtime": 7446.2425,
1006
+ "train_samples_per_second": 12.892,
1007
+ "train_steps_per_second": 0.403
1008
+ }
1009
+ ],
1010
+ "max_steps": 3000,
1011
+ "num_train_epochs": 3000,
1012
+ "total_flos": 3.06181472256e+18,
1013
+ "trial_name": null,
1014
+ "trial_params": null
1015
+ }