shpotes commited on
Commit
4ce8f8a
1 Parent(s): 4bbc5ca

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.22779151797294617,
4
+ "eval_runtime": 337.5199,
5
+ "eval_samples": 6463,
6
+ "eval_samples_per_second": 19.149,
7
+ "eval_steps_per_second": 0.267,
8
+ "eval_wer": 0.17871523648578164,
9
+ "train_loss": 0.14547648023750823,
10
+ "train_runtime": 96097.4367,
11
+ "train_samples": 17030,
12
+ "train_samples_per_second": 17.722,
13
+ "train_steps_per_second": 0.123
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.22779151797294617,
4
+ "eval_runtime": 337.5199,
5
+ "eval_samples": 6463,
6
+ "eval_samples_per_second": 19.149,
7
+ "eval_steps_per_second": 0.267,
8
+ "eval_wer": 0.17871523648578164
9
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f33aae51115a1b6822722fbf80b33b760bcffd0e559d759985589d6cefef951e
3
  size 1262058993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a606acfb7fb479ef93ee010de02a0920109fad8b3b972f6ea1ff6eda4a1e68d6
3
  size 1262058993
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "train_loss": 0.14547648023750823,
4
+ "train_runtime": 96097.4367,
5
+ "train_samples": 17030,
6
+ "train_samples_per_second": 17.722,
7
+ "train_steps_per_second": 0.123
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,940 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 99.9957805907173,
5
+ "global_step": 11800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.84,
12
+ "learning_rate": 0.00019800000000000002,
13
+ "loss": 5.6106,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 1.69,
18
+ "learning_rate": 0.000398,
19
+ "loss": 2.7044,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 2.54,
24
+ "learning_rate": 0.000598,
25
+ "loss": 0.8324,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 3.39,
30
+ "learning_rate": 0.0007980000000000001,
31
+ "loss": 0.345,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 4.24,
36
+ "learning_rate": 0.000998,
37
+ "loss": 0.2548,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 4.24,
42
+ "eval_loss": 0.2469930201768875,
43
+ "eval_runtime": 294.3209,
44
+ "eval_samples_per_second": 21.959,
45
+ "eval_steps_per_second": 0.306,
46
+ "eval_wer": 0.36629738582545746,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 5.08,
51
+ "learning_rate": 0.0009998106236074637,
52
+ "loss": 0.2203,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 5.93,
57
+ "learning_rate": 0.0009992349703990264,
58
+ "loss": 0.1774,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 6.78,
63
+ "learning_rate": 0.0009982734659650546,
64
+ "loss": 0.1643,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 7.62,
69
+ "learning_rate": 0.0009969268534379132,
70
+ "loss": 0.1516,
71
+ "step": 900
72
+ },
73
+ {
74
+ "epoch": 8.47,
75
+ "learning_rate": 0.00099519617359424,
76
+ "loss": 0.1435,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 8.47,
81
+ "eval_loss": 0.20002000033855438,
82
+ "eval_runtime": 295.0975,
83
+ "eval_samples_per_second": 21.901,
84
+ "eval_steps_per_second": 0.305,
85
+ "eval_wer": 0.2791095533162254,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "epoch": 9.32,
90
+ "learning_rate": 0.0009930827640505444,
91
+ "loss": 0.1393,
92
+ "step": 1100
93
+ },
94
+ {
95
+ "epoch": 10.17,
96
+ "learning_rate": 0.0009905882582293853,
97
+ "loss": 0.1323,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 11.02,
102
+ "learning_rate": 0.0009877145840969208,
103
+ "loss": 0.1317,
104
+ "step": 1300
105
+ },
106
+ {
107
+ "epoch": 11.86,
108
+ "learning_rate": 0.0009844639626728146,
109
+ "loss": 0.1156,
110
+ "step": 1400
111
+ },
112
+ {
113
+ "epoch": 12.71,
114
+ "learning_rate": 0.000980838906313641,
115
+ "loss": 0.1158,
116
+ "step": 1500
117
+ },
118
+ {
119
+ "epoch": 12.71,
120
+ "eval_loss": 0.20303700864315033,
121
+ "eval_runtime": 301.1246,
122
+ "eval_samples_per_second": 21.463,
123
+ "eval_steps_per_second": 0.299,
124
+ "eval_wer": 0.26521457929106423,
125
+ "step": 1500
126
+ },
127
+ {
128
+ "epoch": 13.56,
129
+ "learning_rate": 0.0009768422167711244,
130
+ "loss": 0.1174,
131
+ "step": 1600
132
+ },
133
+ {
134
+ "epoch": 14.41,
135
+ "learning_rate": 0.0009724769830267044,
136
+ "loss": 0.1118,
137
+ "step": 1700
138
+ },
139
+ {
140
+ "epoch": 15.25,
141
+ "learning_rate": 0.0009677465789041117,
142
+ "loss": 0.1115,
143
+ "step": 1800
144
+ },
145
+ {
146
+ "epoch": 16.1,
147
+ "learning_rate": 0.0009626546604617881,
148
+ "loss": 0.1149,
149
+ "step": 1900
150
+ },
151
+ {
152
+ "epoch": 16.95,
153
+ "learning_rate": 0.0009572051631671743,
154
+ "loss": 0.1094,
155
+ "step": 2000
156
+ },
157
+ {
158
+ "epoch": 16.95,
159
+ "eval_loss": 0.20957913994789124,
160
+ "eval_runtime": 294.9247,
161
+ "eval_samples_per_second": 21.914,
162
+ "eval_steps_per_second": 0.305,
163
+ "eval_wer": 0.26045774133650457,
164
+ "step": 2000
165
+ },
166
+ {
167
+ "epoch": 17.79,
168
+ "learning_rate": 0.0009514022988550459,
169
+ "loss": 0.1049,
170
+ "step": 2100
171
+ },
172
+ {
173
+ "epoch": 18.64,
174
+ "learning_rate": 0.0009452505524722506,
175
+ "loss": 0.1031,
176
+ "step": 2200
177
+ },
178
+ {
179
+ "epoch": 19.49,
180
+ "learning_rate": 0.0009387546786113578,
181
+ "loss": 0.0991,
182
+ "step": 2300
183
+ },
184
+ {
185
+ "epoch": 20.34,
186
+ "learning_rate": 0.0009319196978359077,
187
+ "loss": 0.1015,
188
+ "step": 2400
189
+ },
190
+ {
191
+ "epoch": 21.19,
192
+ "learning_rate": 0.0009247508928000911,
193
+ "loss": 0.1004,
194
+ "step": 2500
195
+ },
196
+ {
197
+ "epoch": 21.19,
198
+ "eval_loss": 0.21501973271369934,
199
+ "eval_runtime": 295.0309,
200
+ "eval_samples_per_second": 21.906,
201
+ "eval_steps_per_second": 0.305,
202
+ "eval_wer": 0.2477311134756212,
203
+ "step": 2500
204
+ },
205
+ {
206
+ "epoch": 22.03,
207
+ "learning_rate": 0.0009172538041658665,
208
+ "loss": 0.0949,
209
+ "step": 2600
210
+ },
211
+ {
212
+ "epoch": 22.88,
213
+ "learning_rate": 0.0009094342263206646,
214
+ "loss": 0.0869,
215
+ "step": 2700
216
+ },
217
+ {
218
+ "epoch": 23.73,
219
+ "learning_rate": 0.0009012982028989929,
220
+ "loss": 0.0971,
221
+ "step": 2800
222
+ },
223
+ {
224
+ "epoch": 24.57,
225
+ "learning_rate": 0.000892852022111403,
226
+ "loss": 0.0907,
227
+ "step": 2900
228
+ },
229
+ {
230
+ "epoch": 25.42,
231
+ "learning_rate": 0.0008841022118844243,
232
+ "loss": 0.0945,
233
+ "step": 3000
234
+ },
235
+ {
236
+ "epoch": 25.42,
237
+ "eval_loss": 0.20718325674533844,
238
+ "eval_runtime": 295.2378,
239
+ "eval_samples_per_second": 21.891,
240
+ "eval_steps_per_second": 0.305,
241
+ "eval_wer": 0.2369030481316893,
242
+ "step": 3000
243
+ },
244
+ {
245
+ "epoch": 26.27,
246
+ "learning_rate": 0.0008750555348152298,
247
+ "loss": 0.0884,
248
+ "step": 3100
249
+ },
250
+ {
251
+ "epoch": 27.12,
252
+ "learning_rate": 0.0008657189829449246,
253
+ "loss": 0.0894,
254
+ "step": 3200
255
+ },
256
+ {
257
+ "epoch": 27.96,
258
+ "learning_rate": 0.0008560997723545024,
259
+ "loss": 0.0851,
260
+ "step": 3300
261
+ },
262
+ {
263
+ "epoch": 28.81,
264
+ "learning_rate": 0.0008462053375876455,
265
+ "loss": 0.09,
266
+ "step": 3400
267
+ },
268
+ {
269
+ "epoch": 29.66,
270
+ "learning_rate": 0.0008360433259046764,
271
+ "loss": 0.0844,
272
+ "step": 3500
273
+ },
274
+ {
275
+ "epoch": 29.66,
276
+ "eval_loss": 0.1981162428855896,
277
+ "eval_runtime": 296.5443,
278
+ "eval_samples_per_second": 21.794,
279
+ "eval_steps_per_second": 0.303,
280
+ "eval_wer": 0.23275124658363064,
281
+ "step": 3500
282
+ },
283
+ {
284
+ "epoch": 30.51,
285
+ "learning_rate": 0.0008256215913721052,
286
+ "loss": 0.084,
287
+ "step": 3600
288
+ },
289
+ {
290
+ "epoch": 31.35,
291
+ "learning_rate": 0.0008149481887923395,
292
+ "loss": 0.081,
293
+ "step": 3700
294
+ },
295
+ {
296
+ "epoch": 32.2,
297
+ "learning_rate": 0.0008040313674782489,
298
+ "loss": 0.0812,
299
+ "step": 3800
300
+ },
301
+ {
302
+ "epoch": 33.05,
303
+ "learning_rate": 0.0007928795648773933,
304
+ "loss": 0.0836,
305
+ "step": 3900
306
+ },
307
+ {
308
+ "epoch": 33.89,
309
+ "learning_rate": 0.0007815014000508474,
310
+ "loss": 0.0877,
311
+ "step": 4000
312
+ },
313
+ {
314
+ "epoch": 33.89,
315
+ "eval_loss": 0.20411191880702972,
316
+ "eval_runtime": 294.5281,
317
+ "eval_samples_per_second": 21.944,
318
+ "eval_steps_per_second": 0.306,
319
+ "eval_wer": 0.24249441906073313,
320
+ "step": 4000
321
+ },
322
+ {
323
+ "epoch": 34.74,
324
+ "learning_rate": 0.0007699056670116568,
325
+ "loss": 0.0842,
326
+ "step": 4100
327
+ },
328
+ {
329
+ "epoch": 35.59,
330
+ "learning_rate": 0.0007581013279280784,
331
+ "loss": 0.0802,
332
+ "step": 4200
333
+ },
334
+ {
335
+ "epoch": 36.44,
336
+ "learning_rate": 0.0007460975061968531,
337
+ "loss": 0.0812,
338
+ "step": 4300
339
+ },
340
+ {
341
+ "epoch": 37.29,
342
+ "learning_rate": 0.0007339034793918703,
343
+ "loss": 0.0754,
344
+ "step": 4400
345
+ },
346
+ {
347
+ "epoch": 38.14,
348
+ "learning_rate": 0.00072152867209367,
349
+ "loss": 0.0741,
350
+ "step": 4500
351
+ },
352
+ {
353
+ "epoch": 38.14,
354
+ "eval_loss": 0.235269233584404,
355
+ "eval_runtime": 301.8823,
356
+ "eval_samples_per_second": 21.409,
357
+ "eval_steps_per_second": 0.298,
358
+ "eval_wer": 0.24213974254657736,
359
+ "step": 4500
360
+ },
361
+ {
362
+ "epoch": 38.98,
363
+ "learning_rate": 0.0007089826486053256,
364
+ "loss": 0.0752,
365
+ "step": 4600
366
+ },
367
+ {
368
+ "epoch": 39.83,
369
+ "learning_rate": 0.0006962751055603378,
370
+ "loss": 0.0725,
371
+ "step": 4700
372
+ },
373
+ {
374
+ "epoch": 40.68,
375
+ "learning_rate": 0.0006834158644282511,
376
+ "loss": 0.0712,
377
+ "step": 4800
378
+ },
379
+ {
380
+ "epoch": 41.52,
381
+ "learning_rate": 0.000670414863923788,
382
+ "loss": 0.0673,
383
+ "step": 4900
384
+ },
385
+ {
386
+ "epoch": 42.37,
387
+ "learning_rate": 0.0006572821523253649,
388
+ "loss": 0.0676,
389
+ "step": 5000
390
+ },
391
+ {
392
+ "epoch": 42.37,
393
+ "eval_loss": 0.20924021303653717,
394
+ "eval_runtime": 294.7332,
395
+ "eval_samples_per_second": 21.928,
396
+ "eval_steps_per_second": 0.305,
397
+ "eval_wer": 0.22127641818447352,
398
+ "step": 5000
399
+ },
400
+ {
401
+ "epoch": 43.22,
402
+ "learning_rate": 0.000644027879708929,
403
+ "loss": 0.0691,
404
+ "step": 5100
405
+ },
406
+ {
407
+ "epoch": 44.07,
408
+ "learning_rate": 0.0006306622901031152,
409
+ "loss": 0.0621,
410
+ "step": 5200
411
+ },
412
+ {
413
+ "epoch": 44.91,
414
+ "learning_rate": 0.000617195713571792,
415
+ "loss": 0.0648,
416
+ "step": 5300
417
+ },
418
+ {
419
+ "epoch": 45.76,
420
+ "learning_rate": 0.0006036385582301083,
421
+ "loss": 0.0682,
422
+ "step": 5400
423
+ },
424
+ {
425
+ "epoch": 46.61,
426
+ "learning_rate": 0.0005900013022002183,
427
+ "loss": 0.0623,
428
+ "step": 5500
429
+ },
430
+ {
431
+ "epoch": 46.61,
432
+ "eval_loss": 0.22174060344696045,
433
+ "eval_runtime": 298.483,
434
+ "eval_samples_per_second": 21.653,
435
+ "eval_steps_per_second": 0.302,
436
+ "eval_wer": 0.2250318165696522,
437
+ "step": 5500
438
+ },
439
+ {
440
+ "epoch": 47.46,
441
+ "learning_rate": 0.0005762944855128968,
442
+ "loss": 0.0702,
443
+ "step": 5600
444
+ },
445
+ {
446
+ "epoch": 48.3,
447
+ "learning_rate": 0.0005625287019613087,
448
+ "loss": 0.0597,
449
+ "step": 5700
450
+ },
451
+ {
452
+ "epoch": 49.15,
453
+ "learning_rate": 0.0005487145909132243,
454
+ "loss": 0.0557,
455
+ "step": 5800
456
+ },
457
+ {
458
+ "epoch": 50.0,
459
+ "learning_rate": 0.0005348628290880112,
460
+ "loss": 0.0547,
461
+ "step": 5900
462
+ },
463
+ {
464
+ "epoch": 50.84,
465
+ "learning_rate": 0.0005209841223047587,
466
+ "loss": 0.0574,
467
+ "step": 6000
468
+ },
469
+ {
470
+ "epoch": 50.84,
471
+ "eval_loss": 0.21524770557880402,
472
+ "eval_runtime": 297.0196,
473
+ "eval_samples_per_second": 21.76,
474
+ "eval_steps_per_second": 0.303,
475
+ "eval_wer": 0.2179382862865369,
476
+ "step": 6000
477
+ },
478
+ {
479
+ "epoch": 51.69,
480
+ "learning_rate": 0.00050708919720791,
481
+ "loss": 0.0571,
482
+ "step": 6100
483
+ },
484
+ {
485
+ "epoch": 52.54,
486
+ "learning_rate": 0.0004931887929768004,
487
+ "loss": 0.0557,
488
+ "step": 6200
489
+ },
490
+ {
491
+ "epoch": 53.39,
492
+ "learning_rate": 0.0004792936530255048,
493
+ "loss": 0.0513,
494
+ "step": 6300
495
+ },
496
+ {
497
+ "epoch": 54.24,
498
+ "learning_rate": 0.00046541451669941626,
499
+ "loss": 0.0542,
500
+ "step": 6400
501
+ },
502
+ {
503
+ "epoch": 55.08,
504
+ "learning_rate": 0.00045156211097496503,
505
+ "loss": 0.0583,
506
+ "step": 6500
507
+ },
508
+ {
509
+ "epoch": 55.08,
510
+ "eval_loss": 0.22067895531654358,
511
+ "eval_runtime": 297.4094,
512
+ "eval_samples_per_second": 21.731,
513
+ "eval_steps_per_second": 0.303,
514
+ "eval_wer": 0.21860591266612422,
515
+ "step": 6500
516
+ },
517
+ {
518
+ "epoch": 55.93,
519
+ "learning_rate": 0.00043774714216890067,
520
+ "loss": 0.0516,
521
+ "step": 6600
522
+ },
523
+ {
524
+ "epoch": 56.78,
525
+ "learning_rate": 0.000423980287663539,
526
+ "loss": 0.0552,
527
+ "step": 6700
528
+ },
529
+ {
530
+ "epoch": 57.62,
531
+ "learning_rate": 0.00041027218765437467,
532
+ "loss": 0.0545,
533
+ "step": 6800
534
+ },
535
+ {
536
+ "epoch": 58.47,
537
+ "learning_rate": 0.0003966334369264325,
538
+ "loss": 0.0497,
539
+ "step": 6900
540
+ },
541
+ {
542
+ "epoch": 59.32,
543
+ "learning_rate": 0.00038307457666571714,
544
+ "loss": 0.0488,
545
+ "step": 7000
546
+ },
547
+ {
548
+ "epoch": 59.32,
549
+ "eval_loss": 0.22246450185775757,
550
+ "eval_runtime": 291.806,
551
+ "eval_samples_per_second": 22.148,
552
+ "eval_steps_per_second": 0.308,
553
+ "eval_wer": 0.21593540714777493,
554
+ "step": 7000
555
+ },
556
+ {
557
+ "epoch": 60.17,
558
+ "learning_rate": 0.000369606086312089,
559
+ "loss": 0.0467,
560
+ "step": 7100
561
+ },
562
+ {
563
+ "epoch": 61.02,
564
+ "learning_rate": 0.00035623837545986186,
565
+ "loss": 0.0496,
566
+ "step": 7200
567
+ },
568
+ {
569
+ "epoch": 61.86,
570
+ "learning_rate": 0.0003429817758123833,
571
+ "loss": 0.0477,
572
+ "step": 7300
573
+ },
574
+ {
575
+ "epoch": 62.71,
576
+ "learning_rate": 0.00032984653319681614,
577
+ "loss": 0.0491,
578
+ "step": 7400
579
+ },
580
+ {
581
+ "epoch": 63.56,
582
+ "learning_rate": 0.0003168427996452925,
583
+ "loss": 0.0456,
584
+ "step": 7500
585
+ },
586
+ {
587
+ "epoch": 63.56,
588
+ "eval_loss": 0.22932520508766174,
589
+ "eval_runtime": 293.0052,
590
+ "eval_samples_per_second": 22.058,
591
+ "eval_steps_per_second": 0.307,
592
+ "eval_wer": 0.20310446266508106,
593
+ "step": 7500
594
+ },
595
+ {
596
+ "epoch": 64.41,
597
+ "learning_rate": 0.0003039806255485599,
598
+ "loss": 0.042,
599
+ "step": 7600
600
+ },
601
+ {
602
+ "epoch": 65.25,
603
+ "learning_rate": 0.0002912699518881855,
604
+ "loss": 0.0431,
605
+ "step": 7700
606
+ },
607
+ {
608
+ "epoch": 66.1,
609
+ "learning_rate": 0.00027872060255331935,
610
+ "loss": 0.042,
611
+ "step": 7800
612
+ },
613
+ {
614
+ "epoch": 66.95,
615
+ "learning_rate": 0.0002663422767479589,
616
+ "loss": 0.0422,
617
+ "step": 7900
618
+ },
619
+ {
620
+ "epoch": 67.79,
621
+ "learning_rate": 0.0002541445414945791,
622
+ "loss": 0.041,
623
+ "step": 8000
624
+ },
625
+ {
626
+ "epoch": 67.79,
627
+ "eval_loss": 0.22772204875946045,
628
+ "eval_runtime": 292.1439,
629
+ "eval_samples_per_second": 22.123,
630
+ "eval_steps_per_second": 0.308,
631
+ "eval_wer": 0.20128935344557802,
632
+ "step": 8000
633
+ },
634
+ {
635
+ "epoch": 68.64,
636
+ "learning_rate": 0.000242136824239924,
637
+ "loss": 0.042,
638
+ "step": 8100
639
+ },
640
+ {
641
+ "epoch": 69.49,
642
+ "learning_rate": 0.00023032840556867502,
643
+ "loss": 0.0404,
644
+ "step": 8200
645
+ },
646
+ {
647
+ "epoch": 70.34,
648
+ "learning_rate": 0.00021872841203062537,
649
+ "loss": 0.0413,
650
+ "step": 8300
651
+ },
652
+ {
653
+ "epoch": 71.19,
654
+ "learning_rate": 0.0002074585300038035,
655
+ "loss": 0.0385,
656
+ "step": 8400
657
+ },
658
+ {
659
+ "epoch": 72.03,
660
+ "learning_rate": 0.00019629981023833903,
661
+ "loss": 0.0379,
662
+ "step": 8500
663
+ },
664
+ {
665
+ "epoch": 72.03,
666
+ "eval_loss": 0.22867247462272644,
667
+ "eval_runtime": 297.2969,
668
+ "eval_samples_per_second": 21.739,
669
+ "eval_steps_per_second": 0.303,
670
+ "eval_wer": 0.199077841063195,
671
+ "step": 8500
672
+ },
673
+ {
674
+ "epoch": 72.88,
675
+ "learning_rate": 0.00018537581579734625,
676
+ "loss": 0.037,
677
+ "step": 8600
678
+ },
679
+ {
680
+ "epoch": 73.73,
681
+ "learning_rate": 0.00017469498967238802,
682
+ "loss": 0.0362,
683
+ "step": 8700
684
+ },
685
+ {
686
+ "epoch": 74.57,
687
+ "learning_rate": 0.00016426558691388015,
688
+ "loss": 0.0348,
689
+ "step": 8800
690
+ },
691
+ {
692
+ "epoch": 75.42,
693
+ "learning_rate": 0.0001540956682508869,
694
+ "loss": 0.0367,
695
+ "step": 8900
696
+ },
697
+ {
698
+ "epoch": 76.27,
699
+ "learning_rate": 0.00014419309386110528,
700
+ "loss": 0.0381,
701
+ "step": 9000
702
+ },
703
+ {
704
+ "epoch": 76.27,
705
+ "eval_loss": 0.22330239415168762,
706
+ "eval_runtime": 290.8488,
707
+ "eval_samples_per_second": 22.221,
708
+ "eval_steps_per_second": 0.309,
709
+ "eval_wer": 0.19536416932674053,
710
+ "step": 9000
711
+ },
712
+ {
713
+ "epoch": 77.12,
714
+ "learning_rate": 0.00013456551729585032,
715
+ "loss": 0.0343,
716
+ "step": 9100
717
+ },
718
+ {
719
+ "epoch": 77.96,
720
+ "learning_rate": 0.00012522037956474042,
721
+ "loss": 0.033,
722
+ "step": 9200
723
+ },
724
+ {
725
+ "epoch": 78.81,
726
+ "learning_rate": 0.00011616490338465268,
727
+ "loss": 0.032,
728
+ "step": 9300
729
+ },
730
+ {
731
+ "epoch": 79.66,
732
+ "learning_rate": 0.00010740608759739317,
733
+ "loss": 0.031,
734
+ "step": 9400
735
+ },
736
+ {
737
+ "epoch": 80.51,
738
+ "learning_rate": 9.895070176039717e-05,
739
+ "loss": 0.0308,
740
+ "step": 9500
741
+ },
742
+ {
743
+ "epoch": 80.51,
744
+ "eval_loss": 0.21947245299816132,
745
+ "eval_runtime": 295.835,
746
+ "eval_samples_per_second": 21.847,
747
+ "eval_steps_per_second": 0.304,
748
+ "eval_wer": 0.18349293776470343,
749
+ "step": 9500
750
+ },
751
+ {
752
+ "epoch": 81.35,
753
+ "learning_rate": 9.080528091463946e-05,
754
+ "loss": 0.0309,
755
+ "step": 9600
756
+ },
757
+ {
758
+ "epoch": 82.2,
759
+ "learning_rate": 8.297612053379883e-05,
760
+ "loss": 0.0302,
761
+ "step": 9700
762
+ },
763
+ {
764
+ "epoch": 83.05,
765
+ "learning_rate": 7.546927165858153e-05,
766
+ "loss": 0.0296,
767
+ "step": 9800
768
+ },
769
+ {
770
+ "epoch": 83.89,
771
+ "learning_rate": 6.82905362199625e-05,
772
+ "loss": 0.0289,
773
+ "step": 9900
774
+ },
775
+ {
776
+ "epoch": 84.74,
777
+ "learning_rate": 6.14454625549593e-05,
778
+ "loss": 0.0291,
779
+ "step": 10000
780
+ },
781
+ {
782
+ "epoch": 84.74,
783
+ "eval_loss": 0.22659379243850708,
784
+ "eval_runtime": 301.2419,
785
+ "eval_samples_per_second": 21.455,
786
+ "eval_steps_per_second": 0.299,
787
+ "eval_wer": 0.18251236151968456,
788
+ "step": 10000
789
+ },
790
+ {
791
+ "epoch": 85.59,
792
+ "learning_rate": 5.493934111840604e-05,
793
+ "loss": 0.029,
794
+ "step": 10100
795
+ },
796
+ {
797
+ "epoch": 86.44,
798
+ "learning_rate": 4.87772003940401e-05,
799
+ "loss": 0.0276,
800
+ "step": 10200
801
+ },
802
+ {
803
+ "epoch": 87.29,
804
+ "learning_rate": 4.296380300806219e-05,
805
+ "loss": 0.0285,
806
+ "step": 10300
807
+ },
808
+ {
809
+ "epoch": 88.14,
810
+ "learning_rate": 3.750364204817463e-05,
811
+ "loss": 0.0287,
812
+ "step": 10400
813
+ },
814
+ {
815
+ "epoch": 88.98,
816
+ "learning_rate": 3.245018204574429e-05,
817
+ "loss": 0.0266,
818
+ "step": 10500
819
+ },
820
+ {
821
+ "epoch": 88.98,
822
+ "eval_loss": 0.22852544486522675,
823
+ "eval_runtime": 296.9937,
824
+ "eval_samples_per_second": 21.761,
825
+ "eval_steps_per_second": 0.303,
826
+ "eval_wer": 0.1800713525693184,
827
+ "step": 10500
828
+ },
829
+ {
830
+ "epoch": 89.83,
831
+ "learning_rate": 2.7705245284789292e-05,
832
+ "loss": 0.0269,
833
+ "step": 10600
834
+ },
835
+ {
836
+ "epoch": 90.68,
837
+ "learning_rate": 2.3325338060476542e-05,
838
+ "loss": 0.0262,
839
+ "step": 10700
840
+ },
841
+ {
842
+ "epoch": 91.52,
843
+ "learning_rate": 1.9313845537448972e-05,
844
+ "loss": 0.0257,
845
+ "step": 10800
846
+ },
847
+ {
848
+ "epoch": 92.37,
849
+ "learning_rate": 1.567386813814875e-05,
850
+ "loss": 0.0271,
851
+ "step": 10900
852
+ },
853
+ {
854
+ "epoch": 93.22,
855
+ "learning_rate": 1.2408219146547828e-05,
856
+ "loss": 0.0266,
857
+ "step": 11000
858
+ },
859
+ {
860
+ "epoch": 93.22,
861
+ "eval_loss": 0.229219451546669,
862
+ "eval_runtime": 299.2804,
863
+ "eval_samples_per_second": 21.595,
864
+ "eval_steps_per_second": 0.301,
865
+ "eval_wer": 0.1801130792180426,
866
+ "step": 11000
867
+ },
868
+ {
869
+ "epoch": 94.07,
870
+ "learning_rate": 9.519422533802668e-06,
871
+ "loss": 0.027,
872
+ "step": 11100
873
+ },
874
+ {
875
+ "epoch": 94.91,
876
+ "learning_rate": 7.009711007514796e-06,
877
+ "loss": 0.0266,
878
+ "step": 11200
879
+ },
880
+ {
881
+ "epoch": 95.76,
882
+ "learning_rate": 4.8810242861045405e-06,
883
+ "loss": 0.0258,
884
+ "step": 11300
885
+ },
886
+ {
887
+ "epoch": 96.61,
888
+ "learning_rate": 3.1350075996317563e-06,
889
+ "loss": 0.0256,
890
+ "step": 11400
891
+ },
892
+ {
893
+ "epoch": 97.46,
894
+ "learning_rate": 1.773010418222143e-06,
895
+ "loss": 0.0262,
896
+ "step": 11500
897
+ },
898
+ {
899
+ "epoch": 97.46,
900
+ "eval_loss": 0.22784815728664398,
901
+ "eval_runtime": 323.2034,
902
+ "eval_samples_per_second": 19.997,
903
+ "eval_steps_per_second": 0.278,
904
+ "eval_wer": 0.17881955310759218,
905
+ "step": 11500
906
+ },
907
+ {
908
+ "epoch": 98.3,
909
+ "learning_rate": 7.96085409082159e-07,
910
+ "loss": 0.0267,
911
+ "step": 11600
912
+ },
913
+ {
914
+ "epoch": 99.15,
915
+ "learning_rate": 2.049876229081038e-07,
916
+ "loss": 0.026,
917
+ "step": 11700
918
+ },
919
+ {
920
+ "epoch": 100.0,
921
+ "learning_rate": 1.7391031886804242e-10,
922
+ "loss": 0.0257,
923
+ "step": 11800
924
+ },
925
+ {
926
+ "epoch": 100.0,
927
+ "step": 11800,
928
+ "total_flos": 2.7805901871717807e+20,
929
+ "train_loss": 0.14547648023750823,
930
+ "train_runtime": 96097.4367,
931
+ "train_samples_per_second": 17.722,
932
+ "train_steps_per_second": 0.123
933
+ }
934
+ ],
935
+ "max_steps": 11800,
936
+ "num_train_epochs": 100,
937
+ "total_flos": 2.7805901871717807e+20,
938
+ "trial_name": null,
939
+ "trial_params": null
940
+ }
wandb/run-20220205_233515-2f29fa6z/files/config.yaml CHANGED
@@ -4802,6 +4802,26 @@ _wandb:
4802
  5: 1
4803
  6:
4804
  - 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4805
  python_version: 3.9.6
4806
  start_time: 1644122115
4807
  t:
 
4802
  5: 1
4803
  6:
4804
  - 1
4805
+ - 1: train/train_runtime
4806
+ 5: 1
4807
+ 6:
4808
+ - 1
4809
+ - 1: train/train_samples_per_second
4810
+ 5: 1
4811
+ 6:
4812
+ - 1
4813
+ - 1: train/train_steps_per_second
4814
+ 5: 1
4815
+ 6:
4816
+ - 1
4817
+ - 1: train/total_flos
4818
+ 5: 1
4819
+ 6:
4820
+ - 1
4821
+ - 1: train/train_loss
4822
+ 5: 1
4823
+ 6:
4824
+ - 1
4825
  python_version: 3.9.6
4826
  start_time: 1644122115
4827
  t:
wandb/run-20220205_233515-2f29fa6z/files/output.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220205_233515-2f29fa6z/files/wandb-summary.json CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220205_233515-2f29fa6z/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220205_233515-2f29fa6z/run-2f29fa6z.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef44feb48d03b9712d5ec4678c620e98051ea9771e815f41eb8289cec085f077
3
- size 83045857
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:742e2748dedaf1a3dcb8997028a19e4bee59a2093eded406359adc3c8e7fc8ec
3
+ size 85282495