p4b commited on
Commit
1a57c25
1 Parent(s): 33bc9c7

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 24.04,
3
+ "eval_loss": 0.31787109375,
4
+ "eval_runtime": 743.378,
5
+ "eval_samples_per_second": 2.984,
6
+ "eval_steps_per_second": 0.094,
7
+ "eval_wer": 27.47628083491461,
8
+ "train_loss": 0.31971333821614584,
9
+ "train_runtime": 22038.1709,
10
+ "train_samples_per_second": 4.356,
11
+ "train_steps_per_second": 0.068
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 24.04,
3
+ "eval_loss": 0.31787109375,
4
+ "eval_runtime": 743.378,
5
+ "eval_samples_per_second": 2.984,
6
+ "eval_steps_per_second": 0.094,
7
+ "eval_wer": 27.47628083491461
8
+ }
runs/Dec19_02-39-54_nipa2022-65403/events.out.tfevents.1671408912.nipa2022-65403.23021.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75cf4838254b4bab95d88e5fd84ba745ef0358ebee268187f8606d0f174ac0b4
3
+ size 358
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 24.04,
3
+ "train_loss": 0.31971333821614584,
4
+ "train_runtime": 22038.1709,
5
+ "train_samples_per_second": 4.356,
6
+ "train_steps_per_second": 0.068
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 27.47628083491461,
3
+ "best_model_checkpoint": "./checkpoint-1000",
4
+ "epoch": 24.04,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 3.45e-08,
13
+ "loss": 1.3486,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 7.2e-08,
19
+ "loss": 1.1313,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 1.01,
24
+ "learning_rate": 1.095e-07,
25
+ "loss": 1.2018,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 1.03,
30
+ "learning_rate": 1.4699999999999998e-07,
31
+ "loss": 1.0324,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "learning_rate": 1.845e-07,
37
+ "loss": 0.7448,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 2.02,
42
+ "learning_rate": 2.2199999999999998e-07,
43
+ "loss": 0.6496,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 2.04,
48
+ "learning_rate": 2.5949999999999996e-07,
49
+ "loss": 0.4192,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 3.01,
54
+ "learning_rate": 2.9699999999999997e-07,
55
+ "loss": 0.5148,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 3.01,
60
+ "eval_loss": 0.4189453125,
61
+ "eval_runtime": 732.3243,
62
+ "eval_samples_per_second": 3.029,
63
+ "eval_steps_per_second": 0.096,
64
+ "eval_wer": 39.34535104364326,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 3.03,
69
+ "learning_rate": 2.997683575414969e-07,
70
+ "loss": 0.3818,
71
+ "step": 225
72
+ },
73
+ {
74
+ "epoch": 4.01,
75
+ "learning_rate": 2.9899197858556995e-07,
76
+ "loss": 0.3758,
77
+ "step": 250
78
+ },
79
+ {
80
+ "epoch": 4.02,
81
+ "learning_rate": 2.9767194405746293e-07,
82
+ "loss": 0.3957,
83
+ "step": 275
84
+ },
85
+ {
86
+ "epoch": 4.04,
87
+ "learning_rate": 2.958130706200157e-07,
88
+ "loss": 0.2837,
89
+ "step": 300
90
+ },
91
+ {
92
+ "epoch": 5.02,
93
+ "learning_rate": 2.934221411008129e-07,
94
+ "loss": 0.4078,
95
+ "step": 325
96
+ },
97
+ {
98
+ "epoch": 5.03,
99
+ "learning_rate": 2.905078797423817e-07,
100
+ "loss": 0.27,
101
+ "step": 350
102
+ },
103
+ {
104
+ "epoch": 6.01,
105
+ "learning_rate": 2.870809203683774e-07,
106
+ "loss": 0.3376,
107
+ "step": 375
108
+ },
109
+ {
110
+ "epoch": 6.03,
111
+ "learning_rate": 2.831537675819134e-07,
112
+ "loss": 0.3041,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 6.03,
117
+ "eval_loss": 0.33349609375,
118
+ "eval_runtime": 737.3023,
119
+ "eval_samples_per_second": 3.008,
120
+ "eval_steps_per_second": 0.095,
121
+ "eval_wer": 29.573055028462996,
122
+ "step": 400
123
+ },
124
+ {
125
+ "epoch": 7.0,
126
+ "learning_rate": 2.78740751137621e-07,
127
+ "loss": 0.259,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 7.02,
132
+ "learning_rate": 2.738579736539286e-07,
133
+ "loss": 0.334,
134
+ "step": 450
135
+ },
136
+ {
137
+ "epoch": 7.04,
138
+ "learning_rate": 2.6852325185635355e-07,
139
+ "loss": 0.2305,
140
+ "step": 475
141
+ },
142
+ {
143
+ "epoch": 8.01,
144
+ "learning_rate": 2.6275605156620243e-07,
145
+ "loss": 0.3156,
146
+ "step": 500
147
+ },
148
+ {
149
+ "epoch": 8.03,
150
+ "learning_rate": 2.5657741667190066e-07,
151
+ "loss": 0.2408,
152
+ "step": 525
153
+ },
154
+ {
155
+ "epoch": 9.01,
156
+ "learning_rate": 2.500098923421254e-07,
157
+ "loss": 0.2577,
158
+ "step": 550
159
+ },
160
+ {
161
+ "epoch": 9.02,
162
+ "learning_rate": 2.4307744276092984e-07,
163
+ "loss": 0.2818,
164
+ "step": 575
165
+ },
166
+ {
167
+ "epoch": 9.04,
168
+ "learning_rate": 2.3580536368503515e-07,
169
+ "loss": 0.1961,
170
+ "step": 600
171
+ },
172
+ {
173
+ "epoch": 9.04,
174
+ "eval_loss": 0.318603515625,
175
+ "eval_runtime": 746.4809,
176
+ "eval_samples_per_second": 2.971,
177
+ "eval_steps_per_second": 0.094,
178
+ "eval_wer": 27.779886148007588,
179
+ "step": 600
180
+ },
181
+ {
182
+ "epoch": 10.02,
183
+ "learning_rate": 2.2822019014235677e-07,
184
+ "loss": 0.3018,
185
+ "step": 625
186
+ },
187
+ {
188
+ "epoch": 10.03,
189
+ "learning_rate": 2.2034959960856529e-07,
190
+ "loss": 0.2028,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 11.01,
195
+ "learning_rate": 2.1222231101497956e-07,
196
+ "loss": 0.2588,
197
+ "step": 675
198
+ },
199
+ {
200
+ "epoch": 11.03,
201
+ "learning_rate": 2.0386797995630034e-07,
202
+ "loss": 0.2308,
203
+ "step": 700
204
+ },
205
+ {
206
+ "epoch": 12.0,
207
+ "learning_rate": 1.953170904805618e-07,
208
+ "loss": 0.2022,
209
+ "step": 725
210
+ },
211
+ {
212
+ "epoch": 12.02,
213
+ "learning_rate": 1.8660084385614562e-07,
214
+ "loss": 0.2688,
215
+ "step": 750
216
+ },
217
+ {
218
+ "epoch": 12.04,
219
+ "learning_rate": 1.7775104472173675e-07,
220
+ "loss": 0.1752,
221
+ "step": 775
222
+ },
223
+ {
224
+ "epoch": 13.01,
225
+ "learning_rate": 1.6879998503464563e-07,
226
+ "loss": 0.2579,
227
+ "step": 800
228
+ },
229
+ {
230
+ "epoch": 13.01,
231
+ "eval_loss": 0.316650390625,
232
+ "eval_runtime": 737.1707,
233
+ "eval_samples_per_second": 3.009,
234
+ "eval_steps_per_second": 0.095,
235
+ "eval_wer": 27.571157495256166,
236
+ "step": 800
237
+ },
238
+ {
239
+ "epoch": 13.03,
240
+ "learning_rate": 1.5978032624095823e-07,
241
+ "loss": 0.195,
242
+ "step": 825
243
+ },
244
+ {
245
+ "epoch": 14.01,
246
+ "learning_rate": 1.5072498009746107e-07,
247
+ "loss": 0.2044,
248
+ "step": 850
249
+ },
250
+ {
251
+ "epoch": 14.02,
252
+ "learning_rate": 1.416669885802099e-07,
253
+ "loss": 0.2308,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 14.04,
258
+ "learning_rate": 1.3263940331794248e-07,
259
+ "loss": 0.1651,
260
+ "step": 900
261
+ },
262
+ {
263
+ "epoch": 15.02,
264
+ "learning_rate": 1.2367516499026965e-07,
265
+ "loss": 0.2565,
266
+ "step": 925
267
+ },
268
+ {
269
+ "epoch": 15.03,
270
+ "learning_rate": 1.1480698313070749e-07,
271
+ "loss": 0.1693,
272
+ "step": 950
273
+ },
274
+ {
275
+ "epoch": 16.01,
276
+ "learning_rate": 1.0606721677313713e-07,
277
+ "loss": 0.2152,
278
+ "step": 975
279
+ },
280
+ {
281
+ "epoch": 16.03,
282
+ "learning_rate": 9.748775637719916e-08,
283
+ "loss": 0.2034,
284
+ "step": 1000
285
+ },
286
+ {
287
+ "epoch": 16.03,
288
+ "eval_loss": 0.31787109375,
289
+ "eval_runtime": 744.5018,
290
+ "eval_samples_per_second": 2.979,
291
+ "eval_steps_per_second": 0.094,
292
+ "eval_wer": 27.47628083491461,
293
+ "step": 1000
294
+ },
295
+ {
296
+ "epoch": 17.0,
297
+ "learning_rate": 8.976315836886941e-08,
298
+ "loss": 0.1694,
299
+ "step": 1025
300
+ },
301
+ {
302
+ "epoch": 17.02,
303
+ "learning_rate": 8.157864194541929e-08,
304
+ "loss": 0.227,
305
+ "step": 1050
306
+ },
307
+ {
308
+ "epoch": 17.04,
309
+ "learning_rate": 7.36437876374443e-08,
310
+ "loss": 0.1607,
311
+ "step": 1075
312
+ },
313
+ {
314
+ "epoch": 18.01,
315
+ "learning_rate": 6.598754886780604e-08,
316
+ "loss": 0.2253,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 18.03,
321
+ "learning_rate": 5.863786242148148e-08,
322
+ "loss": 0.1737,
323
+ "step": 1125
324
+ },
325
+ {
326
+ "epoch": 19.01,
327
+ "learning_rate": 5.1621546507264605e-08,
328
+ "loss": 0.1891,
329
+ "step": 1150
330
+ },
331
+ {
332
+ "epoch": 19.02,
333
+ "learning_rate": 4.496420290103045e-08,
334
+ "loss": 0.2066,
335
+ "step": 1175
336
+ },
337
+ {
338
+ "epoch": 19.04,
339
+ "learning_rate": 3.869012352763169e-08,
340
+ "loss": 0.1478,
341
+ "step": 1200
342
+ },
343
+ {
344
+ "epoch": 19.04,
345
+ "eval_loss": 0.3193359375,
346
+ "eval_runtime": 742.6411,
347
+ "eval_samples_per_second": 2.987,
348
+ "eval_steps_per_second": 0.094,
349
+ "eval_wer": 27.523719165085385,
350
+ "step": 1200
351
+ },
352
+ {
353
+ "epoch": 20.02,
354
+ "learning_rate": 3.28222018222991e-08,
355
+ "loss": 0.2362,
356
+ "step": 1225
357
+ },
358
+ {
359
+ "epoch": 20.03,
360
+ "learning_rate": 2.738184919497899e-08,
361
+ "loss": 0.1564,
362
+ "step": 1250
363
+ },
364
+ {
365
+ "epoch": 21.01,
366
+ "learning_rate": 2.2388916902420573e-08,
367
+ "loss": 0.2014,
368
+ "step": 1275
369
+ },
370
+ {
371
+ "epoch": 21.03,
372
+ "learning_rate": 1.7861623613092718e-08,
373
+ "loss": 0.1918,
374
+ "step": 1300
375
+ },
376
+ {
377
+ "epoch": 22.0,
378
+ "learning_rate": 1.381648892923838e-08,
379
+ "loss": 0.1614,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 22.02,
384
+ "learning_rate": 1.0268273108637776e-08,
385
+ "loss": 0.2187,
386
+ "step": 1350
387
+ },
388
+ {
389
+ "epoch": 22.04,
390
+ "learning_rate": 7.229923206028154e-09,
391
+ "loss": 0.1492,
392
+ "step": 1375
393
+ },
394
+ {
395
+ "epoch": 23.01,
396
+ "learning_rate": 4.712525830705338e-09,
397
+ "loss": 0.2169,
398
+ "step": 1400
399
+ },
400
+ {
401
+ "epoch": 23.01,
402
+ "eval_loss": 0.31982421875,
403
+ "eval_runtime": 739.4797,
404
+ "eval_samples_per_second": 2.999,
405
+ "eval_steps_per_second": 0.095,
406
+ "eval_wer": 27.504743833017077,
407
+ "step": 1400
408
+ },
409
+ {
410
+ "epoch": 23.03,
411
+ "learning_rate": 2.7252666926886157e-09,
412
+ "loss": 0.1702,
413
+ "step": 1425
414
+ },
415
+ {
416
+ "epoch": 24.01,
417
+ "learning_rate": 1.2753970850610251e-09,
418
+ "loss": 0.1815,
419
+ "step": 1450
420
+ },
421
+ {
422
+ "epoch": 24.02,
423
+ "learning_rate": 3.682074247873257e-10,
424
+ "loss": 0.2022,
425
+ "step": 1475
426
+ },
427
+ {
428
+ "epoch": 24.04,
429
+ "learning_rate": 7.0079485561924665e-12,
430
+ "loss": 0.1448,
431
+ "step": 1500
432
+ },
433
+ {
434
+ "epoch": 24.04,
435
+ "step": 1500,
436
+ "total_flos": 2.0191356490809868e+20,
437
+ "train_loss": 0.31971333821614584,
438
+ "train_runtime": 22038.1709,
439
+ "train_samples_per_second": 4.356,
440
+ "train_steps_per_second": 0.068
441
+ }
442
+ ],
443
+ "max_steps": 1500,
444
+ "num_train_epochs": 9223372036854775807,
445
+ "total_flos": 2.0191356490809868e+20,
446
+ "trial_name": null,
447
+ "trial_params": null
448
+ }