Aditya3107 commited on
Commit
b11a2b9
1 Parent(s): 89cc42e

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +14 -0
  2. eval_results.json +9 -0
  3. train_results.json +8 -0
  4. trainer_state.json +385 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 18.0,
3
+ "eval_loss": 0.3360544443130493,
4
+ "eval_runtime": 56.6507,
5
+ "eval_samples": 512,
6
+ "eval_samples_per_second": 9.038,
7
+ "eval_steps_per_second": 1.13,
8
+ "eval_wer": 0.19631901840490798,
9
+ "train_loss": 0.4896983008235647,
10
+ "train_runtime": 62213.9785,
11
+ "train_samples": 8619,
12
+ "train_samples_per_second": 2.494,
13
+ "train_steps_per_second": 0.104
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 18.0,
3
+ "eval_loss": 0.3360544443130493,
4
+ "eval_runtime": 56.6507,
5
+ "eval_samples": 512,
6
+ "eval_samples_per_second": 9.038,
7
+ "eval_steps_per_second": 1.13,
8
+ "eval_wer": 0.19631901840490798
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 18.0,
3
+ "train_loss": 0.4896983008235647,
4
+ "train_runtime": 62213.9785,
5
+ "train_samples": 8619,
6
+ "train_samples_per_second": 2.494,
7
+ "train_steps_per_second": 0.104
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 17.999072356215212,
5
+ "global_step": 6462,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.56,
12
+ "eval_loss": 2.8832294940948486,
13
+ "eval_runtime": 56.6037,
14
+ "eval_samples_per_second": 9.045,
15
+ "eval_steps_per_second": 1.131,
16
+ "eval_wer": 1.0,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 1.11,
21
+ "eval_loss": 1.1704888343811035,
22
+ "eval_runtime": 56.4819,
23
+ "eval_samples_per_second": 9.065,
24
+ "eval_steps_per_second": 1.133,
25
+ "eval_wer": 0.7788489628980426,
26
+ "step": 400
27
+ },
28
+ {
29
+ "epoch": 1.39,
30
+ "learning_rate": 0.00029939999999999996,
31
+ "loss": 3.3987,
32
+ "step": 500
33
+ },
34
+ {
35
+ "epoch": 1.67,
36
+ "eval_loss": 0.7739295959472656,
37
+ "eval_runtime": 57.0638,
38
+ "eval_samples_per_second": 8.972,
39
+ "eval_steps_per_second": 1.122,
40
+ "eval_wer": 0.5895413380075957,
41
+ "step": 600
42
+ },
43
+ {
44
+ "epoch": 2.23,
45
+ "eval_loss": 0.6045235395431519,
46
+ "eval_runtime": 56.2609,
47
+ "eval_samples_per_second": 9.1,
48
+ "eval_steps_per_second": 1.138,
49
+ "eval_wer": 0.4902132632193982,
50
+ "step": 800
51
+ },
52
+ {
53
+ "epoch": 2.78,
54
+ "learning_rate": 0.0002748909761824891,
55
+ "loss": 0.8313,
56
+ "step": 1000
57
+ },
58
+ {
59
+ "epoch": 2.78,
60
+ "eval_loss": 0.5234911441802979,
61
+ "eval_runtime": 56.3102,
62
+ "eval_samples_per_second": 9.092,
63
+ "eval_steps_per_second": 1.137,
64
+ "eval_wer": 0.43938066023955596,
65
+ "step": 1000
66
+ },
67
+ {
68
+ "epoch": 3.34,
69
+ "eval_loss": 0.48238545656204224,
70
+ "eval_runtime": 56.3832,
71
+ "eval_samples_per_second": 9.081,
72
+ "eval_steps_per_second": 1.135,
73
+ "eval_wer": 0.4002337131171487,
74
+ "step": 1200
75
+ },
76
+ {
77
+ "epoch": 3.9,
78
+ "eval_loss": 0.4378102719783783,
79
+ "eval_runtime": 56.2821,
80
+ "eval_samples_per_second": 9.097,
81
+ "eval_steps_per_second": 1.137,
82
+ "eval_wer": 0.37540169442009935,
83
+ "step": 1400
84
+ },
85
+ {
86
+ "epoch": 4.18,
87
+ "learning_rate": 0.00024973163367997313,
88
+ "loss": 0.5342,
89
+ "step": 1500
90
+ },
91
+ {
92
+ "epoch": 4.46,
93
+ "eval_loss": 0.44333964586257935,
94
+ "eval_runtime": 56.3381,
95
+ "eval_samples_per_second": 9.088,
96
+ "eval_steps_per_second": 1.136,
97
+ "eval_wer": 0.36342389716622847,
98
+ "step": 1600
99
+ },
100
+ {
101
+ "epoch": 5.01,
102
+ "eval_loss": 0.41031020879745483,
103
+ "eval_runtime": 56.4477,
104
+ "eval_samples_per_second": 9.07,
105
+ "eval_steps_per_second": 1.134,
106
+ "eval_wer": 0.34852468594799885,
107
+ "step": 1800
108
+ },
109
+ {
110
+ "epoch": 5.57,
111
+ "learning_rate": 0.00022457229117745723,
112
+ "loss": 0.3792,
113
+ "step": 2000
114
+ },
115
+ {
116
+ "epoch": 5.57,
117
+ "eval_loss": 0.3816453218460083,
118
+ "eval_runtime": 56.4551,
119
+ "eval_samples_per_second": 9.069,
120
+ "eval_steps_per_second": 1.134,
121
+ "eval_wer": 0.33099620216184633,
122
+ "step": 2000
123
+ },
124
+ {
125
+ "epoch": 6.13,
126
+ "eval_loss": 0.3952919840812683,
127
+ "eval_runtime": 56.4457,
128
+ "eval_samples_per_second": 9.071,
129
+ "eval_steps_per_second": 1.134,
130
+ "eval_wer": 0.32252410166520595,
131
+ "step": 2200
132
+ },
133
+ {
134
+ "epoch": 6.68,
135
+ "eval_loss": 0.39945441484451294,
136
+ "eval_runtime": 56.6634,
137
+ "eval_samples_per_second": 9.036,
138
+ "eval_steps_per_second": 1.129,
139
+ "eval_wer": 0.31317557697925796,
140
+ "step": 2400
141
+ },
142
+ {
143
+ "epoch": 6.96,
144
+ "learning_rate": 0.00019941294867494128,
145
+ "loss": 0.2924,
146
+ "step": 2500
147
+ },
148
+ {
149
+ "epoch": 7.24,
150
+ "eval_loss": 0.3906857967376709,
151
+ "eval_runtime": 56.3831,
152
+ "eval_samples_per_second": 9.081,
153
+ "eval_steps_per_second": 1.135,
154
+ "eval_wer": 0.2930178206251826,
155
+ "step": 2600
156
+ },
157
+ {
158
+ "epoch": 7.8,
159
+ "eval_loss": 0.35171157121658325,
160
+ "eval_runtime": 56.7082,
161
+ "eval_samples_per_second": 9.029,
162
+ "eval_steps_per_second": 1.129,
163
+ "eval_wer": 0.2740286298568507,
164
+ "step": 2800
165
+ },
166
+ {
167
+ "epoch": 8.36,
168
+ "learning_rate": 0.00017425360617242535,
169
+ "loss": 0.2217,
170
+ "step": 3000
171
+ },
172
+ {
173
+ "epoch": 8.36,
174
+ "eval_loss": 0.33607447147369385,
175
+ "eval_runtime": 56.5616,
176
+ "eval_samples_per_second": 9.052,
177
+ "eval_steps_per_second": 1.132,
178
+ "eval_wer": 0.2591294186386211,
179
+ "step": 3000
180
+ },
181
+ {
182
+ "epoch": 8.91,
183
+ "eval_loss": 0.3340049088001251,
184
+ "eval_runtime": 56.5741,
185
+ "eval_samples_per_second": 9.05,
186
+ "eval_steps_per_second": 1.131,
187
+ "eval_wer": 0.2451066316096991,
188
+ "step": 3200
189
+ },
190
+ {
191
+ "epoch": 9.47,
192
+ "eval_loss": 0.3125685453414917,
193
+ "eval_runtime": 56.4726,
194
+ "eval_samples_per_second": 9.066,
195
+ "eval_steps_per_second": 1.133,
196
+ "eval_wer": 0.2448144902132632,
197
+ "step": 3400
198
+ },
199
+ {
200
+ "epoch": 9.75,
201
+ "learning_rate": 0.00014909426366990943,
202
+ "loss": 0.1714,
203
+ "step": 3500
204
+ },
205
+ {
206
+ "epoch": 10.03,
207
+ "eval_loss": 0.34412676095962524,
208
+ "eval_runtime": 56.5983,
209
+ "eval_samples_per_second": 9.046,
210
+ "eval_steps_per_second": 1.131,
211
+ "eval_wer": 0.2556237218813906,
212
+ "step": 3600
213
+ },
214
+ {
215
+ "epoch": 10.58,
216
+ "eval_loss": 0.3404456079006195,
217
+ "eval_runtime": 56.5608,
218
+ "eval_samples_per_second": 9.052,
219
+ "eval_steps_per_second": 1.132,
220
+ "eval_wer": 0.2521180251241601,
221
+ "step": 3800
222
+ },
223
+ {
224
+ "epoch": 11.14,
225
+ "learning_rate": 0.0001239349211673935,
226
+ "loss": 0.1395,
227
+ "step": 4000
228
+ },
229
+ {
230
+ "epoch": 11.14,
231
+ "eval_loss": 0.3728441894054413,
232
+ "eval_runtime": 56.7823,
233
+ "eval_samples_per_second": 9.017,
234
+ "eval_steps_per_second": 1.127,
235
+ "eval_wer": 0.25182588372772424,
236
+ "step": 4000
237
+ },
238
+ {
239
+ "epoch": 11.7,
240
+ "eval_loss": 0.3828706741333008,
241
+ "eval_runtime": 56.4473,
242
+ "eval_samples_per_second": 9.07,
243
+ "eval_steps_per_second": 1.134,
244
+ "eval_wer": 0.23955594507741748,
245
+ "step": 4200
246
+ },
247
+ {
248
+ "epoch": 12.26,
249
+ "eval_loss": 0.3465881943702698,
250
+ "eval_runtime": 56.3386,
251
+ "eval_samples_per_second": 9.088,
252
+ "eval_steps_per_second": 1.136,
253
+ "eval_wer": 0.23605024832018698,
254
+ "step": 4400
255
+ },
256
+ {
257
+ "epoch": 12.53,
258
+ "learning_rate": 9.877557866487755e-05,
259
+ "loss": 0.1069,
260
+ "step": 4500
261
+ },
262
+ {
263
+ "epoch": 12.81,
264
+ "eval_loss": 0.3187991976737976,
265
+ "eval_runtime": 56.3638,
266
+ "eval_samples_per_second": 9.084,
267
+ "eval_steps_per_second": 1.135,
268
+ "eval_wer": 0.2240724510663161,
269
+ "step": 4600
270
+ },
271
+ {
272
+ "epoch": 13.37,
273
+ "eval_loss": 0.3395535349845886,
274
+ "eval_runtime": 56.3257,
275
+ "eval_samples_per_second": 9.09,
276
+ "eval_steps_per_second": 1.136,
277
+ "eval_wer": 0.21969033011977798,
278
+ "step": 4800
279
+ },
280
+ {
281
+ "epoch": 13.93,
282
+ "learning_rate": 7.361623616236162e-05,
283
+ "loss": 0.0845,
284
+ "step": 5000
285
+ },
286
+ {
287
+ "epoch": 13.93,
288
+ "eval_loss": 0.3364916741847992,
289
+ "eval_runtime": 56.321,
290
+ "eval_samples_per_second": 9.091,
291
+ "eval_steps_per_second": 1.136,
292
+ "eval_wer": 0.2205667543090856,
293
+ "step": 5000
294
+ },
295
+ {
296
+ "epoch": 14.48,
297
+ "eval_loss": 0.3458584249019623,
298
+ "eval_runtime": 56.3485,
299
+ "eval_samples_per_second": 9.086,
300
+ "eval_steps_per_second": 1.136,
301
+ "eval_wer": 0.22085889570552147,
302
+ "step": 5200
303
+ },
304
+ {
305
+ "epoch": 15.04,
306
+ "eval_loss": 0.3429270386695862,
307
+ "eval_runtime": 57.9716,
308
+ "eval_samples_per_second": 8.832,
309
+ "eval_steps_per_second": 1.104,
310
+ "eval_wer": 0.2193981887233421,
311
+ "step": 5400
312
+ },
313
+ {
314
+ "epoch": 15.32,
315
+ "learning_rate": 4.845689365984569e-05,
316
+ "loss": 0.0675,
317
+ "step": 5500
318
+ },
319
+ {
320
+ "epoch": 15.6,
321
+ "eval_loss": 0.3433798849582672,
322
+ "eval_runtime": 56.3488,
323
+ "eval_samples_per_second": 9.086,
324
+ "eval_steps_per_second": 1.136,
325
+ "eval_wer": 0.2182296231375986,
326
+ "step": 5600
327
+ },
328
+ {
329
+ "epoch": 16.16,
330
+ "eval_loss": 0.3434172570705414,
331
+ "eval_runtime": 56.3181,
332
+ "eval_samples_per_second": 9.091,
333
+ "eval_steps_per_second": 1.136,
334
+ "eval_wer": 0.20829681565877886,
335
+ "step": 5800
336
+ },
337
+ {
338
+ "epoch": 16.71,
339
+ "learning_rate": 2.329755115732975e-05,
340
+ "loss": 0.0561,
341
+ "step": 6000
342
+ },
343
+ {
344
+ "epoch": 16.71,
345
+ "eval_loss": 0.33747875690460205,
346
+ "eval_runtime": 56.3763,
347
+ "eval_samples_per_second": 9.082,
348
+ "eval_steps_per_second": 1.135,
349
+ "eval_wer": 0.20362255331580484,
350
+ "step": 6000
351
+ },
352
+ {
353
+ "epoch": 17.27,
354
+ "eval_loss": 0.3445747494697571,
355
+ "eval_runtime": 56.6382,
356
+ "eval_samples_per_second": 9.04,
357
+ "eval_steps_per_second": 1.13,
358
+ "eval_wer": 0.19865614957639496,
359
+ "step": 6200
360
+ },
361
+ {
362
+ "epoch": 17.83,
363
+ "eval_loss": 0.33622780442237854,
364
+ "eval_runtime": 56.3321,
365
+ "eval_samples_per_second": 9.089,
366
+ "eval_steps_per_second": 1.136,
367
+ "eval_wer": 0.19777972538708735,
368
+ "step": 6400
369
+ },
370
+ {
371
+ "epoch": 18.0,
372
+ "step": 6462,
373
+ "total_flos": 3.4883692787272507e+19,
374
+ "train_loss": 0.4896983008235647,
375
+ "train_runtime": 62213.9785,
376
+ "train_samples_per_second": 2.494,
377
+ "train_steps_per_second": 0.104
378
+ }
379
+ ],
380
+ "max_steps": 6462,
381
+ "num_train_epochs": 18,
382
+ "total_flos": 3.4883692787272507e+19,
383
+ "trial_name": null,
384
+ "trial_params": null
385
+ }