Mofe commited on
Commit
40ab21f
1 Parent(s): 5242e86

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +284 -122
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 89.98,
3
- "eval_loss": 0.7423784136772156,
4
- "eval_runtime": 31.8664,
5
  "eval_samples": 892,
6
- "eval_samples_per_second": 27.992,
7
- "eval_steps_per_second": 3.515,
8
- "eval_wer": 0.6190132717830352,
9
- "train_loss": 1.793763725845902,
10
- "train_runtime": 8224.5964,
11
  "train_samples": 1941,
12
- "train_samples_per_second": 21.24,
13
- "train_steps_per_second": 0.328
14
  }
 
1
  {
2
+ "epoch": 79.99,
3
+ "eval_loss": 0.49252477288246155,
4
+ "eval_runtime": 31.5756,
5
  "eval_samples": 892,
6
+ "eval_samples_per_second": 28.25,
7
+ "eval_steps_per_second": 3.547,
8
+ "eval_wer": 0.5714079630698211,
9
+ "train_loss": 1.795632479985555,
10
+ "train_runtime": 8730.4816,
11
  "train_samples": 1941,
12
+ "train_samples_per_second": 17.786,
13
+ "train_steps_per_second": 0.55
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 89.98,
3
- "eval_loss": 0.7423784136772156,
4
- "eval_runtime": 31.8664,
5
  "eval_samples": 892,
6
- "eval_samples_per_second": 27.992,
7
- "eval_steps_per_second": 3.515,
8
- "eval_wer": 0.6190132717830352
9
  }
 
1
  {
2
+ "epoch": 79.99,
3
+ "eval_loss": 0.49252477288246155,
4
+ "eval_runtime": 31.5756,
5
  "eval_samples": 892,
6
+ "eval_samples_per_second": 28.25,
7
+ "eval_steps_per_second": 3.547,
8
+ "eval_wer": 0.5714079630698211
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 89.98,
3
- "train_loss": 1.793763725845902,
4
- "train_runtime": 8224.5964,
5
  "train_samples": 1941,
6
- "train_samples_per_second": 21.24,
7
- "train_steps_per_second": 0.328
8
  }
 
1
  {
2
+ "epoch": 79.99,
3
+ "train_loss": 1.795632479985555,
4
+ "train_runtime": 8730.4816,
5
  "train_samples": 1941,
6
+ "train_samples_per_second": 17.786,
7
+ "train_steps_per_second": 0.55
8
  }
trainer_state.json CHANGED
@@ -1,232 +1,394 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 89.98360655737704,
5
- "global_step": 2700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 3.33,
12
- "learning_rate": 1.485e-05,
13
- "loss": 11.2315,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 6.66,
18
- "learning_rate": 2.985e-05,
19
- "loss": 4.0404,
20
  "step": 200
21
  },
22
  {
23
- "epoch": 9.98,
24
- "learning_rate": 4.484999999999999e-05,
25
- "loss": 3.056,
26
  "step": 300
27
  },
28
  {
29
- "epoch": 13.33,
30
- "learning_rate": 5.985e-05,
31
- "loss": 2.8167,
32
  "step": 400
33
  },
34
  {
35
- "epoch": 16.66,
36
- "learning_rate": 7.484999999999999e-05,
37
- "loss": 2.721,
38
  "step": 500
39
  },
40
  {
41
- "epoch": 16.66,
42
- "eval_loss": 2.6876425743103027,
43
- "eval_runtime": 32.088,
44
- "eval_samples_per_second": 27.799,
45
- "eval_steps_per_second": 3.49,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
- "epoch": 19.98,
51
- "learning_rate": 8.984999999999999e-05,
52
- "loss": 2.6493,
53
  "step": 600
54
  },
55
  {
56
- "epoch": 23.33,
57
- "learning_rate": 0.00010484999999999999,
58
- "loss": 1.9825,
59
  "step": 700
60
  },
61
  {
62
- "epoch": 26.66,
63
- "learning_rate": 0.00011985,
64
- "loss": 1.4345,
65
  "step": 800
66
  },
67
  {
68
- "epoch": 29.98,
69
- "learning_rate": 0.00013485,
70
- "loss": 1.3429,
71
  "step": 900
72
  },
73
  {
74
- "epoch": 33.33,
75
- "learning_rate": 0.00014954999999999998,
76
- "loss": 1.2944,
77
  "step": 1000
78
  },
79
  {
80
- "epoch": 33.33,
81
- "eval_loss": 0.6482492089271545,
82
- "eval_runtime": 32.0457,
83
- "eval_samples_per_second": 27.835,
84
- "eval_steps_per_second": 3.495,
85
- "eval_wer": 0.7135025966532026,
86
  "step": 1000
87
  },
88
  {
89
- "epoch": 36.66,
90
- "learning_rate": 0.00016455,
91
- "loss": 1.2347,
92
  "step": 1100
93
  },
94
  {
95
- "epoch": 39.98,
96
- "learning_rate": 0.00017955,
97
- "loss": 1.1858,
98
  "step": 1200
99
  },
100
  {
101
- "epoch": 43.33,
102
- "learning_rate": 0.00019454999999999999,
103
- "loss": 1.1475,
104
  "step": 1300
105
  },
106
  {
107
- "epoch": 46.66,
108
- "learning_rate": 0.00020955,
109
- "loss": 1.0875,
110
  "step": 1400
111
  },
112
  {
113
- "epoch": 49.98,
114
- "learning_rate": 0.00022455,
115
- "loss": 1.0515,
116
  "step": 1500
117
  },
118
  {
119
- "epoch": 49.98,
120
- "eval_loss": 0.6644838452339172,
121
- "eval_runtime": 31.726,
122
- "eval_samples_per_second": 28.116,
123
- "eval_steps_per_second": 3.53,
124
- "eval_wer": 0.6754183496826313,
125
  "step": 1500
126
  },
127
  {
128
- "epoch": 53.33,
129
- "learning_rate": 0.00023954999999999997,
130
- "loss": 1.0217,
131
  "step": 1600
132
  },
133
  {
134
- "epoch": 56.66,
135
- "learning_rate": 0.00025455,
136
- "loss": 0.9991,
137
  "step": 1700
138
  },
139
  {
140
- "epoch": 59.98,
141
- "learning_rate": 0.00026954999999999997,
142
- "loss": 0.9918,
143
  "step": 1800
144
  },
145
  {
146
- "epoch": 63.33,
147
- "learning_rate": 0.00028455,
148
- "loss": 0.9552,
149
  "step": 1900
150
  },
151
  {
152
- "epoch": 66.66,
153
- "learning_rate": 0.00029955,
154
- "loss": 0.9153,
155
  "step": 2000
156
  },
157
  {
158
- "epoch": 66.66,
159
- "eval_loss": 0.7648739814758301,
160
- "eval_runtime": 31.6976,
161
- "eval_samples_per_second": 28.141,
162
- "eval_steps_per_second": 3.533,
163
- "eval_wer": 0.693594922100404,
164
  "step": 2000
165
  },
166
  {
167
- "epoch": 69.98,
168
- "learning_rate": 0.0002584285714285714,
169
- "loss": 0.8827,
170
  "step": 2100
171
  },
172
  {
173
- "epoch": 73.33,
174
- "learning_rate": 0.00021557142857142855,
175
- "loss": 0.8474,
176
  "step": 2200
177
  },
178
  {
179
- "epoch": 76.66,
180
- "learning_rate": 0.0001727142857142857,
181
- "loss": 0.8016,
182
  "step": 2300
183
  },
184
  {
185
- "epoch": 79.98,
186
- "learning_rate": 0.00012985714285714285,
187
- "loss": 0.7467,
188
  "step": 2400
189
  },
190
  {
191
- "epoch": 83.33,
192
- "learning_rate": 8.699999999999999e-05,
193
- "loss": 0.7056,
194
  "step": 2500
195
  },
196
  {
197
- "epoch": 83.33,
198
- "eval_loss": 0.8047966361045837,
199
- "eval_runtime": 31.4528,
200
- "eval_samples_per_second": 28.36,
201
- "eval_steps_per_second": 3.561,
202
- "eval_wer": 0.6755626081938835,
203
  "step": 2500
204
  },
205
  {
206
- "epoch": 86.66,
207
- "learning_rate": 4.414285714285714e-05,
208
- "loss": 0.6637,
209
  "step": 2600
210
  },
211
  {
212
- "epoch": 89.98,
213
- "learning_rate": 1.2857142857142856e-06,
214
- "loss": 0.6249,
215
  "step": 2700
216
  },
217
  {
218
- "epoch": 89.98,
219
- "step": 2700,
220
- "total_flos": 2.311805909026184e+19,
221
- "train_loss": 1.793763725845902,
222
- "train_runtime": 8224.5964,
223
- "train_samples_per_second": 21.24,
224
- "train_steps_per_second": 0.328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  }
226
  ],
227
- "max_steps": 2700,
228
- "num_train_epochs": 90,
229
- "total_flos": 2.311805909026184e+19,
230
  "trial_name": null,
231
  "trial_params": null
232
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 79.98765432098766,
5
+ "global_step": 4800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.66,
12
+ "learning_rate": 3.7125e-06,
13
+ "loss": 14.5868,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 3.33,
18
+ "learning_rate": 7.4625e-06,
19
+ "loss": 6.8756,
20
  "step": 200
21
  },
22
  {
23
+ "epoch": 4.99,
24
+ "learning_rate": 1.1212499999999998e-05,
25
+ "loss": 4.2978,
26
  "step": 300
27
  },
28
  {
29
+ "epoch": 6.66,
30
+ "learning_rate": 1.49625e-05,
31
+ "loss": 3.6126,
32
  "step": 400
33
  },
34
  {
35
+ "epoch": 8.33,
36
+ "learning_rate": 1.8712499999999997e-05,
37
+ "loss": 3.1674,
38
  "step": 500
39
  },
40
  {
41
+ "epoch": 8.33,
42
+ "eval_loss": 3.0295047760009766,
43
+ "eval_runtime": 32.2697,
44
+ "eval_samples_per_second": 27.642,
45
+ "eval_steps_per_second": 3.471,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
+ "epoch": 9.99,
51
+ "learning_rate": 2.2462499999999997e-05,
52
+ "loss": 2.8989,
53
  "step": 600
54
  },
55
  {
56
+ "epoch": 11.66,
57
+ "learning_rate": 2.6212499999999997e-05,
58
+ "loss": 2.8318,
59
  "step": 700
60
  },
61
  {
62
+ "epoch": 13.33,
63
+ "learning_rate": 2.99625e-05,
64
+ "loss": 2.7744,
65
  "step": 800
66
  },
67
  {
68
+ "epoch": 14.99,
69
+ "learning_rate": 3.37125e-05,
70
+ "loss": 2.7043,
71
  "step": 900
72
  },
73
  {
74
+ "epoch": 16.66,
75
+ "learning_rate": 3.7462499999999996e-05,
76
+ "loss": 2.6987,
77
  "step": 1000
78
  },
79
  {
80
+ "epoch": 16.66,
81
+ "eval_loss": 2.687849760055542,
82
+ "eval_runtime": 32.1535,
83
+ "eval_samples_per_second": 27.742,
84
+ "eval_steps_per_second": 3.483,
85
+ "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
+ "epoch": 18.33,
90
+ "learning_rate": 4.12125e-05,
91
+ "loss": 2.6565,
92
  "step": 1100
93
  },
94
  {
95
+ "epoch": 19.99,
96
+ "learning_rate": 4.4962499999999995e-05,
97
+ "loss": 2.4086,
98
  "step": 1200
99
  },
100
  {
101
+ "epoch": 21.66,
102
+ "learning_rate": 4.871249999999999e-05,
103
+ "loss": 1.7625,
104
  "step": 1300
105
  },
106
  {
107
+ "epoch": 23.33,
108
+ "learning_rate": 5.2462499999999994e-05,
109
+ "loss": 1.4648,
110
  "step": 1400
111
  },
112
  {
113
+ "epoch": 24.99,
114
+ "learning_rate": 5.62125e-05,
115
+ "loss": 1.3454,
116
  "step": 1500
117
  },
118
  {
119
+ "epoch": 24.99,
120
+ "eval_loss": 0.6813644766807556,
121
+ "eval_runtime": 32.0646,
122
+ "eval_samples_per_second": 27.819,
123
+ "eval_steps_per_second": 3.493,
124
+ "eval_wer": 0.698066935949221,
125
  "step": 1500
126
  },
127
  {
128
+ "epoch": 26.66,
129
+ "learning_rate": 5.9962499999999994e-05,
130
+ "loss": 1.2913,
131
  "step": 1600
132
  },
133
  {
134
+ "epoch": 28.33,
135
+ "learning_rate": 6.37125e-05,
136
+ "loss": 1.2416,
137
  "step": 1700
138
  },
139
  {
140
+ "epoch": 29.99,
141
+ "learning_rate": 6.746249999999999e-05,
142
+ "loss": 1.1899,
143
  "step": 1800
144
  },
145
  {
146
+ "epoch": 31.66,
147
+ "learning_rate": 7.121249999999999e-05,
148
+ "loss": 1.1745,
149
  "step": 1900
150
  },
151
  {
152
+ "epoch": 33.33,
153
+ "learning_rate": 7.492499999999999e-05,
154
+ "loss": 1.1227,
155
  "step": 2000
156
  },
157
  {
158
+ "epoch": 33.33,
159
+ "eval_loss": 0.5790585875511169,
160
+ "eval_runtime": 32.0814,
161
+ "eval_samples_per_second": 27.804,
162
+ "eval_steps_per_second": 3.491,
163
+ "eval_wer": 0.6513271783035199,
164
  "step": 2000
165
  },
166
  {
167
+ "epoch": 34.99,
168
+ "learning_rate": 7.2375e-05,
169
+ "loss": 1.0795,
170
  "step": 2100
171
  },
172
  {
173
+ "epoch": 36.66,
174
+ "learning_rate": 6.972321428571428e-05,
175
+ "loss": 1.0646,
176
  "step": 2200
177
  },
178
  {
179
+ "epoch": 38.33,
180
+ "learning_rate": 6.704464285714285e-05,
181
+ "loss": 1.05,
182
  "step": 2300
183
  },
184
  {
185
+ "epoch": 39.99,
186
+ "learning_rate": 6.436607142857142e-05,
187
+ "loss": 1.0149,
188
  "step": 2400
189
  },
190
  {
191
+ "epoch": 41.66,
192
+ "learning_rate": 6.16875e-05,
193
+ "loss": 0.9972,
194
  "step": 2500
195
  },
196
  {
197
+ "epoch": 41.66,
198
+ "eval_loss": 0.5235142111778259,
199
+ "eval_runtime": 31.8053,
200
+ "eval_samples_per_second": 28.046,
201
+ "eval_steps_per_second": 3.521,
202
+ "eval_wer": 0.5718407386035776,
203
  "step": 2500
204
  },
205
  {
206
+ "epoch": 43.33,
207
+ "learning_rate": 5.9008928571428565e-05,
208
+ "loss": 0.9722,
209
  "step": 2600
210
  },
211
  {
212
+ "epoch": 44.99,
213
+ "learning_rate": 5.633035714285714e-05,
214
+ "loss": 0.9401,
215
  "step": 2700
216
  },
217
  {
218
+ "epoch": 46.66,
219
+ "learning_rate": 5.3651785714285706e-05,
220
+ "loss": 0.9439,
221
+ "step": 2800
222
+ },
223
+ {
224
+ "epoch": 48.33,
225
+ "learning_rate": 5.0973214285714276e-05,
226
+ "loss": 0.923,
227
+ "step": 2900
228
+ },
229
+ {
230
+ "epoch": 49.99,
231
+ "learning_rate": 4.829464285714285e-05,
232
+ "loss": 0.9123,
233
+ "step": 3000
234
+ },
235
+ {
236
+ "epoch": 49.99,
237
+ "eval_loss": 0.5104396939277649,
238
+ "eval_runtime": 31.7859,
239
+ "eval_samples_per_second": 28.063,
240
+ "eval_steps_per_second": 3.524,
241
+ "eval_wer": 0.5633294864396999,
242
+ "step": 3000
243
+ },
244
+ {
245
+ "epoch": 51.66,
246
+ "learning_rate": 4.561607142857142e-05,
247
+ "loss": 0.8979,
248
+ "step": 3100
249
+ },
250
+ {
251
+ "epoch": 53.33,
252
+ "learning_rate": 4.29375e-05,
253
+ "loss": 0.8814,
254
+ "step": 3200
255
+ },
256
+ {
257
+ "epoch": 54.99,
258
+ "learning_rate": 4.025892857142857e-05,
259
+ "loss": 0.8548,
260
+ "step": 3300
261
+ },
262
+ {
263
+ "epoch": 56.66,
264
+ "learning_rate": 3.760714285714286e-05,
265
+ "loss": 0.8515,
266
+ "step": 3400
267
+ },
268
+ {
269
+ "epoch": 58.33,
270
+ "learning_rate": 3.492857142857142e-05,
271
+ "loss": 0.836,
272
+ "step": 3500
273
+ },
274
+ {
275
+ "epoch": 58.33,
276
+ "eval_loss": 0.49266090989112854,
277
+ "eval_runtime": 31.7464,
278
+ "eval_samples_per_second": 28.098,
279
+ "eval_steps_per_second": 3.528,
280
+ "eval_wer": 0.5579919215233698,
281
+ "step": 3500
282
+ },
283
+ {
284
+ "epoch": 59.99,
285
+ "learning_rate": 3.225e-05,
286
+ "loss": 0.8045,
287
+ "step": 3600
288
+ },
289
+ {
290
+ "epoch": 61.66,
291
+ "learning_rate": 2.9571428571428568e-05,
292
+ "loss": 0.8036,
293
+ "step": 3700
294
+ },
295
+ {
296
+ "epoch": 63.33,
297
+ "learning_rate": 2.689285714285714e-05,
298
+ "loss": 0.7924,
299
+ "step": 3800
300
+ },
301
+ {
302
+ "epoch": 64.99,
303
+ "learning_rate": 2.4214285714285712e-05,
304
+ "loss": 0.7837,
305
+ "step": 3900
306
+ },
307
+ {
308
+ "epoch": 66.66,
309
+ "learning_rate": 2.1535714285714285e-05,
310
+ "loss": 0.7725,
311
+ "step": 4000
312
+ },
313
+ {
314
+ "epoch": 66.66,
315
+ "eval_loss": 0.5077508687973022,
316
+ "eval_runtime": 31.8976,
317
+ "eval_samples_per_second": 27.964,
318
+ "eval_steps_per_second": 3.511,
319
+ "eval_wer": 0.5778995960761685,
320
+ "step": 4000
321
+ },
322
+ {
323
+ "epoch": 68.33,
324
+ "learning_rate": 1.888392857142857e-05,
325
+ "loss": 0.7655,
326
+ "step": 4100
327
+ },
328
+ {
329
+ "epoch": 69.99,
330
+ "learning_rate": 1.6205357142857143e-05,
331
+ "loss": 0.741,
332
+ "step": 4200
333
+ },
334
+ {
335
+ "epoch": 71.66,
336
+ "learning_rate": 1.3526785714285713e-05,
337
+ "loss": 0.7499,
338
+ "step": 4300
339
+ },
340
+ {
341
+ "epoch": 73.33,
342
+ "learning_rate": 1.0848214285714287e-05,
343
+ "loss": 0.7479,
344
+ "step": 4400
345
+ },
346
+ {
347
+ "epoch": 74.99,
348
+ "learning_rate": 8.169642857142857e-06,
349
+ "loss": 0.7297,
350
+ "step": 4500
351
+ },
352
+ {
353
+ "epoch": 74.99,
354
+ "eval_loss": 0.4939458966255188,
355
+ "eval_runtime": 31.9903,
356
+ "eval_samples_per_second": 27.883,
357
+ "eval_steps_per_second": 3.501,
358
+ "eval_wer": 0.5737160992498558,
359
+ "step": 4500
360
+ },
361
+ {
362
+ "epoch": 76.66,
363
+ "learning_rate": 5.491071428571429e-06,
364
+ "loss": 0.7318,
365
+ "step": 4600
366
+ },
367
+ {
368
+ "epoch": 78.33,
369
+ "learning_rate": 2.8124999999999998e-06,
370
+ "loss": 0.7232,
371
+ "step": 4700
372
+ },
373
+ {
374
+ "epoch": 79.99,
375
+ "learning_rate": 1.3392857142857142e-07,
376
+ "loss": 0.7195,
377
+ "step": 4800
378
+ },
379
+ {
380
+ "epoch": 79.99,
381
+ "step": 4800,
382
+ "total_flos": 2.017023736432276e+19,
383
+ "train_loss": 1.795632479985555,
384
+ "train_runtime": 8730.4816,
385
+ "train_samples_per_second": 17.786,
386
+ "train_steps_per_second": 0.55
387
  }
388
  ],
389
+ "max_steps": 4800,
390
+ "num_train_epochs": 80,
391
+ "total_flos": 2.017023736432276e+19,
392
  "trial_name": null,
393
  "trial_params": null
394
  }