ihanif commited on
Commit
28dcb1d
1 Parent(s): de26b76

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 42.86,
3
- "eval_loss": 1.019495964050293,
4
- "eval_runtime": 253.9493,
5
- "eval_samples_per_second": 2.016,
6
- "eval_steps_per_second": 0.126,
7
- "eval_wer": 66.14709443099274,
8
- "train_loss": 0.6091492390632629,
9
- "train_runtime": 1414.6972,
10
- "train_samples_per_second": 13.572,
11
- "train_steps_per_second": 0.212
12
  }
 
1
  {
2
+ "epoch": 71.43,
3
+ "eval_loss": 1.0742337703704834,
4
+ "eval_runtime": 209.1132,
5
+ "eval_samples_per_second": 2.448,
6
+ "eval_steps_per_second": 0.153,
7
+ "eval_wer": 69.61259079903148,
8
+ "train_loss": 0.8564610816463828,
9
+ "train_runtime": 2065.7487,
10
+ "train_samples_per_second": 15.491,
11
+ "train_steps_per_second": 0.242
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 42.86,
3
- "eval_loss": 1.019495964050293,
4
- "eval_runtime": 253.9493,
5
- "eval_samples_per_second": 2.016,
6
- "eval_steps_per_second": 0.126,
7
- "eval_wer": 66.14709443099274
8
  }
 
1
  {
2
+ "epoch": 71.43,
3
+ "eval_loss": 1.0742337703704834,
4
+ "eval_runtime": 209.1132,
5
+ "eval_samples_per_second": 2.448,
6
+ "eval_steps_per_second": 0.153,
7
+ "eval_wer": 69.61259079903148
8
  }
runs/Dec18_15-43-50_129-146-179-188/events.out.tfevents.1671380557.129-146-179-188.133614.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ffd09a1a11a5b8b72475b2963cf7d969a36c6061e6f2f4e830fc51522fffd2
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 42.86,
3
- "train_loss": 0.6091492390632629,
4
- "train_runtime": 1414.6972,
5
- "train_samples_per_second": 13.572,
6
- "train_steps_per_second": 0.212
7
  }
 
1
  {
2
+ "epoch": 71.43,
3
+ "train_loss": 0.8564610816463828,
4
+ "train_runtime": 2065.7487,
5
+ "train_samples_per_second": 15.491,
6
+ "train_steps_per_second": 0.242
7
  }
trainer_state.json CHANGED
@@ -1,232 +1,370 @@
1
  {
2
- "best_metric": 1.019495964050293,
3
- "best_model_checkpoint": "./checkpoint-100",
4
- "epoch": 42.857142857142854,
5
- "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.43,
12
- "learning_rate": 1.75e-06,
13
- "loss": 3.252,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 2.86,
18
- "learning_rate": 4.25e-06,
19
- "loss": 2.8169,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 4.29,
24
- "learning_rate": 6.750000000000001e-06,
25
- "loss": 2.2237,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 5.71,
30
- "learning_rate": 9.250000000000001e-06,
31
- "loss": 1.6944,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 7.14,
36
- "learning_rate": 9.730769230769231e-06,
37
- "loss": 1.2764,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 8.57,
42
- "learning_rate": 9.346153846153847e-06,
43
- "loss": 1.0164,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 10.0,
48
- "learning_rate": 8.961538461538462e-06,
49
- "loss": 0.854,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 11.43,
54
- "learning_rate": 8.576923076923077e-06,
55
- "loss": 0.7109,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 12.86,
60
- "learning_rate": 8.192307692307692e-06,
61
- "loss": 0.613,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 14.29,
66
- "learning_rate": 7.807692307692309e-06,
67
- "loss": 0.5307,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 14.29,
72
- "eval_loss": 1.019495964050293,
73
- "eval_runtime": 229.2012,
74
- "eval_samples_per_second": 2.234,
75
- "eval_steps_per_second": 0.14,
76
- "eval_wer": 66.14709443099274,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 15.71,
81
- "learning_rate": 7.423076923076924e-06,
82
- "loss": 0.457,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 17.14,
87
- "learning_rate": 7.038461538461539e-06,
88
- "loss": 0.3915,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 18.57,
93
- "learning_rate": 6.653846153846154e-06,
94
- "loss": 0.3438,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 20.0,
99
- "learning_rate": 6.26923076923077e-06,
100
- "loss": 0.2985,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 21.43,
105
- "learning_rate": 5.884615384615385e-06,
106
- "loss": 0.2591,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 22.86,
111
- "learning_rate": 5.500000000000001e-06,
112
- "loss": 0.2199,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 24.29,
117
- "learning_rate": 5.115384615384616e-06,
118
- "loss": 0.1928,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 25.71,
123
- "learning_rate": 4.730769230769231e-06,
124
- "loss": 0.1676,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 27.14,
129
- "learning_rate": 4.346153846153846e-06,
130
- "loss": 0.1435,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 28.57,
135
- "learning_rate": 3.961538461538462e-06,
136
- "loss": 0.1225,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 28.57,
141
- "eval_loss": 1.146507740020752,
142
- "eval_runtime": 276.2984,
143
- "eval_samples_per_second": 1.853,
144
- "eval_steps_per_second": 0.116,
145
- "eval_wer": 66.01846246973365,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 30.0,
150
- "learning_rate": 3.5769230769230773e-06,
151
- "loss": 0.108,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 31.43,
156
- "learning_rate": 3.192307692307692e-06,
157
- "loss": 0.0941,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 32.86,
162
- "learning_rate": 2.807692307692308e-06,
163
- "loss": 0.0811,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 34.29,
168
- "learning_rate": 2.4230769230769233e-06,
169
- "loss": 0.0729,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 35.71,
174
- "learning_rate": 2.0384615384615386e-06,
175
- "loss": 0.0655,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 37.14,
180
- "learning_rate": 1.653846153846154e-06,
181
- "loss": 0.0604,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 38.57,
186
- "learning_rate": 1.2692307692307692e-06,
187
- "loss": 0.055,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 40.0,
192
- "learning_rate": 8.846153846153848e-07,
193
- "loss": 0.0524,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 41.43,
198
- "learning_rate": 5.000000000000001e-07,
199
- "loss": 0.0505,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 42.86,
204
- "learning_rate": 1.153846153846154e-07,
205
- "loss": 0.0498,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 42.86,
210
- "eval_loss": 1.229990839958191,
211
- "eval_runtime": 273.4631,
212
- "eval_samples_per_second": 1.872,
213
- "eval_steps_per_second": 0.117,
214
- "eval_wer": 66.03359564164649,
215
  "step": 300
216
  },
217
  {
218
- "epoch": 42.86,
219
- "step": 300,
220
- "total_flos": 1.19900188901376e+18,
221
- "train_loss": 0.6091492390632629,
222
- "train_runtime": 1414.6972,
223
- "train_samples_per_second": 13.572,
224
- "train_steps_per_second": 0.212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  }
226
  ],
227
- "max_steps": 300,
228
- "num_train_epochs": 43,
229
- "total_flos": 1.19900188901376e+18,
230
  "trial_name": null,
231
  "trial_params": null
232
  }
 
1
  {
2
+ "best_metric": 1.0742337703704834,
3
+ "best_model_checkpoint": "./checkpoint-200",
4
+ "epoch": 71.42857142857143,
5
+ "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.43,
12
+ "learning_rate": 1.4e-07,
13
+ "loss": 3.2633,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 2.86,
18
+ "learning_rate": 3.4000000000000003e-07,
19
+ "loss": 3.235,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 4.29,
24
+ "learning_rate": 5.4e-07,
25
+ "loss": 3.164,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 5.71,
30
+ "learning_rate": 7.4e-07,
31
+ "loss": 2.9792,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 7.14,
36
+ "learning_rate": 9.400000000000001e-07,
37
+ "loss": 2.7533,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 8.57,
42
+ "learning_rate": 1.14e-06,
43
+ "loss": 2.5502,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 10.0,
48
+ "learning_rate": 1.34e-06,
49
+ "loss": 2.3845,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 11.43,
54
+ "learning_rate": 1.54e-06,
55
+ "loss": 2.161,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 12.86,
60
+ "learning_rate": 1.74e-06,
61
+ "loss": 1.9855,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 14.29,
66
+ "learning_rate": 1.94e-06,
67
+ "loss": 1.8225,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 14.29,
72
+ "eval_loss": 1.760784387588501,
73
+ "eval_runtime": 425.4331,
74
+ "eval_samples_per_second": 1.203,
75
+ "eval_steps_per_second": 0.075,
76
+ "eval_wer": 105.31930992736078,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 15.71,
81
+ "learning_rate": 2.1400000000000003e-06,
82
+ "loss": 1.6373,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 17.14,
87
+ "learning_rate": 2.3400000000000005e-06,
88
+ "loss": 1.4756,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 18.57,
93
+ "learning_rate": 2.5400000000000002e-06,
94
+ "loss": 1.3534,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 20.0,
99
+ "learning_rate": 2.7400000000000004e-06,
100
+ "loss": 1.2259,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 21.43,
105
+ "learning_rate": 2.9400000000000002e-06,
106
+ "loss": 1.1304,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 22.86,
111
+ "learning_rate": 3.1400000000000004e-06,
112
+ "loss": 1.0195,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 24.29,
117
+ "learning_rate": 3.3400000000000006e-06,
118
+ "loss": 0.9428,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 25.71,
123
+ "learning_rate": 3.54e-06,
124
+ "loss": 0.8721,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 27.14,
129
+ "learning_rate": 3.74e-06,
130
+ "loss": 0.7904,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 28.57,
135
+ "learning_rate": 3.94e-06,
136
+ "loss": 0.7281,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 28.57,
141
+ "eval_loss": 1.0742337703704834,
142
+ "eval_runtime": 174.027,
143
+ "eval_samples_per_second": 2.942,
144
+ "eval_steps_per_second": 0.184,
145
+ "eval_wer": 69.61259079903148,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 30.0,
150
+ "learning_rate": 4.14e-06,
151
+ "loss": 0.6704,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 31.43,
156
+ "learning_rate": 4.34e-06,
157
+ "loss": 0.6118,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 32.86,
162
+ "learning_rate": 4.540000000000001e-06,
163
+ "loss": 0.5494,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 34.29,
168
+ "learning_rate": 4.74e-06,
169
+ "loss": 0.5024,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 35.71,
174
+ "learning_rate": 4.94e-06,
175
+ "loss": 0.4511,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 37.14,
180
+ "learning_rate": 5.140000000000001e-06,
181
+ "loss": 0.4026,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 38.57,
186
+ "learning_rate": 5.3400000000000005e-06,
187
+ "loss": 0.3507,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 40.0,
192
+ "learning_rate": 5.540000000000001e-06,
193
+ "loss": 0.3108,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 41.43,
198
+ "learning_rate": 5.74e-06,
199
+ "loss": 0.2687,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 42.86,
204
+ "learning_rate": 5.94e-06,
205
+ "loss": 0.2329,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 42.86,
210
+ "eval_loss": 1.1192402839660645,
211
+ "eval_runtime": 175.174,
212
+ "eval_samples_per_second": 2.923,
213
+ "eval_steps_per_second": 0.183,
214
+ "eval_wer": 67.02481840193705,
215
  "step": 300
216
  },
217
  {
218
+ "epoch": 44.29,
219
+ "learning_rate": 6.1400000000000005e-06,
220
+ "loss": 0.1932,
221
+ "step": 310
222
+ },
223
+ {
224
+ "epoch": 45.71,
225
+ "learning_rate": 6.34e-06,
226
+ "loss": 0.1642,
227
+ "step": 320
228
+ },
229
+ {
230
+ "epoch": 47.14,
231
+ "learning_rate": 6.540000000000001e-06,
232
+ "loss": 0.134,
233
+ "step": 330
234
+ },
235
+ {
236
+ "epoch": 48.57,
237
+ "learning_rate": 6.740000000000001e-06,
238
+ "loss": 0.1047,
239
+ "step": 340
240
+ },
241
+ {
242
+ "epoch": 50.0,
243
+ "learning_rate": 6.9400000000000005e-06,
244
+ "loss": 0.0869,
245
+ "step": 350
246
+ },
247
+ {
248
+ "epoch": 51.43,
249
+ "learning_rate": 7.14e-06,
250
+ "loss": 0.0648,
251
+ "step": 360
252
+ },
253
+ {
254
+ "epoch": 52.86,
255
+ "learning_rate": 7.340000000000001e-06,
256
+ "loss": 0.0517,
257
+ "step": 370
258
+ },
259
+ {
260
+ "epoch": 54.29,
261
+ "learning_rate": 7.540000000000001e-06,
262
+ "loss": 0.0391,
263
+ "step": 380
264
+ },
265
+ {
266
+ "epoch": 55.71,
267
+ "learning_rate": 7.74e-06,
268
+ "loss": 0.0311,
269
+ "step": 390
270
+ },
271
+ {
272
+ "epoch": 57.14,
273
+ "learning_rate": 7.94e-06,
274
+ "loss": 0.0247,
275
+ "step": 400
276
+ },
277
+ {
278
+ "epoch": 57.14,
279
+ "eval_loss": 1.3494515419006348,
280
+ "eval_runtime": 166.7411,
281
+ "eval_samples_per_second": 3.071,
282
+ "eval_steps_per_second": 0.192,
283
+ "eval_wer": 66.37409200968523,
284
+ "step": 400
285
+ },
286
+ {
287
+ "epoch": 58.57,
288
+ "learning_rate": 8.14e-06,
289
+ "loss": 0.0195,
290
+ "step": 410
291
+ },
292
+ {
293
+ "epoch": 60.0,
294
+ "learning_rate": 8.34e-06,
295
+ "loss": 0.0157,
296
+ "step": 420
297
+ },
298
+ {
299
+ "epoch": 61.43,
300
+ "learning_rate": 8.540000000000001e-06,
301
+ "loss": 0.0129,
302
+ "step": 430
303
+ },
304
+ {
305
+ "epoch": 62.86,
306
+ "learning_rate": 8.740000000000001e-06,
307
+ "loss": 0.0112,
308
+ "step": 440
309
+ },
310
+ {
311
+ "epoch": 64.29,
312
+ "learning_rate": 8.94e-06,
313
+ "loss": 0.0097,
314
+ "step": 450
315
+ },
316
+ {
317
+ "epoch": 65.71,
318
+ "learning_rate": 9.14e-06,
319
+ "loss": 0.0085,
320
+ "step": 460
321
+ },
322
+ {
323
+ "epoch": 67.14,
324
+ "learning_rate": 9.340000000000002e-06,
325
+ "loss": 0.0075,
326
+ "step": 470
327
+ },
328
+ {
329
+ "epoch": 68.57,
330
+ "learning_rate": 9.54e-06,
331
+ "loss": 0.0069,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 70.0,
336
+ "learning_rate": 9.74e-06,
337
+ "loss": 0.0062,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 71.43,
342
+ "learning_rate": 9.940000000000001e-06,
343
+ "loss": 0.0057,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 71.43,
348
+ "eval_loss": 1.5055396556854248,
349
+ "eval_runtime": 189.4887,
350
+ "eval_samples_per_second": 2.702,
351
+ "eval_steps_per_second": 0.169,
352
+ "eval_wer": 67.28964891041163,
353
+ "step": 500
354
+ },
355
+ {
356
+ "epoch": 71.43,
357
+ "step": 500,
358
+ "total_flos": 1.99723386175488e+18,
359
+ "train_loss": 0.8564610816463828,
360
+ "train_runtime": 2065.7487,
361
+ "train_samples_per_second": 15.491,
362
+ "train_steps_per_second": 0.242
363
  }
364
  ],
365
+ "max_steps": 500,
366
+ "num_train_epochs": 72,
367
+ "total_flos": 1.99723386175488e+18,
368
  "trial_name": null,
369
  "trial_params": null
370
  }