SuperKogito commited on
Commit
56313bf
1 Parent(s): ec5d524

End of training

Browse files
Files changed (1) hide show
  1. trainer_state.json +132 -132
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 25.293262636395276,
3
  "best_model_checkpoint": "whisper-base-nl-2/checkpoint-1000",
4
- "epoch": 0.2139495079161318,
5
  "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
@@ -10,390 +10,390 @@
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 4.0000000000000003e-07,
13
- "loss": 1.3206,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
- "learning_rate": 8.8e-07,
19
- "loss": 1.247,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
  "learning_rate": 1.3800000000000001e-06,
25
- "loss": 1.1224,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.01,
30
  "learning_rate": 1.8800000000000002e-06,
31
- "loss": 0.9567,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.02,
36
  "learning_rate": 2.38e-06,
37
- "loss": 0.964,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.02,
42
  "learning_rate": 2.88e-06,
43
- "loss": 0.7427,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.02,
48
  "learning_rate": 3.3800000000000007e-06,
49
- "loss": 0.6921,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 0.03,
54
  "learning_rate": 3.88e-06,
55
- "loss": 0.6178,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.03,
60
  "learning_rate": 4.38e-06,
61
- "loss": 0.6377,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 0.04,
66
  "learning_rate": 4.880000000000001e-06,
67
- "loss": 0.5442,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 0.04,
72
  "learning_rate": 5.380000000000001e-06,
73
- "loss": 0.483,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.04,
78
  "learning_rate": 5.8800000000000005e-06,
79
- "loss": 0.5293,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 0.05,
84
  "learning_rate": 6.380000000000001e-06,
85
- "loss": 0.5293,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 0.05,
90
  "learning_rate": 6.88e-06,
91
- "loss": 0.5213,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.05,
96
  "learning_rate": 7.3800000000000005e-06,
97
- "loss": 0.4849,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 0.06,
102
  "learning_rate": 7.88e-06,
103
- "loss": 0.5068,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 0.06,
108
  "learning_rate": 8.380000000000001e-06,
109
- "loss": 0.4787,
110
  "step": 425
111
  },
112
  {
113
- "epoch": 0.06,
114
  "learning_rate": 8.880000000000001e-06,
115
- "loss": 0.4744,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 0.07,
120
  "learning_rate": 9.38e-06,
121
- "loss": 0.4914,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 0.07,
126
  "learning_rate": 9.88e-06,
127
- "loss": 0.4643,
128
  "step": 500
129
  },
130
  {
131
- "epoch": 0.07,
132
  "learning_rate": 9.810000000000001e-06,
133
- "loss": 0.4792,
134
  "step": 525
135
  },
136
  {
137
- "epoch": 0.08,
138
  "learning_rate": 9.56e-06,
139
- "loss": 0.4715,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 0.08,
144
  "learning_rate": 9.31e-06,
145
- "loss": 0.4405,
146
  "step": 575
147
  },
148
  {
149
- "epoch": 0.09,
150
  "learning_rate": 9.060000000000001e-06,
151
- "loss": 0.4577,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 0.09,
156
  "learning_rate": 8.81e-06,
157
- "loss": 0.4624,
158
  "step": 625
159
  },
160
  {
161
- "epoch": 0.09,
162
  "learning_rate": 8.560000000000001e-06,
163
- "loss": 0.3639,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 0.1,
168
  "learning_rate": 8.31e-06,
169
- "loss": 0.4156,
170
  "step": 675
171
  },
172
  {
173
- "epoch": 0.1,
174
  "learning_rate": 8.06e-06,
175
- "loss": 0.4121,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 0.1,
180
  "learning_rate": 7.810000000000001e-06,
181
- "loss": 0.3885,
182
  "step": 725
183
  },
184
  {
185
- "epoch": 0.11,
186
  "learning_rate": 7.5600000000000005e-06,
187
- "loss": 0.4401,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 0.11,
192
  "learning_rate": 7.31e-06,
193
- "loss": 0.4141,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 0.11,
198
  "learning_rate": 7.06e-06,
199
- "loss": 0.4179,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 0.12,
204
  "learning_rate": 6.810000000000001e-06,
205
- "loss": 0.4299,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 0.12,
210
  "learning_rate": 6.560000000000001e-06,
211
- "loss": 0.3733,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 0.12,
216
  "learning_rate": 6.3100000000000006e-06,
217
- "loss": 0.429,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 0.13,
222
  "learning_rate": 6.0600000000000004e-06,
223
- "loss": 0.3495,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 0.13,
228
  "learning_rate": 5.81e-06,
229
- "loss": 0.373,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 0.14,
234
  "learning_rate": 5.560000000000001e-06,
235
- "loss": 0.3427,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 0.14,
240
  "learning_rate": 5.310000000000001e-06,
241
- "loss": 0.3687,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 0.14,
246
  "learning_rate": 5.060000000000001e-06,
247
- "loss": 0.3749,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 0.14,
252
- "eval_cer": 8.575715070233361,
253
- "eval_loss": 0.5007154941558838,
254
- "eval_runtime": 2429.0625,
255
  "eval_samples_per_second": 0.804,
256
  "eval_steps_per_second": 0.402,
257
- "eval_wer": 25.293262636395276,
258
  "step": 1000
259
  },
260
  {
261
- "epoch": 0.15,
262
  "learning_rate": 4.8100000000000005e-06,
263
- "loss": 0.3366,
264
  "step": 1025
265
  },
266
  {
267
- "epoch": 0.15,
268
  "learning_rate": 4.56e-06,
269
- "loss": 0.3493,
270
  "step": 1050
271
  },
272
  {
273
- "epoch": 0.15,
274
  "learning_rate": 4.31e-06,
275
- "loss": 0.3721,
276
  "step": 1075
277
  },
278
  {
279
- "epoch": 0.16,
280
  "learning_rate": 4.060000000000001e-06,
281
- "loss": 0.3504,
282
  "step": 1100
283
  },
284
  {
285
- "epoch": 0.16,
286
  "learning_rate": 3.8100000000000004e-06,
287
- "loss": 0.3543,
288
  "step": 1125
289
  },
290
  {
291
- "epoch": 0.16,
292
  "learning_rate": 3.5600000000000002e-06,
293
- "loss": 0.395,
294
  "step": 1150
295
  },
296
  {
297
- "epoch": 0.17,
298
  "learning_rate": 3.3100000000000005e-06,
299
- "loss": 0.3789,
300
  "step": 1175
301
  },
302
  {
303
- "epoch": 0.17,
304
  "learning_rate": 3.0600000000000003e-06,
305
- "loss": 0.3223,
306
  "step": 1200
307
  },
308
  {
309
- "epoch": 0.17,
310
  "learning_rate": 2.8100000000000006e-06,
311
- "loss": 0.3242,
312
  "step": 1225
313
  },
314
  {
315
- "epoch": 0.18,
316
  "learning_rate": 2.56e-06,
317
- "loss": 0.3701,
318
  "step": 1250
319
  },
320
  {
321
- "epoch": 0.18,
322
  "learning_rate": 2.3100000000000003e-06,
323
- "loss": 0.3882,
324
  "step": 1275
325
  },
326
  {
327
- "epoch": 0.19,
328
  "learning_rate": 2.06e-06,
329
- "loss": 0.3678,
330
  "step": 1300
331
  },
332
  {
333
- "epoch": 0.19,
334
  "learning_rate": 1.81e-06,
335
- "loss": 0.3539,
336
  "step": 1325
337
  },
338
  {
339
- "epoch": 0.19,
340
  "learning_rate": 1.56e-06,
341
- "loss": 0.3723,
342
  "step": 1350
343
  },
344
  {
345
- "epoch": 0.2,
346
  "learning_rate": 1.3100000000000002e-06,
347
- "loss": 0.3184,
348
  "step": 1375
349
  },
350
  {
351
- "epoch": 0.2,
352
  "learning_rate": 1.06e-06,
353
- "loss": 0.3204,
354
  "step": 1400
355
  },
356
  {
357
- "epoch": 0.2,
358
  "learning_rate": 8.100000000000001e-07,
359
- "loss": 0.3884,
360
  "step": 1425
361
  },
362
  {
363
- "epoch": 0.21,
364
  "learning_rate": 5.6e-07,
365
- "loss": 0.3948,
366
  "step": 1450
367
  },
368
  {
369
- "epoch": 0.21,
370
  "learning_rate": 3.1000000000000005e-07,
371
- "loss": 0.3151,
372
  "step": 1475
373
  },
374
  {
375
- "epoch": 0.21,
376
  "learning_rate": 6.000000000000001e-08,
377
- "loss": 0.3676,
378
  "step": 1500
379
  },
380
  {
381
- "epoch": 0.21,
382
  "step": 1500,
383
  "total_flos": 9.728999424e+16,
384
- "train_loss": 0.4858832302093506,
385
- "train_runtime": 2744.5265,
386
- "train_samples_per_second": 0.547,
387
- "train_steps_per_second": 0.547
388
- },
389
- {
390
- "epoch": 0.21,
391
- "eval_cer": 8.575715070233361,
392
- "eval_loss": 0.5007154941558838,
393
- "eval_runtime": 2408.5648,
394
- "eval_samples_per_second": 0.811,
395
- "eval_steps_per_second": 0.406,
396
- "eval_wer": 25.293262636395276,
397
  "step": 1500
398
  }
399
  ],
 
1
  {
2
+ "best_metric": 25.674043441767154,
3
  "best_model_checkpoint": "whisper-base-nl-2/checkpoint-1000",
4
+ "epoch": 0.18277080541001584,
5
  "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
 
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 4.0000000000000003e-07,
13
+ "loss": 1.4165,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
+ "learning_rate": 9.000000000000001e-07,
19
+ "loss": 1.2919,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
  "learning_rate": 1.3800000000000001e-06,
25
+ "loss": 1.0487,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.01,
30
  "learning_rate": 1.8800000000000002e-06,
31
+ "loss": 1.0115,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.02,
36
  "learning_rate": 2.38e-06,
37
+ "loss": 0.9841,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.02,
42
  "learning_rate": 2.88e-06,
43
+ "loss": 0.99,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.02,
48
  "learning_rate": 3.3800000000000007e-06,
49
+ "loss": 0.8826,
50
  "step": 175
51
  },
52
  {
53
+ "epoch": 0.02,
54
  "learning_rate": 3.88e-06,
55
+ "loss": 0.6872,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.03,
60
  "learning_rate": 4.38e-06,
61
+ "loss": 0.6233,
62
  "step": 225
63
  },
64
  {
65
+ "epoch": 0.03,
66
  "learning_rate": 4.880000000000001e-06,
67
+ "loss": 0.6387,
68
  "step": 250
69
  },
70
  {
71
+ "epoch": 0.03,
72
  "learning_rate": 5.380000000000001e-06,
73
+ "loss": 0.7633,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.04,
78
  "learning_rate": 5.8800000000000005e-06,
79
+ "loss": 0.5987,
80
  "step": 300
81
  },
82
  {
83
+ "epoch": 0.04,
84
  "learning_rate": 6.380000000000001e-06,
85
+ "loss": 0.5338,
86
  "step": 325
87
  },
88
  {
89
+ "epoch": 0.04,
90
  "learning_rate": 6.88e-06,
91
+ "loss": 0.5378,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.05,
96
  "learning_rate": 7.3800000000000005e-06,
97
+ "loss": 0.5808,
98
  "step": 375
99
  },
100
  {
101
+ "epoch": 0.05,
102
  "learning_rate": 7.88e-06,
103
+ "loss": 0.5216,
104
  "step": 400
105
  },
106
  {
107
+ "epoch": 0.05,
108
  "learning_rate": 8.380000000000001e-06,
109
+ "loss": 0.503,
110
  "step": 425
111
  },
112
  {
113
+ "epoch": 0.05,
114
  "learning_rate": 8.880000000000001e-06,
115
+ "loss": 0.5549,
116
  "step": 450
117
  },
118
  {
119
+ "epoch": 0.06,
120
  "learning_rate": 9.38e-06,
121
+ "loss": 0.6421,
122
  "step": 475
123
  },
124
  {
125
+ "epoch": 0.06,
126
  "learning_rate": 9.88e-06,
127
+ "loss": 0.4607,
128
  "step": 500
129
  },
130
  {
131
+ "epoch": 0.06,
132
  "learning_rate": 9.810000000000001e-06,
133
+ "loss": 0.6395,
134
  "step": 525
135
  },
136
  {
137
+ "epoch": 0.07,
138
  "learning_rate": 9.56e-06,
139
+ "loss": 0.5217,
140
  "step": 550
141
  },
142
  {
143
+ "epoch": 0.07,
144
  "learning_rate": 9.31e-06,
145
+ "loss": 0.4829,
146
  "step": 575
147
  },
148
  {
149
+ "epoch": 0.07,
150
  "learning_rate": 9.060000000000001e-06,
151
+ "loss": 0.644,
152
  "step": 600
153
  },
154
  {
155
+ "epoch": 0.08,
156
  "learning_rate": 8.81e-06,
157
+ "loss": 0.4887,
158
  "step": 625
159
  },
160
  {
161
+ "epoch": 0.08,
162
  "learning_rate": 8.560000000000001e-06,
163
+ "loss": 0.5319,
164
  "step": 650
165
  },
166
  {
167
+ "epoch": 0.08,
168
  "learning_rate": 8.31e-06,
169
+ "loss": 0.4961,
170
  "step": 675
171
  },
172
  {
173
+ "epoch": 0.09,
174
  "learning_rate": 8.06e-06,
175
+ "loss": 0.4463,
176
  "step": 700
177
  },
178
  {
179
+ "epoch": 0.09,
180
  "learning_rate": 7.810000000000001e-06,
181
+ "loss": 0.4523,
182
  "step": 725
183
  },
184
  {
185
+ "epoch": 0.09,
186
  "learning_rate": 7.5600000000000005e-06,
187
+ "loss": 0.5192,
188
  "step": 750
189
  },
190
  {
191
+ "epoch": 0.09,
192
  "learning_rate": 7.31e-06,
193
+ "loss": 0.5411,
194
  "step": 775
195
  },
196
  {
197
+ "epoch": 0.1,
198
  "learning_rate": 7.06e-06,
199
+ "loss": 0.472,
200
  "step": 800
201
  },
202
  {
203
+ "epoch": 0.1,
204
  "learning_rate": 6.810000000000001e-06,
205
+ "loss": 0.5429,
206
  "step": 825
207
  },
208
  {
209
+ "epoch": 0.1,
210
  "learning_rate": 6.560000000000001e-06,
211
+ "loss": 0.4652,
212
  "step": 850
213
  },
214
  {
215
+ "epoch": 0.11,
216
  "learning_rate": 6.3100000000000006e-06,
217
+ "loss": 0.4493,
218
  "step": 875
219
  },
220
  {
221
+ "epoch": 0.11,
222
  "learning_rate": 6.0600000000000004e-06,
223
+ "loss": 0.5725,
224
  "step": 900
225
  },
226
  {
227
+ "epoch": 0.11,
228
  "learning_rate": 5.81e-06,
229
+ "loss": 0.5933,
230
  "step": 925
231
  },
232
  {
233
+ "epoch": 0.12,
234
  "learning_rate": 5.560000000000001e-06,
235
+ "loss": 0.4506,
236
  "step": 950
237
  },
238
  {
239
+ "epoch": 0.12,
240
  "learning_rate": 5.310000000000001e-06,
241
+ "loss": 0.457,
242
  "step": 975
243
  },
244
  {
245
+ "epoch": 0.12,
246
  "learning_rate": 5.060000000000001e-06,
247
+ "loss": 0.4709,
248
  "step": 1000
249
  },
250
  {
251
+ "epoch": 0.12,
252
+ "eval_cer": 8.48722037329875,
253
+ "eval_loss": 0.49462148547172546,
254
+ "eval_runtime": 2428.8628,
255
  "eval_samples_per_second": 0.804,
256
  "eval_steps_per_second": 0.402,
257
+ "eval_wer": 25.674043441767154,
258
  "step": 1000
259
  },
260
  {
261
+ "epoch": 0.12,
262
  "learning_rate": 4.8100000000000005e-06,
263
+ "loss": 0.3941,
264
  "step": 1025
265
  },
266
  {
267
+ "epoch": 0.13,
268
  "learning_rate": 4.56e-06,
269
+ "loss": 0.4826,
270
  "step": 1050
271
  },
272
  {
273
+ "epoch": 0.13,
274
  "learning_rate": 4.31e-06,
275
+ "loss": 0.4664,
276
  "step": 1075
277
  },
278
  {
279
+ "epoch": 0.13,
280
  "learning_rate": 4.060000000000001e-06,
281
+ "loss": 0.3897,
282
  "step": 1100
283
  },
284
  {
285
+ "epoch": 0.14,
286
  "learning_rate": 3.8100000000000004e-06,
287
+ "loss": 0.5067,
288
  "step": 1125
289
  },
290
  {
291
+ "epoch": 0.14,
292
  "learning_rate": 3.5600000000000002e-06,
293
+ "loss": 0.466,
294
  "step": 1150
295
  },
296
  {
297
+ "epoch": 0.14,
298
  "learning_rate": 3.3100000000000005e-06,
299
+ "loss": 0.495,
300
  "step": 1175
301
  },
302
  {
303
+ "epoch": 0.15,
304
  "learning_rate": 3.0600000000000003e-06,
305
+ "loss": 0.3934,
306
  "step": 1200
307
  },
308
  {
309
+ "epoch": 0.15,
310
  "learning_rate": 2.8100000000000006e-06,
311
+ "loss": 0.4138,
312
  "step": 1225
313
  },
314
  {
315
+ "epoch": 0.15,
316
  "learning_rate": 2.56e-06,
317
+ "loss": 0.4547,
318
  "step": 1250
319
  },
320
  {
321
+ "epoch": 0.16,
322
  "learning_rate": 2.3100000000000003e-06,
323
+ "loss": 0.4548,
324
  "step": 1275
325
  },
326
  {
327
+ "epoch": 0.16,
328
  "learning_rate": 2.06e-06,
329
+ "loss": 0.4612,
330
  "step": 1300
331
  },
332
  {
333
+ "epoch": 0.16,
334
  "learning_rate": 1.81e-06,
335
+ "loss": 0.5229,
336
  "step": 1325
337
  },
338
  {
339
+ "epoch": 0.16,
340
  "learning_rate": 1.56e-06,
341
+ "loss": 0.3348,
342
  "step": 1350
343
  },
344
  {
345
+ "epoch": 0.17,
346
  "learning_rate": 1.3100000000000002e-06,
347
+ "loss": 0.4843,
348
  "step": 1375
349
  },
350
  {
351
+ "epoch": 0.17,
352
  "learning_rate": 1.06e-06,
353
+ "loss": 0.3578,
354
  "step": 1400
355
  },
356
  {
357
+ "epoch": 0.17,
358
  "learning_rate": 8.100000000000001e-07,
359
+ "loss": 0.4753,
360
  "step": 1425
361
  },
362
  {
363
+ "epoch": 0.18,
364
  "learning_rate": 5.6e-07,
365
+ "loss": 0.441,
366
  "step": 1450
367
  },
368
  {
369
+ "epoch": 0.18,
370
  "learning_rate": 3.1000000000000005e-07,
371
+ "loss": 0.3252,
372
  "step": 1475
373
  },
374
  {
375
+ "epoch": 0.18,
376
  "learning_rate": 6.000000000000001e-08,
377
+ "loss": 0.4169,
378
  "step": 1500
379
  },
380
  {
381
+ "epoch": 0.18,
382
  "step": 1500,
383
  "total_flos": 9.728999424e+16,
384
+ "train_loss": 0.5707454249064128,
385
+ "train_runtime": 2748.8536,
386
+ "train_samples_per_second": 0.546,
387
+ "train_steps_per_second": 0.546
388
+ },
389
+ {
390
+ "epoch": 0.18,
391
+ "eval_cer": 8.48722037329875,
392
+ "eval_loss": 0.49462148547172546,
393
+ "eval_runtime": 2417.8085,
394
+ "eval_samples_per_second": 0.808,
395
+ "eval_steps_per_second": 0.404,
396
+ "eval_wer": 25.674043441767154,
397
  "step": 1500
398
  }
399
  ],