Wilber87vn commited on
Commit
d3ae7ac
1 Parent(s): e49a1df

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +3 -0
  3. scheduler.pt +1 -1
  4. trainer_state.json +114 -114
  5. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec41a0782b6318bc15c8f675366b016d127cbfdca60b12d2bf1a24c898b06082
3
  size 297614301
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8812c64fe873ccccaa06bce81ca573f218d52e1edaec68ecada6595fd710905
3
  size 297614301
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e212f3353754d87ac53835f03575cdce01b0392151759678950572d16504cf
3
+ size 151097459
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63af957ee3b34e42669c1057cdc6588be7f6f80ecf40ef0ce11d46a18f4edccc
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c697a902817aee34d66b970112c7ee2c24a5d924304347ce2a6944a1bab07de6
3
  size 623
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.3898635477582846,
3
  "best_model_checkpoint": "D:\\development\\whisper-finetune\\output_tiny\\checkpoint-1000",
4
  "epoch": 1000.0,
5
  "eval_steps": 1000,
@@ -11,64 +11,64 @@
11
  {
12
  "epoch": 33.33,
13
  "learning_rate": 9.88e-06,
14
- "loss": 0.8251,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 66.67,
19
  "learning_rate": 9.659310344827587e-06,
20
- "loss": 0.0035,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 66.67,
25
- "eval_loss": 0.0014676946448162198,
26
- "eval_runtime": 20.2982,
27
- "eval_samples_per_second": 5.666,
28
- "eval_steps_per_second": 0.739,
29
- "eval_wer": 0.3898635477582846,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 100.0,
34
  "learning_rate": 9.314482758620691e-06,
35
- "loss": 0.0014,
36
  "step": 1500
37
  },
38
  {
39
  "epoch": 133.33,
40
  "learning_rate": 8.969655172413794e-06,
41
- "loss": 0.0013,
42
  "step": 2000
43
  },
44
  {
45
  "epoch": 133.33,
46
- "eval_loss": 0.0020876836497336626,
47
- "eval_runtime": 19.7859,
48
- "eval_samples_per_second": 5.812,
49
- "eval_steps_per_second": 0.758,
50
- "eval_wer": 0.3898635477582846,
51
  "step": 2000
52
  },
53
  {
54
  "epoch": 166.67,
55
  "learning_rate": 8.624827586206898e-06,
56
- "loss": 0.0013,
57
  "step": 2500
58
  },
59
  {
60
  "epoch": 200.0,
61
  "learning_rate": 8.28e-06,
62
- "loss": 0.0013,
63
  "step": 3000
64
  },
65
  {
66
  "epoch": 200.0,
67
- "eval_loss": 0.0014716371661052108,
68
- "eval_runtime": 21.0452,
69
- "eval_samples_per_second": 5.464,
70
- "eval_steps_per_second": 0.713,
71
- "eval_wer": 0.3898635477582846,
72
  "step": 3000
73
  },
74
  {
@@ -80,247 +80,247 @@
80
  {
81
  "epoch": 266.67,
82
  "learning_rate": 7.590344827586208e-06,
83
- "loss": 0.0012,
84
  "step": 4000
85
  },
86
  {
87
  "epoch": 266.67,
88
- "eval_loss": 0.0013525157701224089,
89
- "eval_runtime": 20.7843,
90
- "eval_samples_per_second": 5.533,
91
- "eval_steps_per_second": 0.722,
92
- "eval_wer": 0.3898635477582846,
93
  "step": 4000
94
  },
95
  {
96
  "epoch": 300.0,
97
- "learning_rate": 7.24551724137931e-06,
98
- "loss": 0.0011,
99
  "step": 4500
100
  },
101
  {
102
  "epoch": 333.33,
103
- "learning_rate": 6.9006896551724145e-06,
104
- "loss": 0.0012,
105
  "step": 5000
106
  },
107
  {
108
  "epoch": 333.33,
109
- "eval_loss": 0.0016531223664060235,
110
- "eval_runtime": 21.1699,
111
- "eval_samples_per_second": 5.432,
112
- "eval_steps_per_second": 0.709,
113
- "eval_wer": 0.3898635477582846,
114
  "step": 5000
115
  },
116
  {
117
  "epoch": 366.67,
118
- "learning_rate": 6.555862068965517e-06,
119
  "loss": 0.0011,
120
  "step": 5500
121
  },
122
  {
123
  "epoch": 400.0,
124
- "learning_rate": 6.211034482758621e-06,
125
- "loss": 0.0012,
126
  "step": 6000
127
  },
128
  {
129
  "epoch": 400.0,
130
- "eval_loss": 0.0016335018444806337,
131
- "eval_runtime": 22.773,
132
- "eval_samples_per_second": 5.05,
133
- "eval_steps_per_second": 0.659,
134
- "eval_wer": 0.3898635477582846,
135
  "step": 6000
136
  },
137
  {
138
  "epoch": 433.33,
139
- "learning_rate": 5.8662068965517245e-06,
140
  "loss": 0.0011,
141
  "step": 6500
142
  },
143
  {
144
  "epoch": 466.67,
145
- "learning_rate": 5.521379310344828e-06,
146
  "loss": 0.001,
147
  "step": 7000
148
  },
149
  {
150
  "epoch": 466.67,
151
- "eval_loss": 0.0016853931592777371,
152
- "eval_runtime": 21.6678,
153
- "eval_samples_per_second": 5.307,
154
- "eval_steps_per_second": 0.692,
155
- "eval_wer": 0.3898635477582846,
156
  "step": 7000
157
  },
158
  {
159
  "epoch": 500.0,
160
- "learning_rate": 5.176551724137931e-06,
161
- "loss": 0.0011,
162
  "step": 7500
163
  },
164
  {
165
  "epoch": 533.33,
166
- "learning_rate": 4.8317241379310345e-06,
167
- "loss": 0.003,
168
  "step": 8000
169
  },
170
  {
171
  "epoch": 533.33,
172
- "eval_loss": 0.0015577995218336582,
173
- "eval_runtime": 21.0956,
174
- "eval_samples_per_second": 5.451,
175
- "eval_steps_per_second": 0.711,
176
- "eval_wer": 0.3898635477582846,
177
  "step": 8000
178
  },
179
  {
180
  "epoch": 566.67,
181
- "learning_rate": 4.486896551724138e-06,
182
- "loss": 0.0011,
183
  "step": 8500
184
  },
185
  {
186
  "epoch": 600.0,
187
- "learning_rate": 4.142068965517242e-06,
188
  "loss": 0.001,
189
  "step": 9000
190
  },
191
  {
192
  "epoch": 600.0,
193
- "eval_loss": 0.0015839524567127228,
194
- "eval_runtime": 21.0168,
195
- "eval_samples_per_second": 5.472,
196
- "eval_steps_per_second": 0.714,
197
- "eval_wer": 0.3898635477582846,
198
  "step": 9000
199
  },
200
  {
201
  "epoch": 633.33,
202
- "learning_rate": 3.7972413793103454e-06,
203
  "loss": 0.001,
204
  "step": 9500
205
  },
206
  {
207
  "epoch": 666.67,
208
- "learning_rate": 3.4524137931034487e-06,
209
  "loss": 0.001,
210
  "step": 10000
211
  },
212
  {
213
  "epoch": 666.67,
214
- "eval_loss": 0.0015147783560678363,
215
- "eval_runtime": 21.21,
216
- "eval_samples_per_second": 5.422,
217
- "eval_steps_per_second": 0.707,
218
- "eval_wer": 0.3898635477582846,
219
  "step": 10000
220
  },
221
  {
222
  "epoch": 700.0,
223
- "learning_rate": 3.1082758620689657e-06,
224
  "loss": 0.001,
225
  "step": 10500
226
  },
227
  {
228
  "epoch": 733.33,
229
- "learning_rate": 2.763448275862069e-06,
230
  "loss": 0.001,
231
  "step": 11000
232
  },
233
  {
234
  "epoch": 733.33,
235
- "eval_loss": 0.0014770556008443236,
236
- "eval_runtime": 21.0935,
237
- "eval_samples_per_second": 5.452,
238
- "eval_steps_per_second": 0.711,
239
- "eval_wer": 0.3898635477582846,
240
  "step": 11000
241
  },
242
  {
243
  "epoch": 766.67,
244
- "learning_rate": 2.4186206896551724e-06,
245
- "loss": 0.001,
246
  "step": 11500
247
  },
248
  {
249
  "epoch": 800.0,
250
- "learning_rate": 2.073793103448276e-06,
251
  "loss": 0.001,
252
  "step": 12000
253
  },
254
  {
255
  "epoch": 800.0,
256
- "eval_loss": 0.0015582370106130838,
257
- "eval_runtime": 21.0889,
258
- "eval_samples_per_second": 5.453,
259
- "eval_steps_per_second": 0.711,
260
- "eval_wer": 0.3898635477582846,
261
  "step": 12000
262
  },
263
  {
264
  "epoch": 833.33,
265
- "learning_rate": 1.7289655172413794e-06,
266
  "loss": 0.0009,
267
  "step": 12500
268
  },
269
  {
270
  "epoch": 866.67,
271
- "learning_rate": 1.3848275862068967e-06,
272
  "loss": 0.0009,
273
  "step": 13000
274
  },
275
  {
276
  "epoch": 866.67,
277
- "eval_loss": 0.0016244335565716028,
278
- "eval_runtime": 21.0544,
279
- "eval_samples_per_second": 5.462,
280
- "eval_steps_per_second": 0.712,
281
- "eval_wer": 0.3898635477582846,
282
  "step": 13000
283
  },
284
  {
285
  "epoch": 900.0,
286
- "learning_rate": 1.04e-06,
287
  "loss": 0.0009,
288
  "step": 13500
289
  },
290
  {
291
  "epoch": 933.33,
292
- "learning_rate": 6.951724137931034e-07,
293
  "loss": 0.0009,
294
  "step": 14000
295
  },
296
  {
297
  "epoch": 933.33,
298
- "eval_loss": 0.001572693814523518,
299
- "eval_runtime": 21.1472,
300
- "eval_samples_per_second": 5.438,
301
- "eval_steps_per_second": 0.709,
302
- "eval_wer": 0.3898635477582846,
303
  "step": 14000
304
  },
305
  {
306
  "epoch": 966.67,
307
- "learning_rate": 3.50344827586207e-07,
308
- "loss": 0.001,
309
  "step": 14500
310
  },
311
  {
312
  "epoch": 1000.0,
313
- "learning_rate": 6.206896551724139e-09,
314
  "loss": 0.0009,
315
  "step": 15000
316
  },
317
  {
318
  "epoch": 1000.0,
319
- "eval_loss": 0.0015988650266081095,
320
- "eval_runtime": 21.2054,
321
- "eval_samples_per_second": 5.423,
322
- "eval_steps_per_second": 0.707,
323
- "eval_wer": 0.3898635477582846,
324
  "step": 15000
325
  }
326
  ],
@@ -329,7 +329,7 @@
329
  "num_input_tokens_seen": 0,
330
  "num_train_epochs": 1000,
331
  "save_steps": 1000,
332
- "total_flos": 2.8311717888e+18,
333
  "train_batch_size": 8,
334
  "trial_name": null,
335
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3861003861003861,
3
  "best_model_checkpoint": "D:\\development\\whisper-finetune\\output_tiny\\checkpoint-1000",
4
  "epoch": 1000.0,
5
  "eval_steps": 1000,
 
11
  {
12
  "epoch": 33.33,
13
  "learning_rate": 9.88e-06,
14
+ "loss": 0.8261,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 66.67,
19
  "learning_rate": 9.659310344827587e-06,
20
+ "loss": 0.0028,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 66.67,
25
+ "eval_loss": 0.002442890079692006,
26
+ "eval_runtime": 21.9145,
27
+ "eval_samples_per_second": 5.339,
28
+ "eval_steps_per_second": 0.684,
29
+ "eval_wer": 0.3861003861003861,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 100.0,
34
  "learning_rate": 9.314482758620691e-06,
35
+ "loss": 0.0016,
36
  "step": 1500
37
  },
38
  {
39
  "epoch": 133.33,
40
  "learning_rate": 8.969655172413794e-06,
41
+ "loss": 0.0014,
42
  "step": 2000
43
  },
44
  {
45
  "epoch": 133.33,
46
+ "eval_loss": 0.0016565920086577535,
47
+ "eval_runtime": 21.8481,
48
+ "eval_samples_per_second": 5.355,
49
+ "eval_steps_per_second": 0.687,
50
+ "eval_wer": 0.3861003861003861,
51
  "step": 2000
52
  },
53
  {
54
  "epoch": 166.67,
55
  "learning_rate": 8.624827586206898e-06,
56
+ "loss": 0.0014,
57
  "step": 2500
58
  },
59
  {
60
  "epoch": 200.0,
61
  "learning_rate": 8.28e-06,
62
+ "loss": 0.0012,
63
  "step": 3000
64
  },
65
  {
66
  "epoch": 200.0,
67
+ "eval_loss": 0.0012075488921254873,
68
+ "eval_runtime": 21.8068,
69
+ "eval_samples_per_second": 5.365,
70
+ "eval_steps_per_second": 0.688,
71
+ "eval_wer": 0.3861003861003861,
72
  "step": 3000
73
  },
74
  {
 
80
  {
81
  "epoch": 266.67,
82
  "learning_rate": 7.590344827586208e-06,
83
+ "loss": 0.0011,
84
  "step": 4000
85
  },
86
  {
87
  "epoch": 266.67,
88
+ "eval_loss": 0.0011881846003234386,
89
+ "eval_runtime": 22.3448,
90
+ "eval_samples_per_second": 5.236,
91
+ "eval_steps_per_second": 0.671,
92
+ "eval_wer": 0.3861003861003861,
93
  "step": 4000
94
  },
95
  {
96
  "epoch": 300.0,
97
+ "learning_rate": 7.2468965517241385e-06,
98
+ "loss": 0.0013,
99
  "step": 4500
100
  },
101
  {
102
  "epoch": 333.33,
103
+ "learning_rate": 6.902068965517243e-06,
104
+ "loss": 0.0011,
105
  "step": 5000
106
  },
107
  {
108
  "epoch": 333.33,
109
+ "eval_loss": 0.0012107096845284104,
110
+ "eval_runtime": 21.9233,
111
+ "eval_samples_per_second": 5.337,
112
+ "eval_steps_per_second": 0.684,
113
+ "eval_wer": 0.3861003861003861,
114
  "step": 5000
115
  },
116
  {
117
  "epoch": 366.67,
118
+ "learning_rate": 6.557241379310345e-06,
119
  "loss": 0.0011,
120
  "step": 5500
121
  },
122
  {
123
  "epoch": 400.0,
124
+ "learning_rate": 6.2124137931034485e-06,
125
+ "loss": 0.0011,
126
  "step": 6000
127
  },
128
  {
129
  "epoch": 400.0,
130
+ "eval_loss": 0.0011809396091848612,
131
+ "eval_runtime": 21.8718,
132
+ "eval_samples_per_second": 5.349,
133
+ "eval_steps_per_second": 0.686,
134
+ "eval_wer": 0.3861003861003861,
135
  "step": 6000
136
  },
137
  {
138
  "epoch": 433.33,
139
+ "learning_rate": 5.868275862068966e-06,
140
  "loss": 0.0011,
141
  "step": 6500
142
  },
143
  {
144
  "epoch": 466.67,
145
+ "learning_rate": 5.523448275862069e-06,
146
  "loss": 0.001,
147
  "step": 7000
148
  },
149
  {
150
  "epoch": 466.67,
151
+ "eval_loss": 0.0011794030433520675,
152
+ "eval_runtime": 21.8283,
153
+ "eval_samples_per_second": 5.36,
154
+ "eval_steps_per_second": 0.687,
155
+ "eval_wer": 0.3861003861003861,
156
  "step": 7000
157
  },
158
  {
159
  "epoch": 500.0,
160
+ "learning_rate": 5.178620689655173e-06,
161
+ "loss": 0.001,
162
  "step": 7500
163
  },
164
  {
165
  "epoch": 533.33,
166
+ "learning_rate": 4.833793103448276e-06,
167
+ "loss": 0.001,
168
  "step": 8000
169
  },
170
  {
171
  "epoch": 533.33,
172
+ "eval_loss": 0.0011808406561613083,
173
+ "eval_runtime": 21.7413,
174
+ "eval_samples_per_second": 5.381,
175
+ "eval_steps_per_second": 0.69,
176
+ "eval_wer": 0.3861003861003861,
177
  "step": 8000
178
  },
179
  {
180
  "epoch": 566.67,
181
+ "learning_rate": 4.489655172413793e-06,
182
+ "loss": 0.001,
183
  "step": 8500
184
  },
185
  {
186
  "epoch": 600.0,
187
+ "learning_rate": 4.144827586206897e-06,
188
  "loss": 0.001,
189
  "step": 9000
190
  },
191
  {
192
  "epoch": 600.0,
193
+ "eval_loss": 0.00120567309204489,
194
+ "eval_runtime": 21.8405,
195
+ "eval_samples_per_second": 5.357,
196
+ "eval_steps_per_second": 0.687,
197
+ "eval_wer": 0.3861003861003861,
198
  "step": 9000
199
  },
200
  {
201
  "epoch": 633.33,
202
+ "learning_rate": 3.8000000000000005e-06,
203
  "loss": 0.001,
204
  "step": 9500
205
  },
206
  {
207
  "epoch": 666.67,
208
+ "learning_rate": 3.455172413793104e-06,
209
  "loss": 0.001,
210
  "step": 10000
211
  },
212
  {
213
  "epoch": 666.67,
214
+ "eval_loss": 0.0011991177452728152,
215
+ "eval_runtime": 21.7109,
216
+ "eval_samples_per_second": 5.389,
217
+ "eval_steps_per_second": 0.691,
218
+ "eval_wer": 0.3861003861003861,
219
  "step": 10000
220
  },
221
  {
222
  "epoch": 700.0,
223
+ "learning_rate": 3.111034482758621e-06,
224
  "loss": 0.001,
225
  "step": 10500
226
  },
227
  {
228
  "epoch": 733.33,
229
+ "learning_rate": 2.766206896551724e-06,
230
  "loss": 0.001,
231
  "step": 11000
232
  },
233
  {
234
  "epoch": 733.33,
235
+ "eval_loss": 0.0011818531202152371,
236
+ "eval_runtime": 21.7427,
237
+ "eval_samples_per_second": 5.381,
238
+ "eval_steps_per_second": 0.69,
239
+ "eval_wer": 0.3861003861003861,
240
  "step": 11000
241
  },
242
  {
243
  "epoch": 766.67,
244
+ "learning_rate": 2.4220689655172416e-06,
245
+ "loss": 0.0009,
246
  "step": 11500
247
  },
248
  {
249
  "epoch": 800.0,
250
+ "learning_rate": 2.077931034482759e-06,
251
  "loss": 0.001,
252
  "step": 12000
253
  },
254
  {
255
  "epoch": 800.0,
256
+ "eval_loss": 0.0011808592826128006,
257
+ "eval_runtime": 21.9404,
258
+ "eval_samples_per_second": 5.333,
259
+ "eval_steps_per_second": 0.684,
260
+ "eval_wer": 0.3861003861003861,
261
  "step": 12000
262
  },
263
  {
264
  "epoch": 833.33,
265
+ "learning_rate": 1.733103448275862e-06,
266
  "loss": 0.0009,
267
  "step": 12500
268
  },
269
  {
270
  "epoch": 866.67,
271
+ "learning_rate": 1.3882758620689656e-06,
272
  "loss": 0.0009,
273
  "step": 13000
274
  },
275
  {
276
  "epoch": 866.67,
277
+ "eval_loss": 0.0012090284144505858,
278
+ "eval_runtime": 21.6103,
279
+ "eval_samples_per_second": 5.414,
280
+ "eval_steps_per_second": 0.694,
281
+ "eval_wer": 0.3861003861003861,
282
  "step": 13000
283
  },
284
  {
285
  "epoch": 900.0,
286
+ "learning_rate": 1.043448275862069e-06,
287
  "loss": 0.0009,
288
  "step": 13500
289
  },
290
  {
291
  "epoch": 933.33,
292
+ "learning_rate": 6.993103448275862e-07,
293
  "loss": 0.0009,
294
  "step": 14000
295
  },
296
  {
297
  "epoch": 933.33,
298
+ "eval_loss": 0.0012141974875703454,
299
+ "eval_runtime": 21.6816,
300
+ "eval_samples_per_second": 5.396,
301
+ "eval_steps_per_second": 0.692,
302
+ "eval_wer": 0.3861003861003861,
303
  "step": 14000
304
  },
305
  {
306
  "epoch": 966.67,
307
+ "learning_rate": 3.558620689655173e-07,
308
+ "loss": 0.0009,
309
  "step": 14500
310
  },
311
  {
312
  "epoch": 1000.0,
313
+ "learning_rate": 1.1034482758620692e-08,
314
  "loss": 0.0009,
315
  "step": 15000
316
  },
317
  {
318
  "epoch": 1000.0,
319
+ "eval_loss": 0.0012086295755580068,
320
+ "eval_runtime": 22.0752,
321
+ "eval_samples_per_second": 5.3,
322
+ "eval_steps_per_second": 0.679,
323
+ "eval_wer": 0.3861003861003861,
324
  "step": 15000
325
  }
326
  ],
 
329
  "num_input_tokens_seen": 0,
330
  "num_train_epochs": 1000,
331
  "save_steps": 1000,
332
+ "total_flos": 2.88040955904e+18,
333
  "train_batch_size": 8,
334
  "trial_name": null,
335
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:846d03b932f04329ac38183c3b8644cc26aaec372708364b91ec993b90cf9f75
3
  size 4463
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6616deffac2284b0d54a1924417ac3aa669129ba55bdbb7a1b5ecda4b25bf76d
3
  size 4463