marinone94
commited on
Commit
•
cf9c148
1
Parent(s):
e21f388
End of training
Browse files- all_results.json +17 -17
- config.json +1 -1
- eval_pretrained_results.json +4 -4
- generation_config.json +1 -1
- pytorch_model.bin +1 -1
- test_finetuned_results.json +5 -5
- test_pretrained_results.json +4 -4
- train_results.json +4 -4
- trainer_state.json +124 -124
- training_args.bin +2 -2
all_results.json
CHANGED
@@ -1,33 +1,33 @@
|
|
1 |
{
|
2 |
"epoch": 2.33,
|
3 |
"eval_loss": 1.6191972494125366,
|
4 |
-
"eval_pretrained_loss": 1.
|
5 |
-
"eval_pretrained_runtime":
|
6 |
-
"eval_pretrained_samples_per_second":
|
7 |
-
"eval_pretrained_steps_per_second": 0.
|
8 |
"eval_pretrained_wer": 264.42599393262014,
|
9 |
"eval_runtime": 56.3363,
|
10 |
"eval_samples_per_second": 0.071,
|
11 |
"eval_steps_per_second": 0.036,
|
12 |
"eval_wer": 153.2258064516129,
|
13 |
-
"test_finetuned_loss": 1.
|
14 |
-
"test_finetuned_runtime":
|
15 |
-
"test_finetuned_samples_per_second": 3.
|
16 |
-
"test_finetuned_steps_per_second": 0.
|
17 |
-
"test_finetuned_wer": 172.
|
18 |
"test_loss": 1.7568330764770508,
|
19 |
-
"test_pretrained_loss": 1.
|
20 |
-
"test_pretrained_runtime":
|
21 |
-
"test_pretrained_samples_per_second": 2.
|
22 |
-
"test_pretrained_steps_per_second": 0.
|
23 |
"test_pretrained_wer": 261.9066587001262,
|
24 |
"test_runtime": 37.8582,
|
25 |
"test_samples_per_second": 0.106,
|
26 |
"test_steps_per_second": 0.053,
|
27 |
"test_wer": 138.5964912280702,
|
28 |
"total_flos": 1.7572960198656e+17,
|
29 |
-
"train_loss": 0.
|
30 |
-
"train_runtime":
|
31 |
-
"train_samples_per_second": 3.
|
32 |
-
"train_steps_per_second": 0.
|
33 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.33,
|
3 |
"eval_loss": 1.6191972494125366,
|
4 |
+
"eval_pretrained_loss": 1.71565842628479,
|
5 |
+
"eval_pretrained_runtime": 197.0203,
|
6 |
+
"eval_pretrained_samples_per_second": 1.675,
|
7 |
+
"eval_pretrained_steps_per_second": 0.213,
|
8 |
"eval_pretrained_wer": 264.42599393262014,
|
9 |
"eval_runtime": 56.3363,
|
10 |
"eval_samples_per_second": 0.071,
|
11 |
"eval_steps_per_second": 0.036,
|
12 |
"eval_wer": 153.2258064516129,
|
13 |
+
"test_finetuned_loss": 1.0018519163131714,
|
14 |
+
"test_finetuned_runtime": 237.997,
|
15 |
+
"test_finetuned_samples_per_second": 3.189,
|
16 |
+
"test_finetuned_steps_per_second": 0.399,
|
17 |
+
"test_finetuned_wer": 172.96023368518888,
|
18 |
"test_loss": 1.7568330764770508,
|
19 |
+
"test_pretrained_loss": 1.7249696254730225,
|
20 |
+
"test_pretrained_runtime": 273.2544,
|
21 |
+
"test_pretrained_samples_per_second": 2.778,
|
22 |
+
"test_pretrained_steps_per_second": 0.348,
|
23 |
"test_pretrained_wer": 261.9066587001262,
|
24 |
"test_runtime": 37.8582,
|
25 |
"test_samples_per_second": 0.106,
|
26 |
"test_steps_per_second": 0.053,
|
27 |
"test_wer": 138.5964912280702,
|
28 |
"total_flos": 1.7572960198656e+17,
|
29 |
+
"train_loss": 0.8751586728862354,
|
30 |
+
"train_runtime": 2260.8457,
|
31 |
+
"train_samples_per_second": 3.17,
|
32 |
+
"train_steps_per_second": 0.198
|
33 |
}
|
config.json
CHANGED
@@ -146,7 +146,7 @@
|
|
146 |
50362
|
147 |
],
|
148 |
"torch_dtype": "float32",
|
149 |
-
"transformers_version": "4.
|
150 |
"use_cache": true,
|
151 |
"use_weighted_layer_sum": false,
|
152 |
"vocab_size": 51865
|
|
|
146 |
50362
|
147 |
],
|
148 |
"torch_dtype": "float32",
|
149 |
+
"transformers_version": "4.29.2",
|
150 |
"use_cache": true,
|
151 |
"use_weighted_layer_sum": false,
|
152 |
"vocab_size": 51865
|
eval_pretrained_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"eval_pretrained_loss": 1.
|
3 |
-
"eval_pretrained_runtime":
|
4 |
-
"eval_pretrained_samples_per_second":
|
5 |
-
"eval_pretrained_steps_per_second": 0.
|
6 |
"eval_pretrained_wer": 264.42599393262014
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"eval_pretrained_loss": 1.71565842628479,
|
3 |
+
"eval_pretrained_runtime": 197.0203,
|
4 |
+
"eval_pretrained_samples_per_second": 1.675,
|
5 |
+
"eval_pretrained_steps_per_second": 0.213,
|
6 |
"eval_pretrained_wer": 264.42599393262014
|
7 |
}
|
generation_config.json
CHANGED
@@ -221,5 +221,5 @@
|
|
221 |
"transcribe": 50359,
|
222 |
"translate": 50358
|
223 |
},
|
224 |
-
"transformers_version": "4.
|
225 |
}
|
|
|
221 |
"transcribe": 50359,
|
222 |
"translate": 50358
|
223 |
},
|
224 |
+
"transformers_version": "4.29.2"
|
225 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 151098921
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e347b86221199e0a86cf1f1e77515b57a801546e73d0951d67d6f1773a1ddfee
|
3 |
size 151098921
|
test_finetuned_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 2.33,
|
3 |
-
"test_finetuned_loss": 1.
|
4 |
-
"test_finetuned_runtime":
|
5 |
-
"test_finetuned_samples_per_second": 3.
|
6 |
-
"test_finetuned_steps_per_second": 0.
|
7 |
-
"test_finetuned_wer": 172.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.33,
|
3 |
+
"test_finetuned_loss": 1.0018519163131714,
|
4 |
+
"test_finetuned_runtime": 237.997,
|
5 |
+
"test_finetuned_samples_per_second": 3.189,
|
6 |
+
"test_finetuned_steps_per_second": 0.399,
|
7 |
+
"test_finetuned_wer": 172.96023368518888
|
8 |
}
|
test_pretrained_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"test_pretrained_loss": 1.
|
3 |
-
"test_pretrained_runtime":
|
4 |
-
"test_pretrained_samples_per_second": 2.
|
5 |
-
"test_pretrained_steps_per_second": 0.
|
6 |
"test_pretrained_wer": 261.9066587001262
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"test_pretrained_loss": 1.7249696254730225,
|
3 |
+
"test_pretrained_runtime": 273.2544,
|
4 |
+
"test_pretrained_samples_per_second": 2.778,
|
5 |
+
"test_pretrained_steps_per_second": 0.348,
|
6 |
"test_pretrained_wer": 261.9066587001262
|
7 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 2.33,
|
3 |
"total_flos": 1.7572960198656e+17,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second": 3.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.33,
|
3 |
"total_flos": 1.7572960198656e+17,
|
4 |
+
"train_loss": 0.8751586728862354,
|
5 |
+
"train_runtime": 2260.8457,
|
6 |
+
"train_samples_per_second": 3.17,
|
7 |
+
"train_steps_per_second": 0.198
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
"best_model_checkpoint": "./whisper-training-blog/checkpoint-396",
|
4 |
"epoch": 2.330357142857143,
|
5 |
"global_step": 448,
|
@@ -22,7 +22,7 @@
|
|
22 |
{
|
23 |
"epoch": 0.03,
|
24 |
"learning_rate": 5.555555555555555e-07,
|
25 |
-
"loss": 1.
|
26 |
"step": 12
|
27 |
},
|
28 |
{
|
@@ -34,25 +34,25 @@
|
|
34 |
{
|
35 |
"epoch": 0.04,
|
36 |
"learning_rate": 1e-06,
|
37 |
-
"loss": 1.
|
38 |
"step": 20
|
39 |
},
|
40 |
{
|
41 |
"epoch": 0.05,
|
42 |
"learning_rate": 1.2222222222222223e-06,
|
43 |
-
"loss": 1.
|
44 |
"step": 24
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.06,
|
48 |
"learning_rate": 1.4444444444444445e-06,
|
49 |
-
"loss": 1.
|
50 |
"step": 28
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.07,
|
54 |
"learning_rate": 1.6666666666666667e-06,
|
55 |
-
"loss": 1.
|
56 |
"step": 32
|
57 |
},
|
58 |
{
|
@@ -64,28 +64,28 @@
|
|
64 |
{
|
65 |
"epoch": 0.09,
|
66 |
"learning_rate": 2.1111111111111114e-06,
|
67 |
-
"loss": 1.
|
68 |
"step": 40
|
69 |
},
|
70 |
{
|
71 |
"epoch": 0.1,
|
72 |
"learning_rate": 2.3333333333333336e-06,
|
73 |
-
"loss": 1.
|
74 |
"step": 44
|
75 |
},
|
76 |
{
|
77 |
"epoch": 0.1,
|
78 |
-
"eval_loss": 1.
|
79 |
-
"eval_runtime": 119.
|
80 |
-
"eval_samples_per_second": 2.
|
81 |
-
"eval_steps_per_second": 0.
|
82 |
"eval_wer": 245.34568098355422,
|
83 |
"step": 44
|
84 |
},
|
85 |
{
|
86 |
"epoch": 0.11,
|
87 |
"learning_rate": 2.5555555555555557e-06,
|
88 |
-
"loss": 1.
|
89 |
"step": 48
|
90 |
},
|
91 |
{
|
@@ -103,7 +103,7 @@
|
|
103 |
{
|
104 |
"epoch": 0.13,
|
105 |
"learning_rate": 3.2222222222222222e-06,
|
106 |
-
"loss": 1.
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
@@ -121,13 +121,13 @@
|
|
121 |
{
|
122 |
"epoch": 0.16,
|
123 |
"learning_rate": 3.888888888888889e-06,
|
124 |
-
"loss": 1.
|
125 |
"step": 72
|
126 |
},
|
127 |
{
|
128 |
"epoch": 0.17,
|
129 |
"learning_rate": 4.111111111111111e-06,
|
130 |
-
"loss": 1.
|
131 |
"step": 76
|
132 |
},
|
133 |
{
|
@@ -139,28 +139,28 @@
|
|
139 |
{
|
140 |
"epoch": 0.19,
|
141 |
"learning_rate": 4.555555555555555e-06,
|
142 |
-
"loss": 1.
|
143 |
"step": 84
|
144 |
},
|
145 |
{
|
146 |
"epoch": 0.2,
|
147 |
"learning_rate": 4.777777777777778e-06,
|
148 |
-
"loss": 1.
|
149 |
"step": 88
|
150 |
},
|
151 |
{
|
152 |
"epoch": 0.2,
|
153 |
-
"eval_loss": 1.
|
154 |
-
"eval_runtime":
|
155 |
-
"eval_samples_per_second": 2.
|
156 |
-
"eval_steps_per_second": 0.
|
157 |
-
"eval_wer":
|
158 |
"step": 88
|
159 |
},
|
160 |
{
|
161 |
"epoch": 0.21,
|
162 |
"learning_rate": 4.9999999999999996e-06,
|
163 |
-
"loss": 1.
|
164 |
"step": 92
|
165 |
},
|
166 |
{
|
@@ -172,19 +172,19 @@
|
|
172 |
{
|
173 |
"epoch": 0.22,
|
174 |
"learning_rate": 5.444444444444445e-06,
|
175 |
-
"loss": 0.
|
176 |
"step": 100
|
177 |
},
|
178 |
{
|
179 |
"epoch": 0.23,
|
180 |
"learning_rate": 5.666666666666667e-06,
|
181 |
-
"loss": 1.
|
182 |
"step": 104
|
183 |
},
|
184 |
{
|
185 |
"epoch": 0.24,
|
186 |
"learning_rate": 5.888888888888889e-06,
|
187 |
-
"loss": 1.
|
188 |
"step": 108
|
189 |
},
|
190 |
{
|
@@ -196,40 +196,40 @@
|
|
196 |
{
|
197 |
"epoch": 0.26,
|
198 |
"learning_rate": 6.333333333333333e-06,
|
199 |
-
"loss": 0.
|
200 |
"step": 116
|
201 |
},
|
202 |
{
|
203 |
"epoch": 0.27,
|
204 |
"learning_rate": 6.555555555555556e-06,
|
205 |
-
"loss": 0.
|
206 |
"step": 120
|
207 |
},
|
208 |
{
|
209 |
"epoch": 0.28,
|
210 |
"learning_rate": 6.777777777777778e-06,
|
211 |
-
"loss": 1.
|
212 |
"step": 124
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.29,
|
216 |
"learning_rate": 7e-06,
|
217 |
-
"loss": 0.
|
218 |
"step": 128
|
219 |
},
|
220 |
{
|
221 |
"epoch": 0.29,
|
222 |
"learning_rate": 7.222222222222222e-06,
|
223 |
-
"loss": 0.
|
224 |
"step": 132
|
225 |
},
|
226 |
{
|
227 |
"epoch": 0.29,
|
228 |
-
"eval_loss": 1.
|
229 |
-
"eval_runtime":
|
230 |
-
"eval_samples_per_second":
|
231 |
-
"eval_steps_per_second": 0.
|
232 |
-
"eval_wer":
|
233 |
"step": 132
|
234 |
},
|
235 |
{
|
@@ -247,7 +247,7 @@
|
|
247 |
{
|
248 |
"epoch": 0.32,
|
249 |
"learning_rate": 7.332268370607029e-06,
|
250 |
-
"loss": 0.
|
251 |
"step": 144
|
252 |
},
|
253 |
{
|
@@ -259,13 +259,13 @@
|
|
259 |
{
|
260 |
"epoch": 1.0,
|
261 |
"learning_rate": 7.140575079872205e-06,
|
262 |
-
"loss": 1.
|
263 |
"step": 152
|
264 |
},
|
265 |
{
|
266 |
"epoch": 1.01,
|
267 |
"learning_rate": 7.044728434504793e-06,
|
268 |
-
"loss": 0.
|
269 |
"step": 156
|
270 |
},
|
271 |
{
|
@@ -277,13 +277,13 @@
|
|
277 |
{
|
278 |
"epoch": 1.03,
|
279 |
"learning_rate": 6.853035143769968e-06,
|
280 |
-
"loss": 0.
|
281 |
"step": 164
|
282 |
},
|
283 |
{
|
284 |
"epoch": 1.04,
|
285 |
"learning_rate": 6.7571884984025565e-06,
|
286 |
-
"loss": 0.
|
287 |
"step": 168
|
288 |
},
|
289 |
{
|
@@ -300,17 +300,17 @@
|
|
300 |
},
|
301 |
{
|
302 |
"epoch": 1.06,
|
303 |
-
"eval_loss": 1.
|
304 |
-
"eval_runtime":
|
305 |
-
"eval_samples_per_second": 3.
|
306 |
-
"eval_steps_per_second": 0.
|
307 |
-
"eval_wer":
|
308 |
"step": 176
|
309 |
},
|
310 |
{
|
311 |
"epoch": 1.07,
|
312 |
"learning_rate": 6.469648562300319e-06,
|
313 |
-
"loss": 0.
|
314 |
"step": 180
|
315 |
},
|
316 |
{
|
@@ -340,13 +340,13 @@
|
|
340 |
{
|
341 |
"epoch": 1.11,
|
342 |
"learning_rate": 5.990415335463259e-06,
|
343 |
-
"loss": 0.
|
344 |
"step": 200
|
345 |
},
|
346 |
{
|
347 |
"epoch": 1.12,
|
348 |
"learning_rate": 5.894568690095847e-06,
|
349 |
-
"loss": 0.
|
350 |
"step": 204
|
351 |
},
|
352 |
{
|
@@ -358,13 +358,13 @@
|
|
358 |
{
|
359 |
"epoch": 1.14,
|
360 |
"learning_rate": 5.702875399361023e-06,
|
361 |
-
"loss": 0.
|
362 |
"step": 212
|
363 |
},
|
364 |
{
|
365 |
"epoch": 1.15,
|
366 |
"learning_rate": 5.607028753993611e-06,
|
367 |
-
"loss": 0.
|
368 |
"step": 216
|
369 |
},
|
370 |
{
|
@@ -375,17 +375,17 @@
|
|
375 |
},
|
376 |
{
|
377 |
"epoch": 1.16,
|
378 |
-
"eval_loss": 1.
|
379 |
-
"eval_runtime":
|
380 |
-
"eval_samples_per_second": 3.
|
381 |
-
"eval_steps_per_second": 0.
|
382 |
-
"eval_wer": 178.
|
383 |
"step": 220
|
384 |
},
|
385 |
{
|
386 |
"epoch": 1.17,
|
387 |
"learning_rate": 5.415335463258786e-06,
|
388 |
-
"loss": 0.
|
389 |
"step": 224
|
390 |
},
|
391 |
{
|
@@ -397,25 +397,25 @@
|
|
397 |
{
|
398 |
"epoch": 1.18,
|
399 |
"learning_rate": 5.223642172523962e-06,
|
400 |
-
"loss": 0.
|
401 |
"step": 232
|
402 |
},
|
403 |
{
|
404 |
"epoch": 1.19,
|
405 |
"learning_rate": 5.127795527156549e-06,
|
406 |
-
"loss": 0.
|
407 |
"step": 236
|
408 |
},
|
409 |
{
|
410 |
"epoch": 1.2,
|
411 |
"learning_rate": 5.031948881789138e-06,
|
412 |
-
"loss": 0.
|
413 |
"step": 240
|
414 |
},
|
415 |
{
|
416 |
"epoch": 1.21,
|
417 |
"learning_rate": 4.936102236421725e-06,
|
418 |
-
"loss": 0.
|
419 |
"step": 244
|
420 |
},
|
421 |
{
|
@@ -427,7 +427,7 @@
|
|
427 |
{
|
428 |
"epoch": 1.23,
|
429 |
"learning_rate": 4.744408945686901e-06,
|
430 |
-
"loss": 0.
|
431 |
"step": 252
|
432 |
},
|
433 |
{
|
@@ -445,22 +445,22 @@
|
|
445 |
{
|
446 |
"epoch": 1.25,
|
447 |
"learning_rate": 4.456869009584665e-06,
|
448 |
-
"loss": 0.
|
449 |
"step": 264
|
450 |
},
|
451 |
{
|
452 |
"epoch": 1.25,
|
453 |
-
"eval_loss": 1.
|
454 |
-
"eval_runtime":
|
455 |
-
"eval_samples_per_second": 2.
|
456 |
-
"eval_steps_per_second": 0.
|
457 |
-
"eval_wer":
|
458 |
"step": 264
|
459 |
},
|
460 |
{
|
461 |
"epoch": 1.26,
|
462 |
"learning_rate": 4.361022364217253e-06,
|
463 |
-
"loss": 0.
|
464 |
"step": 268
|
465 |
},
|
466 |
{
|
@@ -472,19 +472,19 @@
|
|
472 |
{
|
473 |
"epoch": 1.28,
|
474 |
"learning_rate": 4.169329073482428e-06,
|
475 |
-
"loss": 0.
|
476 |
"step": 276
|
477 |
},
|
478 |
{
|
479 |
"epoch": 1.29,
|
480 |
"learning_rate": 4.0734824281150155e-06,
|
481 |
-
"loss": 0.
|
482 |
"step": 280
|
483 |
},
|
484 |
{
|
485 |
"epoch": 1.3,
|
486 |
"learning_rate": 3.977635782747604e-06,
|
487 |
-
"loss": 0.
|
488 |
"step": 284
|
489 |
},
|
490 |
{
|
@@ -496,64 +496,64 @@
|
|
496 |
{
|
497 |
"epoch": 1.32,
|
498 |
"learning_rate": 3.7859424920127796e-06,
|
499 |
-
"loss": 0.
|
500 |
"step": 292
|
501 |
},
|
502 |
{
|
503 |
"epoch": 1.33,
|
504 |
"learning_rate": 3.6900958466453675e-06,
|
505 |
-
"loss": 0.
|
506 |
"step": 296
|
507 |
},
|
508 |
{
|
509 |
"epoch": 1.33,
|
510 |
"learning_rate": 3.5942492012779555e-06,
|
511 |
-
"loss": 0.
|
512 |
"step": 300
|
513 |
},
|
514 |
{
|
515 |
"epoch": 2.01,
|
516 |
"learning_rate": 3.4984025559105434e-06,
|
517 |
-
"loss": 0.
|
518 |
"step": 304
|
519 |
},
|
520 |
{
|
521 |
"epoch": 2.02,
|
522 |
"learning_rate": 3.4025559105431313e-06,
|
523 |
-
"loss": 0.
|
524 |
"step": 308
|
525 |
},
|
526 |
{
|
527 |
"epoch": 2.02,
|
528 |
-
"eval_loss": 1.
|
529 |
-
"eval_runtime":
|
530 |
-
"eval_samples_per_second":
|
531 |
-
"eval_steps_per_second": 0.
|
532 |
-
"eval_wer":
|
533 |
"step": 308
|
534 |
},
|
535 |
{
|
536 |
"epoch": 2.03,
|
537 |
"learning_rate": 3.306709265175719e-06,
|
538 |
-
"loss": 0.
|
539 |
"step": 312
|
540 |
},
|
541 |
{
|
542 |
"epoch": 2.04,
|
543 |
"learning_rate": 3.2108626198083067e-06,
|
544 |
-
"loss": 0.
|
545 |
"step": 316
|
546 |
},
|
547 |
{
|
548 |
"epoch": 2.04,
|
549 |
"learning_rate": 3.1150159744408946e-06,
|
550 |
-
"loss": 0.
|
551 |
"step": 320
|
552 |
},
|
553 |
{
|
554 |
"epoch": 2.05,
|
555 |
"learning_rate": 3.0191693290734825e-06,
|
556 |
-
"loss": 0.
|
557 |
"step": 324
|
558 |
},
|
559 |
{
|
@@ -571,19 +571,19 @@
|
|
571 |
{
|
572 |
"epoch": 2.08,
|
573 |
"learning_rate": 2.7316293929712462e-06,
|
574 |
-
"loss": 0.
|
575 |
"step": 336
|
576 |
},
|
577 |
{
|
578 |
"epoch": 2.09,
|
579 |
"learning_rate": 2.635782747603834e-06,
|
580 |
-
"loss": 0.
|
581 |
"step": 340
|
582 |
},
|
583 |
{
|
584 |
"epoch": 2.1,
|
585 |
"learning_rate": 2.539936102236422e-06,
|
586 |
-
"loss": 0.
|
587 |
"step": 344
|
588 |
},
|
589 |
{
|
@@ -595,22 +595,22 @@
|
|
595 |
{
|
596 |
"epoch": 2.12,
|
597 |
"learning_rate": 2.3482428115015974e-06,
|
598 |
-
"loss": 0.
|
599 |
"step": 352
|
600 |
},
|
601 |
{
|
602 |
"epoch": 2.12,
|
603 |
-
"eval_loss": 1.
|
604 |
-
"eval_runtime":
|
605 |
-
"eval_samples_per_second":
|
606 |
-
"eval_steps_per_second": 0.
|
607 |
"eval_wer": 194.7149928149449,
|
608 |
"step": 352
|
609 |
},
|
610 |
{
|
611 |
"epoch": 2.12,
|
612 |
"learning_rate": 2.2523961661341854e-06,
|
613 |
-
"loss": 0.
|
614 |
"step": 356
|
615 |
},
|
616 |
{
|
@@ -628,37 +628,37 @@
|
|
628 |
{
|
629 |
"epoch": 2.15,
|
630 |
"learning_rate": 1.964856230031949e-06,
|
631 |
-
"loss": 0.
|
632 |
"step": 368
|
633 |
},
|
634 |
{
|
635 |
"epoch": 2.16,
|
636 |
"learning_rate": 1.8690095846645368e-06,
|
637 |
-
"loss": 0.
|
638 |
"step": 372
|
639 |
},
|
640 |
{
|
641 |
"epoch": 2.17,
|
642 |
"learning_rate": 1.7731629392971245e-06,
|
643 |
-
"loss": 0.
|
644 |
"step": 376
|
645 |
},
|
646 |
{
|
647 |
"epoch": 2.18,
|
648 |
"learning_rate": 1.6773162939297124e-06,
|
649 |
-
"loss": 0.
|
650 |
"step": 380
|
651 |
},
|
652 |
{
|
653 |
"epoch": 2.19,
|
654 |
"learning_rate": 1.5814696485623003e-06,
|
655 |
-
"loss": 0.
|
656 |
"step": 384
|
657 |
},
|
658 |
{
|
659 |
"epoch": 2.2,
|
660 |
"learning_rate": 1.4856230031948882e-06,
|
661 |
-
"loss": 0.
|
662 |
"step": 388
|
663 |
},
|
664 |
{
|
@@ -675,17 +675,17 @@
|
|
675 |
},
|
676 |
{
|
677 |
"epoch": 2.21,
|
678 |
-
"eval_loss": 1.
|
679 |
-
"eval_runtime":
|
680 |
-
"eval_samples_per_second": 3.
|
681 |
-
"eval_steps_per_second": 0.
|
682 |
-
"eval_wer":
|
683 |
"step": 396
|
684 |
},
|
685 |
{
|
686 |
"epoch": 2.22,
|
687 |
"learning_rate": 1.1980830670926517e-06,
|
688 |
-
"loss": 0.
|
689 |
"step": 400
|
690 |
},
|
691 |
{
|
@@ -697,25 +697,25 @@
|
|
697 |
{
|
698 |
"epoch": 2.24,
|
699 |
"learning_rate": 1.0063897763578274e-06,
|
700 |
-
"loss": 0.
|
701 |
"step": 408
|
702 |
},
|
703 |
{
|
704 |
"epoch": 2.25,
|
705 |
"learning_rate": 9.105431309904153e-07,
|
706 |
-
"loss": 0.
|
707 |
"step": 412
|
708 |
},
|
709 |
{
|
710 |
"epoch": 2.26,
|
711 |
"learning_rate": 8.146964856230032e-07,
|
712 |
-
"loss": 0.
|
713 |
"step": 416
|
714 |
},
|
715 |
{
|
716 |
"epoch": 2.27,
|
717 |
"learning_rate": 7.188498402555911e-07,
|
718 |
-
"loss": 0.
|
719 |
"step": 420
|
720 |
},
|
721 |
{
|
@@ -739,22 +739,22 @@
|
|
739 |
{
|
740 |
"epoch": 2.3,
|
741 |
"learning_rate": 3.3546325878594247e-07,
|
742 |
-
"loss": 0.
|
743 |
"step": 436
|
744 |
},
|
745 |
{
|
746 |
"epoch": 2.31,
|
747 |
"learning_rate": 2.3961661341853033e-07,
|
748 |
-
"loss": 0.
|
749 |
"step": 440
|
750 |
},
|
751 |
{
|
752 |
"epoch": 2.31,
|
753 |
-
"eval_loss": 1.
|
754 |
-
"eval_runtime":
|
755 |
-
"eval_samples_per_second":
|
756 |
-
"eval_steps_per_second": 0.
|
757 |
-
"eval_wer":
|
758 |
"step": 440
|
759 |
},
|
760 |
{
|
@@ -766,17 +766,17 @@
|
|
766 |
{
|
767 |
"epoch": 2.33,
|
768 |
"learning_rate": 4.792332268370607e-08,
|
769 |
-
"loss": 0.
|
770 |
"step": 448
|
771 |
},
|
772 |
{
|
773 |
"epoch": 2.33,
|
774 |
"step": 448,
|
775 |
"total_flos": 1.7572960198656e+17,
|
776 |
-
"train_loss": 0.
|
777 |
-
"train_runtime":
|
778 |
-
"train_samples_per_second": 3.
|
779 |
-
"train_steps_per_second": 0.
|
780 |
}
|
781 |
],
|
782 |
"max_steps": 448,
|
|
|
1 |
{
|
2 |
+
"best_metric": 166.9327798179786,
|
3 |
"best_model_checkpoint": "./whisper-training-blog/checkpoint-396",
|
4 |
"epoch": 2.330357142857143,
|
5 |
"global_step": 448,
|
|
|
22 |
{
|
23 |
"epoch": 0.03,
|
24 |
"learning_rate": 5.555555555555555e-07,
|
25 |
+
"loss": 1.7119,
|
26 |
"step": 12
|
27 |
},
|
28 |
{
|
|
|
34 |
{
|
35 |
"epoch": 0.04,
|
36 |
"learning_rate": 1e-06,
|
37 |
+
"loss": 1.609,
|
38 |
"step": 20
|
39 |
},
|
40 |
{
|
41 |
"epoch": 0.05,
|
42 |
"learning_rate": 1.2222222222222223e-06,
|
43 |
+
"loss": 1.5656,
|
44 |
"step": 24
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.06,
|
48 |
"learning_rate": 1.4444444444444445e-06,
|
49 |
+
"loss": 1.6078,
|
50 |
"step": 28
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.07,
|
54 |
"learning_rate": 1.6666666666666667e-06,
|
55 |
+
"loss": 1.5075,
|
56 |
"step": 32
|
57 |
},
|
58 |
{
|
|
|
64 |
{
|
65 |
"epoch": 0.09,
|
66 |
"learning_rate": 2.1111111111111114e-06,
|
67 |
+
"loss": 1.4949,
|
68 |
"step": 40
|
69 |
},
|
70 |
{
|
71 |
"epoch": 0.1,
|
72 |
"learning_rate": 2.3333333333333336e-06,
|
73 |
+
"loss": 1.4111,
|
74 |
"step": 44
|
75 |
},
|
76 |
{
|
77 |
"epoch": 0.1,
|
78 |
+
"eval_loss": 1.491944432258606,
|
79 |
+
"eval_runtime": 119.9494,
|
80 |
+
"eval_samples_per_second": 2.751,
|
81 |
+
"eval_steps_per_second": 0.35,
|
82 |
"eval_wer": 245.34568098355422,
|
83 |
"step": 44
|
84 |
},
|
85 |
{
|
86 |
"epoch": 0.11,
|
87 |
"learning_rate": 2.5555555555555557e-06,
|
88 |
+
"loss": 1.3192,
|
89 |
"step": 48
|
90 |
},
|
91 |
{
|
|
|
103 |
{
|
104 |
"epoch": 0.13,
|
105 |
"learning_rate": 3.2222222222222222e-06,
|
106 |
+
"loss": 1.2748,
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
|
|
121 |
{
|
122 |
"epoch": 0.16,
|
123 |
"learning_rate": 3.888888888888889e-06,
|
124 |
+
"loss": 1.2186,
|
125 |
"step": 72
|
126 |
},
|
127 |
{
|
128 |
"epoch": 0.17,
|
129 |
"learning_rate": 4.111111111111111e-06,
|
130 |
+
"loss": 1.1661,
|
131 |
"step": 76
|
132 |
},
|
133 |
{
|
|
|
139 |
{
|
140 |
"epoch": 0.19,
|
141 |
"learning_rate": 4.555555555555555e-06,
|
142 |
+
"loss": 1.0729,
|
143 |
"step": 84
|
144 |
},
|
145 |
{
|
146 |
"epoch": 0.2,
|
147 |
"learning_rate": 4.777777777777778e-06,
|
148 |
+
"loss": 1.0501,
|
149 |
"step": 88
|
150 |
},
|
151 |
{
|
152 |
"epoch": 0.2,
|
153 |
+
"eval_loss": 1.225460410118103,
|
154 |
+
"eval_runtime": 124.4384,
|
155 |
+
"eval_samples_per_second": 2.652,
|
156 |
+
"eval_steps_per_second": 0.338,
|
157 |
+
"eval_wer": 225.88216509659907,
|
158 |
"step": 88
|
159 |
},
|
160 |
{
|
161 |
"epoch": 0.21,
|
162 |
"learning_rate": 4.9999999999999996e-06,
|
163 |
+
"loss": 1.1278,
|
164 |
"step": 92
|
165 |
},
|
166 |
{
|
|
|
172 |
{
|
173 |
"epoch": 0.22,
|
174 |
"learning_rate": 5.444444444444445e-06,
|
175 |
+
"loss": 0.9929,
|
176 |
"step": 100
|
177 |
},
|
178 |
{
|
179 |
"epoch": 0.23,
|
180 |
"learning_rate": 5.666666666666667e-06,
|
181 |
+
"loss": 1.0878,
|
182 |
"step": 104
|
183 |
},
|
184 |
{
|
185 |
"epoch": 0.24,
|
186 |
"learning_rate": 5.888888888888889e-06,
|
187 |
+
"loss": 1.0416,
|
188 |
"step": 108
|
189 |
},
|
190 |
{
|
|
|
196 |
{
|
197 |
"epoch": 0.26,
|
198 |
"learning_rate": 6.333333333333333e-06,
|
199 |
+
"loss": 0.9686,
|
200 |
"step": 116
|
201 |
},
|
202 |
{
|
203 |
"epoch": 0.27,
|
204 |
"learning_rate": 6.555555555555556e-06,
|
205 |
+
"loss": 0.9557,
|
206 |
"step": 120
|
207 |
},
|
208 |
{
|
209 |
"epoch": 0.28,
|
210 |
"learning_rate": 6.777777777777778e-06,
|
211 |
+
"loss": 1.0193,
|
212 |
"step": 124
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.29,
|
216 |
"learning_rate": 7e-06,
|
217 |
+
"loss": 0.851,
|
218 |
"step": 128
|
219 |
},
|
220 |
{
|
221 |
"epoch": 0.29,
|
222 |
"learning_rate": 7.222222222222222e-06,
|
223 |
+
"loss": 0.9032,
|
224 |
"step": 132
|
225 |
},
|
226 |
{
|
227 |
"epoch": 0.29,
|
228 |
+
"eval_loss": 1.1202839612960815,
|
229 |
+
"eval_runtime": 115.008,
|
230 |
+
"eval_samples_per_second": 2.869,
|
231 |
+
"eval_steps_per_second": 0.365,
|
232 |
+
"eval_wer": 211.65575602746287,
|
233 |
"step": 132
|
234 |
},
|
235 |
{
|
|
|
247 |
{
|
248 |
"epoch": 0.32,
|
249 |
"learning_rate": 7.332268370607029e-06,
|
250 |
+
"loss": 0.8926,
|
251 |
"step": 144
|
252 |
},
|
253 |
{
|
|
|
259 |
{
|
260 |
"epoch": 1.0,
|
261 |
"learning_rate": 7.140575079872205e-06,
|
262 |
+
"loss": 1.239,
|
263 |
"step": 152
|
264 |
},
|
265 |
{
|
266 |
"epoch": 1.01,
|
267 |
"learning_rate": 7.044728434504793e-06,
|
268 |
+
"loss": 0.9147,
|
269 |
"step": 156
|
270 |
},
|
271 |
{
|
|
|
277 |
{
|
278 |
"epoch": 1.03,
|
279 |
"learning_rate": 6.853035143769968e-06,
|
280 |
+
"loss": 0.9081,
|
281 |
"step": 164
|
282 |
},
|
283 |
{
|
284 |
"epoch": 1.04,
|
285 |
"learning_rate": 6.7571884984025565e-06,
|
286 |
+
"loss": 0.8435,
|
287 |
"step": 168
|
288 |
},
|
289 |
{
|
|
|
300 |
},
|
301 |
{
|
302 |
"epoch": 1.06,
|
303 |
+
"eval_loss": 1.0674982070922852,
|
304 |
+
"eval_runtime": 102.5987,
|
305 |
+
"eval_samples_per_second": 3.216,
|
306 |
+
"eval_steps_per_second": 0.409,
|
307 |
+
"eval_wer": 184.62398211719625,
|
308 |
"step": 176
|
309 |
},
|
310 |
{
|
311 |
"epoch": 1.07,
|
312 |
"learning_rate": 6.469648562300319e-06,
|
313 |
+
"loss": 0.8328,
|
314 |
"step": 180
|
315 |
},
|
316 |
{
|
|
|
340 |
{
|
341 |
"epoch": 1.11,
|
342 |
"learning_rate": 5.990415335463259e-06,
|
343 |
+
"loss": 0.7813,
|
344 |
"step": 200
|
345 |
},
|
346 |
{
|
347 |
"epoch": 1.12,
|
348 |
"learning_rate": 5.894568690095847e-06,
|
349 |
+
"loss": 0.7541,
|
350 |
"step": 204
|
351 |
},
|
352 |
{
|
|
|
358 |
{
|
359 |
"epoch": 1.14,
|
360 |
"learning_rate": 5.702875399361023e-06,
|
361 |
+
"loss": 0.818,
|
362 |
"step": 212
|
363 |
},
|
364 |
{
|
365 |
"epoch": 1.15,
|
366 |
"learning_rate": 5.607028753993611e-06,
|
367 |
+
"loss": 0.7461,
|
368 |
"step": 216
|
369 |
},
|
370 |
{
|
|
|
375 |
},
|
376 |
{
|
377 |
"epoch": 1.16,
|
378 |
+
"eval_loss": 1.0393612384796143,
|
379 |
+
"eval_runtime": 103.1889,
|
380 |
+
"eval_samples_per_second": 3.198,
|
381 |
+
"eval_steps_per_second": 0.407,
|
382 |
+
"eval_wer": 178.4129011655756,
|
383 |
"step": 220
|
384 |
},
|
385 |
{
|
386 |
"epoch": 1.17,
|
387 |
"learning_rate": 5.415335463258786e-06,
|
388 |
+
"loss": 0.7541,
|
389 |
"step": 224
|
390 |
},
|
391 |
{
|
|
|
397 |
{
|
398 |
"epoch": 1.18,
|
399 |
"learning_rate": 5.223642172523962e-06,
|
400 |
+
"loss": 0.7253,
|
401 |
"step": 232
|
402 |
},
|
403 |
{
|
404 |
"epoch": 1.19,
|
405 |
"learning_rate": 5.127795527156549e-06,
|
406 |
+
"loss": 0.6943,
|
407 |
"step": 236
|
408 |
},
|
409 |
{
|
410 |
"epoch": 1.2,
|
411 |
"learning_rate": 5.031948881789138e-06,
|
412 |
+
"loss": 0.728,
|
413 |
"step": 240
|
414 |
},
|
415 |
{
|
416 |
"epoch": 1.21,
|
417 |
"learning_rate": 4.936102236421725e-06,
|
418 |
+
"loss": 0.784,
|
419 |
"step": 244
|
420 |
},
|
421 |
{
|
|
|
427 |
{
|
428 |
"epoch": 1.23,
|
429 |
"learning_rate": 4.744408945686901e-06,
|
430 |
+
"loss": 0.7076,
|
431 |
"step": 252
|
432 |
},
|
433 |
{
|
|
|
445 |
{
|
446 |
"epoch": 1.25,
|
447 |
"learning_rate": 4.456869009584665e-06,
|
448 |
+
"loss": 0.6325,
|
449 |
"step": 264
|
450 |
},
|
451 |
{
|
452 |
"epoch": 1.25,
|
453 |
+
"eval_loss": 1.0301399230957031,
|
454 |
+
"eval_runtime": 117.4437,
|
455 |
+
"eval_samples_per_second": 2.81,
|
456 |
+
"eval_steps_per_second": 0.358,
|
457 |
+
"eval_wer": 216.63739422002237,
|
458 |
"step": 264
|
459 |
},
|
460 |
{
|
461 |
"epoch": 1.26,
|
462 |
"learning_rate": 4.361022364217253e-06,
|
463 |
+
"loss": 0.6825,
|
464 |
"step": 268
|
465 |
},
|
466 |
{
|
|
|
472 |
{
|
473 |
"epoch": 1.28,
|
474 |
"learning_rate": 4.169329073482428e-06,
|
475 |
+
"loss": 0.6699,
|
476 |
"step": 276
|
477 |
},
|
478 |
{
|
479 |
"epoch": 1.29,
|
480 |
"learning_rate": 4.0734824281150155e-06,
|
481 |
+
"loss": 0.5677,
|
482 |
"step": 280
|
483 |
},
|
484 |
{
|
485 |
"epoch": 1.3,
|
486 |
"learning_rate": 3.977635782747604e-06,
|
487 |
+
"loss": 0.6832,
|
488 |
"step": 284
|
489 |
},
|
490 |
{
|
|
|
496 |
{
|
497 |
"epoch": 1.32,
|
498 |
"learning_rate": 3.7859424920127796e-06,
|
499 |
+
"loss": 0.6719,
|
500 |
"step": 292
|
501 |
},
|
502 |
{
|
503 |
"epoch": 1.33,
|
504 |
"learning_rate": 3.6900958466453675e-06,
|
505 |
+
"loss": 0.712,
|
506 |
"step": 296
|
507 |
},
|
508 |
{
|
509 |
"epoch": 1.33,
|
510 |
"learning_rate": 3.5942492012779555e-06,
|
511 |
+
"loss": 0.8767,
|
512 |
"step": 300
|
513 |
},
|
514 |
{
|
515 |
"epoch": 2.01,
|
516 |
"learning_rate": 3.4984025559105434e-06,
|
517 |
+
"loss": 0.6725,
|
518 |
"step": 304
|
519 |
},
|
520 |
{
|
521 |
"epoch": 2.02,
|
522 |
"learning_rate": 3.4025559105431313e-06,
|
523 |
+
"loss": 0.6971,
|
524 |
"step": 308
|
525 |
},
|
526 |
{
|
527 |
"epoch": 2.02,
|
528 |
+
"eval_loss": 1.0135136842727661,
|
529 |
+
"eval_runtime": 114.2748,
|
530 |
+
"eval_samples_per_second": 2.888,
|
531 |
+
"eval_steps_per_second": 0.368,
|
532 |
+
"eval_wer": 184.4004470700942,
|
533 |
"step": 308
|
534 |
},
|
535 |
{
|
536 |
"epoch": 2.03,
|
537 |
"learning_rate": 3.306709265175719e-06,
|
538 |
+
"loss": 0.7433,
|
539 |
"step": 312
|
540 |
},
|
541 |
{
|
542 |
"epoch": 2.04,
|
543 |
"learning_rate": 3.2108626198083067e-06,
|
544 |
+
"loss": 0.6264,
|
545 |
"step": 316
|
546 |
},
|
547 |
{
|
548 |
"epoch": 2.04,
|
549 |
"learning_rate": 3.1150159744408946e-06,
|
550 |
+
"loss": 0.6605,
|
551 |
"step": 320
|
552 |
},
|
553 |
{
|
554 |
"epoch": 2.05,
|
555 |
"learning_rate": 3.0191693290734825e-06,
|
556 |
+
"loss": 0.6411,
|
557 |
"step": 324
|
558 |
},
|
559 |
{
|
|
|
571 |
{
|
572 |
"epoch": 2.08,
|
573 |
"learning_rate": 2.7316293929712462e-06,
|
574 |
+
"loss": 0.6827,
|
575 |
"step": 336
|
576 |
},
|
577 |
{
|
578 |
"epoch": 2.09,
|
579 |
"learning_rate": 2.635782747603834e-06,
|
580 |
+
"loss": 0.6498,
|
581 |
"step": 340
|
582 |
},
|
583 |
{
|
584 |
"epoch": 2.1,
|
585 |
"learning_rate": 2.539936102236422e-06,
|
586 |
+
"loss": 0.6641,
|
587 |
"step": 344
|
588 |
},
|
589 |
{
|
|
|
595 |
{
|
596 |
"epoch": 2.12,
|
597 |
"learning_rate": 2.3482428115015974e-06,
|
598 |
+
"loss": 0.6051,
|
599 |
"step": 352
|
600 |
},
|
601 |
{
|
602 |
"epoch": 2.12,
|
603 |
+
"eval_loss": 1.0065311193466187,
|
604 |
+
"eval_runtime": 110.2985,
|
605 |
+
"eval_samples_per_second": 2.992,
|
606 |
+
"eval_steps_per_second": 0.381,
|
607 |
"eval_wer": 194.7149928149449,
|
608 |
"step": 352
|
609 |
},
|
610 |
{
|
611 |
"epoch": 2.12,
|
612 |
"learning_rate": 2.2523961661341854e-06,
|
613 |
+
"loss": 0.6236,
|
614 |
"step": 356
|
615 |
},
|
616 |
{
|
|
|
628 |
{
|
629 |
"epoch": 2.15,
|
630 |
"learning_rate": 1.964856230031949e-06,
|
631 |
+
"loss": 0.6014,
|
632 |
"step": 368
|
633 |
},
|
634 |
{
|
635 |
"epoch": 2.16,
|
636 |
"learning_rate": 1.8690095846645368e-06,
|
637 |
+
"loss": 0.6221,
|
638 |
"step": 372
|
639 |
},
|
640 |
{
|
641 |
"epoch": 2.17,
|
642 |
"learning_rate": 1.7731629392971245e-06,
|
643 |
+
"loss": 0.6288,
|
644 |
"step": 376
|
645 |
},
|
646 |
{
|
647 |
"epoch": 2.18,
|
648 |
"learning_rate": 1.6773162939297124e-06,
|
649 |
+
"loss": 0.6081,
|
650 |
"step": 380
|
651 |
},
|
652 |
{
|
653 |
"epoch": 2.19,
|
654 |
"learning_rate": 1.5814696485623003e-06,
|
655 |
+
"loss": 0.5887,
|
656 |
"step": 384
|
657 |
},
|
658 |
{
|
659 |
"epoch": 2.2,
|
660 |
"learning_rate": 1.4856230031948882e-06,
|
661 |
+
"loss": 0.5933,
|
662 |
"step": 388
|
663 |
},
|
664 |
{
|
|
|
675 |
},
|
676 |
{
|
677 |
"epoch": 2.21,
|
678 |
+
"eval_loss": 1.0029499530792236,
|
679 |
+
"eval_runtime": 97.7372,
|
680 |
+
"eval_samples_per_second": 3.376,
|
681 |
+
"eval_steps_per_second": 0.43,
|
682 |
+
"eval_wer": 166.9327798179786,
|
683 |
"step": 396
|
684 |
},
|
685 |
{
|
686 |
"epoch": 2.22,
|
687 |
"learning_rate": 1.1980830670926517e-06,
|
688 |
+
"loss": 0.5773,
|
689 |
"step": 400
|
690 |
},
|
691 |
{
|
|
|
697 |
{
|
698 |
"epoch": 2.24,
|
699 |
"learning_rate": 1.0063897763578274e-06,
|
700 |
+
"loss": 0.6009,
|
701 |
"step": 408
|
702 |
},
|
703 |
{
|
704 |
"epoch": 2.25,
|
705 |
"learning_rate": 9.105431309904153e-07,
|
706 |
+
"loss": 0.5616,
|
707 |
"step": 412
|
708 |
},
|
709 |
{
|
710 |
"epoch": 2.26,
|
711 |
"learning_rate": 8.146964856230032e-07,
|
712 |
+
"loss": 0.5729,
|
713 |
"step": 416
|
714 |
},
|
715 |
{
|
716 |
"epoch": 2.27,
|
717 |
"learning_rate": 7.188498402555911e-07,
|
718 |
+
"loss": 0.5754,
|
719 |
"step": 420
|
720 |
},
|
721 |
{
|
|
|
739 |
{
|
740 |
"epoch": 2.3,
|
741 |
"learning_rate": 3.3546325878594247e-07,
|
742 |
+
"loss": 0.5607,
|
743 |
"step": 436
|
744 |
},
|
745 |
{
|
746 |
"epoch": 2.31,
|
747 |
"learning_rate": 2.3961661341853033e-07,
|
748 |
+
"loss": 0.585,
|
749 |
"step": 440
|
750 |
},
|
751 |
{
|
752 |
"epoch": 2.31,
|
753 |
+
"eval_loss": 1.0049597024917603,
|
754 |
+
"eval_runtime": 112.1305,
|
755 |
+
"eval_samples_per_second": 2.943,
|
756 |
+
"eval_steps_per_second": 0.375,
|
757 |
+
"eval_wer": 191.23423279578478,
|
758 |
"step": 440
|
759 |
},
|
760 |
{
|
|
|
766 |
{
|
767 |
"epoch": 2.33,
|
768 |
"learning_rate": 4.792332268370607e-08,
|
769 |
+
"loss": 0.6385,
|
770 |
"step": 448
|
771 |
},
|
772 |
{
|
773 |
"epoch": 2.33,
|
774 |
"step": 448,
|
775 |
"total_flos": 1.7572960198656e+17,
|
776 |
+
"train_loss": 0.8751586728862354,
|
777 |
+
"train_runtime": 2260.8457,
|
778 |
+
"train_samples_per_second": 3.17,
|
779 |
+
"train_steps_per_second": 0.198
|
780 |
}
|
781 |
],
|
782 |
"max_steps": 448,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55dadabd854c053224338d1d5bf08deab9f76b217d00da7ec8d1fd3f5f27f892
|
3 |
+
size 4091
|