marinone94 commited on
Commit
43dd921
1 Parent(s): 4123391

End of training

Browse files
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 9.09,
3
- "eval_loss": 1.8916987180709839,
4
- "eval_runtime": 96.9796,
5
- "eval_samples_per_second": 4.063,
6
- "eval_steps_per_second": 0.134,
7
- "eval_wer": 15.494331342191881,
8
- "test_loss": 0.5623113512992859,
9
- "test_runtime": 121.6703,
10
- "test_samples_per_second": 5.318,
11
- "test_steps_per_second": 0.173,
12
- "test_wer": 20.965372507869883,
13
- "train_loss": 0.35074408769753995,
14
- "train_runtime": 2707.3827,
15
- "train_samples_per_second": 9.621,
16
- "train_steps_per_second": 0.15
17
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.6191972494125366,
4
+ "eval_runtime": 56.3363,
5
+ "eval_samples_per_second": 0.071,
6
+ "eval_steps_per_second": 0.036,
7
+ "eval_wer": 153.2258064516129,
8
+ "test_loss": 1.7568330764770508,
9
+ "test_runtime": 37.8582,
10
+ "test_samples_per_second": 0.106,
11
+ "test_steps_per_second": 0.053,
12
+ "test_wer": 138.5964912280702,
13
+ "train_loss": 1.4339025020599365,
14
+ "train_runtime": 108.1566,
15
+ "train_samples_per_second": 0.074,
16
+ "train_steps_per_second": 0.018
17
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "eval_loss": 1.8916987180709839,
3
- "eval_runtime": 96.9796,
4
- "eval_samples_per_second": 4.063,
5
- "eval_steps_per_second": 0.134,
6
- "eval_wer": 15.494331342191881
7
  }
 
1
  {
2
+ "eval_loss": 1.6191972494125366,
3
+ "eval_runtime": 56.3363,
4
+ "eval_samples_per_second": 0.071,
5
+ "eval_steps_per_second": 0.036,
6
+ "eval_wer": 153.2258064516129
7
  }
huggingface_training.py CHANGED
@@ -1,6 +1,7 @@
1
  """ Whisper training script using Hugging Face Transformers. """
2
 
3
- import os # used to create output directory
 
4
  from dataclasses import dataclass # used to define data collator
5
  from math import ceil # used to round up decimals
6
 
@@ -321,60 +322,60 @@ I hope you haven't left yet. If you have, bad for you, as we are ready for train
321
  As Whisper is a pretrained model ready to be used off-the-shelf, it is advisable to evaluate it before training on both the validation and test sets. Let's make sure we make no harm to it.
322
  """
323
 
324
- eval_metrics = trainer.evaluate(
325
- eval_dataset=preprocessed_dataset["validation"],
326
- metric_key_prefix="eval",
327
- max_length=448,
328
- num_beams=1,
329
- # gen_kwargs={"key": value} to provide additional generation specific arguments by keyword
330
- )
331
 
332
- trainer.log_metrics("eval", eval_metrics)
333
- trainer.save_metrics("eval", eval_metrics)
334
- print(eval_metrics)
335
 
336
- test_metrics = trainer.evaluate(
337
- eval_dataset=preprocessed_dataset["test"],
338
- metric_key_prefix="test",
339
- max_length=448,
340
- num_beams=1,
341
- # gen_kwargs={"key": value} to provide additional generation specific arguments by keyword
342
- )
343
 
344
- trainer.log_metrics("test", test_metrics)
345
- trainer.save_metrics("test", test_metrics)
346
- print(test_metrics)
347
 
348
- train_result = trainer.train()
349
- trainer.save_model()
350
 
351
- metrics = train_result.metrics
352
- trainer.log_metrics("train", metrics)
353
- trainer.save_metrics("train", metrics)
354
- trainer.save_state()
355
- print(metrics)
356
 
357
- """ADD SOMETHING ABOUT THE TRAINING.
358
 
359
- Now let's evaluate the
360
- """
361
 
362
- final_metrics = trainer.evaluate(
363
- eval_dataset=preprocessed_dataset["test"],
364
- metric_key_prefix="test",
365
- max_length=448,
366
- num_beams=1,
367
- # gen_kwargs={"key": value} to provide additional generation specific arguments by keyword
368
- )
369
 
370
- trainer.log_metrics("test", final_metrics)
371
- trainer.save_metrics("test", final_metrics)
372
- print(final_metrics)
373
 
374
  # Pushing to hub during training slows down training
375
  # so we push it only in the end.
376
  # Since training is completed and best model has been saved, we first delete the checkpoints
377
  for filename in os.listdir("."):
378
  if filename.startswith("checkpoint-"):
379
- os.remove(f"./{filename}")
380
  trainer.push_to_hub()
 
1
  """ Whisper training script using Hugging Face Transformers. """
2
 
3
+ import os # used to find checkpoints
4
+ import shutil
5
  from dataclasses import dataclass # used to define data collator
6
  from math import ceil # used to round up decimals
7
 
 
322
  As Whisper is a pretrained model ready to be used off-the-shelf, it is advisable to evaluate it before training on both the validation and test sets. Let's make sure we make no harm to it.
323
  """
324
 
325
+ # eval_metrics = trainer.evaluate(
326
+ # eval_dataset=preprocessed_dataset["validation"],
327
+ # metric_key_prefix="eval",
328
+ # max_length=448,
329
+ # num_beams=1,
330
+ # # gen_kwargs={"key": value} to provide additional generation specific arguments by keyword
331
+ # )
332
 
333
+ # trainer.log_metrics("eval", eval_metrics)
334
+ # trainer.save_metrics("eval", eval_metrics)
335
+ # print(eval_metrics)
336
 
337
+ # test_metrics = trainer.evaluate(
338
+ # eval_dataset=preprocessed_dataset["test"],
339
+ # metric_key_prefix="test",
340
+ # max_length=448,
341
+ # num_beams=1,
342
+ # # gen_kwargs={"key": value} to provide additional generation specific arguments by keyword
343
+ # )
344
 
345
+ # trainer.log_metrics("test", test_metrics)
346
+ # trainer.save_metrics("test", test_metrics)
347
+ # print(test_metrics)
348
 
349
+ # train_result = trainer.train()
350
+ # trainer.save_model()
351
 
352
+ # metrics = train_result.metrics
353
+ # trainer.log_metrics("train", metrics)
354
+ # trainer.save_metrics("train", metrics)
355
+ # trainer.save_state()
356
+ # print(metrics)
357
 
358
+ # """ADD SOMETHING ABOUT THE TRAINING.
359
 
360
+ # Now let's evaluate the
361
+ # """
362
 
363
+ # final_metrics = trainer.evaluate(
364
+ # eval_dataset=preprocessed_dataset["test"],
365
+ # metric_key_prefix="test",
366
+ # max_length=448,
367
+ # num_beams=1,
368
+ # # gen_kwargs={"key": value} to provide additional generation specific arguments by keyword
369
+ # )
370
 
371
+ # trainer.log_metrics("test", final_metrics)
372
+ # trainer.save_metrics("test", final_metrics)
373
+ # print(final_metrics)
374
 
375
  # Pushing to hub during training slows down training
376
  # so we push it only in the end.
377
  # Since training is completed and best model has been saved, we first delete the checkpoints
378
  for filename in os.listdir("."):
379
  if filename.startswith("checkpoint-"):
380
+ shutil.rmtree(f"./{filename}")
381
  trainer.push_to_hub()
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21343063174657acd721a023a2780da91e0bede1cc15233f17e5468d93d0ae51
3
  size 151098921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3843686519777a4550909e8bd4961dcf7425e7183295f03d09a433a271f0887
3
  size 151098921
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.09,
3
- "test_loss": 0.5623113512992859,
4
- "test_runtime": 121.6703,
5
- "test_samples_per_second": 5.318,
6
- "test_steps_per_second": 0.173,
7
- "test_wer": 20.965372507869883
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "test_loss": 1.7568330764770508,
4
+ "test_runtime": 37.8582,
5
+ "test_samples_per_second": 0.106,
6
+ "test_steps_per_second": 0.053,
7
+ "test_wer": 138.5964912280702
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 9.09,
3
- "train_loss": 0.35074408769753995,
4
- "train_runtime": 2707.3827,
5
- "train_samples_per_second": 9.621,
6
- "train_steps_per_second": 0.15
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.4339025020599365,
4
+ "train_runtime": 108.1566,
5
+ "train_samples_per_second": 0.074,
6
+ "train_steps_per_second": 0.018
7
  }
trainer_state.json CHANGED
@@ -1,721 +1,55 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 9.093366093366093,
5
- "global_step": 407,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
12
  "learning_rate": 0.0,
13
- "loss": 1.8118,
14
- "step": 4
15
  },
16
  {
17
- "epoch": 0.02,
18
- "learning_rate": 3.6585365853658536e-07,
19
- "loss": 1.8122,
20
- "step": 8
 
 
 
21
  },
22
  {
23
- "epoch": 0.03,
24
- "learning_rate": 8.53658536585366e-07,
25
- "loss": 1.8174,
26
- "step": 12
27
  },
28
  {
29
- "epoch": 0.04,
30
- "learning_rate": 1.3414634146341465e-06,
31
- "loss": 1.7616,
32
- "step": 16
 
 
 
33
  },
34
  {
35
- "epoch": 0.05,
36
- "learning_rate": 1.8292682926829268e-06,
37
- "loss": 1.6875,
38
- "step": 20
39
- },
40
- {
41
- "epoch": 0.06,
42
- "learning_rate": 2.317073170731708e-06,
43
- "loss": 1.5201,
44
- "step": 24
45
- },
46
- {
47
- "epoch": 0.07,
48
- "learning_rate": 2.8048780487804884e-06,
49
- "loss": 1.3982,
50
- "step": 28
51
- },
52
- {
53
- "epoch": 0.08,
54
- "learning_rate": 3.292682926829269e-06,
55
- "loss": 1.3541,
56
- "step": 32
57
- },
58
- {
59
- "epoch": 0.09,
60
- "learning_rate": 3.780487804878049e-06,
61
- "loss": 1.2092,
62
- "step": 36
63
- },
64
- {
65
- "epoch": 0.1,
66
- "learning_rate": 4.268292682926829e-06,
67
- "loss": 1.1599,
68
- "step": 40
69
- },
70
- {
71
- "epoch": 0.1,
72
- "eval_loss": 1.142654299736023,
73
- "eval_runtime": 101.9854,
74
- "eval_samples_per_second": 3.863,
75
- "eval_steps_per_second": 0.127,
76
- "eval_wer": 15.213946117274169,
77
- "step": 40
78
- },
79
- {
80
- "epoch": 1.01,
81
- "learning_rate": 4.75609756097561e-06,
82
- "loss": 1.0124,
83
- "step": 44
84
- },
85
- {
86
- "epoch": 1.02,
87
- "learning_rate": 5.243902439024391e-06,
88
- "loss": 0.9171,
89
- "step": 48
90
- },
91
- {
92
- "epoch": 1.03,
93
- "learning_rate": 5.731707317073171e-06,
94
- "loss": 0.8027,
95
- "step": 52
96
- },
97
- {
98
- "epoch": 1.04,
99
- "learning_rate": 6.219512195121951e-06,
100
- "loss": 0.7284,
101
- "step": 56
102
- },
103
- {
104
- "epoch": 1.05,
105
- "learning_rate": 6.707317073170733e-06,
106
- "loss": 0.6185,
107
- "step": 60
108
- },
109
- {
110
- "epoch": 1.06,
111
- "learning_rate": 7.1951219512195125e-06,
112
- "loss": 0.57,
113
- "step": 64
114
- },
115
- {
116
- "epoch": 1.07,
117
- "learning_rate": 7.682926829268293e-06,
118
- "loss": 0.4985,
119
- "step": 68
120
- },
121
- {
122
- "epoch": 1.08,
123
- "learning_rate": 8.170731707317073e-06,
124
- "loss": 0.488,
125
- "step": 72
126
- },
127
- {
128
- "epoch": 1.09,
129
- "learning_rate": 8.658536585365854e-06,
130
- "loss": 0.4569,
131
- "step": 76
132
- },
133
- {
134
- "epoch": 1.1,
135
- "learning_rate": 9.146341463414635e-06,
136
- "loss": 0.4655,
137
- "step": 80
138
- },
139
- {
140
- "epoch": 1.1,
141
- "eval_loss": 0.5613037943840027,
142
- "eval_runtime": 91.9697,
143
- "eval_samples_per_second": 4.284,
144
- "eval_steps_per_second": 0.141,
145
- "eval_wer": 17.591125198098258,
146
- "step": 80
147
- },
148
- {
149
- "epoch": 2.0,
150
- "learning_rate": 9.634146341463415e-06,
151
- "loss": 0.425,
152
- "step": 84
153
- },
154
- {
155
- "epoch": 2.01,
156
- "learning_rate": 9.96923076923077e-06,
157
- "loss": 0.4162,
158
- "step": 88
159
- },
160
- {
161
- "epoch": 2.02,
162
- "learning_rate": 9.846153846153848e-06,
163
- "loss": 0.3809,
164
- "step": 92
165
- },
166
- {
167
- "epoch": 2.03,
168
- "learning_rate": 9.723076923076924e-06,
169
- "loss": 0.3533,
170
- "step": 96
171
- },
172
- {
173
- "epoch": 2.04,
174
- "learning_rate": 9.600000000000001e-06,
175
- "loss": 0.3511,
176
- "step": 100
177
- },
178
- {
179
- "epoch": 2.05,
180
- "learning_rate": 9.476923076923079e-06,
181
- "loss": 0.3475,
182
- "step": 104
183
- },
184
- {
185
- "epoch": 2.06,
186
- "learning_rate": 9.353846153846155e-06,
187
- "loss": 0.321,
188
- "step": 108
189
- },
190
- {
191
- "epoch": 2.07,
192
- "learning_rate": 9.230769230769232e-06,
193
- "loss": 0.2859,
194
- "step": 112
195
- },
196
- {
197
- "epoch": 2.08,
198
- "learning_rate": 9.107692307692308e-06,
199
- "loss": 0.3191,
200
- "step": 116
201
- },
202
- {
203
- "epoch": 2.09,
204
- "learning_rate": 8.984615384615386e-06,
205
- "loss": 0.2753,
206
- "step": 120
207
- },
208
- {
209
- "epoch": 2.09,
210
- "eval_loss": 0.5241264700889587,
211
- "eval_runtime": 88.0526,
212
- "eval_samples_per_second": 4.475,
213
- "eval_steps_per_second": 0.148,
214
- "eval_wer": 17.21321467755699,
215
- "step": 120
216
- },
217
- {
218
- "epoch": 3.0,
219
- "learning_rate": 8.861538461538463e-06,
220
- "loss": 0.3104,
221
- "step": 124
222
- },
223
- {
224
- "epoch": 3.01,
225
- "learning_rate": 8.73846153846154e-06,
226
- "loss": 0.2734,
227
- "step": 128
228
- },
229
- {
230
- "epoch": 3.02,
231
- "learning_rate": 8.615384615384617e-06,
232
- "loss": 0.2608,
233
- "step": 132
234
- },
235
- {
236
- "epoch": 3.03,
237
- "learning_rate": 8.492307692307693e-06,
238
- "loss": 0.2509,
239
- "step": 136
240
- },
241
- {
242
- "epoch": 3.04,
243
- "learning_rate": 8.36923076923077e-06,
244
- "loss": 0.2548,
245
- "step": 140
246
- },
247
- {
248
- "epoch": 3.05,
249
- "learning_rate": 8.246153846153848e-06,
250
- "loss": 0.2469,
251
- "step": 144
252
- },
253
- {
254
- "epoch": 3.06,
255
- "learning_rate": 8.123076923076924e-06,
256
- "loss": 0.2231,
257
- "step": 148
258
- },
259
- {
260
- "epoch": 3.07,
261
- "learning_rate": 8.000000000000001e-06,
262
- "loss": 0.2138,
263
- "step": 152
264
- },
265
- {
266
- "epoch": 3.08,
267
- "learning_rate": 7.876923076923077e-06,
268
- "loss": 0.2349,
269
- "step": 156
270
- },
271
- {
272
- "epoch": 3.09,
273
- "learning_rate": 7.753846153846155e-06,
274
- "loss": 0.2077,
275
- "step": 160
276
- },
277
- {
278
- "epoch": 3.09,
279
- "eval_loss": 0.5241798758506775,
280
- "eval_runtime": 88.5317,
281
- "eval_samples_per_second": 4.45,
282
- "eval_steps_per_second": 0.147,
283
- "eval_wer": 17.26197732536877,
284
- "step": 160
285
- },
286
- {
287
- "epoch": 3.1,
288
- "learning_rate": 7.630769230769232e-06,
289
- "loss": 0.2322,
290
- "step": 164
291
- },
292
- {
293
- "epoch": 4.01,
294
- "learning_rate": 7.507692307692308e-06,
295
- "loss": 0.2036,
296
- "step": 168
297
- },
298
- {
299
- "epoch": 4.02,
300
- "learning_rate": 7.384615384615386e-06,
301
- "loss": 0.2058,
302
- "step": 172
303
- },
304
- {
305
- "epoch": 4.03,
306
- "learning_rate": 7.261538461538462e-06,
307
- "loss": 0.1797,
308
- "step": 176
309
- },
310
- {
311
- "epoch": 4.04,
312
- "learning_rate": 7.1384615384615385e-06,
313
- "loss": 0.186,
314
- "step": 180
315
- },
316
- {
317
- "epoch": 4.05,
318
- "learning_rate": 7.015384615384616e-06,
319
- "loss": 0.2035,
320
- "step": 184
321
- },
322
- {
323
- "epoch": 4.06,
324
- "learning_rate": 6.892307692307693e-06,
325
- "loss": 0.1794,
326
- "step": 188
327
- },
328
- {
329
- "epoch": 4.07,
330
- "learning_rate": 6.76923076923077e-06,
331
- "loss": 0.1589,
332
- "step": 192
333
- },
334
- {
335
- "epoch": 4.08,
336
- "learning_rate": 6.646153846153846e-06,
337
- "loss": 0.1879,
338
- "step": 196
339
- },
340
- {
341
- "epoch": 4.09,
342
- "learning_rate": 6.523076923076923e-06,
343
- "loss": 0.1636,
344
- "step": 200
345
- },
346
- {
347
- "epoch": 4.09,
348
- "eval_loss": 0.5289868712425232,
349
- "eval_runtime": 95.5188,
350
- "eval_samples_per_second": 4.125,
351
- "eval_steps_per_second": 0.136,
352
- "eval_wer": 17.66426916981592,
353
- "step": 200
354
- },
355
- {
356
- "epoch": 4.1,
357
- "learning_rate": 6.4000000000000006e-06,
358
- "loss": 0.1767,
359
- "step": 204
360
- },
361
- {
362
- "epoch": 5.01,
363
- "learning_rate": 6.276923076923077e-06,
364
- "loss": 0.1657,
365
- "step": 208
366
- },
367
- {
368
- "epoch": 5.02,
369
- "learning_rate": 6.153846153846155e-06,
370
- "loss": 0.1607,
371
- "step": 212
372
- },
373
- {
374
- "epoch": 5.03,
375
- "learning_rate": 6.030769230769231e-06,
376
- "loss": 0.1458,
377
- "step": 216
378
- },
379
- {
380
- "epoch": 5.04,
381
- "learning_rate": 5.907692307692308e-06,
382
- "loss": 0.1541,
383
- "step": 220
384
- },
385
- {
386
- "epoch": 5.05,
387
- "learning_rate": 5.784615384615385e-06,
388
- "loss": 0.1494,
389
- "step": 224
390
- },
391
- {
392
- "epoch": 5.06,
393
- "learning_rate": 5.661538461538462e-06,
394
- "loss": 0.144,
395
- "step": 228
396
- },
397
- {
398
- "epoch": 5.07,
399
- "learning_rate": 5.538461538461539e-06,
400
- "loss": 0.1311,
401
- "step": 232
402
- },
403
- {
404
- "epoch": 5.08,
405
- "learning_rate": 5.415384615384615e-06,
406
- "loss": 0.1411,
407
- "step": 236
408
- },
409
- {
410
- "epoch": 5.09,
411
- "learning_rate": 5.292307692307693e-06,
412
- "loss": 0.1322,
413
- "step": 240
414
- },
415
- {
416
- "epoch": 5.09,
417
- "eval_loss": 0.5350630283355713,
418
- "eval_runtime": 92.5111,
419
- "eval_samples_per_second": 4.259,
420
- "eval_steps_per_second": 0.141,
421
- "eval_wer": 18.2128489576984,
422
- "step": 240
423
- },
424
- {
425
- "epoch": 5.1,
426
- "learning_rate": 5.16923076923077e-06,
427
- "loss": 0.1436,
428
- "step": 244
429
- },
430
- {
431
- "epoch": 6.0,
432
- "learning_rate": 5.046153846153846e-06,
433
- "loss": 0.1375,
434
- "step": 248
435
- },
436
- {
437
- "epoch": 6.01,
438
- "learning_rate": 4.923076923076924e-06,
439
- "loss": 0.1361,
440
- "step": 252
441
- },
442
- {
443
- "epoch": 6.02,
444
- "learning_rate": 4.800000000000001e-06,
445
- "loss": 0.129,
446
- "step": 256
447
- },
448
- {
449
- "epoch": 6.03,
450
- "learning_rate": 4.676923076923077e-06,
451
- "loss": 0.1127,
452
- "step": 260
453
- },
454
- {
455
- "epoch": 6.04,
456
- "learning_rate": 4.553846153846154e-06,
457
- "loss": 0.1266,
458
- "step": 264
459
- },
460
- {
461
- "epoch": 6.05,
462
- "learning_rate": 4.430769230769232e-06,
463
- "loss": 0.1193,
464
- "step": 268
465
- },
466
- {
467
- "epoch": 6.06,
468
- "learning_rate": 4.307692307692308e-06,
469
- "loss": 0.1127,
470
- "step": 272
471
- },
472
- {
473
- "epoch": 6.07,
474
- "learning_rate": 4.184615384615385e-06,
475
- "loss": 0.1064,
476
- "step": 276
477
- },
478
- {
479
- "epoch": 6.08,
480
- "learning_rate": 4.061538461538462e-06,
481
- "loss": 0.123,
482
- "step": 280
483
- },
484
- {
485
- "epoch": 6.08,
486
- "eval_loss": 0.5429388284683228,
487
- "eval_runtime": 91.5818,
488
- "eval_samples_per_second": 4.302,
489
- "eval_steps_per_second": 0.142,
490
- "eval_wer": 18.907716689016212,
491
- "step": 280
492
- },
493
- {
494
- "epoch": 6.09,
495
- "learning_rate": 3.938461538461539e-06,
496
- "loss": 0.1057,
497
- "step": 284
498
- },
499
- {
500
- "epoch": 7.0,
501
- "learning_rate": 3.815384615384616e-06,
502
- "loss": 0.1258,
503
- "step": 288
504
- },
505
- {
506
- "epoch": 7.01,
507
- "learning_rate": 3.692307692307693e-06,
508
- "loss": 0.1108,
509
- "step": 292
510
- },
511
- {
512
- "epoch": 7.02,
513
- "learning_rate": 3.5692307692307692e-06,
514
- "loss": 0.1115,
515
- "step": 296
516
- },
517
- {
518
- "epoch": 7.03,
519
- "learning_rate": 3.4461538461538464e-06,
520
- "loss": 0.0998,
521
- "step": 300
522
- },
523
- {
524
- "epoch": 7.04,
525
- "learning_rate": 3.323076923076923e-06,
526
- "loss": 0.1106,
527
- "step": 304
528
- },
529
- {
530
- "epoch": 7.05,
531
- "learning_rate": 3.2000000000000003e-06,
532
- "loss": 0.1045,
533
- "step": 308
534
- },
535
- {
536
- "epoch": 7.06,
537
- "learning_rate": 3.0769230769230774e-06,
538
- "loss": 0.0908,
539
- "step": 312
540
- },
541
- {
542
- "epoch": 7.07,
543
- "learning_rate": 2.953846153846154e-06,
544
- "loss": 0.0931,
545
- "step": 316
546
- },
547
- {
548
- "epoch": 7.08,
549
- "learning_rate": 2.830769230769231e-06,
550
- "loss": 0.1074,
551
- "step": 320
552
- },
553
- {
554
- "epoch": 7.08,
555
- "eval_loss": 0.5500437021255493,
556
- "eval_runtime": 104.0907,
557
- "eval_samples_per_second": 3.785,
558
- "eval_steps_per_second": 0.125,
559
- "eval_wer": 19.054004632451544,
560
- "step": 320
561
- },
562
- {
563
- "epoch": 7.09,
564
- "learning_rate": 2.7076923076923076e-06,
565
- "loss": 0.0937,
566
- "step": 324
567
- },
568
- {
569
- "epoch": 7.1,
570
- "learning_rate": 2.584615384615385e-06,
571
- "loss": 0.1091,
572
- "step": 328
573
- },
574
- {
575
- "epoch": 8.01,
576
- "learning_rate": 2.461538461538462e-06,
577
- "loss": 0.0951,
578
- "step": 332
579
- },
580
- {
581
- "epoch": 8.02,
582
- "learning_rate": 2.3384615384615387e-06,
583
- "loss": 0.1003,
584
- "step": 336
585
- },
586
- {
587
- "epoch": 8.03,
588
- "learning_rate": 2.215384615384616e-06,
589
- "loss": 0.0836,
590
- "step": 340
591
- },
592
- {
593
- "epoch": 8.04,
594
- "learning_rate": 2.0923076923076926e-06,
595
- "loss": 0.0907,
596
- "step": 344
597
- },
598
- {
599
- "epoch": 8.05,
600
- "learning_rate": 1.9692307692307693e-06,
601
- "loss": 0.1013,
602
- "step": 348
603
- },
604
- {
605
- "epoch": 8.06,
606
- "learning_rate": 1.8461538461538465e-06,
607
- "loss": 0.0891,
608
- "step": 352
609
- },
610
- {
611
- "epoch": 8.07,
612
- "learning_rate": 1.7230769230769232e-06,
613
- "loss": 0.077,
614
- "step": 356
615
- },
616
- {
617
- "epoch": 8.08,
618
- "learning_rate": 1.6000000000000001e-06,
619
- "loss": 0.1007,
620
- "step": 360
621
- },
622
- {
623
- "epoch": 8.08,
624
- "eval_loss": 0.5552565455436707,
625
- "eval_runtime": 88.458,
626
- "eval_samples_per_second": 4.454,
627
- "eval_steps_per_second": 0.147,
628
- "eval_wer": 19.310008533463368,
629
- "step": 360
630
- },
631
- {
632
- "epoch": 8.09,
633
- "learning_rate": 1.476923076923077e-06,
634
- "loss": 0.0849,
635
- "step": 364
636
- },
637
- {
638
- "epoch": 8.1,
639
- "learning_rate": 1.3538461538461538e-06,
640
- "loss": 0.0971,
641
- "step": 368
642
- },
643
- {
644
- "epoch": 9.01,
645
- "learning_rate": 1.230769230769231e-06,
646
- "loss": 0.0876,
647
- "step": 372
648
- },
649
- {
650
- "epoch": 9.02,
651
- "learning_rate": 1.107692307692308e-06,
652
- "loss": 0.0879,
653
- "step": 376
654
- },
655
- {
656
- "epoch": 9.03,
657
- "learning_rate": 9.846153846153847e-07,
658
- "loss": 0.0805,
659
- "step": 380
660
- },
661
- {
662
- "epoch": 9.04,
663
- "learning_rate": 8.615384615384616e-07,
664
- "loss": 0.0888,
665
- "step": 384
666
- },
667
- {
668
- "epoch": 9.05,
669
- "learning_rate": 7.384615384615385e-07,
670
- "loss": 0.0858,
671
- "step": 388
672
- },
673
- {
674
- "epoch": 9.06,
675
- "learning_rate": 6.153846153846155e-07,
676
- "loss": 0.0825,
677
- "step": 392
678
- },
679
- {
680
- "epoch": 9.07,
681
- "learning_rate": 4.923076923076923e-07,
682
- "loss": 0.0748,
683
- "step": 396
684
- },
685
- {
686
- "epoch": 9.08,
687
- "learning_rate": 3.6923076923076927e-07,
688
- "loss": 0.0876,
689
- "step": 400
690
- },
691
- {
692
- "epoch": 9.08,
693
- "eval_loss": 0.5568162202835083,
694
- "eval_runtime": 89.7223,
695
- "eval_samples_per_second": 4.391,
696
- "eval_steps_per_second": 0.145,
697
- "eval_wer": 19.3465805193222,
698
- "step": 400
699
- },
700
- {
701
- "epoch": 9.09,
702
- "learning_rate": 2.4615384615384616e-07,
703
- "loss": 0.0802,
704
- "step": 404
705
- },
706
- {
707
- "epoch": 9.09,
708
- "step": 407,
709
- "total_flos": 6.36398180352e+17,
710
- "train_loss": 0.35074408769753995,
711
- "train_runtime": 2707.3827,
712
- "train_samples_per_second": 9.621,
713
- "train_steps_per_second": 0.15
714
  }
715
  ],
716
- "max_steps": 407,
717
  "num_train_epochs": 9223372036854775807,
718
- "total_flos": 6.36398180352e+17,
719
  "trial_name": null,
720
  "trial_params": null
721
  }
 
1
  {
2
+ "best_metric": 153.2258064516129,
3
+ "best_model_checkpoint": "./checkpoint-1",
4
+ "epoch": 1.0,
5
+ "global_step": 2,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.5,
12
  "learning_rate": 0.0,
13
+ "loss": 1.565,
14
+ "step": 1
15
  },
16
  {
17
+ "epoch": 0.5,
18
+ "eval_loss": 1.6191972494125366,
19
+ "eval_runtime": 41.7522,
20
+ "eval_samples_per_second": 0.096,
21
+ "eval_steps_per_second": 0.048,
22
+ "eval_wer": 153.2258064516129,
23
+ "step": 1
24
  },
25
  {
26
+ "epoch": 1.0,
27
+ "learning_rate": 1e-05,
28
+ "loss": 1.3028,
29
+ "step": 2
30
  },
31
  {
32
+ "epoch": 1.0,
33
+ "eval_loss": 1.6191972494125366,
34
+ "eval_runtime": 37.2469,
35
+ "eval_samples_per_second": 0.107,
36
+ "eval_steps_per_second": 0.054,
37
+ "eval_wer": 153.2258064516129,
38
+ "step": 2
39
  },
40
  {
41
+ "epoch": 1.0,
42
+ "step": 2,
43
+ "total_flos": 196951080960000.0,
44
+ "train_loss": 1.4339025020599365,
45
+ "train_runtime": 108.1566,
46
+ "train_samples_per_second": 0.074,
47
+ "train_steps_per_second": 0.018
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  }
49
  ],
50
+ "max_steps": 2,
51
  "num_train_epochs": 9223372036854775807,
52
+ "total_flos": 196951080960000.0,
53
  "trial_name": null,
54
  "trial_params": null
55
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcbca0d141969bcb1c3cd0ef5a009221139334753b899d88e4d5003bd23f4b5f
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081bb11364a81f07b31bca0107ccb5d62d955d95e115f206089558cf85595e34
3
  size 3579