|
{ |
|
"best_metric": 19.97153700189753, |
|
"best_model_checkpoint": "./checkpoint-800", |
|
"epoch": 15.016666666666667, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2e-08, |
|
"loss": 1.2498, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.5e-08, |
|
"loss": 0.9037, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.999999999999999e-08, |
|
"loss": 1.0907, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.499999999999999e-08, |
|
"loss": 0.927, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.983007697636658e-08, |
|
"loss": 0.8001, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.918643144747679e-08, |
|
"loss": 0.8719, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.812276182268236e-08, |
|
"loss": 0.5706, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 9.656074673794016e-08, |
|
"loss": 0.7919, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 0.279296875, |
|
"eval_runtime": 701.0908, |
|
"eval_samples_per_second": 3.164, |
|
"eval_steps_per_second": 0.1, |
|
"eval_wer": 22.58064516129032, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 9.455032620941839e-08, |
|
"loss": 0.5192, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.200467769494708e-08, |
|
"loss": 0.6054, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 8.914398527981508e-08, |
|
"loss": 0.5903, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 8.590631488815944e-08, |
|
"loss": 0.469, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 8.23228470557425e-08, |
|
"loss": 0.6128, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 7.84280925367132e-08, |
|
"loss": 0.4026, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 7.408768370508576e-08, |
|
"loss": 0.5923, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 6.967704118273256e-08, |
|
"loss": 0.4409, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_loss": 0.26513671875, |
|
"eval_runtime": 702.5872, |
|
"eval_samples_per_second": 3.157, |
|
"eval_steps_per_second": 0.1, |
|
"eval_wer": 20.60721062618596, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 6.507689799722478e-08, |
|
"loss": 0.4854, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 6.03315560421329e-08, |
|
"loss": 0.4989, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 5.5291509972967234e-08, |
|
"loss": 0.3982, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 5.0392695044435566e-08, |
|
"loss": 0.5425, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 4.5490098247957034e-08, |
|
"loss": 0.3773, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.0630934270713755e-08, |
|
"loss": 0.5168, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 3.586199951809582e-08, |
|
"loss": 0.4259, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.1047378773808274e-08, |
|
"loss": 0.4393, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"eval_loss": 0.260009765625, |
|
"eval_runtime": 701.0779, |
|
"eval_samples_per_second": 3.164, |
|
"eval_steps_per_second": 0.1, |
|
"eval_wer": 20.06641366223909, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 2.6603509286971338e-08, |
|
"loss": 0.497, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.238496079552367e-08, |
|
"loss": 0.3649, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 1.8432360227531113e-08, |
|
"loss": 0.5444, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 1.4783773287174683e-08, |
|
"loss": 0.3612, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 1.134947733186315e-08, |
|
"loss": 0.4897, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 8.426519384872732e-09, |
|
"loss": 0.4383, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 5.903936782582253e-09, |
|
"loss": 0.4205, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 3.8060233744356625e-09, |
|
"loss": 0.4975, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_loss": 0.25927734375, |
|
"eval_runtime": 700.0881, |
|
"eval_samples_per_second": 3.168, |
|
"eval_steps_per_second": 0.1, |
|
"eval_wer": 19.97153700189753, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 2.152983213389559e-09, |
|
"loss": 0.3587, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 9.228083061983804e-10, |
|
"loss": 0.5509, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 2.2190176984600017e-10, |
|
"loss": 0.3727, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 3.8553092647219954e-13, |
|
"loss": 0.4754, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"step": 900, |
|
"total_flos": 1.2318638198947106e+20, |
|
"train_loss": 0.5692690700954861, |
|
"train_runtime": 21274.2497, |
|
"train_samples_per_second": 2.707, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"max_steps": 900, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 1.2318638198947106e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|