|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 200.0, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 5.82e-05, |
|
"loss": 9.8118, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0001182, |
|
"loss": 3.4789, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.00017819999999999997, |
|
"loss": 3.114, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 0.0002382, |
|
"loss": 2.721, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 0.0002982, |
|
"loss": 1.7294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"eval_loss": 0.8540233373641968, |
|
"eval_runtime": 10.9769, |
|
"eval_samples_per_second": 19.313, |
|
"eval_steps_per_second": 0.638, |
|
"eval_wer": 0.9943609022556391, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.0002906129032258064, |
|
"loss": 1.351, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"learning_rate": 0.0002809354838709677, |
|
"loss": 1.1775, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 0.000271258064516129, |
|
"loss": 1.0539, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0002615806451612903, |
|
"loss": 0.9587, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"learning_rate": 0.0002519032258064516, |
|
"loss": 0.8863, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"eval_loss": 0.7282317280769348, |
|
"eval_runtime": 11.4744, |
|
"eval_samples_per_second": 18.476, |
|
"eval_steps_per_second": 0.61, |
|
"eval_wer": 0.731203007518797, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 61.11, |
|
"learning_rate": 0.00024222580645161287, |
|
"loss": 0.7985, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 0.00023254838709677416, |
|
"loss": 0.7323, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 72.22, |
|
"learning_rate": 0.00022287096774193548, |
|
"loss": 0.6712, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 77.78, |
|
"learning_rate": 0.00021319354838709678, |
|
"loss": 0.6214, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 0.00020351612903225804, |
|
"loss": 0.5789, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 0.817844033241272, |
|
"eval_runtime": 11.023, |
|
"eval_samples_per_second": 19.232, |
|
"eval_steps_per_second": 0.635, |
|
"eval_wer": 0.8101503759398496, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"learning_rate": 0.00019383870967741934, |
|
"loss": 0.5519, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 94.44, |
|
"learning_rate": 0.00018416129032258063, |
|
"loss": 0.4957, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.00017448387096774193, |
|
"loss": 0.4599, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 105.56, |
|
"learning_rate": 0.0001648064516129032, |
|
"loss": 0.4272, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"learning_rate": 0.0001551290322580645, |
|
"loss": 0.3899, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"eval_loss": 0.803404688835144, |
|
"eval_runtime": 10.8752, |
|
"eval_samples_per_second": 19.494, |
|
"eval_steps_per_second": 0.644, |
|
"eval_wer": 0.7700501253132832, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 0.0001454516129032258, |
|
"loss": 0.3725, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 122.22, |
|
"learning_rate": 0.00013577419354838708, |
|
"loss": 0.3373, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 127.78, |
|
"learning_rate": 0.00012609677419354837, |
|
"loss": 0.3134, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 0.00011641935483870967, |
|
"loss": 0.3057, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 138.89, |
|
"learning_rate": 0.00010674193548387096, |
|
"loss": 0.2869, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 138.89, |
|
"eval_loss": 0.9060825705528259, |
|
"eval_runtime": 10.9301, |
|
"eval_samples_per_second": 19.396, |
|
"eval_steps_per_second": 0.64, |
|
"eval_wer": 0.699874686716792, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 144.44, |
|
"learning_rate": 9.706451612903224e-05, |
|
"loss": 0.2579, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 8.738709677419355e-05, |
|
"loss": 0.2383, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 155.56, |
|
"learning_rate": 7.770967741935483e-05, |
|
"loss": 0.2253, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 161.11, |
|
"learning_rate": 6.803225806451613e-05, |
|
"loss": 0.2144, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 5.835483870967742e-05, |
|
"loss": 0.1934, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_loss": 0.9400397539138794, |
|
"eval_runtime": 11.0289, |
|
"eval_samples_per_second": 19.222, |
|
"eval_steps_per_second": 0.635, |
|
"eval_wer": 0.7105263157894737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 172.22, |
|
"learning_rate": 4.8677419354838705e-05, |
|
"loss": 0.1874, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 177.78, |
|
"learning_rate": 3.9e-05, |
|
"loss": 0.1778, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"learning_rate": 2.932258064516129e-05, |
|
"loss": 0.171, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 188.89, |
|
"learning_rate": 1.9645161290322582e-05, |
|
"loss": 0.1568, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 194.44, |
|
"learning_rate": 9.96774193548387e-06, |
|
"loss": 0.1551, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 194.44, |
|
"eval_loss": 0.966656506061554, |
|
"eval_runtime": 11.4149, |
|
"eval_samples_per_second": 18.572, |
|
"eval_steps_per_second": 0.613, |
|
"eval_wer": 0.6954887218045113, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 2.903225806451613e-07, |
|
"loss": 0.145, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"step": 3600, |
|
"total_flos": 2.405940879390409e+19, |
|
"train_loss": 0.9929841698540581, |
|
"train_runtime": 8863.3906, |
|
"train_samples_per_second": 12.501, |
|
"train_steps_per_second": 0.406 |
|
} |
|
], |
|
"max_steps": 3600, |
|
"num_train_epochs": 200, |
|
"total_flos": 2.405940879390409e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|