|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.303030303030305, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.6655681133270264, |
|
"eval_runtime": 31.9535, |
|
"eval_samples_per_second": 16.242, |
|
"eval_steps_per_second": 1.033, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.34202826023101807, |
|
"eval_runtime": 32.6272, |
|
"eval_samples_per_second": 15.907, |
|
"eval_steps_per_second": 1.011, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.1993253082036972, |
|
"eval_runtime": 34.1947, |
|
"eval_samples_per_second": 15.178, |
|
"eval_steps_per_second": 0.965, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.12108779698610306, |
|
"eval_runtime": 33.5003, |
|
"eval_samples_per_second": 15.492, |
|
"eval_steps_per_second": 0.985, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.08059267699718475, |
|
"eval_runtime": 33.0363, |
|
"eval_samples_per_second": 15.71, |
|
"eval_steps_per_second": 0.999, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.0539543516933918, |
|
"eval_runtime": 33.3191, |
|
"eval_samples_per_second": 15.577, |
|
"eval_steps_per_second": 0.99, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.033514220267534256, |
|
"eval_runtime": 33.9315, |
|
"eval_samples_per_second": 15.296, |
|
"eval_steps_per_second": 0.973, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.02793893776834011, |
|
"eval_runtime": 33.1021, |
|
"eval_samples_per_second": 15.679, |
|
"eval_steps_per_second": 0.997, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.017579322680830956, |
|
"eval_runtime": 33.1929, |
|
"eval_samples_per_second": 15.636, |
|
"eval_steps_per_second": 0.994, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.017992401495575905, |
|
"eval_runtime": 33.5989, |
|
"eval_samples_per_second": 15.447, |
|
"eval_steps_per_second": 0.982, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.011186002753674984, |
|
"eval_runtime": 33.2114, |
|
"eval_samples_per_second": 15.627, |
|
"eval_steps_per_second": 0.994, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.009413644671440125, |
|
"eval_runtime": 33.6973, |
|
"eval_samples_per_second": 15.402, |
|
"eval_steps_per_second": 0.979, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.008357277140021324, |
|
"eval_runtime": 34.2116, |
|
"eval_samples_per_second": 15.17, |
|
"eval_steps_per_second": 0.965, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.006698057986795902, |
|
"eval_runtime": 32.3567, |
|
"eval_samples_per_second": 16.04, |
|
"eval_steps_per_second": 1.02, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.005631112959235907, |
|
"eval_runtime": 30.3383, |
|
"eval_samples_per_second": 17.107, |
|
"eval_steps_per_second": 1.088, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 1.2424242424242425e-05, |
|
"loss": 0.1575, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.00457022013142705, |
|
"eval_runtime": 30.6053, |
|
"eval_samples_per_second": 16.958, |
|
"eval_steps_per_second": 1.078, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.005157523322850466, |
|
"eval_runtime": 30.0767, |
|
"eval_samples_per_second": 17.256, |
|
"eval_steps_per_second": 1.097, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.0044335490092635155, |
|
"eval_runtime": 29.0299, |
|
"eval_samples_per_second": 17.878, |
|
"eval_steps_per_second": 1.137, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.003722449066117406, |
|
"eval_runtime": 28.4937, |
|
"eval_samples_per_second": 18.215, |
|
"eval_steps_per_second": 1.158, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.004425578285008669, |
|
"eval_runtime": 32.1425, |
|
"eval_samples_per_second": 16.147, |
|
"eval_steps_per_second": 1.027, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.0040681445971131325, |
|
"eval_runtime": 28.8069, |
|
"eval_samples_per_second": 18.017, |
|
"eval_steps_per_second": 1.146, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.003019771073013544, |
|
"eval_runtime": 28.6404, |
|
"eval_samples_per_second": 18.121, |
|
"eval_steps_per_second": 1.152, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.002829624805599451, |
|
"eval_runtime": 29.787, |
|
"eval_samples_per_second": 17.424, |
|
"eval_steps_per_second": 1.108, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.002751641208305955, |
|
"eval_runtime": 28.377, |
|
"eval_samples_per_second": 18.289, |
|
"eval_steps_per_second": 1.163, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.002945221494883299, |
|
"eval_runtime": 29.5958, |
|
"eval_samples_per_second": 17.536, |
|
"eval_steps_per_second": 1.115, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.0026160639245063066, |
|
"eval_runtime": 29.0161, |
|
"eval_samples_per_second": 17.887, |
|
"eval_steps_per_second": 1.137, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.002537393243983388, |
|
"eval_runtime": 28.4904, |
|
"eval_samples_per_second": 18.217, |
|
"eval_steps_per_second": 1.158, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.00242584478110075, |
|
"eval_runtime": 29.437, |
|
"eval_samples_per_second": 17.631, |
|
"eval_steps_per_second": 1.121, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.0026495754718780518, |
|
"eval_runtime": 28.3889, |
|
"eval_samples_per_second": 18.282, |
|
"eval_steps_per_second": 1.162, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.0023259243462234735, |
|
"eval_runtime": 28.1977, |
|
"eval_samples_per_second": 18.406, |
|
"eval_steps_per_second": 1.17, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"learning_rate": 4.848484848484849e-06, |
|
"loss": 0.0065, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1320, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 278856790097838.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|