|
{ |
|
"best_metric": 0.4556220769882202, |
|
"best_model_checkpoint": "output/checkpoint-600", |
|
"epoch": 0.807061790668348, |
|
"eval_steps": 50, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.5465, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.7053, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.6561679790026247, |
|
"eval_loss": 0.6764907836914062, |
|
"eval_runtime": 59.7766, |
|
"eval_samples_per_second": 6.374, |
|
"eval_steps_per_second": 1.606, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00015666666666666666, |
|
"loss": 0.6576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7217847769028871, |
|
"eval_loss": 0.574826717376709, |
|
"eval_runtime": 59.8708, |
|
"eval_samples_per_second": 6.364, |
|
"eval_steps_per_second": 1.603, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.5874, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.7480314960629921, |
|
"eval_loss": 0.5501317381858826, |
|
"eval_runtime": 55.7487, |
|
"eval_samples_per_second": 6.834, |
|
"eval_steps_per_second": 1.722, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002948780487804878, |
|
"loss": 0.5775, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7401574803149606, |
|
"eval_loss": 0.5592061877250671, |
|
"eval_runtime": 54.7265, |
|
"eval_samples_per_second": 6.962, |
|
"eval_steps_per_second": 1.754, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027658536585365855, |
|
"loss": 0.5821, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.7664041994750657, |
|
"eval_loss": 0.5005777478218079, |
|
"eval_runtime": 54.7418, |
|
"eval_samples_per_second": 6.96, |
|
"eval_steps_per_second": 1.754, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00025829268292682926, |
|
"loss": 0.5699, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.6745406824146981, |
|
"eval_loss": 0.7000961303710938, |
|
"eval_runtime": 54.7375, |
|
"eval_samples_per_second": 6.96, |
|
"eval_steps_per_second": 1.754, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.5079, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.7322834645669292, |
|
"eval_loss": 0.5262107253074646, |
|
"eval_runtime": 54.7143, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 1.755, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00022170731707317072, |
|
"loss": 0.5826, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.7296587926509186, |
|
"eval_loss": 0.5798280835151672, |
|
"eval_runtime": 54.7163, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 1.755, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00020341463414634146, |
|
"loss": 0.5009, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.7506561679790026, |
|
"eval_loss": 0.5315915942192078, |
|
"eval_runtime": 54.7235, |
|
"eval_samples_per_second": 6.962, |
|
"eval_steps_per_second": 1.754, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018512195121951218, |
|
"loss": 0.4756, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.7349081364829396, |
|
"eval_loss": 0.5232318639755249, |
|
"eval_runtime": 54.7185, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 1.754, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00016682926829268292, |
|
"loss": 0.4791, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.7847769028871391, |
|
"eval_loss": 0.447153776884079, |
|
"eval_runtime": 54.7183, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 1.754, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00014853658536585364, |
|
"loss": 0.4415, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7847769028871391, |
|
"eval_loss": 0.4556220769882202, |
|
"eval_runtime": 54.7334, |
|
"eval_samples_per_second": 6.961, |
|
"eval_steps_per_second": 1.754, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00013024390243902438, |
|
"loss": 0.4528, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.7690288713910761, |
|
"eval_loss": 0.47037920355796814, |
|
"eval_runtime": 54.7397, |
|
"eval_samples_per_second": 6.96, |
|
"eval_steps_per_second": 1.754, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00011195121951219511, |
|
"loss": 0.5177, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.7742782152230971, |
|
"eval_loss": 0.4634643793106079, |
|
"eval_runtime": 54.7337, |
|
"eval_samples_per_second": 6.961, |
|
"eval_steps_per_second": 1.754, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.365853658536585e-05, |
|
"loss": 0.4753, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.7847769028871391, |
|
"eval_loss": 0.49703294038772583, |
|
"eval_runtime": 54.7189, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 1.754, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.536585365853658e-05, |
|
"loss": 0.452, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.7821522309711286, |
|
"eval_loss": 0.46870630979537964, |
|
"eval_runtime": 54.7224, |
|
"eval_samples_per_second": 6.962, |
|
"eval_steps_per_second": 1.754, |
|
"step": 800 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1000, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 1.3840268699529216e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|