|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 12.0, |
|
"global_step": 6144, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.755859375e-05, |
|
"loss": 228.0322, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 95.51275634765625, |
|
"eval_runtime": 19.2239, |
|
"eval_samples_per_second": 84.738, |
|
"eval_steps_per_second": 5.306, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.5117187500000005e-05, |
|
"loss": 102.3494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 94.07129669189453, |
|
"eval_runtime": 19.254, |
|
"eval_samples_per_second": 84.606, |
|
"eval_steps_per_second": 5.298, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.267578125e-05, |
|
"loss": 90.6642, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 83.28936767578125, |
|
"eval_runtime": 19.2397, |
|
"eval_samples_per_second": 84.669, |
|
"eval_steps_per_second": 5.302, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.0234375e-05, |
|
"loss": 78.5213, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 83.8617172241211, |
|
"eval_runtime": 19.2535, |
|
"eval_samples_per_second": 84.608, |
|
"eval_steps_per_second": 5.298, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 3.7792968750000005e-05, |
|
"loss": 70.4173, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 79.97139739990234, |
|
"eval_runtime": 19.2503, |
|
"eval_samples_per_second": 84.622, |
|
"eval_steps_per_second": 5.299, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 3.53515625e-05, |
|
"loss": 63.1215, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 81.74008178710938, |
|
"eval_runtime": 19.2347, |
|
"eval_samples_per_second": 84.69, |
|
"eval_steps_per_second": 5.303, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 3.291015625e-05, |
|
"loss": 55.0817, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 80.47997283935547, |
|
"eval_runtime": 19.239, |
|
"eval_samples_per_second": 84.672, |
|
"eval_steps_per_second": 5.302, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 3.0468750000000002e-05, |
|
"loss": 49.4482, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 78.9246597290039, |
|
"eval_runtime": 19.2369, |
|
"eval_samples_per_second": 84.681, |
|
"eval_steps_per_second": 5.302, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.802734375e-05, |
|
"loss": 45.7534, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 79.2200927734375, |
|
"eval_runtime": 19.2428, |
|
"eval_samples_per_second": 84.655, |
|
"eval_steps_per_second": 5.301, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.55859375e-05, |
|
"loss": 43.1915, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 81.24749755859375, |
|
"eval_runtime": 19.2441, |
|
"eval_samples_per_second": 84.649, |
|
"eval_steps_per_second": 5.3, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 2.3144531250000002e-05, |
|
"loss": 41.0389, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 78.5303955078125, |
|
"eval_runtime": 19.2504, |
|
"eval_samples_per_second": 84.622, |
|
"eval_steps_per_second": 5.299, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 2.0703125e-05, |
|
"loss": 38.4756, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 77.77259826660156, |
|
"eval_runtime": 19.2175, |
|
"eval_samples_per_second": 84.766, |
|
"eval_steps_per_second": 5.308, |
|
"step": 6144 |
|
} |
|
], |
|
"max_steps": 10240, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.582451944655667e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|