|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.616636528028933, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8e-05, |
|
"loss": 2.7434, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.814986123959297e-05, |
|
"loss": 1.6287, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.629972247918594e-05, |
|
"loss": 1.5903, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.444958371877891e-05, |
|
"loss": 1.5706, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.259944495837189e-05, |
|
"loss": 1.556, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.074930619796485e-05, |
|
"loss": 1.5469, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.889916743755783e-05, |
|
"loss": 1.5393, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.70490286771508e-05, |
|
"loss": 1.5316, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.519888991674377e-05, |
|
"loss": 1.5272, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.334875115633673e-05, |
|
"loss": 1.5229, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.9172784090042114, |
|
"eval_runtime": 137.7477, |
|
"eval_samples_per_second": 884.196, |
|
"eval_steps_per_second": 3.456, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.14986123959297e-05, |
|
"loss": 1.5171, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5.964847363552267e-05, |
|
"loss": 1.507, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5.779833487511564e-05, |
|
"loss": 1.5052, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.5948196114708607e-05, |
|
"loss": 1.5016, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.4098057354301575e-05, |
|
"loss": 1.5007, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.224791859389455e-05, |
|
"loss": 1.4962, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5.039777983348751e-05, |
|
"loss": 1.494, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.8547641073080486e-05, |
|
"loss": 1.4907, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.6697502312673454e-05, |
|
"loss": 1.4908, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.484736355226642e-05, |
|
"loss": 1.4895, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 1.9569236040115356, |
|
"eval_runtime": 138.1502, |
|
"eval_samples_per_second": 881.62, |
|
"eval_steps_per_second": 3.446, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.299722479185939e-05, |
|
"loss": 1.4883, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.1147086031452366e-05, |
|
"loss": 1.4843, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9296947271045334e-05, |
|
"loss": 1.4751, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.74468085106383e-05, |
|
"loss": 1.4734, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.559666975023127e-05, |
|
"loss": 1.471, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.374653098982424e-05, |
|
"loss": 1.4732, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.1896392229417213e-05, |
|
"loss": 1.4739, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.0046253469010178e-05, |
|
"loss": 1.4692, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.8196114708603146e-05, |
|
"loss": 1.4645, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.6345975948196118e-05, |
|
"loss": 1.4667, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 1.9881402254104614, |
|
"eval_runtime": 137.335, |
|
"eval_samples_per_second": 886.854, |
|
"eval_steps_per_second": 3.466, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.4495837187789086e-05, |
|
"loss": 1.4677, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.2645698427382054e-05, |
|
"loss": 1.4639, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.0795559666975026e-05, |
|
"loss": 1.4615, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.8945420906567994e-05, |
|
"loss": 1.4551, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.7095282146160962e-05, |
|
"loss": 1.4539, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.5245143385753934e-05, |
|
"loss": 1.4535, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.33950046253469e-05, |
|
"loss": 1.4506, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.1544865864939872e-05, |
|
"loss": 1.4522, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 9.69472710453284e-06, |
|
"loss": 1.4504, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.84458834412581e-06, |
|
"loss": 1.4489, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_loss": 2.012910842895508, |
|
"eval_runtime": 137.8372, |
|
"eval_samples_per_second": 883.622, |
|
"eval_steps_per_second": 3.453, |
|
"step": 8000 |
|
} |
|
], |
|
"max_steps": 8848, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.5605846428483584e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|