|
{ |
|
"best_metric": 1.818081021308899, |
|
"best_model_checkpoint": "math/checkpoint-17040", |
|
"epoch": 5.0, |
|
"global_step": 17040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.853286384976526e-05, |
|
"loss": 2.4525, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.706572769953052e-05, |
|
"loss": 2.3303, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.559859154929578e-05, |
|
"loss": 2.2788, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.413145539906103e-05, |
|
"loss": 2.2405, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.26643192488263e-05, |
|
"loss": 2.2089, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.119718309859155e-05, |
|
"loss": 2.1796, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.94922935962677, |
|
"eval_runtime": 38.2996, |
|
"eval_samples_per_second": 79.113, |
|
"eval_steps_per_second": 9.896, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.973004694835681e-05, |
|
"loss": 2.1575, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.826291079812207e-05, |
|
"loss": 2.1208, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.679577464788733e-05, |
|
"loss": 2.0958, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.532863849765258e-05, |
|
"loss": 2.0928, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.386150234741784e-05, |
|
"loss": 2.0898, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.23943661971831e-05, |
|
"loss": 2.0751, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.092723004694836e-05, |
|
"loss": 2.0669, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.8773746490478516, |
|
"eval_runtime": 38.3673, |
|
"eval_samples_per_second": 78.973, |
|
"eval_steps_per_second": 9.878, |
|
"step": 6816 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.9460093896713615e-05, |
|
"loss": 2.0433, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.7992957746478874e-05, |
|
"loss": 2.0275, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.6525821596244134e-05, |
|
"loss": 2.0168, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.505868544600939e-05, |
|
"loss": 2.0208, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.359154929577465e-05, |
|
"loss": 2.0111, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.2124413145539908e-05, |
|
"loss": 2.0058, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.0657276995305167e-05, |
|
"loss": 2.0019, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.8432753086090088, |
|
"eval_runtime": 38.3414, |
|
"eval_samples_per_second": 79.027, |
|
"eval_steps_per_second": 9.885, |
|
"step": 10224 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.9190140845070423e-05, |
|
"loss": 1.9746, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.7723004694835683e-05, |
|
"loss": 1.9727, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.625586854460094e-05, |
|
"loss": 1.9677, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.4788732394366198e-05, |
|
"loss": 1.9785, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.3321596244131457e-05, |
|
"loss": 1.9694, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.1854460093896715e-05, |
|
"loss": 1.9653, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.0387323943661972e-05, |
|
"loss": 1.9702, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.8244620561599731, |
|
"eval_runtime": 38.37, |
|
"eval_samples_per_second": 78.968, |
|
"eval_steps_per_second": 9.878, |
|
"step": 13632 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 8.92018779342723e-06, |
|
"loss": 1.9433, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 7.453051643192488e-06, |
|
"loss": 1.9495, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5.9859154929577465e-06, |
|
"loss": 1.9531, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 4.518779342723005e-06, |
|
"loss": 1.9457, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3.051643192488263e-06, |
|
"loss": 1.9329, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.5845070422535212e-06, |
|
"loss": 1.9425, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.1737089201877935e-07, |
|
"loss": 1.9458, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.818081021308899, |
|
"eval_runtime": 38.3351, |
|
"eval_samples_per_second": 79.04, |
|
"eval_steps_per_second": 9.886, |
|
"step": 17040 |
|
} |
|
], |
|
"max_steps": 17040, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.312631697641472e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|