|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9748106591865358, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.765037593984963e-05, |
|
"loss": 12.2573, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.7165932655334473, |
|
"eval_runtime": 138.6905, |
|
"eval_samples_per_second": 13.483, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.295112781954887e-05, |
|
"loss": 2.6565, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.7028679847717285, |
|
"eval_runtime": 135.8765, |
|
"eval_samples_per_second": 13.762, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.825187969924812e-05, |
|
"loss": 2.6054, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.677929162979126, |
|
"eval_runtime": 138.76, |
|
"eval_samples_per_second": 13.477, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.355263157894737e-05, |
|
"loss": 2.6296, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.667823553085327, |
|
"eval_runtime": 138.8745, |
|
"eval_samples_per_second": 13.465, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8853383458646617e-05, |
|
"loss": 2.6148, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.6645262241363525, |
|
"eval_runtime": 138.612, |
|
"eval_samples_per_second": 13.491, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.4154135338345866e-05, |
|
"loss": 2.5843, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.6597423553466797, |
|
"eval_runtime": 138.5965, |
|
"eval_samples_per_second": 13.492, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.9454887218045115e-05, |
|
"loss": 2.5548, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 2.653554677963257, |
|
"eval_runtime": 138.5895, |
|
"eval_samples_per_second": 13.493, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4755639097744361e-05, |
|
"loss": 2.5588, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.6523187160491943, |
|
"eval_runtime": 138.5665, |
|
"eval_samples_per_second": 13.495, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.005639097744361e-05, |
|
"loss": 2.5814, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.64888858795166, |
|
"eval_runtime": 138.6375, |
|
"eval_samples_per_second": 13.488, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 2.569, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 2.6475982666015625, |
|
"eval_runtime": 138.5945, |
|
"eval_samples_per_second": 13.493, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.578947368421053e-07, |
|
"loss": 2.5401, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.6439733505249023, |
|
"eval_runtime": 138.082, |
|
"eval_samples_per_second": 13.543, |
|
"step": 2200 |
|
} |
|
], |
|
"max_steps": 2228, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.691387690319872e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|