|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.991695501730105, |
|
"global_step": 4332, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00027499999999999996, |
|
"loss": 2.3243, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.3360440731048584, |
|
"eval_runtime": 81.7171, |
|
"eval_samples_per_second": 457.077, |
|
"eval_steps_per_second": 57.136, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00025, |
|
"loss": 1.9943, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.3849265575408936, |
|
"eval_runtime": 80.8133, |
|
"eval_samples_per_second": 462.189, |
|
"eval_steps_per_second": 57.775, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.000225, |
|
"loss": 1.7331, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.5010716915130615, |
|
"eval_runtime": 80.8638, |
|
"eval_samples_per_second": 461.9, |
|
"eval_steps_per_second": 57.739, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00019993074792243765, |
|
"loss": 1.463, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.7065136432647705, |
|
"eval_runtime": 80.5929, |
|
"eval_samples_per_second": 463.453, |
|
"eval_steps_per_second": 57.933, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00017493074792243764, |
|
"loss": 1.1969, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.9511666297912598, |
|
"eval_runtime": 80.4169, |
|
"eval_samples_per_second": 464.467, |
|
"eval_steps_per_second": 58.06, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00014993074792243766, |
|
"loss": 0.9339, |
|
"step": 2167 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.2412941455841064, |
|
"eval_runtime": 80.5491, |
|
"eval_samples_per_second": 463.705, |
|
"eval_steps_per_second": 57.965, |
|
"step": 2167 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00012493074792243767, |
|
"loss": 0.6915, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 3.56394362449646, |
|
"eval_runtime": 79.8139, |
|
"eval_samples_per_second": 467.976, |
|
"eval_steps_per_second": 58.499, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.986149584487533e-05, |
|
"loss": 0.4811, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.897730827331543, |
|
"eval_runtime": 80.0943, |
|
"eval_samples_per_second": 466.338, |
|
"eval_steps_per_second": 58.294, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 7.486149584487535e-05, |
|
"loss": 0.3195, |
|
"step": 3251 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 4.2111005783081055, |
|
"eval_runtime": 79.9229, |
|
"eval_samples_per_second": 467.338, |
|
"eval_steps_per_second": 58.419, |
|
"step": 3251 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.986149584487534e-05, |
|
"loss": 0.211, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 4.46787166595459, |
|
"eval_runtime": 80.4105, |
|
"eval_samples_per_second": 464.504, |
|
"eval_steps_per_second": 58.065, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.4861495844875343e-05, |
|
"loss": 0.1515, |
|
"step": 3973 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 4.6477370262146, |
|
"eval_runtime": 80.1153, |
|
"eval_samples_per_second": 466.216, |
|
"eval_steps_per_second": 58.279, |
|
"step": 3973 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.1233, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_loss": 4.7357659339904785, |
|
"eval_runtime": 80.0925, |
|
"eval_samples_per_second": 466.348, |
|
"eval_steps_per_second": 58.295, |
|
"step": 4332 |
|
} |
|
], |
|
"max_steps": 4332, |
|
"num_train_epochs": 12, |
|
"total_flos": 1.705389820458624e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|