|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.0, |
|
"global_step": 1368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.965881397238018e-05, |
|
"loss": 1.3459, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.5954508529650693e-05, |
|
"loss": 0.7067, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.22502030869212e-05, |
|
"loss": 0.2494, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8545897644191714e-05, |
|
"loss": 0.0634, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.4841592201462226e-05, |
|
"loss": 0.0222, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.1137286758732738e-05, |
|
"loss": 0.0062, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 7.4329813160032494e-06, |
|
"loss": 0.0047, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.728675873273761e-06, |
|
"loss": 0.002, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.437043054427295e-08, |
|
"loss": 0.0017, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 1368, |
|
"total_flos": 3267933886955520.0, |
|
"train_loss": 0.26691262162568274, |
|
"train_runtime": 934.9288, |
|
"train_samples_per_second": 13.169, |
|
"train_steps_per_second": 1.463 |
|
} |
|
], |
|
"max_steps": 1368, |
|
"num_train_epochs": 9, |
|
"total_flos": 3267933886955520.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|