|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9880878960910772e-05, |
|
"loss": 0.9998, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.916316904487005e-05, |
|
"loss": 0.909, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7841198065767107e-05, |
|
"loss": 0.8932, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6002142805483686e-05, |
|
"loss": 0.8899, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3767278936351853e-05, |
|
"loss": 0.8681, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1283983551465512e-05, |
|
"loss": 0.866, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.71601644853449e-06, |
|
"loss": 0.8606, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.232721063648148e-06, |
|
"loss": 0.8603, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.997857194516321e-06, |
|
"loss": 0.8592, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.158801934232897e-06, |
|
"loss": 0.8497, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.368309551299536e-07, |
|
"loss": 0.8514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.1912103908922945e-07, |
|
"loss": 0.8521, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 126, |
|
"total_flos": 3.8892807999371346e+18, |
|
"train_loss": 0.877318367125496, |
|
"train_runtime": 1975.1648, |
|
"train_samples_per_second": 97.517, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"max_steps": 126, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.8892807999371346e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|