|
{ |
|
"best_metric": 1.5348279476165771, |
|
"best_model_checkpoint": "../saved_models_new/gptNEO_author_RB_epochs15_lr5e-05/checkpoint-4000", |
|
"epoch": 14.934660858743, |
|
"global_step": 24000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.6896, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.5621881484985352, |
|
"eval_runtime": 14.0653, |
|
"eval_samples_per_second": 25.382, |
|
"eval_steps_per_second": 12.726, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.7630893153281216e-05, |
|
"loss": 1.3351, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 1.5348279476165771, |
|
"eval_runtime": 14.063, |
|
"eval_samples_per_second": 25.386, |
|
"eval_steps_per_second": 12.728, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.289267945984364e-05, |
|
"loss": 1.154, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 1.5707106590270996, |
|
"eval_runtime": 14.0614, |
|
"eval_samples_per_second": 25.389, |
|
"eval_steps_per_second": 12.73, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 3.815446576640607e-05, |
|
"loss": 0.9265, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_loss": 1.6517491340637207, |
|
"eval_runtime": 14.0589, |
|
"eval_samples_per_second": 25.393, |
|
"eval_steps_per_second": 12.732, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 3.341625207296849e-05, |
|
"loss": 0.6998, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_loss": 1.8329756259918213, |
|
"eval_runtime": 14.0588, |
|
"eval_samples_per_second": 25.393, |
|
"eval_steps_per_second": 12.732, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 2.867803837953092e-05, |
|
"loss": 0.5298, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"eval_loss": 1.9407192468643188, |
|
"eval_runtime": 14.0594, |
|
"eval_samples_per_second": 25.392, |
|
"eval_steps_per_second": 12.732, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 2.3939824686093343e-05, |
|
"loss": 0.391, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"eval_loss": 2.0445475578308105, |
|
"eval_runtime": 14.0538, |
|
"eval_samples_per_second": 25.402, |
|
"eval_steps_per_second": 12.737, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.920161099265577e-05, |
|
"loss": 0.2698, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_loss": 2.14052414894104, |
|
"eval_runtime": 14.0566, |
|
"eval_samples_per_second": 25.397, |
|
"eval_steps_per_second": 12.734, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 1.4463397299218195e-05, |
|
"loss": 0.1802, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_loss": 2.2577126026153564, |
|
"eval_runtime": 14.0589, |
|
"eval_samples_per_second": 25.393, |
|
"eval_steps_per_second": 12.732, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 9.725183605780622e-06, |
|
"loss": 0.1244, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"eval_loss": 2.3212833404541016, |
|
"eval_runtime": 14.0654, |
|
"eval_samples_per_second": 25.381, |
|
"eval_steps_per_second": 12.726, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 4.986969912343047e-06, |
|
"loss": 0.0874, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"eval_loss": 2.3692986965179443, |
|
"eval_runtime": 14.0588, |
|
"eval_samples_per_second": 25.393, |
|
"eval_steps_per_second": 12.732, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 2.4875621890547267e-07, |
|
"loss": 0.0666, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_loss": 2.398601531982422, |
|
"eval_runtime": 14.0655, |
|
"eval_samples_per_second": 25.381, |
|
"eval_steps_per_second": 12.726, |
|
"step": 24000 |
|
} |
|
], |
|
"max_steps": 24105, |
|
"num_train_epochs": 15, |
|
"total_flos": 6267141886574592.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|