|
{ |
|
"best_metric": 1.3794686794281006, |
|
"best_model_checkpoint": "../saved_models_new/gptNEO_author_VJ_epochs15_lr5e-05/checkpoint-2000", |
|
"epoch": 14.37125748502994, |
|
"global_step": 24000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.5146, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.3794686794281006, |
|
"eval_runtime": 14.6483, |
|
"eval_samples_per_second": 25.395, |
|
"eval_steps_per_second": 12.698, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.773242630385488e-05, |
|
"loss": 1.1782, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.3970938920974731, |
|
"eval_runtime": 14.6463, |
|
"eval_samples_per_second": 25.399, |
|
"eval_steps_per_second": 12.699, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 4.319727891156463e-05, |
|
"loss": 1.0049, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_loss": 1.4491934776306152, |
|
"eval_runtime": 14.6504, |
|
"eval_samples_per_second": 25.392, |
|
"eval_steps_per_second": 12.696, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 3.8662131519274384e-05, |
|
"loss": 0.8259, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_loss": 1.5493556261062622, |
|
"eval_runtime": 14.6505, |
|
"eval_samples_per_second": 25.392, |
|
"eval_steps_per_second": 12.696, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 3.412698412698413e-05, |
|
"loss": 0.6638, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 1.6542575359344482, |
|
"eval_runtime": 14.6609, |
|
"eval_samples_per_second": 25.374, |
|
"eval_steps_per_second": 12.687, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 2.959183673469388e-05, |
|
"loss": 0.4904, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"eval_loss": 1.8380136489868164, |
|
"eval_runtime": 14.6527, |
|
"eval_samples_per_second": 25.388, |
|
"eval_steps_per_second": 12.694, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 2.5056689342403626e-05, |
|
"loss": 0.3432, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"eval_loss": 1.9379925727844238, |
|
"eval_runtime": 14.6501, |
|
"eval_samples_per_second": 25.392, |
|
"eval_steps_per_second": 12.696, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 2.052154195011338e-05, |
|
"loss": 0.2499, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"eval_loss": 2.04109787940979, |
|
"eval_runtime": 14.6527, |
|
"eval_samples_per_second": 25.388, |
|
"eval_steps_per_second": 12.694, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 1.5986394557823133e-05, |
|
"loss": 0.1809, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"eval_loss": 2.1205220222473145, |
|
"eval_runtime": 14.6526, |
|
"eval_samples_per_second": 25.388, |
|
"eval_steps_per_second": 12.694, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 1.145124716553288e-05, |
|
"loss": 0.1246, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_loss": 2.1967058181762695, |
|
"eval_runtime": 14.6441, |
|
"eval_samples_per_second": 25.403, |
|
"eval_steps_per_second": 12.701, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 6.9160997732426305e-06, |
|
"loss": 0.0824, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"eval_loss": 2.2765705585479736, |
|
"eval_runtime": 14.6502, |
|
"eval_samples_per_second": 25.392, |
|
"eval_steps_per_second": 12.696, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 0.0615, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"eval_loss": 2.3161871433258057, |
|
"eval_runtime": 14.6483, |
|
"eval_samples_per_second": 25.395, |
|
"eval_steps_per_second": 12.698, |
|
"step": 24000 |
|
} |
|
], |
|
"max_steps": 25050, |
|
"num_train_epochs": 15, |
|
"total_flos": 6268970336256000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|