AlekseyKorshuk's picture
End of training
8c9e33b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 75,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 6e-06,
"loss": 2.718,
"step": 1
},
{
"epoch": 0.08,
"learning_rate": 6e-06,
"loss": 2.5793,
"step": 6
},
{
"epoch": 0.08,
"eval_accuracy": 0.27131470139518427,
"eval_loss": 2.572265625,
"eval_runtime": 75.3129,
"eval_samples_per_second": 4.833,
"eval_steps_per_second": 0.611,
"step": 6
},
{
"epoch": 0.16,
"learning_rate": 6e-06,
"loss": 2.5612,
"step": 12
},
{
"epoch": 0.16,
"eval_accuracy": 0.27501824131602803,
"eval_loss": 2.5,
"eval_runtime": 75.9396,
"eval_samples_per_second": 4.793,
"eval_steps_per_second": 0.606,
"step": 12
},
{
"epoch": 0.24,
"learning_rate": 6e-06,
"loss": 2.5235,
"step": 18
},
{
"epoch": 0.24,
"eval_accuracy": 0.27841775930307117,
"eval_loss": 2.447265625,
"eval_runtime": 75.8369,
"eval_samples_per_second": 4.8,
"eval_steps_per_second": 0.607,
"step": 18
},
{
"epoch": 0.32,
"learning_rate": 6e-06,
"loss": 2.4961,
"step": 24
},
{
"epoch": 0.32,
"eval_accuracy": 0.2818393879762089,
"eval_loss": 2.41015625,
"eval_runtime": 75.7164,
"eval_samples_per_second": 4.807,
"eval_steps_per_second": 0.608,
"step": 24
},
{
"epoch": 0.4,
"learning_rate": 6e-06,
"loss": 2.4488,
"step": 30
},
{
"epoch": 0.4,
"eval_accuracy": 0.2848740796426913,
"eval_loss": 2.3671875,
"eval_runtime": 75.8548,
"eval_samples_per_second": 4.799,
"eval_steps_per_second": 0.606,
"step": 30
},
{
"epoch": 0.48,
"learning_rate": 6e-06,
"loss": 2.4121,
"step": 36
},
{
"epoch": 0.48,
"eval_accuracy": 0.2877761071926062,
"eval_loss": 2.33203125,
"eval_runtime": 75.8441,
"eval_samples_per_second": 4.799,
"eval_steps_per_second": 0.607,
"step": 36
},
{
"epoch": 0.56,
"learning_rate": 6e-06,
"loss": 2.3901,
"step": 42
},
{
"epoch": 0.56,
"eval_accuracy": 0.2902580317067239,
"eval_loss": 2.302734375,
"eval_runtime": 75.8507,
"eval_samples_per_second": 4.799,
"eval_steps_per_second": 0.606,
"step": 42
},
{
"epoch": 0.64,
"learning_rate": 6e-06,
"loss": 2.2845,
"step": 48
},
{
"epoch": 0.64,
"eval_accuracy": 0.29269573484865236,
"eval_loss": 2.271484375,
"eval_runtime": 75.7801,
"eval_samples_per_second": 4.803,
"eval_steps_per_second": 0.607,
"step": 48
},
{
"epoch": 0.72,
"learning_rate": 6e-06,
"loss": 2.3032,
"step": 54
},
{
"epoch": 0.72,
"eval_accuracy": 0.29554248568333075,
"eval_loss": 2.2421875,
"eval_runtime": 75.8262,
"eval_samples_per_second": 4.8,
"eval_steps_per_second": 0.607,
"step": 54
},
{
"epoch": 0.8,
"learning_rate": 6e-06,
"loss": 2.2954,
"step": 60
},
{
"epoch": 0.8,
"eval_accuracy": 0.2984555685762929,
"eval_loss": 2.208984375,
"eval_runtime": 75.8901,
"eval_samples_per_second": 4.796,
"eval_steps_per_second": 0.606,
"step": 60
},
{
"epoch": 0.88,
"learning_rate": 6e-06,
"loss": 2.3908,
"step": 66
},
{
"epoch": 0.88,
"eval_accuracy": 0.30088774404669777,
"eval_loss": 2.18359375,
"eval_runtime": 76.1084,
"eval_samples_per_second": 4.783,
"eval_steps_per_second": 0.604,
"step": 66
},
{
"epoch": 0.96,
"learning_rate": 6e-06,
"loss": 2.2676,
"step": 72
},
{
"epoch": 0.96,
"eval_accuracy": 0.3035631370641431,
"eval_loss": 2.150390625,
"eval_runtime": 75.8379,
"eval_samples_per_second": 4.8,
"eval_steps_per_second": 0.607,
"step": 72
},
{
"epoch": 1.0,
"step": 75,
"total_flos": 4974649540608.0,
"train_loss": 2.4108121744791666,
"train_runtime": 14351.1542,
"train_samples_per_second": 0.042,
"train_steps_per_second": 0.005
}
],
"max_steps": 75,
"num_train_epochs": 1,
"total_flos": 4974649540608.0,
"trial_name": null,
"trial_params": null
}