AlekseyKorshuk's picture
End of training
4f0db4d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 56,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 6e-06,
"loss": 2.5647,
"step": 1
},
{
"epoch": 0.11,
"learning_rate": 6e-06,
"loss": 2.6377,
"step": 6
},
{
"epoch": 0.11,
"eval_accuracy": 0.3015994648178556,
"eval_loss": 2.46875,
"eval_runtime": 64.0845,
"eval_samples_per_second": 4.915,
"eval_steps_per_second": 0.624,
"step": 6
},
{
"epoch": 0.21,
"learning_rate": 6e-06,
"loss": 2.5046,
"step": 12
},
{
"epoch": 0.21,
"eval_accuracy": 0.3096454418293499,
"eval_loss": 2.384765625,
"eval_runtime": 64.5855,
"eval_samples_per_second": 4.877,
"eval_steps_per_second": 0.619,
"step": 12
},
{
"epoch": 0.32,
"learning_rate": 6e-06,
"loss": 2.4755,
"step": 18
},
{
"epoch": 0.32,
"eval_accuracy": 0.315568935109165,
"eval_loss": 2.322265625,
"eval_runtime": 64.83,
"eval_samples_per_second": 4.859,
"eval_steps_per_second": 0.617,
"step": 18
},
{
"epoch": 0.43,
"learning_rate": 6e-06,
"loss": 2.459,
"step": 24
},
{
"epoch": 0.43,
"eval_accuracy": 0.32014839141275925,
"eval_loss": 2.271484375,
"eval_runtime": 64.7722,
"eval_samples_per_second": 4.863,
"eval_steps_per_second": 0.618,
"step": 24
},
{
"epoch": 0.54,
"learning_rate": 6e-06,
"loss": 2.3602,
"step": 30
},
{
"epoch": 0.54,
"eval_accuracy": 0.3243203794927933,
"eval_loss": 2.224609375,
"eval_runtime": 63.7938,
"eval_samples_per_second": 4.938,
"eval_steps_per_second": 0.627,
"step": 30
},
{
"epoch": 0.64,
"learning_rate": 6e-06,
"loss": 2.3829,
"step": 36
},
{
"epoch": 0.64,
"eval_accuracy": 0.3275071458979505,
"eval_loss": 2.189453125,
"eval_runtime": 64.8117,
"eval_samples_per_second": 4.86,
"eval_steps_per_second": 0.617,
"step": 36
},
{
"epoch": 0.75,
"learning_rate": 6e-06,
"loss": 2.3188,
"step": 42
},
{
"epoch": 0.75,
"eval_accuracy": 0.331490603904397,
"eval_loss": 2.146484375,
"eval_runtime": 63.7849,
"eval_samples_per_second": 4.938,
"eval_steps_per_second": 0.627,
"step": 42
},
{
"epoch": 0.86,
"learning_rate": 6e-06,
"loss": 2.2895,
"step": 48
},
{
"epoch": 0.86,
"eval_accuracy": 0.3365140181232135,
"eval_loss": 2.103515625,
"eval_runtime": 64.7342,
"eval_samples_per_second": 4.866,
"eval_steps_per_second": 0.618,
"step": 48
},
{
"epoch": 0.96,
"learning_rate": 6e-06,
"loss": 2.3062,
"step": 54
},
{
"epoch": 0.96,
"eval_accuracy": 0.341166453810132,
"eval_loss": 2.05859375,
"eval_runtime": 64.7609,
"eval_samples_per_second": 4.864,
"eval_steps_per_second": 0.618,
"step": 54
},
{
"epoch": 1.0,
"step": 56,
"total_flos": 3701894676480.0,
"train_loss": 2.4000658307756697,
"train_runtime": 10898.1431,
"train_samples_per_second": 0.041,
"train_steps_per_second": 0.005
}
],
"max_steps": 56,
"num_train_epochs": 1,
"total_flos": 3701894676480.0,
"trial_name": null,
"trial_params": null
}