AlekseyKorshuk's picture
End of training
4996e40
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 75,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3e-07,
"loss": 2.718,
"step": 1
},
{
"epoch": 0.08,
"learning_rate": 3e-07,
"loss": 2.5856,
"step": 6
},
{
"epoch": 0.08,
"eval_accuracy": 0.2696508722665664,
"eval_loss": 2.595703125,
"eval_runtime": 73.0734,
"eval_samples_per_second": 4.981,
"eval_steps_per_second": 0.63,
"step": 6
},
{
"epoch": 0.16,
"learning_rate": 3e-07,
"loss": 2.6027,
"step": 12
},
{
"epoch": 0.16,
"eval_accuracy": 0.26980564706922855,
"eval_loss": 2.59375,
"eval_runtime": 73.5378,
"eval_samples_per_second": 4.95,
"eval_steps_per_second": 0.626,
"step": 12
},
{
"epoch": 0.24,
"learning_rate": 3e-07,
"loss": 2.619,
"step": 18
},
{
"epoch": 0.24,
"eval_accuracy": 0.26998806022950894,
"eval_loss": 2.587890625,
"eval_runtime": 73.5544,
"eval_samples_per_second": 4.949,
"eval_steps_per_second": 0.625,
"step": 18
},
{
"epoch": 0.32,
"learning_rate": 3e-07,
"loss": 2.6121,
"step": 24
},
{
"epoch": 0.32,
"eval_accuracy": 0.27019258407588387,
"eval_loss": 2.583984375,
"eval_runtime": 73.6493,
"eval_samples_per_second": 4.942,
"eval_steps_per_second": 0.625,
"step": 24
},
{
"epoch": 0.4,
"learning_rate": 3e-07,
"loss": 2.6024,
"step": 30
},
{
"epoch": 0.4,
"eval_accuracy": 0.27057399341101557,
"eval_loss": 2.576171875,
"eval_runtime": 73.5651,
"eval_samples_per_second": 4.948,
"eval_steps_per_second": 0.625,
"step": 30
},
{
"epoch": 0.48,
"learning_rate": 3e-07,
"loss": 2.5878,
"step": 36
},
{
"epoch": 0.48,
"eval_accuracy": 0.270739823556725,
"eval_loss": 2.5703125,
"eval_runtime": 73.2671,
"eval_samples_per_second": 4.968,
"eval_steps_per_second": 0.628,
"step": 36
},
{
"epoch": 0.56,
"learning_rate": 3e-07,
"loss": 2.5541,
"step": 42
},
{
"epoch": 0.56,
"eval_accuracy": 0.2710327901474783,
"eval_loss": 2.5625,
"eval_runtime": 73.5901,
"eval_samples_per_second": 4.946,
"eval_steps_per_second": 0.625,
"step": 42
},
{
"epoch": 0.64,
"learning_rate": 3e-07,
"loss": 2.5207,
"step": 48
},
{
"epoch": 0.64,
"eval_accuracy": 0.27128706303756606,
"eval_loss": 2.556640625,
"eval_runtime": 73.6385,
"eval_samples_per_second": 4.943,
"eval_steps_per_second": 0.625,
"step": 48
},
{
"epoch": 0.72,
"learning_rate": 3e-07,
"loss": 2.4577,
"step": 54
},
{
"epoch": 0.72,
"eval_accuracy": 0.27152475291308287,
"eval_loss": 2.548828125,
"eval_runtime": 73.672,
"eval_samples_per_second": 4.941,
"eval_steps_per_second": 0.624,
"step": 54
},
{
"epoch": 0.8,
"learning_rate": 3e-07,
"loss": 2.5614,
"step": 60
},
{
"epoch": 0.8,
"eval_accuracy": 0.27176797046012335,
"eval_loss": 2.54296875,
"eval_runtime": 73.5346,
"eval_samples_per_second": 4.95,
"eval_steps_per_second": 0.626,
"step": 60
},
{
"epoch": 0.88,
"learning_rate": 3e-07,
"loss": 2.6959,
"step": 66
},
{
"epoch": 0.88,
"eval_accuracy": 0.27219360116744423,
"eval_loss": 2.53515625,
"eval_runtime": 73.3529,
"eval_samples_per_second": 4.962,
"eval_steps_per_second": 0.627,
"step": 66
},
{
"epoch": 0.96,
"learning_rate": 3e-07,
"loss": 2.5084,
"step": 72
},
{
"epoch": 0.96,
"eval_accuracy": 0.2725086784442921,
"eval_loss": 2.529296875,
"eval_runtime": 73.6241,
"eval_samples_per_second": 4.944,
"eval_steps_per_second": 0.625,
"step": 72
},
{
"epoch": 1.0,
"step": 75,
"total_flos": 4974649540608.0,
"train_loss": 2.57614501953125,
"train_runtime": 16094.8553,
"train_samples_per_second": 0.037,
"train_steps_per_second": 0.005
}
],
"max_steps": 75,
"num_train_epochs": 1,
"total_flos": 4974649540608.0,
"trial_name": null,
"trial_params": null
}