nfliu's picture
End of training
c5e08b8
{
"best_metric": 0.5417497754096985,
"best_model_checkpoint": "./MiniLMv2-L6-H768-distilled-from-RoBERTa-Large_boolq/checkpoint-1250",
"epoch": 5.0,
"global_step": 1475,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.85,
"eval_accuracy": 0.618960244648318,
"eval_loss": 0.6579468846321106,
"eval_runtime": 18.5057,
"eval_samples_per_second": 176.702,
"eval_steps_per_second": 22.101,
"step": 250
},
{
"epoch": 1.69,
"learning_rate": 6.610169491525424e-06,
"loss": 0.6352,
"step": 500
},
{
"epoch": 1.69,
"eval_accuracy": 0.6840978593272171,
"eval_loss": 0.5906558632850647,
"eval_runtime": 18.6795,
"eval_samples_per_second": 175.058,
"eval_steps_per_second": 21.896,
"step": 500
},
{
"epoch": 2.54,
"eval_accuracy": 0.7195718654434251,
"eval_loss": 0.5612974166870117,
"eval_runtime": 18.6317,
"eval_samples_per_second": 175.507,
"eval_steps_per_second": 21.952,
"step": 750
},
{
"epoch": 3.39,
"learning_rate": 3.2203389830508473e-06,
"loss": 0.535,
"step": 1000
},
{
"epoch": 3.39,
"eval_accuracy": 0.7373088685015291,
"eval_loss": 0.5444376468658447,
"eval_runtime": 18.6673,
"eval_samples_per_second": 175.173,
"eval_steps_per_second": 21.91,
"step": 1000
},
{
"epoch": 4.24,
"eval_accuracy": 0.7379204892966361,
"eval_loss": 0.5417497754096985,
"eval_runtime": 18.6845,
"eval_samples_per_second": 175.012,
"eval_steps_per_second": 21.89,
"step": 1250
},
{
"epoch": 5.0,
"step": 1475,
"total_flos": 6243850835650560.0,
"train_loss": 0.5471283062433793,
"train_runtime": 1069.2073,
"train_samples_per_second": 44.084,
"train_steps_per_second": 1.38
}
],
"max_steps": 1475,
"num_train_epochs": 5,
"total_flos": 6243850835650560.0,
"trial_name": null,
"trial_params": null
}