gokuls's picture
End of training
f896a26
{
"best_metric": 0.9328168630599976,
"best_model_checkpoint": "distilbert_sa_GLUE_Experiment_data_aug_mnli_96/checkpoint-31440",
"epoch": 6.0,
"global_step": 188640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.900028625954199e-05,
"loss": 0.9142,
"step": 31440
},
{
"epoch": 1.0,
"eval_accuracy": 0.5686194600101885,
"eval_loss": 0.9328168630599976,
"eval_runtime": 6.4992,
"eval_samples_per_second": 1510.195,
"eval_steps_per_second": 6.001,
"step": 31440
},
{
"epoch": 2.0,
"learning_rate": 4.8000731552162856e-05,
"loss": 0.8099,
"step": 62880
},
{
"epoch": 2.0,
"eval_accuracy": 0.5752419765664799,
"eval_loss": 0.9523168802261353,
"eval_runtime": 6.3789,
"eval_samples_per_second": 1538.662,
"eval_steps_per_second": 6.114,
"step": 62880
},
{
"epoch": 3.0,
"learning_rate": 4.7001145038167944e-05,
"loss": 0.7371,
"step": 94320
},
{
"epoch": 3.0,
"eval_accuracy": 0.573713703515028,
"eval_loss": 1.007168173789978,
"eval_runtime": 6.2854,
"eval_samples_per_second": 1561.551,
"eval_steps_per_second": 6.205,
"step": 94320
},
{
"epoch": 4.0,
"learning_rate": 4.6001526717557254e-05,
"loss": 0.6756,
"step": 125760
},
{
"epoch": 4.0,
"eval_accuracy": 0.5750382068262863,
"eval_loss": 1.060626745223999,
"eval_runtime": 6.3969,
"eval_samples_per_second": 1534.325,
"eval_steps_per_second": 6.097,
"step": 125760
},
{
"epoch": 5.0,
"learning_rate": 4.500194020356234e-05,
"loss": 0.6229,
"step": 157200
},
{
"epoch": 5.0,
"eval_accuracy": 0.5739174732552216,
"eval_loss": 1.111612319946289,
"eval_runtime": 6.3646,
"eval_samples_per_second": 1542.126,
"eval_steps_per_second": 6.128,
"step": 157200
},
{
"epoch": 6.0,
"learning_rate": 4.400232188295166e-05,
"loss": 0.5784,
"step": 188640
},
{
"epoch": 6.0,
"eval_accuracy": 0.5795211411105451,
"eval_loss": 1.1395729780197144,
"eval_runtime": 6.2786,
"eval_samples_per_second": 1563.255,
"eval_steps_per_second": 6.212,
"step": 188640
},
{
"epoch": 6.0,
"step": 188640,
"total_flos": 2.8138783182422016e+17,
"train_loss": 0.7230182159139101,
"train_runtime": 46377.2172,
"train_samples_per_second": 8677.28,
"train_steps_per_second": 33.896
}
],
"max_steps": 1572000,
"num_train_epochs": 50,
"total_flos": 2.8138783182422016e+17,
"trial_name": null,
"trial_params": null
}