electra-base-avg-2e-5-lcc / trainer_state.json
EhsanAghazadeh's picture
Add model and its dependencies
3c47de8
raw history blame
No virus
4.32 kB
{
"best_metric": 0.8575587868690491,
"best_model_checkpoint": "./electra-base-discriminator/fine_tuned_models/checkpoint-3729",
"epoch": 10.0,
"global_step": 12430,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.9998212370396856e-05,
"loss": 0.5211,
"step": 1243
},
{
"epoch": 1.0,
"eval_accuracy": 0.8279955387115479,
"eval_loss": 0.3951820731163025,
"eval_runtime": 7.0965,
"eval_samples_per_second": 629.183,
"eval_steps_per_second": 78.771,
"step": 1243
},
{
"epoch": 2.0,
"learning_rate": 1.7776188773686092e-05,
"loss": 0.3301,
"step": 2486
},
{
"epoch": 2.0,
"eval_accuracy": 0.8539753556251526,
"eval_loss": 0.356336385011673,
"eval_runtime": 7.2692,
"eval_samples_per_second": 614.237,
"eval_steps_per_second": 76.9,
"step": 2486
},
{
"epoch": 3.0,
"learning_rate": 1.5554165176975332e-05,
"loss": 0.2267,
"step": 3729
},
{
"epoch": 3.0,
"eval_accuracy": 0.8575587868690491,
"eval_loss": 0.3921225965023041,
"eval_runtime": 7.1896,
"eval_samples_per_second": 621.038,
"eval_steps_per_second": 77.752,
"step": 3729
},
{
"epoch": 4.0,
"learning_rate": 1.333214158026457e-05,
"loss": 0.1557,
"step": 4972
},
{
"epoch": 4.0,
"eval_accuracy": 0.8546472787857056,
"eval_loss": 0.46923860907554626,
"eval_runtime": 7.2458,
"eval_samples_per_second": 616.217,
"eval_steps_per_second": 77.148,
"step": 4972
},
{
"epoch": 5.0,
"learning_rate": 1.1110117983553808e-05,
"loss": 0.1155,
"step": 6215
},
{
"epoch": 5.0,
"eval_accuracy": 0.8559910655021667,
"eval_loss": 0.5102965831756592,
"eval_runtime": 7.4396,
"eval_samples_per_second": 600.169,
"eval_steps_per_second": 75.139,
"step": 6215
},
{
"epoch": 6.0,
"learning_rate": 8.888094386843046e-06,
"loss": 0.0894,
"step": 7458
},
{
"epoch": 6.0,
"eval_accuracy": 0.856662929058075,
"eval_loss": 0.6104263663291931,
"eval_runtime": 7.2873,
"eval_samples_per_second": 612.707,
"eval_steps_per_second": 76.708,
"step": 7458
},
{
"epoch": 7.0,
"learning_rate": 6.666070790132285e-06,
"loss": 0.0702,
"step": 8701
},
{
"epoch": 7.0,
"eval_accuracy": 0.8533034920692444,
"eval_loss": 0.6788500547409058,
"eval_runtime": 7.0995,
"eval_samples_per_second": 628.92,
"eval_steps_per_second": 78.738,
"step": 8701
},
{
"epoch": 8.0,
"learning_rate": 4.444047193421523e-06,
"loss": 0.0594,
"step": 9944
},
{
"epoch": 8.0,
"eval_accuracy": 0.8524076342582703,
"eval_loss": 0.7277155518531799,
"eval_runtime": 7.218,
"eval_samples_per_second": 618.59,
"eval_steps_per_second": 77.445,
"step": 9944
},
{
"epoch": 9.0,
"learning_rate": 2.2220235967107616e-06,
"loss": 0.0472,
"step": 11187
},
{
"epoch": 9.0,
"eval_accuracy": 0.8546472787857056,
"eval_loss": 0.8223575949668884,
"eval_runtime": 7.1336,
"eval_samples_per_second": 625.908,
"eval_steps_per_second": 78.361,
"step": 11187
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 0.0379,
"step": 12430
},
{
"epoch": 10.0,
"eval_accuracy": 0.8521836400032043,
"eval_loss": 0.8311443328857422,
"eval_runtime": 7.1227,
"eval_samples_per_second": 626.866,
"eval_steps_per_second": 78.481,
"step": 12430
},
{
"epoch": 10.0,
"step": 12430,
"total_flos": 1.671959181036288e+16,
"train_loss": 0.16532029376716645,
"train_runtime": 1732.3817,
"train_samples_per_second": 229.563,
"train_steps_per_second": 7.175
}
],
"max_steps": 12430,
"num_train_epochs": 10,
"total_flos": 1.671959181036288e+16,
"trial_name": null,
"trial_params": null
}