bert-large-cased-scsmall / trainer_state.json
ZongqianLi's picture
Upload 130 files
d80431a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 37.0,
"global_step": 999,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.2262943855309169e-05,
"loss": 3.1653,
"step": 27
},
{
"epoch": 1.0,
"eval_accuracy": 0.547526790579403,
"eval_loss": 2.819357395172119,
"eval_runtime": 8.7539,
"eval_samples_per_second": 50.72,
"eval_steps_per_second": 0.228,
"step": 27
},
{
"epoch": 2.0,
"learning_rate": 1.4841962570206113e-05,
"loss": 2.7725,
"step": 54
},
{
"epoch": 2.0,
"eval_accuracy": 0.5574515806383172,
"eval_loss": 2.6864047050476074,
"eval_runtime": 7.4551,
"eval_samples_per_second": 59.556,
"eval_steps_per_second": 0.268,
"step": 54
},
{
"epoch": 3.0,
"learning_rate": 1.6350591807078892e-05,
"loss": 2.6256,
"step": 81
},
{
"epoch": 3.0,
"eval_accuracy": 0.5682560989844753,
"eval_loss": 2.602532148361206,
"eval_runtime": 7.5503,
"eval_samples_per_second": 58.806,
"eval_steps_per_second": 0.265,
"step": 81
},
{
"epoch": 4.0,
"learning_rate": 1.7420981285103056e-05,
"loss": 2.5044,
"step": 108
},
{
"epoch": 4.0,
"eval_accuracy": 0.601172152498224,
"eval_loss": 2.341265916824341,
"eval_runtime": 7.5499,
"eval_samples_per_second": 58.809,
"eval_steps_per_second": 0.265,
"step": 108
},
{
"epoch": 5.0,
"learning_rate": 1.825123986666868e-05,
"loss": 2.4348,
"step": 135
},
{
"epoch": 5.0,
"eval_accuracy": 0.6008952865794787,
"eval_loss": 2.3422300815582275,
"eval_runtime": 7.5767,
"eval_samples_per_second": 58.601,
"eval_steps_per_second": 0.264,
"step": 135
},
{
"epoch": 6.0,
"learning_rate": 1.892961052197583e-05,
"loss": 2.42,
"step": 162
},
{
"epoch": 6.0,
"eval_accuracy": 0.5931487564523698,
"eval_loss": 2.3707046508789062,
"eval_runtime": 7.5549,
"eval_samples_per_second": 58.77,
"eval_steps_per_second": 0.265,
"step": 162
},
{
"epoch": 7.0,
"learning_rate": 1.9503164738653782e-05,
"loss": 2.334,
"step": 189
},
{
"epoch": 7.0,
"eval_accuracy": 0.6161689510150978,
"eval_loss": 2.23409366607666,
"eval_runtime": 7.5922,
"eval_samples_per_second": 58.481,
"eval_steps_per_second": 0.263,
"step": 189
},
{
"epoch": 8.0,
"learning_rate": 1.9999999999999998e-05,
"loss": 2.288,
"step": 216
},
{
"epoch": 8.0,
"eval_accuracy": 0.6175385999100584,
"eval_loss": 2.2085084915161133,
"eval_runtime": 7.5018,
"eval_samples_per_second": 59.186,
"eval_steps_per_second": 0.267,
"step": 216
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 2.2954,
"step": 243
},
{
"epoch": 9.0,
"eval_accuracy": 0.6139404159640247,
"eval_loss": 2.2152740955352783,
"eval_runtime": 7.565,
"eval_samples_per_second": 58.691,
"eval_steps_per_second": 0.264,
"step": 243
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 2.2934,
"step": 270
},
{
"epoch": 10.0,
"eval_accuracy": 0.6153211306628938,
"eval_loss": 2.232224225997925,
"eval_runtime": 7.5858,
"eval_samples_per_second": 58.531,
"eval_steps_per_second": 0.264,
"step": 270
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 2.2283,
"step": 297
},
{
"epoch": 11.0,
"eval_accuracy": 0.6139165373025295,
"eval_loss": 2.231621503829956,
"eval_runtime": 6.8069,
"eval_samples_per_second": 65.228,
"eval_steps_per_second": 0.294,
"step": 297
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 2.2228,
"step": 324
},
{
"epoch": 12.0,
"eval_accuracy": 0.6216490772616965,
"eval_loss": 2.1547019481658936,
"eval_runtime": 7.5498,
"eval_samples_per_second": 58.809,
"eval_steps_per_second": 0.265,
"step": 324
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 2.2178,
"step": 351
},
{
"epoch": 13.0,
"eval_accuracy": 0.6255061354433901,
"eval_loss": 2.1324307918548584,
"eval_runtime": 7.037,
"eval_samples_per_second": 63.095,
"eval_steps_per_second": 0.284,
"step": 351
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 2.1743,
"step": 378
},
{
"epoch": 14.0,
"eval_accuracy": 0.6387313908007641,
"eval_loss": 2.018895387649536,
"eval_runtime": 7.5158,
"eval_samples_per_second": 59.076,
"eval_steps_per_second": 0.266,
"step": 378
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 2.0488,
"step": 405
},
{
"epoch": 15.0,
"eval_accuracy": 0.6706725076959508,
"eval_loss": 1.7761304378509521,
"eval_runtime": 6.6483,
"eval_samples_per_second": 66.784,
"eval_steps_per_second": 0.301,
"step": 405
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 1.9293,
"step": 432
},
{
"epoch": 16.0,
"eval_accuracy": 0.6976124177972629,
"eval_loss": 1.5710580348968506,
"eval_runtime": 7.0351,
"eval_samples_per_second": 63.112,
"eval_steps_per_second": 0.284,
"step": 432
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 1.824,
"step": 459
},
{
"epoch": 17.0,
"eval_accuracy": 0.7190793783123921,
"eval_loss": 1.4179129600524902,
"eval_runtime": 7.5577,
"eval_samples_per_second": 58.748,
"eval_steps_per_second": 0.265,
"step": 459
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 1.677,
"step": 486
},
{
"epoch": 18.0,
"eval_accuracy": 0.7287676129762578,
"eval_loss": 1.3340463638305664,
"eval_runtime": 7.6139,
"eval_samples_per_second": 58.315,
"eval_steps_per_second": 0.263,
"step": 486
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 1.5681,
"step": 513
},
{
"epoch": 19.0,
"eval_accuracy": 0.7393688419248894,
"eval_loss": 1.262519121170044,
"eval_runtime": 7.6652,
"eval_samples_per_second": 57.924,
"eval_steps_per_second": 0.261,
"step": 513
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 1.4292,
"step": 540
},
{
"epoch": 20.0,
"eval_accuracy": 0.7635153050702649,
"eval_loss": 1.123979091644287,
"eval_runtime": 7.5316,
"eval_samples_per_second": 58.951,
"eval_steps_per_second": 0.266,
"step": 540
},
{
"epoch": 21.0,
"learning_rate": 2e-05,
"loss": 1.3347,
"step": 567
},
{
"epoch": 21.0,
"eval_accuracy": 0.7732615942452775,
"eval_loss": 1.0541319847106934,
"eval_runtime": 7.5273,
"eval_samples_per_second": 58.985,
"eval_steps_per_second": 0.266,
"step": 567
},
{
"epoch": 22.0,
"learning_rate": 2e-05,
"loss": 1.2435,
"step": 594
},
{
"epoch": 22.0,
"eval_accuracy": 0.7811072373166627,
"eval_loss": 1.0116688013076782,
"eval_runtime": 7.6371,
"eval_samples_per_second": 58.137,
"eval_steps_per_second": 0.262,
"step": 594
},
{
"epoch": 23.0,
"learning_rate": 2e-05,
"loss": 1.1747,
"step": 621
},
{
"epoch": 23.0,
"eval_accuracy": 0.7842111556166677,
"eval_loss": 0.9821351766586304,
"eval_runtime": 7.6429,
"eval_samples_per_second": 58.093,
"eval_steps_per_second": 0.262,
"step": 621
},
{
"epoch": 24.0,
"learning_rate": 2e-05,
"loss": 1.1536,
"step": 648
},
{
"epoch": 24.0,
"eval_accuracy": 0.7867322173128384,
"eval_loss": 0.964832603931427,
"eval_runtime": 6.912,
"eval_samples_per_second": 64.236,
"eval_steps_per_second": 0.289,
"step": 648
},
{
"epoch": 25.0,
"learning_rate": 2e-05,
"loss": 1.0971,
"step": 675
},
{
"epoch": 25.0,
"eval_accuracy": 0.7866705302810779,
"eval_loss": 0.9783045053482056,
"eval_runtime": 7.5744,
"eval_samples_per_second": 58.618,
"eval_steps_per_second": 0.264,
"step": 675
},
{
"epoch": 26.0,
"learning_rate": 2e-05,
"loss": 1.0809,
"step": 702
},
{
"epoch": 26.0,
"eval_accuracy": 0.7918671517427532,
"eval_loss": 0.9400666356086731,
"eval_runtime": 7.1416,
"eval_samples_per_second": 62.171,
"eval_steps_per_second": 0.28,
"step": 702
},
{
"epoch": 27.0,
"learning_rate": 2e-05,
"loss": 1.0653,
"step": 729
},
{
"epoch": 27.0,
"eval_accuracy": 0.7834480395236599,
"eval_loss": 0.9835608005523682,
"eval_runtime": 7.5584,
"eval_samples_per_second": 58.743,
"eval_steps_per_second": 0.265,
"step": 729
},
{
"epoch": 28.0,
"learning_rate": 2e-05,
"loss": 1.0341,
"step": 756
},
{
"epoch": 28.0,
"eval_accuracy": 0.7897628339837887,
"eval_loss": 0.9648067951202393,
"eval_runtime": 7.5415,
"eval_samples_per_second": 58.874,
"eval_steps_per_second": 0.265,
"step": 756
},
{
"epoch": 29.0,
"learning_rate": 2e-05,
"loss": 1.0056,
"step": 783
},
{
"epoch": 29.0,
"eval_accuracy": 0.7985819252651742,
"eval_loss": 0.9186079502105713,
"eval_runtime": 7.5907,
"eval_samples_per_second": 58.492,
"eval_steps_per_second": 0.263,
"step": 783
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 0.9993,
"step": 810
},
{
"epoch": 30.0,
"eval_accuracy": 0.7981126511353583,
"eval_loss": 0.9101386666297913,
"eval_runtime": 7.191,
"eval_samples_per_second": 61.744,
"eval_steps_per_second": 0.278,
"step": 810
},
{
"epoch": 31.0,
"learning_rate": 2e-05,
"loss": 0.9849,
"step": 837
},
{
"epoch": 31.0,
"eval_accuracy": 0.7893089053803339,
"eval_loss": 0.9685505628585815,
"eval_runtime": 7.5619,
"eval_samples_per_second": 58.716,
"eval_steps_per_second": 0.264,
"step": 837
},
{
"epoch": 32.0,
"learning_rate": 2e-05,
"loss": 0.9799,
"step": 864
},
{
"epoch": 32.0,
"eval_accuracy": 0.7988627229152401,
"eval_loss": 0.9036659002304077,
"eval_runtime": 7.5514,
"eval_samples_per_second": 58.797,
"eval_steps_per_second": 0.265,
"step": 864
},
{
"epoch": 33.0,
"learning_rate": 2e-05,
"loss": 0.957,
"step": 891
},
{
"epoch": 33.0,
"eval_accuracy": 0.7937915742793792,
"eval_loss": 0.9258220195770264,
"eval_runtime": 7.548,
"eval_samples_per_second": 58.824,
"eval_steps_per_second": 0.265,
"step": 891
},
{
"epoch": 34.0,
"learning_rate": 2e-05,
"loss": 0.9535,
"step": 918
},
{
"epoch": 34.0,
"eval_accuracy": 0.7996154854496198,
"eval_loss": 0.8886759877204895,
"eval_runtime": 7.5277,
"eval_samples_per_second": 58.982,
"eval_steps_per_second": 0.266,
"step": 918
},
{
"epoch": 35.0,
"learning_rate": 2e-05,
"loss": 0.9343,
"step": 945
},
{
"epoch": 35.0,
"eval_accuracy": 0.8085179621980223,
"eval_loss": 0.8577904105186462,
"eval_runtime": 6.7518,
"eval_samples_per_second": 65.76,
"eval_steps_per_second": 0.296,
"step": 945
},
{
"epoch": 36.0,
"learning_rate": 2e-05,
"loss": 0.9346,
"step": 972
},
{
"epoch": 36.0,
"eval_accuracy": 0.8009545281527245,
"eval_loss": 0.8876005411148071,
"eval_runtime": 6.6328,
"eval_samples_per_second": 66.94,
"eval_steps_per_second": 0.302,
"step": 972
},
{
"epoch": 37.0,
"learning_rate": 2e-05,
"loss": 0.9187,
"step": 999
},
{
"epoch": 37.0,
"eval_accuracy": 0.8115537848605577,
"eval_loss": 0.8389037251472473,
"eval_runtime": 7.5847,
"eval_samples_per_second": 58.539,
"eval_steps_per_second": 0.264,
"step": 999
}
],
"max_steps": 1080,
"num_train_epochs": 40,
"total_flos": 311085194280960.0,
"trial_name": null,
"trial_params": null
}