20230826022810 / trainer_state.json
dkqjrm's picture
End of training
4083c33
raw
history blame contribute delete
No virus
28.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 80.0,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5,
"eval_loss": 0.4473404288291931,
"eval_runtime": 2.9685,
"eval_samples_per_second": 33.687,
"eval_steps_per_second": 4.379,
"step": 25
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.5,
"epoch": 1.0,
"step": 25
},
{
"epoch": 2.0,
"eval_accuracy": 0.6,
"eval_loss": 0.37501347064971924,
"eval_runtime": 3.0237,
"eval_samples_per_second": 33.072,
"eval_steps_per_second": 4.299,
"step": 50
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.6,
"epoch": 2.0,
"step": 50
},
{
"epoch": 3.0,
"eval_accuracy": 0.63,
"eval_loss": 0.3427189886569977,
"eval_runtime": 3.0795,
"eval_samples_per_second": 32.473,
"eval_steps_per_second": 4.221,
"step": 75
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.63,
"epoch": 3.0,
"step": 75
},
{
"epoch": 4.0,
"eval_accuracy": 0.63,
"eval_loss": 0.2967395484447479,
"eval_runtime": 3.1247,
"eval_samples_per_second": 32.003,
"eval_steps_per_second": 4.16,
"step": 100
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.63,
"epoch": 4.0,
"step": 100
},
{
"epoch": 5.0,
"eval_accuracy": 0.57,
"eval_loss": 0.2981172800064087,
"eval_runtime": 3.1551,
"eval_samples_per_second": 31.695,
"eval_steps_per_second": 4.12,
"step": 125
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.63,
"epoch": 5.0,
"step": 125
},
{
"epoch": 6.0,
"eval_accuracy": 0.56,
"eval_loss": 0.32641762495040894,
"eval_runtime": 3.1755,
"eval_samples_per_second": 31.491,
"eval_steps_per_second": 4.094,
"step": 150
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.63,
"epoch": 6.0,
"step": 150
},
{
"epoch": 7.0,
"eval_accuracy": 0.58,
"eval_loss": 0.2917565405368805,
"eval_runtime": 3.1839,
"eval_samples_per_second": 31.408,
"eval_steps_per_second": 4.083,
"step": 175
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.63,
"epoch": 7.0,
"step": 175
},
{
"epoch": 8.0,
"eval_accuracy": 0.66,
"eval_loss": 0.30622953176498413,
"eval_runtime": 3.1953,
"eval_samples_per_second": 31.296,
"eval_steps_per_second": 4.068,
"step": 200
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 8.0,
"step": 200
},
{
"epoch": 9.0,
"eval_accuracy": 0.58,
"eval_loss": 0.2885332703590393,
"eval_runtime": 3.2033,
"eval_samples_per_second": 31.218,
"eval_steps_per_second": 4.058,
"step": 225
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 9.0,
"step": 225
},
{
"epoch": 10.0,
"eval_accuracy": 0.6,
"eval_loss": 0.288424015045166,
"eval_runtime": 3.2092,
"eval_samples_per_second": 31.16,
"eval_steps_per_second": 4.051,
"step": 250
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 10.0,
"step": 250
},
{
"epoch": 11.0,
"eval_accuracy": 0.55,
"eval_loss": 0.2963046133518219,
"eval_runtime": 3.2093,
"eval_samples_per_second": 31.16,
"eval_steps_per_second": 4.051,
"step": 275
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 11.0,
"step": 275
},
{
"epoch": 12.0,
"eval_accuracy": 0.6,
"eval_loss": 0.28954315185546875,
"eval_runtime": 3.2079,
"eval_samples_per_second": 31.173,
"eval_steps_per_second": 4.052,
"step": 300
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 12.0,
"step": 300
},
{
"epoch": 13.0,
"eval_accuracy": 0.6,
"eval_loss": 0.28725749254226685,
"eval_runtime": 3.2054,
"eval_samples_per_second": 31.197,
"eval_steps_per_second": 4.056,
"step": 325
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 13.0,
"step": 325
},
{
"epoch": 14.0,
"eval_accuracy": 0.58,
"eval_loss": 0.28840315341949463,
"eval_runtime": 3.2071,
"eval_samples_per_second": 31.181,
"eval_steps_per_second": 4.053,
"step": 350
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 14.0,
"step": 350
},
{
"epoch": 15.0,
"eval_accuracy": 0.59,
"eval_loss": 0.2870934009552002,
"eval_runtime": 3.2042,
"eval_samples_per_second": 31.209,
"eval_steps_per_second": 4.057,
"step": 375
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 15.0,
"step": 375
},
{
"epoch": 16.0,
"eval_accuracy": 0.6,
"eval_loss": 0.28593483567237854,
"eval_runtime": 3.1986,
"eval_samples_per_second": 31.264,
"eval_steps_per_second": 4.064,
"step": 400
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 16.0,
"step": 400
},
{
"epoch": 17.0,
"eval_accuracy": 0.53,
"eval_loss": 0.29119229316711426,
"eval_runtime": 3.2008,
"eval_samples_per_second": 31.242,
"eval_steps_per_second": 4.061,
"step": 425
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 17.0,
"step": 425
},
{
"epoch": 18.0,
"eval_accuracy": 0.61,
"eval_loss": 0.2840566337108612,
"eval_runtime": 3.2065,
"eval_samples_per_second": 31.186,
"eval_steps_per_second": 4.054,
"step": 450
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 18.0,
"step": 450
},
{
"epoch": 19.0,
"eval_accuracy": 0.61,
"eval_loss": 0.28335732221603394,
"eval_runtime": 3.2057,
"eval_samples_per_second": 31.195,
"eval_steps_per_second": 4.055,
"step": 475
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 19.0,
"step": 475
},
{
"epoch": 20.0,
"learning_rate": 0.0075,
"loss": 0.5493,
"step": 500
},
{
"epoch": 20.0,
"eval_accuracy": 0.64,
"eval_loss": 0.2825382947921753,
"eval_runtime": 3.2031,
"eval_samples_per_second": 31.22,
"eval_steps_per_second": 4.059,
"step": 500
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 20.0,
"step": 500
},
{
"epoch": 21.0,
"eval_accuracy": 0.62,
"eval_loss": 0.28468167781829834,
"eval_runtime": 3.2095,
"eval_samples_per_second": 31.158,
"eval_steps_per_second": 4.051,
"step": 525
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 21.0,
"step": 525
},
{
"epoch": 22.0,
"eval_accuracy": 0.62,
"eval_loss": 0.2782175540924072,
"eval_runtime": 3.2053,
"eval_samples_per_second": 31.198,
"eval_steps_per_second": 4.056,
"step": 550
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 22.0,
"step": 550
},
{
"epoch": 23.0,
"eval_accuracy": 0.62,
"eval_loss": 0.2759018838405609,
"eval_runtime": 3.2029,
"eval_samples_per_second": 31.221,
"eval_steps_per_second": 4.059,
"step": 575
},
{
"best_epoch": 7,
"best_eval_accuracy": 0.66,
"epoch": 23.0,
"step": 575
},
{
"epoch": 24.0,
"eval_accuracy": 0.67,
"eval_loss": 0.27502644062042236,
"eval_runtime": 3.2041,
"eval_samples_per_second": 31.21,
"eval_steps_per_second": 4.057,
"step": 600
},
{
"best_epoch": 23,
"best_eval_accuracy": 0.67,
"epoch": 24.0,
"step": 600
},
{
"epoch": 25.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2744516432285309,
"eval_runtime": 3.2088,
"eval_samples_per_second": 31.164,
"eval_steps_per_second": 4.051,
"step": 625
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 25.0,
"step": 625
},
{
"epoch": 26.0,
"eval_accuracy": 0.66,
"eval_loss": 0.2720719873905182,
"eval_runtime": 3.2072,
"eval_samples_per_second": 31.18,
"eval_steps_per_second": 4.053,
"step": 650
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 26.0,
"step": 650
},
{
"epoch": 27.0,
"eval_accuracy": 0.65,
"eval_loss": 0.2727718949317932,
"eval_runtime": 3.2062,
"eval_samples_per_second": 31.189,
"eval_steps_per_second": 4.055,
"step": 675
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 27.0,
"step": 675
},
{
"epoch": 28.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2848474383354187,
"eval_runtime": 3.2049,
"eval_samples_per_second": 31.202,
"eval_steps_per_second": 4.056,
"step": 700
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 28.0,
"step": 700
},
{
"epoch": 29.0,
"eval_accuracy": 0.65,
"eval_loss": 0.2726779580116272,
"eval_runtime": 3.2066,
"eval_samples_per_second": 31.185,
"eval_steps_per_second": 4.054,
"step": 725
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 29.0,
"step": 725
},
{
"epoch": 30.0,
"eval_accuracy": 0.66,
"eval_loss": 0.273853600025177,
"eval_runtime": 3.2057,
"eval_samples_per_second": 31.194,
"eval_steps_per_second": 4.055,
"step": 750
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 30.0,
"step": 750
},
{
"epoch": 31.0,
"eval_accuracy": 0.66,
"eval_loss": 0.271501362323761,
"eval_runtime": 3.2048,
"eval_samples_per_second": 31.204,
"eval_steps_per_second": 4.056,
"step": 775
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 31.0,
"step": 775
},
{
"epoch": 32.0,
"eval_accuracy": 0.67,
"eval_loss": 0.2950344383716583,
"eval_runtime": 3.2044,
"eval_samples_per_second": 31.207,
"eval_steps_per_second": 4.057,
"step": 800
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 32.0,
"step": 800
},
{
"epoch": 33.0,
"eval_accuracy": 0.68,
"eval_loss": 0.27638041973114014,
"eval_runtime": 3.2097,
"eval_samples_per_second": 31.156,
"eval_steps_per_second": 4.05,
"step": 825
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 33.0,
"step": 825
},
{
"epoch": 34.0,
"eval_accuracy": 0.68,
"eval_loss": 0.26928141713142395,
"eval_runtime": 3.206,
"eval_samples_per_second": 31.191,
"eval_steps_per_second": 4.055,
"step": 850
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 34.0,
"step": 850
},
{
"epoch": 35.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2686476409435272,
"eval_runtime": 3.2046,
"eval_samples_per_second": 31.205,
"eval_steps_per_second": 4.057,
"step": 875
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 35.0,
"step": 875
},
{
"epoch": 36.0,
"eval_accuracy": 0.66,
"eval_loss": 0.2793463468551636,
"eval_runtime": 3.2054,
"eval_samples_per_second": 31.197,
"eval_steps_per_second": 4.056,
"step": 900
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 36.0,
"step": 900
},
{
"epoch": 37.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2699900269508362,
"eval_runtime": 3.2061,
"eval_samples_per_second": 31.191,
"eval_steps_per_second": 4.055,
"step": 925
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 37.0,
"step": 925
},
{
"epoch": 38.0,
"eval_accuracy": 0.68,
"eval_loss": 0.27441737055778503,
"eval_runtime": 3.2047,
"eval_samples_per_second": 31.205,
"eval_steps_per_second": 4.057,
"step": 950
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 38.0,
"step": 950
},
{
"epoch": 39.0,
"eval_accuracy": 0.71,
"eval_loss": 0.27893561124801636,
"eval_runtime": 3.2019,
"eval_samples_per_second": 31.232,
"eval_steps_per_second": 4.06,
"step": 975
},
{
"best_epoch": 38,
"best_eval_accuracy": 0.71,
"epoch": 39.0,
"step": 975
},
{
"epoch": 40.0,
"learning_rate": 0.005,
"loss": 0.4987,
"step": 1000
},
{
"epoch": 40.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2756670415401459,
"eval_runtime": 3.2056,
"eval_samples_per_second": 31.195,
"eval_steps_per_second": 4.055,
"step": 1000
},
{
"best_epoch": 38,
"best_eval_accuracy": 0.71,
"epoch": 40.0,
"step": 1000
},
{
"epoch": 41.0,
"eval_accuracy": 0.69,
"eval_loss": 0.27051329612731934,
"eval_runtime": 3.206,
"eval_samples_per_second": 31.192,
"eval_steps_per_second": 4.055,
"step": 1025
},
{
"best_epoch": 38,
"best_eval_accuracy": 0.71,
"epoch": 41.0,
"step": 1025
},
{
"epoch": 42.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2836475074291229,
"eval_runtime": 3.2118,
"eval_samples_per_second": 31.135,
"eval_steps_per_second": 4.048,
"step": 1050
},
{
"best_epoch": 38,
"best_eval_accuracy": 0.71,
"epoch": 42.0,
"step": 1050
},
{
"epoch": 43.0,
"eval_accuracy": 0.6,
"eval_loss": 0.28077641129493713,
"eval_runtime": 3.2076,
"eval_samples_per_second": 31.176,
"eval_steps_per_second": 4.053,
"step": 1075
},
{
"best_epoch": 38,
"best_eval_accuracy": 0.71,
"epoch": 43.0,
"step": 1075
},
{
"epoch": 44.0,
"eval_accuracy": 0.71,
"eval_loss": 0.27339887619018555,
"eval_runtime": 3.2087,
"eval_samples_per_second": 31.165,
"eval_steps_per_second": 4.052,
"step": 1100
},
{
"best_epoch": 38,
"best_eval_accuracy": 0.71,
"epoch": 44.0,
"step": 1100
},
{
"epoch": 45.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2703300416469574,
"eval_runtime": 3.2077,
"eval_samples_per_second": 31.175,
"eval_steps_per_second": 4.053,
"step": 1125
},
{
"best_epoch": 38,
"best_eval_accuracy": 0.71,
"epoch": 45.0,
"step": 1125
},
{
"epoch": 46.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2787483334541321,
"eval_runtime": 3.2167,
"eval_samples_per_second": 31.088,
"eval_steps_per_second": 4.041,
"step": 1150
},
{
"best_epoch": 45,
"best_eval_accuracy": 0.72,
"epoch": 46.0,
"step": 1150
},
{
"epoch": 47.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2683820128440857,
"eval_runtime": 3.2087,
"eval_samples_per_second": 31.165,
"eval_steps_per_second": 4.051,
"step": 1175
},
{
"best_epoch": 45,
"best_eval_accuracy": 0.72,
"epoch": 47.0,
"step": 1175
},
{
"epoch": 48.0,
"eval_accuracy": 0.7,
"eval_loss": 0.27374398708343506,
"eval_runtime": 3.2081,
"eval_samples_per_second": 31.171,
"eval_steps_per_second": 4.052,
"step": 1200
},
{
"best_epoch": 45,
"best_eval_accuracy": 0.72,
"epoch": 48.0,
"step": 1200
},
{
"epoch": 49.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2792060375213623,
"eval_runtime": 3.2041,
"eval_samples_per_second": 31.21,
"eval_steps_per_second": 4.057,
"step": 1225
},
{
"best_epoch": 45,
"best_eval_accuracy": 0.72,
"epoch": 49.0,
"step": 1225
},
{
"epoch": 50.0,
"eval_accuracy": 0.71,
"eval_loss": 0.273674875497818,
"eval_runtime": 3.2047,
"eval_samples_per_second": 31.205,
"eval_steps_per_second": 4.057,
"step": 1250
},
{
"best_epoch": 45,
"best_eval_accuracy": 0.72,
"epoch": 50.0,
"step": 1250
},
{
"epoch": 51.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2722756266593933,
"eval_runtime": 3.2013,
"eval_samples_per_second": 31.238,
"eval_steps_per_second": 4.061,
"step": 1275
},
{
"best_epoch": 45,
"best_eval_accuracy": 0.72,
"epoch": 51.0,
"step": 1275
},
{
"epoch": 52.0,
"eval_accuracy": 0.73,
"eval_loss": 0.2725033164024353,
"eval_runtime": 3.2035,
"eval_samples_per_second": 31.216,
"eval_steps_per_second": 4.058,
"step": 1300
},
{
"best_epoch": 51,
"best_eval_accuracy": 0.73,
"epoch": 52.0,
"step": 1300
},
{
"epoch": 53.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2722368836402893,
"eval_runtime": 3.207,
"eval_samples_per_second": 31.181,
"eval_steps_per_second": 4.054,
"step": 1325
},
{
"best_epoch": 51,
"best_eval_accuracy": 0.73,
"epoch": 53.0,
"step": 1325
},
{
"epoch": 54.0,
"eval_accuracy": 0.7,
"eval_loss": 0.28004658222198486,
"eval_runtime": 3.2057,
"eval_samples_per_second": 31.194,
"eval_steps_per_second": 4.055,
"step": 1350
},
{
"best_epoch": 51,
"best_eval_accuracy": 0.73,
"epoch": 54.0,
"step": 1350
},
{
"epoch": 55.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2768723666667938,
"eval_runtime": 3.205,
"eval_samples_per_second": 31.201,
"eval_steps_per_second": 4.056,
"step": 1375
},
{
"best_epoch": 51,
"best_eval_accuracy": 0.73,
"epoch": 55.0,
"step": 1375
},
{
"epoch": 56.0,
"eval_accuracy": 0.76,
"eval_loss": 0.27720382809638977,
"eval_runtime": 3.2049,
"eval_samples_per_second": 31.202,
"eval_steps_per_second": 4.056,
"step": 1400
},
{
"best_epoch": 55,
"best_eval_accuracy": 0.76,
"epoch": 56.0,
"step": 1400
},
{
"epoch": 57.0,
"eval_accuracy": 0.77,
"eval_loss": 0.27145129442214966,
"eval_runtime": 3.2036,
"eval_samples_per_second": 31.215,
"eval_steps_per_second": 4.058,
"step": 1425
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 57.0,
"step": 1425
},
{
"epoch": 58.0,
"eval_accuracy": 0.75,
"eval_loss": 0.27938613295555115,
"eval_runtime": 3.2025,
"eval_samples_per_second": 31.226,
"eval_steps_per_second": 4.059,
"step": 1450
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 58.0,
"step": 1450
},
{
"epoch": 59.0,
"eval_accuracy": 0.73,
"eval_loss": 0.277148574590683,
"eval_runtime": 3.2053,
"eval_samples_per_second": 31.198,
"eval_steps_per_second": 4.056,
"step": 1475
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 59.0,
"step": 1475
},
{
"epoch": 60.0,
"learning_rate": 0.0025,
"loss": 0.447,
"step": 1500
},
{
"epoch": 60.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2797820270061493,
"eval_runtime": 3.2029,
"eval_samples_per_second": 31.221,
"eval_steps_per_second": 4.059,
"step": 1500
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 60.0,
"step": 1500
},
{
"epoch": 61.0,
"eval_accuracy": 0.74,
"eval_loss": 0.2717023193836212,
"eval_runtime": 3.2014,
"eval_samples_per_second": 31.236,
"eval_steps_per_second": 4.061,
"step": 1525
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 61.0,
"step": 1525
},
{
"epoch": 62.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2990992069244385,
"eval_runtime": 3.2016,
"eval_samples_per_second": 31.234,
"eval_steps_per_second": 4.06,
"step": 1550
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 62.0,
"step": 1550
},
{
"epoch": 63.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2718846797943115,
"eval_runtime": 3.2014,
"eval_samples_per_second": 31.236,
"eval_steps_per_second": 4.061,
"step": 1575
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 63.0,
"step": 1575
},
{
"epoch": 64.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2761527895927429,
"eval_runtime": 3.2015,
"eval_samples_per_second": 31.235,
"eval_steps_per_second": 4.061,
"step": 1600
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 64.0,
"step": 1600
},
{
"epoch": 65.0,
"eval_accuracy": 0.73,
"eval_loss": 0.2833251953125,
"eval_runtime": 3.2011,
"eval_samples_per_second": 31.24,
"eval_steps_per_second": 4.061,
"step": 1625
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 65.0,
"step": 1625
},
{
"epoch": 66.0,
"eval_accuracy": 0.74,
"eval_loss": 0.2772473096847534,
"eval_runtime": 3.2001,
"eval_samples_per_second": 31.249,
"eval_steps_per_second": 4.062,
"step": 1650
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 66.0,
"step": 1650
},
{
"epoch": 67.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2806840240955353,
"eval_runtime": 3.2008,
"eval_samples_per_second": 31.242,
"eval_steps_per_second": 4.061,
"step": 1675
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 67.0,
"step": 1675
},
{
"epoch": 68.0,
"eval_accuracy": 0.73,
"eval_loss": 0.2740679979324341,
"eval_runtime": 3.1997,
"eval_samples_per_second": 31.253,
"eval_steps_per_second": 4.063,
"step": 1700
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 68.0,
"step": 1700
},
{
"epoch": 69.0,
"eval_accuracy": 0.72,
"eval_loss": 0.27646538615226746,
"eval_runtime": 3.2009,
"eval_samples_per_second": 31.241,
"eval_steps_per_second": 4.061,
"step": 1725
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 69.0,
"step": 1725
},
{
"epoch": 70.0,
"eval_accuracy": 0.73,
"eval_loss": 0.27863335609436035,
"eval_runtime": 3.2011,
"eval_samples_per_second": 31.239,
"eval_steps_per_second": 4.061,
"step": 1750
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 70.0,
"step": 1750
},
{
"epoch": 71.0,
"eval_accuracy": 0.73,
"eval_loss": 0.2795208692550659,
"eval_runtime": 3.2,
"eval_samples_per_second": 31.25,
"eval_steps_per_second": 4.062,
"step": 1775
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 71.0,
"step": 1775
},
{
"epoch": 72.0,
"eval_accuracy": 0.74,
"eval_loss": 0.27518370747566223,
"eval_runtime": 3.2014,
"eval_samples_per_second": 31.236,
"eval_steps_per_second": 4.061,
"step": 1800
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 72.0,
"step": 1800
},
{
"epoch": 73.0,
"eval_accuracy": 0.71,
"eval_loss": 0.283783495426178,
"eval_runtime": 3.2004,
"eval_samples_per_second": 31.246,
"eval_steps_per_second": 4.062,
"step": 1825
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 73.0,
"step": 1825
},
{
"epoch": 74.0,
"eval_accuracy": 0.74,
"eval_loss": 0.2762831747531891,
"eval_runtime": 3.2012,
"eval_samples_per_second": 31.239,
"eval_steps_per_second": 4.061,
"step": 1850
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 74.0,
"step": 1850
},
{
"epoch": 75.0,
"eval_accuracy": 0.73,
"eval_loss": 0.27643781900405884,
"eval_runtime": 3.1996,
"eval_samples_per_second": 31.254,
"eval_steps_per_second": 4.063,
"step": 1875
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 75.0,
"step": 1875
},
{
"epoch": 76.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2755553722381592,
"eval_runtime": 3.1982,
"eval_samples_per_second": 31.268,
"eval_steps_per_second": 4.065,
"step": 1900
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 76.0,
"step": 1900
},
{
"epoch": 77.0,
"eval_accuracy": 0.74,
"eval_loss": 0.2737685441970825,
"eval_runtime": 3.2108,
"eval_samples_per_second": 31.145,
"eval_steps_per_second": 4.049,
"step": 1925
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 77.0,
"step": 1925
},
{
"epoch": 78.0,
"eval_accuracy": 0.74,
"eval_loss": 0.27428507804870605,
"eval_runtime": 3.2072,
"eval_samples_per_second": 31.18,
"eval_steps_per_second": 4.053,
"step": 1950
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 78.0,
"step": 1950
},
{
"epoch": 79.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2778934836387634,
"eval_runtime": 3.2047,
"eval_samples_per_second": 31.204,
"eval_steps_per_second": 4.056,
"step": 1975
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 79.0,
"step": 1975
},
{
"epoch": 80.0,
"learning_rate": 0.0,
"loss": 0.4199,
"step": 2000
},
{
"epoch": 80.0,
"eval_accuracy": 0.73,
"eval_loss": 0.2766960859298706,
"eval_runtime": 3.1925,
"eval_samples_per_second": 31.324,
"eval_steps_per_second": 4.072,
"step": 2000
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.77,
"epoch": 80.0,
"step": 2000
},
{
"epoch": 80.0,
"step": 2000,
"total_flos": 2.9821702864896e+16,
"train_loss": 0.4787121353149414,
"train_runtime": 1702.5964,
"train_samples_per_second": 18.795,
"train_steps_per_second": 1.175
}
],
"max_steps": 2000,
"num_train_epochs": 80,
"total_flos": 2.9821702864896e+16,
"trial_name": null,
"trial_params": null
}