20230826035826 / trainer_state.json
dkqjrm's picture
End of training
14dd094
raw
history blame contribute delete
No virus
28.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 80.0,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.4,
"eval_loss": 0.32287806272506714,
"eval_runtime": 2.9676,
"eval_samples_per_second": 33.697,
"eval_steps_per_second": 4.381,
"step": 25
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.4,
"epoch": 1.0,
"step": 25
},
{
"epoch": 2.0,
"eval_accuracy": 0.63,
"eval_loss": 0.35066673159599304,
"eval_runtime": 3.0298,
"eval_samples_per_second": 33.006,
"eval_steps_per_second": 4.291,
"step": 50
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.63,
"epoch": 2.0,
"step": 50
},
{
"epoch": 3.0,
"eval_accuracy": 0.39,
"eval_loss": 0.31648871302604675,
"eval_runtime": 3.0781,
"eval_samples_per_second": 32.487,
"eval_steps_per_second": 4.223,
"step": 75
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.63,
"epoch": 3.0,
"step": 75
},
{
"epoch": 4.0,
"eval_accuracy": 0.59,
"eval_loss": 0.31587904691696167,
"eval_runtime": 3.1134,
"eval_samples_per_second": 32.119,
"eval_steps_per_second": 4.176,
"step": 100
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.63,
"epoch": 4.0,
"step": 100
},
{
"epoch": 5.0,
"eval_accuracy": 0.35,
"eval_loss": 0.3276338279247284,
"eval_runtime": 3.1438,
"eval_samples_per_second": 31.809,
"eval_steps_per_second": 4.135,
"step": 125
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.63,
"epoch": 5.0,
"step": 125
},
{
"epoch": 6.0,
"eval_accuracy": 0.37,
"eval_loss": 0.3255067765712738,
"eval_runtime": 3.1564,
"eval_samples_per_second": 31.681,
"eval_steps_per_second": 4.119,
"step": 150
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.63,
"epoch": 6.0,
"step": 150
},
{
"epoch": 7.0,
"eval_accuracy": 0.63,
"eval_loss": 0.2893008887767792,
"eval_runtime": 3.169,
"eval_samples_per_second": 31.555,
"eval_steps_per_second": 4.102,
"step": 175
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.63,
"epoch": 7.0,
"step": 175
},
{
"epoch": 8.0,
"eval_accuracy": 0.63,
"eval_loss": 0.30655354261398315,
"eval_runtime": 3.1806,
"eval_samples_per_second": 31.441,
"eval_steps_per_second": 4.087,
"step": 200
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.63,
"epoch": 8.0,
"step": 200
},
{
"epoch": 9.0,
"eval_accuracy": 0.64,
"eval_loss": 0.3015369772911072,
"eval_runtime": 3.1746,
"eval_samples_per_second": 31.5,
"eval_steps_per_second": 4.095,
"step": 225
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 9.0,
"step": 225
},
{
"epoch": 10.0,
"eval_accuracy": 0.62,
"eval_loss": 0.293341726064682,
"eval_runtime": 3.1847,
"eval_samples_per_second": 31.4,
"eval_steps_per_second": 4.082,
"step": 250
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 10.0,
"step": 250
},
{
"epoch": 11.0,
"eval_accuracy": 0.45,
"eval_loss": 0.29533302783966064,
"eval_runtime": 3.1804,
"eval_samples_per_second": 31.443,
"eval_steps_per_second": 4.088,
"step": 275
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 11.0,
"step": 275
},
{
"epoch": 12.0,
"eval_accuracy": 0.62,
"eval_loss": 0.29426100850105286,
"eval_runtime": 3.1802,
"eval_samples_per_second": 31.444,
"eval_steps_per_second": 4.088,
"step": 300
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 12.0,
"step": 300
},
{
"epoch": 13.0,
"eval_accuracy": 0.62,
"eval_loss": 0.28667622804641724,
"eval_runtime": 3.1818,
"eval_samples_per_second": 31.429,
"eval_steps_per_second": 4.086,
"step": 325
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 13.0,
"step": 325
},
{
"epoch": 14.0,
"eval_accuracy": 0.59,
"eval_loss": 0.28816643357276917,
"eval_runtime": 3.183,
"eval_samples_per_second": 31.417,
"eval_steps_per_second": 4.084,
"step": 350
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 14.0,
"step": 350
},
{
"epoch": 15.0,
"eval_accuracy": 0.63,
"eval_loss": 0.29221782088279724,
"eval_runtime": 3.1833,
"eval_samples_per_second": 31.414,
"eval_steps_per_second": 4.084,
"step": 375
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 15.0,
"step": 375
},
{
"epoch": 16.0,
"eval_accuracy": 0.59,
"eval_loss": 0.28952255845069885,
"eval_runtime": 3.1873,
"eval_samples_per_second": 31.374,
"eval_steps_per_second": 4.079,
"step": 400
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.64,
"epoch": 16.0,
"step": 400
},
{
"epoch": 17.0,
"eval_accuracy": 0.65,
"eval_loss": 0.29009494185447693,
"eval_runtime": 3.1886,
"eval_samples_per_second": 31.362,
"eval_steps_per_second": 4.077,
"step": 425
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 17.0,
"step": 425
},
{
"epoch": 18.0,
"eval_accuracy": 0.64,
"eval_loss": 0.28774628043174744,
"eval_runtime": 3.1894,
"eval_samples_per_second": 31.354,
"eval_steps_per_second": 4.076,
"step": 450
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 18.0,
"step": 450
},
{
"epoch": 19.0,
"eval_accuracy": 0.6,
"eval_loss": 0.2908971309661865,
"eval_runtime": 3.1888,
"eval_samples_per_second": 31.36,
"eval_steps_per_second": 4.077,
"step": 475
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 19.0,
"step": 475
},
{
"epoch": 20.0,
"learning_rate": 0.0075,
"loss": 0.5537,
"step": 500
},
{
"epoch": 20.0,
"eval_accuracy": 0.62,
"eval_loss": 0.2870675325393677,
"eval_runtime": 3.1962,
"eval_samples_per_second": 31.287,
"eval_steps_per_second": 4.067,
"step": 500
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 20.0,
"step": 500
},
{
"epoch": 21.0,
"eval_accuracy": 0.61,
"eval_loss": 0.28545090556144714,
"eval_runtime": 3.1985,
"eval_samples_per_second": 31.264,
"eval_steps_per_second": 4.064,
"step": 525
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 21.0,
"step": 525
},
{
"epoch": 22.0,
"eval_accuracy": 0.64,
"eval_loss": 0.286283940076828,
"eval_runtime": 3.1885,
"eval_samples_per_second": 31.363,
"eval_steps_per_second": 4.077,
"step": 550
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 22.0,
"step": 550
},
{
"epoch": 23.0,
"eval_accuracy": 0.61,
"eval_loss": 0.28589385747909546,
"eval_runtime": 3.1956,
"eval_samples_per_second": 31.293,
"eval_steps_per_second": 4.068,
"step": 575
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 23.0,
"step": 575
},
{
"epoch": 24.0,
"eval_accuracy": 0.6,
"eval_loss": 0.28539586067199707,
"eval_runtime": 3.1953,
"eval_samples_per_second": 31.296,
"eval_steps_per_second": 4.068,
"step": 600
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 24.0,
"step": 600
},
{
"epoch": 25.0,
"eval_accuracy": 0.59,
"eval_loss": 0.2839178442955017,
"eval_runtime": 3.1925,
"eval_samples_per_second": 31.323,
"eval_steps_per_second": 4.072,
"step": 625
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 25.0,
"step": 625
},
{
"epoch": 26.0,
"eval_accuracy": 0.56,
"eval_loss": 0.2858905494213104,
"eval_runtime": 3.1947,
"eval_samples_per_second": 31.302,
"eval_steps_per_second": 4.069,
"step": 650
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 26.0,
"step": 650
},
{
"epoch": 27.0,
"eval_accuracy": 0.58,
"eval_loss": 0.28208523988723755,
"eval_runtime": 3.1973,
"eval_samples_per_second": 31.276,
"eval_steps_per_second": 4.066,
"step": 675
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 27.0,
"step": 675
},
{
"epoch": 28.0,
"eval_accuracy": 0.64,
"eval_loss": 0.2830556631088257,
"eval_runtime": 3.1983,
"eval_samples_per_second": 31.266,
"eval_steps_per_second": 4.065,
"step": 700
},
{
"best_epoch": 16,
"best_eval_accuracy": 0.65,
"epoch": 28.0,
"step": 700
},
{
"epoch": 29.0,
"eval_accuracy": 0.66,
"eval_loss": 0.2813069522380829,
"eval_runtime": 3.2015,
"eval_samples_per_second": 31.236,
"eval_steps_per_second": 4.061,
"step": 725
},
{
"best_epoch": 28,
"best_eval_accuracy": 0.66,
"epoch": 29.0,
"step": 725
},
{
"epoch": 30.0,
"eval_accuracy": 0.67,
"eval_loss": 0.28120794892311096,
"eval_runtime": 3.1996,
"eval_samples_per_second": 31.254,
"eval_steps_per_second": 4.063,
"step": 750
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 30.0,
"step": 750
},
{
"epoch": 31.0,
"eval_accuracy": 0.64,
"eval_loss": 0.27902308106422424,
"eval_runtime": 3.201,
"eval_samples_per_second": 31.24,
"eval_steps_per_second": 4.061,
"step": 775
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 31.0,
"step": 775
},
{
"epoch": 32.0,
"eval_accuracy": 0.64,
"eval_loss": 0.2801484763622284,
"eval_runtime": 3.2027,
"eval_samples_per_second": 31.224,
"eval_steps_per_second": 4.059,
"step": 800
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 32.0,
"step": 800
},
{
"epoch": 33.0,
"eval_accuracy": 0.65,
"eval_loss": 0.28051745891571045,
"eval_runtime": 3.1989,
"eval_samples_per_second": 31.26,
"eval_steps_per_second": 4.064,
"step": 825
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 33.0,
"step": 825
},
{
"epoch": 34.0,
"eval_accuracy": 0.64,
"eval_loss": 0.28497397899627686,
"eval_runtime": 3.1984,
"eval_samples_per_second": 31.265,
"eval_steps_per_second": 4.064,
"step": 850
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 34.0,
"step": 850
},
{
"epoch": 35.0,
"eval_accuracy": 0.66,
"eval_loss": 0.2781234681606293,
"eval_runtime": 3.1956,
"eval_samples_per_second": 31.293,
"eval_steps_per_second": 4.068,
"step": 875
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 35.0,
"step": 875
},
{
"epoch": 36.0,
"eval_accuracy": 0.65,
"eval_loss": 0.280036598443985,
"eval_runtime": 3.1973,
"eval_samples_per_second": 31.276,
"eval_steps_per_second": 4.066,
"step": 900
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 36.0,
"step": 900
},
{
"epoch": 37.0,
"eval_accuracy": 0.64,
"eval_loss": 0.28637388348579407,
"eval_runtime": 3.1965,
"eval_samples_per_second": 31.284,
"eval_steps_per_second": 4.067,
"step": 925
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 37.0,
"step": 925
},
{
"epoch": 38.0,
"eval_accuracy": 0.65,
"eval_loss": 0.28161221742630005,
"eval_runtime": 3.1956,
"eval_samples_per_second": 31.293,
"eval_steps_per_second": 4.068,
"step": 950
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 38.0,
"step": 950
},
{
"epoch": 39.0,
"eval_accuracy": 0.67,
"eval_loss": 0.28857657313346863,
"eval_runtime": 3.203,
"eval_samples_per_second": 31.221,
"eval_steps_per_second": 4.059,
"step": 975
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 39.0,
"step": 975
},
{
"epoch": 40.0,
"learning_rate": 0.005,
"loss": 0.5047,
"step": 1000
},
{
"epoch": 40.0,
"eval_accuracy": 0.67,
"eval_loss": 0.310051292181015,
"eval_runtime": 3.1962,
"eval_samples_per_second": 31.287,
"eval_steps_per_second": 4.067,
"step": 1000
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 40.0,
"step": 1000
},
{
"epoch": 41.0,
"eval_accuracy": 0.66,
"eval_loss": 0.28261107206344604,
"eval_runtime": 3.1977,
"eval_samples_per_second": 31.273,
"eval_steps_per_second": 4.065,
"step": 1025
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 41.0,
"step": 1025
},
{
"epoch": 42.0,
"eval_accuracy": 0.62,
"eval_loss": 0.2800942361354828,
"eval_runtime": 3.1967,
"eval_samples_per_second": 31.282,
"eval_steps_per_second": 4.067,
"step": 1050
},
{
"best_epoch": 29,
"best_eval_accuracy": 0.67,
"epoch": 42.0,
"step": 1050
},
{
"epoch": 43.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2907128632068634,
"eval_runtime": 3.1994,
"eval_samples_per_second": 31.256,
"eval_steps_per_second": 4.063,
"step": 1075
},
{
"best_epoch": 42,
"best_eval_accuracy": 0.68,
"epoch": 43.0,
"step": 1075
},
{
"epoch": 44.0,
"eval_accuracy": 0.64,
"eval_loss": 0.2894176244735718,
"eval_runtime": 3.1965,
"eval_samples_per_second": 31.284,
"eval_steps_per_second": 4.067,
"step": 1100
},
{
"best_epoch": 42,
"best_eval_accuracy": 0.68,
"epoch": 44.0,
"step": 1100
},
{
"epoch": 45.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2854782044887543,
"eval_runtime": 3.2014,
"eval_samples_per_second": 31.236,
"eval_steps_per_second": 4.061,
"step": 1125
},
{
"best_epoch": 42,
"best_eval_accuracy": 0.68,
"epoch": 45.0,
"step": 1125
},
{
"epoch": 46.0,
"eval_accuracy": 0.67,
"eval_loss": 0.28108128905296326,
"eval_runtime": 3.1981,
"eval_samples_per_second": 31.269,
"eval_steps_per_second": 4.065,
"step": 1150
},
{
"best_epoch": 42,
"best_eval_accuracy": 0.68,
"epoch": 46.0,
"step": 1150
},
{
"epoch": 47.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2946772575378418,
"eval_runtime": 3.2021,
"eval_samples_per_second": 31.23,
"eval_steps_per_second": 4.06,
"step": 1175
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 47.0,
"step": 1175
},
{
"epoch": 48.0,
"eval_accuracy": 0.69,
"eval_loss": 0.29523247480392456,
"eval_runtime": 3.2001,
"eval_samples_per_second": 31.249,
"eval_steps_per_second": 4.062,
"step": 1200
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 48.0,
"step": 1200
},
{
"epoch": 49.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2832265794277191,
"eval_runtime": 3.2085,
"eval_samples_per_second": 31.167,
"eval_steps_per_second": 4.052,
"step": 1225
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 49.0,
"step": 1225
},
{
"epoch": 50.0,
"eval_accuracy": 0.68,
"eval_loss": 0.29542115330696106,
"eval_runtime": 3.1989,
"eval_samples_per_second": 31.261,
"eval_steps_per_second": 4.064,
"step": 1250
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 50.0,
"step": 1250
},
{
"epoch": 51.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2839832007884979,
"eval_runtime": 3.2036,
"eval_samples_per_second": 31.215,
"eval_steps_per_second": 4.058,
"step": 1275
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 51.0,
"step": 1275
},
{
"epoch": 52.0,
"eval_accuracy": 0.67,
"eval_loss": 0.30789753794670105,
"eval_runtime": 3.2016,
"eval_samples_per_second": 31.235,
"eval_steps_per_second": 4.06,
"step": 1300
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 52.0,
"step": 1300
},
{
"epoch": 53.0,
"eval_accuracy": 0.66,
"eval_loss": 0.27960124611854553,
"eval_runtime": 3.2024,
"eval_samples_per_second": 31.226,
"eval_steps_per_second": 4.059,
"step": 1325
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 53.0,
"step": 1325
},
{
"epoch": 54.0,
"eval_accuracy": 0.67,
"eval_loss": 0.286190003156662,
"eval_runtime": 3.2013,
"eval_samples_per_second": 31.237,
"eval_steps_per_second": 4.061,
"step": 1350
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 54.0,
"step": 1350
},
{
"epoch": 55.0,
"eval_accuracy": 0.69,
"eval_loss": 0.28528231382369995,
"eval_runtime": 3.2039,
"eval_samples_per_second": 31.212,
"eval_steps_per_second": 4.058,
"step": 1375
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 55.0,
"step": 1375
},
{
"epoch": 56.0,
"eval_accuracy": 0.69,
"eval_loss": 0.29692474007606506,
"eval_runtime": 3.2026,
"eval_samples_per_second": 31.225,
"eval_steps_per_second": 4.059,
"step": 1400
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 56.0,
"step": 1400
},
{
"epoch": 57.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2865641117095947,
"eval_runtime": 3.2043,
"eval_samples_per_second": 31.208,
"eval_steps_per_second": 4.057,
"step": 1425
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 57.0,
"step": 1425
},
{
"epoch": 58.0,
"eval_accuracy": 0.69,
"eval_loss": 0.28954190015792847,
"eval_runtime": 3.2033,
"eval_samples_per_second": 31.218,
"eval_steps_per_second": 4.058,
"step": 1450
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 58.0,
"step": 1450
},
{
"epoch": 59.0,
"eval_accuracy": 0.69,
"eval_loss": 0.305752158164978,
"eval_runtime": 3.2026,
"eval_samples_per_second": 31.225,
"eval_steps_per_second": 4.059,
"step": 1475
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 59.0,
"step": 1475
},
{
"epoch": 60.0,
"learning_rate": 0.0025,
"loss": 0.4502,
"step": 1500
},
{
"epoch": 60.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2998403310775757,
"eval_runtime": 3.2116,
"eval_samples_per_second": 31.137,
"eval_steps_per_second": 4.048,
"step": 1500
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 60.0,
"step": 1500
},
{
"epoch": 61.0,
"eval_accuracy": 0.69,
"eval_loss": 0.29743996262550354,
"eval_runtime": 3.2044,
"eval_samples_per_second": 31.207,
"eval_steps_per_second": 4.057,
"step": 1525
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 61.0,
"step": 1525
},
{
"epoch": 62.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2788439095020294,
"eval_runtime": 3.2048,
"eval_samples_per_second": 31.203,
"eval_steps_per_second": 4.056,
"step": 1550
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 62.0,
"step": 1550
},
{
"epoch": 63.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2882141172885895,
"eval_runtime": 3.2042,
"eval_samples_per_second": 31.209,
"eval_steps_per_second": 4.057,
"step": 1575
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 63.0,
"step": 1575
},
{
"epoch": 64.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2892961800098419,
"eval_runtime": 3.2113,
"eval_samples_per_second": 31.14,
"eval_steps_per_second": 4.048,
"step": 1600
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 64.0,
"step": 1600
},
{
"epoch": 65.0,
"eval_accuracy": 0.7,
"eval_loss": 0.28336700797080994,
"eval_runtime": 3.2095,
"eval_samples_per_second": 31.157,
"eval_steps_per_second": 4.05,
"step": 1625
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.7,
"epoch": 65.0,
"step": 1625
},
{
"epoch": 66.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2888612747192383,
"eval_runtime": 3.205,
"eval_samples_per_second": 31.201,
"eval_steps_per_second": 4.056,
"step": 1650
},
{
"best_epoch": 65,
"best_eval_accuracy": 0.72,
"epoch": 66.0,
"step": 1650
},
{
"epoch": 67.0,
"eval_accuracy": 0.73,
"eval_loss": 0.2850661873817444,
"eval_runtime": 3.2056,
"eval_samples_per_second": 31.195,
"eval_steps_per_second": 4.055,
"step": 1675
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 67.0,
"step": 1675
},
{
"epoch": 68.0,
"eval_accuracy": 0.7,
"eval_loss": 0.27734076976776123,
"eval_runtime": 3.2023,
"eval_samples_per_second": 31.228,
"eval_steps_per_second": 4.06,
"step": 1700
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 68.0,
"step": 1700
},
{
"epoch": 69.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2854936420917511,
"eval_runtime": 3.2033,
"eval_samples_per_second": 31.218,
"eval_steps_per_second": 4.058,
"step": 1725
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 69.0,
"step": 1725
},
{
"epoch": 70.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2902901768684387,
"eval_runtime": 3.2035,
"eval_samples_per_second": 31.216,
"eval_steps_per_second": 4.058,
"step": 1750
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 70.0,
"step": 1750
},
{
"epoch": 71.0,
"eval_accuracy": 0.7,
"eval_loss": 0.28505316376686096,
"eval_runtime": 3.2006,
"eval_samples_per_second": 31.244,
"eval_steps_per_second": 4.062,
"step": 1775
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 71.0,
"step": 1775
},
{
"epoch": 72.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2892363369464874,
"eval_runtime": 3.2052,
"eval_samples_per_second": 31.2,
"eval_steps_per_second": 4.056,
"step": 1800
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 72.0,
"step": 1800
},
{
"epoch": 73.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2811330556869507,
"eval_runtime": 3.2021,
"eval_samples_per_second": 31.23,
"eval_steps_per_second": 4.06,
"step": 1825
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 73.0,
"step": 1825
},
{
"epoch": 74.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2880754768848419,
"eval_runtime": 3.201,
"eval_samples_per_second": 31.241,
"eval_steps_per_second": 4.061,
"step": 1850
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 74.0,
"step": 1850
},
{
"epoch": 75.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2892223000526428,
"eval_runtime": 3.2049,
"eval_samples_per_second": 31.202,
"eval_steps_per_second": 4.056,
"step": 1875
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 75.0,
"step": 1875
},
{
"epoch": 76.0,
"eval_accuracy": 0.71,
"eval_loss": 0.2834882438182831,
"eval_runtime": 3.1993,
"eval_samples_per_second": 31.257,
"eval_steps_per_second": 4.063,
"step": 1900
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 76.0,
"step": 1900
},
{
"epoch": 77.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2799616754055023,
"eval_runtime": 3.1962,
"eval_samples_per_second": 31.287,
"eval_steps_per_second": 4.067,
"step": 1925
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 77.0,
"step": 1925
},
{
"epoch": 78.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2808959484100342,
"eval_runtime": 3.199,
"eval_samples_per_second": 31.259,
"eval_steps_per_second": 4.064,
"step": 1950
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 78.0,
"step": 1950
},
{
"epoch": 79.0,
"eval_accuracy": 0.71,
"eval_loss": 0.28005915880203247,
"eval_runtime": 3.1973,
"eval_samples_per_second": 31.277,
"eval_steps_per_second": 4.066,
"step": 1975
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 79.0,
"step": 1975
},
{
"epoch": 80.0,
"learning_rate": 0.0,
"loss": 0.4329,
"step": 2000
},
{
"epoch": 80.0,
"eval_accuracy": 0.72,
"eval_loss": 0.2806113362312317,
"eval_runtime": 3.1967,
"eval_samples_per_second": 31.282,
"eval_steps_per_second": 4.067,
"step": 2000
},
{
"best_epoch": 66,
"best_eval_accuracy": 0.73,
"epoch": 80.0,
"step": 2000
},
{
"epoch": 80.0,
"step": 2000,
"total_flos": 2.9821702864896e+16,
"train_loss": 0.4853669128417969,
"train_runtime": 1702.0431,
"train_samples_per_second": 18.801,
"train_steps_per_second": 1.175
}
],
"max_steps": 2000,
"num_train_epochs": 80,
"total_flos": 2.9821702864896e+16,
"trial_name": null,
"trial_params": null
}