2_6e-3_1_0.1 / trainer_state.json
Onutoa's picture
Model save
13a23b1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 60.0,
"global_step": 35400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.85,
"learning_rate": 0.005915423728813559,
"loss": 0.9831,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.6217125382262997,
"eval_loss": 0.8378309011459351,
"eval_runtime": 43.2844,
"eval_samples_per_second": 75.547,
"eval_steps_per_second": 9.449,
"step": 590
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 1.0,
"step": 590
},
{
"epoch": 1.69,
"learning_rate": 0.005830847457627119,
"loss": 0.965,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.3782874617737003,
"eval_loss": 1.149783968925476,
"eval_runtime": 43.2275,
"eval_samples_per_second": 75.646,
"eval_steps_per_second": 9.462,
"step": 1180
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 2.0,
"step": 1180
},
{
"epoch": 2.54,
"learning_rate": 0.005746101694915255,
"loss": 0.8937,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.6217125382262997,
"eval_loss": 1.2835785150527954,
"eval_runtime": 43.2406,
"eval_samples_per_second": 75.623,
"eval_steps_per_second": 9.459,
"step": 1770
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 3.0,
"step": 1770
},
{
"epoch": 3.39,
"learning_rate": 0.00566135593220339,
"loss": 0.9435,
"step": 2000
},
{
"epoch": 4.0,
"eval_accuracy": 0.6217125382262997,
"eval_loss": 0.8481320142745972,
"eval_runtime": 43.2628,
"eval_samples_per_second": 75.585,
"eval_steps_per_second": 9.454,
"step": 2360
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 4.0,
"step": 2360
},
{
"epoch": 4.24,
"learning_rate": 0.005576779661016949,
"loss": 0.8566,
"step": 2500
},
{
"epoch": 5.0,
"eval_accuracy": 0.3834862385321101,
"eval_loss": 0.9289329648017883,
"eval_runtime": 43.2414,
"eval_samples_per_second": 75.622,
"eval_steps_per_second": 9.459,
"step": 2950
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 5.0,
"step": 2950
},
{
"epoch": 5.08,
"learning_rate": 0.005492033898305085,
"loss": 0.8917,
"step": 3000
},
{
"epoch": 5.93,
"learning_rate": 0.00540728813559322,
"loss": 0.8868,
"step": 3500
},
{
"epoch": 6.0,
"eval_accuracy": 0.591743119266055,
"eval_loss": 0.651913583278656,
"eval_runtime": 43.1168,
"eval_samples_per_second": 75.84,
"eval_steps_per_second": 9.486,
"step": 3540
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 6.0,
"step": 3540
},
{
"epoch": 6.78,
"learning_rate": 0.0053225423728813556,
"loss": 0.8905,
"step": 4000
},
{
"epoch": 7.0,
"eval_accuracy": 0.3785932721712538,
"eval_loss": 1.771241307258606,
"eval_runtime": 43.2692,
"eval_samples_per_second": 75.573,
"eval_steps_per_second": 9.452,
"step": 4130
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 7.0,
"step": 4130
},
{
"epoch": 7.63,
"learning_rate": 0.005237796610169492,
"loss": 0.84,
"step": 4500
},
{
"epoch": 8.0,
"eval_accuracy": 0.6217125382262997,
"eval_loss": 0.9782317876815796,
"eval_runtime": 43.2151,
"eval_samples_per_second": 75.668,
"eval_steps_per_second": 9.464,
"step": 4720
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.6217125382262997,
"epoch": 8.0,
"step": 4720
},
{
"epoch": 8.47,
"learning_rate": 0.005153220338983051,
"loss": 0.7962,
"step": 5000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6568807339449542,
"eval_loss": 0.6086090803146362,
"eval_runtime": 43.139,
"eval_samples_per_second": 75.801,
"eval_steps_per_second": 9.481,
"step": 5310
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.6568807339449542,
"epoch": 9.0,
"step": 5310
},
{
"epoch": 9.32,
"learning_rate": 0.005068474576271187,
"loss": 0.8003,
"step": 5500
},
{
"epoch": 10.0,
"eval_accuracy": 0.6220183486238532,
"eval_loss": 0.8011331558227539,
"eval_runtime": 43.025,
"eval_samples_per_second": 76.002,
"eval_steps_per_second": 9.506,
"step": 5900
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.6568807339449542,
"epoch": 10.0,
"step": 5900
},
{
"epoch": 10.17,
"learning_rate": 0.004983728813559322,
"loss": 0.793,
"step": 6000
},
{
"epoch": 11.0,
"eval_accuracy": 0.6,
"eval_loss": 0.6698857545852661,
"eval_runtime": 43.1878,
"eval_samples_per_second": 75.716,
"eval_steps_per_second": 9.47,
"step": 6490
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.6568807339449542,
"epoch": 11.0,
"step": 6490
},
{
"epoch": 11.02,
"learning_rate": 0.004899152542372881,
"loss": 0.7706,
"step": 6500
},
{
"epoch": 11.86,
"learning_rate": 0.004814406779661017,
"loss": 0.7558,
"step": 7000
},
{
"epoch": 12.0,
"eval_accuracy": 0.6244648318042814,
"eval_loss": 0.670046329498291,
"eval_runtime": 43.1604,
"eval_samples_per_second": 75.764,
"eval_steps_per_second": 9.476,
"step": 7080
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.6568807339449542,
"epoch": 12.0,
"step": 7080
},
{
"epoch": 12.71,
"learning_rate": 0.004729661016949153,
"loss": 0.7947,
"step": 7500
},
{
"epoch": 13.0,
"eval_accuracy": 0.43149847094801225,
"eval_loss": 1.0037223100662231,
"eval_runtime": 43.2404,
"eval_samples_per_second": 75.624,
"eval_steps_per_second": 9.459,
"step": 7670
},
{
"best_epoch": 8,
"best_eval_accuracy": 0.6568807339449542,
"epoch": 13.0,
"step": 7670
},
{
"epoch": 13.56,
"learning_rate": 0.004644915254237288,
"loss": 0.7465,
"step": 8000
},
{
"epoch": 14.0,
"eval_accuracy": 0.6902140672782875,
"eval_loss": 0.6232324242591858,
"eval_runtime": 43.2231,
"eval_samples_per_second": 75.654,
"eval_steps_per_second": 9.463,
"step": 8260
},
{
"best_epoch": 13,
"best_eval_accuracy": 0.6902140672782875,
"epoch": 14.0,
"step": 8260
},
{
"epoch": 14.41,
"learning_rate": 0.004560169491525424,
"loss": 0.6835,
"step": 8500
},
{
"epoch": 15.0,
"eval_accuracy": 0.6889908256880733,
"eval_loss": 0.6589847207069397,
"eval_runtime": 43.0731,
"eval_samples_per_second": 75.917,
"eval_steps_per_second": 9.495,
"step": 8850
},
{
"best_epoch": 13,
"best_eval_accuracy": 0.6902140672782875,
"epoch": 15.0,
"step": 8850
},
{
"epoch": 15.25,
"learning_rate": 0.004475593220338983,
"loss": 0.7494,
"step": 9000
},
{
"epoch": 16.0,
"eval_accuracy": 0.6862385321100918,
"eval_loss": 0.7069215178489685,
"eval_runtime": 43.2435,
"eval_samples_per_second": 75.618,
"eval_steps_per_second": 9.458,
"step": 9440
},
{
"best_epoch": 13,
"best_eval_accuracy": 0.6902140672782875,
"epoch": 16.0,
"step": 9440
},
{
"epoch": 16.1,
"learning_rate": 0.004390847457627119,
"loss": 0.7499,
"step": 9500
},
{
"epoch": 16.95,
"learning_rate": 0.004306101694915254,
"loss": 0.6775,
"step": 10000
},
{
"epoch": 17.0,
"eval_accuracy": 0.4856269113149847,
"eval_loss": 0.9627411365509033,
"eval_runtime": 43.2672,
"eval_samples_per_second": 75.577,
"eval_steps_per_second": 9.453,
"step": 10030
},
{
"best_epoch": 13,
"best_eval_accuracy": 0.6902140672782875,
"epoch": 17.0,
"step": 10030
},
{
"epoch": 17.8,
"learning_rate": 0.0042213559322033896,
"loss": 0.6928,
"step": 10500
},
{
"epoch": 18.0,
"eval_accuracy": 0.5663608562691131,
"eval_loss": 1.088120460510254,
"eval_runtime": 43.2117,
"eval_samples_per_second": 75.674,
"eval_steps_per_second": 9.465,
"step": 10620
},
{
"best_epoch": 13,
"best_eval_accuracy": 0.6902140672782875,
"epoch": 18.0,
"step": 10620
},
{
"epoch": 18.64,
"learning_rate": 0.004136610169491526,
"loss": 0.6991,
"step": 11000
},
{
"epoch": 19.0,
"eval_accuracy": 0.7024464831804281,
"eval_loss": 0.5778092741966248,
"eval_runtime": 43.3502,
"eval_samples_per_second": 75.432,
"eval_steps_per_second": 9.435,
"step": 11210
},
{
"best_epoch": 18,
"best_eval_accuracy": 0.7024464831804281,
"epoch": 19.0,
"step": 11210
},
{
"epoch": 19.49,
"learning_rate": 0.004051864406779661,
"loss": 0.6594,
"step": 11500
},
{
"epoch": 20.0,
"eval_accuracy": 0.6051987767584098,
"eval_loss": 0.7909632325172424,
"eval_runtime": 43.1541,
"eval_samples_per_second": 75.775,
"eval_steps_per_second": 9.478,
"step": 11800
},
{
"best_epoch": 18,
"best_eval_accuracy": 0.7024464831804281,
"epoch": 20.0,
"step": 11800
},
{
"epoch": 20.34,
"learning_rate": 0.003967118644067796,
"loss": 0.6327,
"step": 12000
},
{
"epoch": 21.0,
"eval_accuracy": 0.6966360856269113,
"eval_loss": 0.6203939914703369,
"eval_runtime": 43.0351,
"eval_samples_per_second": 75.984,
"eval_steps_per_second": 9.504,
"step": 12390
},
{
"best_epoch": 18,
"best_eval_accuracy": 0.7024464831804281,
"epoch": 21.0,
"step": 12390
},
{
"epoch": 21.19,
"learning_rate": 0.0038823728813559325,
"loss": 0.6201,
"step": 12500
},
{
"epoch": 22.0,
"eval_accuracy": 0.6792048929663609,
"eval_loss": 0.5992993712425232,
"eval_runtime": 43.2326,
"eval_samples_per_second": 75.637,
"eval_steps_per_second": 9.46,
"step": 12980
},
{
"best_epoch": 18,
"best_eval_accuracy": 0.7024464831804281,
"epoch": 22.0,
"step": 12980
},
{
"epoch": 22.03,
"learning_rate": 0.0037977966101694917,
"loss": 0.629,
"step": 13000
},
{
"epoch": 22.88,
"learning_rate": 0.0037130508474576274,
"loss": 0.6026,
"step": 13500
},
{
"epoch": 23.0,
"eval_accuracy": 0.6633027522935779,
"eval_loss": 0.6735050082206726,
"eval_runtime": 43.216,
"eval_samples_per_second": 75.666,
"eval_steps_per_second": 9.464,
"step": 13570
},
{
"best_epoch": 18,
"best_eval_accuracy": 0.7024464831804281,
"epoch": 23.0,
"step": 13570
},
{
"epoch": 23.73,
"learning_rate": 0.0036283050847457626,
"loss": 0.5826,
"step": 14000
},
{
"epoch": 24.0,
"eval_accuracy": 0.6605504587155964,
"eval_loss": 0.6619319319725037,
"eval_runtime": 43.1433,
"eval_samples_per_second": 75.794,
"eval_steps_per_second": 9.48,
"step": 14160
},
{
"best_epoch": 18,
"best_eval_accuracy": 0.7024464831804281,
"epoch": 24.0,
"step": 14160
},
{
"epoch": 24.58,
"learning_rate": 0.0035435593220338986,
"loss": 0.5831,
"step": 14500
},
{
"epoch": 25.0,
"eval_accuracy": 0.7073394495412844,
"eval_loss": 0.7766701579093933,
"eval_runtime": 43.2655,
"eval_samples_per_second": 75.58,
"eval_steps_per_second": 9.453,
"step": 14750
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.7073394495412844,
"epoch": 25.0,
"step": 14750
},
{
"epoch": 25.42,
"learning_rate": 0.003458813559322034,
"loss": 0.5809,
"step": 15000
},
{
"epoch": 26.0,
"eval_accuracy": 0.5425076452599389,
"eval_loss": 1.2840725183486938,
"eval_runtime": 43.2156,
"eval_samples_per_second": 75.667,
"eval_steps_per_second": 9.464,
"step": 15340
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.7073394495412844,
"epoch": 26.0,
"step": 15340
},
{
"epoch": 26.27,
"learning_rate": 0.0033740677966101694,
"loss": 0.6095,
"step": 15500
},
{
"epoch": 27.0,
"eval_accuracy": 0.6400611620795107,
"eval_loss": 0.8816479444503784,
"eval_runtime": 43.1544,
"eval_samples_per_second": 75.774,
"eval_steps_per_second": 9.478,
"step": 15930
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.7073394495412844,
"epoch": 27.0,
"step": 15930
},
{
"epoch": 27.12,
"learning_rate": 0.0032893220338983055,
"loss": 0.5729,
"step": 16000
},
{
"epoch": 27.97,
"learning_rate": 0.0032045762711864407,
"loss": 0.5478,
"step": 16500
},
{
"epoch": 28.0,
"eval_accuracy": 0.7189602446483181,
"eval_loss": 0.6825653910636902,
"eval_runtime": 43.238,
"eval_samples_per_second": 75.628,
"eval_steps_per_second": 9.459,
"step": 16520
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 28.0,
"step": 16520
},
{
"epoch": 28.81,
"learning_rate": 0.0031198305084745763,
"loss": 0.5516,
"step": 17000
},
{
"epoch": 29.0,
"eval_accuracy": 0.7168195718654434,
"eval_loss": 0.6076229214668274,
"eval_runtime": 43.2007,
"eval_samples_per_second": 75.693,
"eval_steps_per_second": 9.467,
"step": 17110
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 29.0,
"step": 17110
},
{
"epoch": 29.66,
"learning_rate": 0.0030352542372881356,
"loss": 0.5538,
"step": 17500
},
{
"epoch": 30.0,
"eval_accuracy": 0.6859327217125383,
"eval_loss": 0.9477331042289734,
"eval_runtime": 43.2891,
"eval_samples_per_second": 75.539,
"eval_steps_per_second": 9.448,
"step": 17700
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 30.0,
"step": 17700
},
{
"epoch": 30.51,
"learning_rate": 0.002950508474576271,
"loss": 0.5516,
"step": 18000
},
{
"epoch": 31.0,
"eval_accuracy": 0.7137614678899082,
"eval_loss": 0.6786766052246094,
"eval_runtime": 43.2599,
"eval_samples_per_second": 75.59,
"eval_steps_per_second": 9.454,
"step": 18290
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 31.0,
"step": 18290
},
{
"epoch": 31.36,
"learning_rate": 0.002865762711864407,
"loss": 0.5296,
"step": 18500
},
{
"epoch": 32.0,
"eval_accuracy": 0.7006116207951071,
"eval_loss": 0.8120760917663574,
"eval_runtime": 43.2461,
"eval_samples_per_second": 75.614,
"eval_steps_per_second": 9.458,
"step": 18880
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 32.0,
"step": 18880
},
{
"epoch": 32.2,
"learning_rate": 0.002781186440677966,
"loss": 0.5209,
"step": 19000
},
{
"epoch": 33.0,
"eval_accuracy": 0.7018348623853211,
"eval_loss": 0.8754389882087708,
"eval_runtime": 43.2296,
"eval_samples_per_second": 75.643,
"eval_steps_per_second": 9.461,
"step": 19470
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 33.0,
"step": 19470
},
{
"epoch": 33.05,
"learning_rate": 0.0026964406779661017,
"loss": 0.5172,
"step": 19500
},
{
"epoch": 33.9,
"learning_rate": 0.0026116949152542373,
"loss": 0.4932,
"step": 20000
},
{
"epoch": 34.0,
"eval_accuracy": 0.7097859327217125,
"eval_loss": 0.6252529621124268,
"eval_runtime": 43.3313,
"eval_samples_per_second": 75.465,
"eval_steps_per_second": 9.439,
"step": 20060
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 34.0,
"step": 20060
},
{
"epoch": 34.75,
"learning_rate": 0.002527118644067797,
"loss": 0.4914,
"step": 20500
},
{
"epoch": 35.0,
"eval_accuracy": 0.7039755351681957,
"eval_loss": 0.6481243968009949,
"eval_runtime": 43.2111,
"eval_samples_per_second": 75.675,
"eval_steps_per_second": 9.465,
"step": 20650
},
{
"best_epoch": 27,
"best_eval_accuracy": 0.7189602446483181,
"epoch": 35.0,
"step": 20650
},
{
"epoch": 35.59,
"learning_rate": 0.002442372881355932,
"loss": 0.4845,
"step": 21000
},
{
"epoch": 36.0,
"eval_accuracy": 0.7207951070336391,
"eval_loss": 0.6696820855140686,
"eval_runtime": 43.2359,
"eval_samples_per_second": 75.632,
"eval_steps_per_second": 9.46,
"step": 21240
},
{
"best_epoch": 35,
"best_eval_accuracy": 0.7207951070336391,
"epoch": 36.0,
"step": 21240
},
{
"epoch": 36.44,
"learning_rate": 0.002357627118644068,
"loss": 0.4836,
"step": 21500
},
{
"epoch": 37.0,
"eval_accuracy": 0.7363914373088685,
"eval_loss": 0.6275990605354309,
"eval_runtime": 43.254,
"eval_samples_per_second": 75.6,
"eval_steps_per_second": 9.456,
"step": 21830
},
{
"best_epoch": 36,
"best_eval_accuracy": 0.7363914373088685,
"epoch": 37.0,
"step": 21830
},
{
"epoch": 37.29,
"learning_rate": 0.0022728813559322034,
"loss": 0.4592,
"step": 22000
},
{
"epoch": 38.0,
"eval_accuracy": 0.7342507645259939,
"eval_loss": 0.5963826775550842,
"eval_runtime": 43.2046,
"eval_samples_per_second": 75.686,
"eval_steps_per_second": 9.467,
"step": 22420
},
{
"best_epoch": 36,
"best_eval_accuracy": 0.7363914373088685,
"epoch": 38.0,
"step": 22420
},
{
"epoch": 38.14,
"learning_rate": 0.002188135593220339,
"loss": 0.4692,
"step": 22500
},
{
"epoch": 38.98,
"learning_rate": 0.0021033898305084747,
"loss": 0.4642,
"step": 23000
},
{
"epoch": 39.0,
"eval_accuracy": 0.7363914373088685,
"eval_loss": 0.5508460402488708,
"eval_runtime": 43.299,
"eval_samples_per_second": 75.521,
"eval_steps_per_second": 9.446,
"step": 23010
},
{
"best_epoch": 36,
"best_eval_accuracy": 0.7363914373088685,
"epoch": 39.0,
"step": 23010
},
{
"epoch": 39.83,
"learning_rate": 0.002018813559322034,
"loss": 0.4704,
"step": 23500
},
{
"epoch": 40.0,
"eval_accuracy": 0.708256880733945,
"eval_loss": 0.8355740308761597,
"eval_runtime": 43.2934,
"eval_samples_per_second": 75.531,
"eval_steps_per_second": 9.447,
"step": 23600
},
{
"best_epoch": 36,
"best_eval_accuracy": 0.7363914373088685,
"epoch": 40.0,
"step": 23600
},
{
"epoch": 40.68,
"learning_rate": 0.0019340677966101694,
"loss": 0.4556,
"step": 24000
},
{
"epoch": 41.0,
"eval_accuracy": 0.7339449541284404,
"eval_loss": 0.6307940483093262,
"eval_runtime": 43.1769,
"eval_samples_per_second": 75.735,
"eval_steps_per_second": 9.473,
"step": 24190
},
{
"best_epoch": 36,
"best_eval_accuracy": 0.7363914373088685,
"epoch": 41.0,
"step": 24190
},
{
"epoch": 41.53,
"learning_rate": 0.0018494915254237288,
"loss": 0.4583,
"step": 24500
},
{
"epoch": 42.0,
"eval_accuracy": 0.7373088685015291,
"eval_loss": 0.5991156697273254,
"eval_runtime": 43.2189,
"eval_samples_per_second": 75.661,
"eval_steps_per_second": 9.463,
"step": 24780
},
{
"best_epoch": 41,
"best_eval_accuracy": 0.7373088685015291,
"epoch": 42.0,
"step": 24780
},
{
"epoch": 42.37,
"learning_rate": 0.0017647457627118644,
"loss": 0.4445,
"step": 25000
},
{
"epoch": 43.0,
"eval_accuracy": 0.7247706422018348,
"eval_loss": 0.6277905106544495,
"eval_runtime": 43.2757,
"eval_samples_per_second": 75.562,
"eval_steps_per_second": 9.451,
"step": 25370
},
{
"best_epoch": 41,
"best_eval_accuracy": 0.7373088685015291,
"epoch": 43.0,
"step": 25370
},
{
"epoch": 43.22,
"learning_rate": 0.0016800000000000003,
"loss": 0.4298,
"step": 25500
},
{
"epoch": 44.0,
"eval_accuracy": 0.6880733944954128,
"eval_loss": 0.7619650363922119,
"eval_runtime": 43.2274,
"eval_samples_per_second": 75.646,
"eval_steps_per_second": 9.462,
"step": 25960
},
{
"best_epoch": 41,
"best_eval_accuracy": 0.7373088685015291,
"epoch": 44.0,
"step": 25960
},
{
"epoch": 44.07,
"learning_rate": 0.0015952542372881355,
"loss": 0.4346,
"step": 26000
},
{
"epoch": 44.92,
"learning_rate": 0.0015105084745762713,
"loss": 0.4319,
"step": 26500
},
{
"epoch": 45.0,
"eval_accuracy": 0.7311926605504587,
"eval_loss": 0.6154680848121643,
"eval_runtime": 43.3006,
"eval_samples_per_second": 75.519,
"eval_steps_per_second": 9.446,
"step": 26550
},
{
"best_epoch": 41,
"best_eval_accuracy": 0.7373088685015291,
"epoch": 45.0,
"step": 26550
},
{
"epoch": 45.76,
"learning_rate": 0.0014257627118644067,
"loss": 0.4178,
"step": 27000
},
{
"epoch": 46.0,
"eval_accuracy": 0.736085626911315,
"eval_loss": 0.6141914129257202,
"eval_runtime": 43.3021,
"eval_samples_per_second": 75.516,
"eval_steps_per_second": 9.445,
"step": 27140
},
{
"best_epoch": 41,
"best_eval_accuracy": 0.7373088685015291,
"epoch": 46.0,
"step": 27140
},
{
"epoch": 46.61,
"learning_rate": 0.0013410169491525424,
"loss": 0.4204,
"step": 27500
},
{
"epoch": 47.0,
"eval_accuracy": 0.7321100917431193,
"eval_loss": 0.6599806547164917,
"eval_runtime": 43.3421,
"eval_samples_per_second": 75.446,
"eval_steps_per_second": 9.437,
"step": 27730
},
{
"best_epoch": 41,
"best_eval_accuracy": 0.7373088685015291,
"epoch": 47.0,
"step": 27730
},
{
"epoch": 47.46,
"learning_rate": 0.001256271186440678,
"loss": 0.4204,
"step": 28000
},
{
"epoch": 48.0,
"eval_accuracy": 0.7403669724770642,
"eval_loss": 0.601150631904602,
"eval_runtime": 43.197,
"eval_samples_per_second": 75.7,
"eval_steps_per_second": 9.468,
"step": 28320
},
{
"best_epoch": 47,
"best_eval_accuracy": 0.7403669724770642,
"epoch": 48.0,
"step": 28320
},
{
"epoch": 48.31,
"learning_rate": 0.0011715254237288136,
"loss": 0.4038,
"step": 28500
},
{
"epoch": 49.0,
"eval_accuracy": 0.7391437308868501,
"eval_loss": 0.6091906428337097,
"eval_runtime": 43.2721,
"eval_samples_per_second": 75.568,
"eval_steps_per_second": 9.452,
"step": 28910
},
{
"best_epoch": 47,
"best_eval_accuracy": 0.7403669724770642,
"epoch": 49.0,
"step": 28910
},
{
"epoch": 49.15,
"learning_rate": 0.001086779661016949,
"loss": 0.4017,
"step": 29000
},
{
"epoch": 50.0,
"learning_rate": 0.0010022033898305085,
"loss": 0.4103,
"step": 29500
},
{
"epoch": 50.0,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.6660399436950684,
"eval_runtime": 43.2592,
"eval_samples_per_second": 75.591,
"eval_steps_per_second": 9.455,
"step": 29500
},
{
"best_epoch": 47,
"best_eval_accuracy": 0.7403669724770642,
"epoch": 50.0,
"step": 29500
},
{
"epoch": 50.85,
"learning_rate": 0.000917627118644068,
"loss": 0.3979,
"step": 30000
},
{
"epoch": 51.0,
"eval_accuracy": 0.7363914373088685,
"eval_loss": 0.6605736613273621,
"eval_runtime": 43.1587,
"eval_samples_per_second": 75.767,
"eval_steps_per_second": 9.477,
"step": 30090
},
{
"best_epoch": 47,
"best_eval_accuracy": 0.7403669724770642,
"epoch": 51.0,
"step": 30090
},
{
"epoch": 51.69,
"learning_rate": 0.0008328813559322035,
"loss": 0.3946,
"step": 30500
},
{
"epoch": 52.0,
"eval_accuracy": 0.7223241590214067,
"eval_loss": 0.7039574384689331,
"eval_runtime": 43.2757,
"eval_samples_per_second": 75.562,
"eval_steps_per_second": 9.451,
"step": 30680
},
{
"best_epoch": 47,
"best_eval_accuracy": 0.7403669724770642,
"epoch": 52.0,
"step": 30680
},
{
"epoch": 52.54,
"learning_rate": 0.000748135593220339,
"loss": 0.3857,
"step": 31000
},
{
"epoch": 53.0,
"eval_accuracy": 0.7412844036697248,
"eval_loss": 0.6302646398544312,
"eval_runtime": 43.1428,
"eval_samples_per_second": 75.795,
"eval_steps_per_second": 9.48,
"step": 31270
},
{
"best_epoch": 52,
"best_eval_accuracy": 0.7412844036697248,
"epoch": 53.0,
"step": 31270
},
{
"epoch": 53.39,
"learning_rate": 0.0006633898305084746,
"loss": 0.3837,
"step": 31500
},
{
"epoch": 54.0,
"eval_accuracy": 0.7269113149847095,
"eval_loss": 0.6580860614776611,
"eval_runtime": 43.2755,
"eval_samples_per_second": 75.562,
"eval_steps_per_second": 9.451,
"step": 31860
},
{
"best_epoch": 52,
"best_eval_accuracy": 0.7412844036697248,
"epoch": 54.0,
"step": 31860
},
{
"epoch": 54.24,
"learning_rate": 0.0005786440677966102,
"loss": 0.3803,
"step": 32000
},
{
"epoch": 55.0,
"eval_accuracy": 0.7281345565749235,
"eval_loss": 0.6364992260932922,
"eval_runtime": 43.2674,
"eval_samples_per_second": 75.577,
"eval_steps_per_second": 9.453,
"step": 32450
},
{
"best_epoch": 52,
"best_eval_accuracy": 0.7412844036697248,
"epoch": 55.0,
"step": 32450
},
{
"epoch": 55.08,
"learning_rate": 0.0004938983050847458,
"loss": 0.3847,
"step": 32500
},
{
"epoch": 55.93,
"learning_rate": 0.0004091525423728814,
"loss": 0.3792,
"step": 33000
},
{
"epoch": 56.0,
"eval_accuracy": 0.7302752293577982,
"eval_loss": 0.6349842548370361,
"eval_runtime": 43.2291,
"eval_samples_per_second": 75.643,
"eval_steps_per_second": 9.461,
"step": 33040
},
{
"best_epoch": 52,
"best_eval_accuracy": 0.7412844036697248,
"epoch": 56.0,
"step": 33040
},
{
"epoch": 56.78,
"learning_rate": 0.00032440677966101696,
"loss": 0.3826,
"step": 33500
},
{
"epoch": 57.0,
"eval_accuracy": 0.7415902140672783,
"eval_loss": 0.6233869791030884,
"eval_runtime": 43.2253,
"eval_samples_per_second": 75.65,
"eval_steps_per_second": 9.462,
"step": 33630
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.7415902140672783,
"epoch": 57.0,
"step": 33630
},
{
"epoch": 57.63,
"learning_rate": 0.00023966101694915254,
"loss": 0.3784,
"step": 34000
},
{
"epoch": 58.0,
"eval_accuracy": 0.735474006116208,
"eval_loss": 0.6312357187271118,
"eval_runtime": 43.257,
"eval_samples_per_second": 75.595,
"eval_steps_per_second": 9.455,
"step": 34220
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.7415902140672783,
"epoch": 58.0,
"step": 34220
},
{
"epoch": 58.47,
"learning_rate": 0.00015491525423728814,
"loss": 0.373,
"step": 34500
},
{
"epoch": 59.0,
"eval_accuracy": 0.7403669724770642,
"eval_loss": 0.6151607632637024,
"eval_runtime": 43.2076,
"eval_samples_per_second": 75.681,
"eval_steps_per_second": 9.466,
"step": 34810
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.7415902140672783,
"epoch": 59.0,
"step": 34810
},
{
"epoch": 59.32,
"learning_rate": 7.016949152542373e-05,
"loss": 0.3713,
"step": 35000
},
{
"epoch": 60.0,
"eval_accuracy": 0.735474006116208,
"eval_loss": 0.6204875707626343,
"eval_runtime": 20.923,
"eval_samples_per_second": 156.287,
"eval_steps_per_second": 19.548,
"step": 35400
},
{
"best_epoch": 56,
"best_eval_accuracy": 0.7415902140672783,
"epoch": 60.0,
"step": 35400
},
{
"epoch": 60.0,
"step": 35400,
"total_flos": 2.6355950886279168e+17,
"train_loss": 0.581377860031559,
"train_runtime": 12400.4956,
"train_samples_per_second": 45.613,
"train_steps_per_second": 2.855
}
],
"max_steps": 35400,
"num_train_epochs": 60,
"total_flos": 2.6355950886279168e+17,
"trial_name": null,
"trial_params": null
}