|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 35400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.005915423728813559, |
|
"loss": 0.9831, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6217125382262997, |
|
"eval_loss": 0.8378309011459351, |
|
"eval_runtime": 43.2844, |
|
"eval_samples_per_second": 75.547, |
|
"eval_steps_per_second": 9.449, |
|
"step": 590 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 1.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.005830847457627119, |
|
"loss": 0.965, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3782874617737003, |
|
"eval_loss": 1.149783968925476, |
|
"eval_runtime": 43.2275, |
|
"eval_samples_per_second": 75.646, |
|
"eval_steps_per_second": 9.462, |
|
"step": 1180 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 2.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.005746101694915255, |
|
"loss": 0.8937, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6217125382262997, |
|
"eval_loss": 1.2835785150527954, |
|
"eval_runtime": 43.2406, |
|
"eval_samples_per_second": 75.623, |
|
"eval_steps_per_second": 9.459, |
|
"step": 1770 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 3.0, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00566135593220339, |
|
"loss": 0.9435, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6217125382262997, |
|
"eval_loss": 0.8481320142745972, |
|
"eval_runtime": 43.2628, |
|
"eval_samples_per_second": 75.585, |
|
"eval_steps_per_second": 9.454, |
|
"step": 2360 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 4.0, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.005576779661016949, |
|
"loss": 0.8566, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3834862385321101, |
|
"eval_loss": 0.9289329648017883, |
|
"eval_runtime": 43.2414, |
|
"eval_samples_per_second": 75.622, |
|
"eval_steps_per_second": 9.459, |
|
"step": 2950 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 5.0, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.005492033898305085, |
|
"loss": 0.8917, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00540728813559322, |
|
"loss": 0.8868, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.591743119266055, |
|
"eval_loss": 0.651913583278656, |
|
"eval_runtime": 43.1168, |
|
"eval_samples_per_second": 75.84, |
|
"eval_steps_per_second": 9.486, |
|
"step": 3540 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 6.0, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0053225423728813556, |
|
"loss": 0.8905, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.3785932721712538, |
|
"eval_loss": 1.771241307258606, |
|
"eval_runtime": 43.2692, |
|
"eval_samples_per_second": 75.573, |
|
"eval_steps_per_second": 9.452, |
|
"step": 4130 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 7.0, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.005237796610169492, |
|
"loss": 0.84, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6217125382262997, |
|
"eval_loss": 0.9782317876815796, |
|
"eval_runtime": 43.2151, |
|
"eval_samples_per_second": 75.668, |
|
"eval_steps_per_second": 9.464, |
|
"step": 4720 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 8.0, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.005153220338983051, |
|
"loss": 0.7962, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6568807339449542, |
|
"eval_loss": 0.6086090803146362, |
|
"eval_runtime": 43.139, |
|
"eval_samples_per_second": 75.801, |
|
"eval_steps_per_second": 9.481, |
|
"step": 5310 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6568807339449542, |
|
"epoch": 9.0, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.005068474576271187, |
|
"loss": 0.8003, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6220183486238532, |
|
"eval_loss": 0.8011331558227539, |
|
"eval_runtime": 43.025, |
|
"eval_samples_per_second": 76.002, |
|
"eval_steps_per_second": 9.506, |
|
"step": 5900 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6568807339449542, |
|
"epoch": 10.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.004983728813559322, |
|
"loss": 0.793, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.6698857545852661, |
|
"eval_runtime": 43.1878, |
|
"eval_samples_per_second": 75.716, |
|
"eval_steps_per_second": 9.47, |
|
"step": 6490 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6568807339449542, |
|
"epoch": 11.0, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 0.004899152542372881, |
|
"loss": 0.7706, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.004814406779661017, |
|
"loss": 0.7558, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6244648318042814, |
|
"eval_loss": 0.670046329498291, |
|
"eval_runtime": 43.1604, |
|
"eval_samples_per_second": 75.764, |
|
"eval_steps_per_second": 9.476, |
|
"step": 7080 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6568807339449542, |
|
"epoch": 12.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.004729661016949153, |
|
"loss": 0.7947, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.43149847094801225, |
|
"eval_loss": 1.0037223100662231, |
|
"eval_runtime": 43.2404, |
|
"eval_samples_per_second": 75.624, |
|
"eval_steps_per_second": 9.459, |
|
"step": 7670 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6568807339449542, |
|
"epoch": 13.0, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.004644915254237288, |
|
"loss": 0.7465, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6902140672782875, |
|
"eval_loss": 0.6232324242591858, |
|
"eval_runtime": 43.2231, |
|
"eval_samples_per_second": 75.654, |
|
"eval_steps_per_second": 9.463, |
|
"step": 8260 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6902140672782875, |
|
"epoch": 14.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.004560169491525424, |
|
"loss": 0.6835, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6889908256880733, |
|
"eval_loss": 0.6589847207069397, |
|
"eval_runtime": 43.0731, |
|
"eval_samples_per_second": 75.917, |
|
"eval_steps_per_second": 9.495, |
|
"step": 8850 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6902140672782875, |
|
"epoch": 15.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 0.004475593220338983, |
|
"loss": 0.7494, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6862385321100918, |
|
"eval_loss": 0.7069215178489685, |
|
"eval_runtime": 43.2435, |
|
"eval_samples_per_second": 75.618, |
|
"eval_steps_per_second": 9.458, |
|
"step": 9440 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6902140672782875, |
|
"epoch": 16.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 0.004390847457627119, |
|
"loss": 0.7499, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.004306101694915254, |
|
"loss": 0.6775, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.4856269113149847, |
|
"eval_loss": 0.9627411365509033, |
|
"eval_runtime": 43.2672, |
|
"eval_samples_per_second": 75.577, |
|
"eval_steps_per_second": 9.453, |
|
"step": 10030 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6902140672782875, |
|
"epoch": 17.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 0.0042213559322033896, |
|
"loss": 0.6928, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5663608562691131, |
|
"eval_loss": 1.088120460510254, |
|
"eval_runtime": 43.2117, |
|
"eval_samples_per_second": 75.674, |
|
"eval_steps_per_second": 9.465, |
|
"step": 10620 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6902140672782875, |
|
"epoch": 18.0, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.004136610169491526, |
|
"loss": 0.6991, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7024464831804281, |
|
"eval_loss": 0.5778092741966248, |
|
"eval_runtime": 43.3502, |
|
"eval_samples_per_second": 75.432, |
|
"eval_steps_per_second": 9.435, |
|
"step": 11210 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7024464831804281, |
|
"epoch": 19.0, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 0.004051864406779661, |
|
"loss": 0.6594, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6051987767584098, |
|
"eval_loss": 0.7909632325172424, |
|
"eval_runtime": 43.1541, |
|
"eval_samples_per_second": 75.775, |
|
"eval_steps_per_second": 9.478, |
|
"step": 11800 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7024464831804281, |
|
"epoch": 20.0, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 0.003967118644067796, |
|
"loss": 0.6327, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6966360856269113, |
|
"eval_loss": 0.6203939914703369, |
|
"eval_runtime": 43.0351, |
|
"eval_samples_per_second": 75.984, |
|
"eval_steps_per_second": 9.504, |
|
"step": 12390 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7024464831804281, |
|
"epoch": 21.0, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"learning_rate": 0.0038823728813559325, |
|
"loss": 0.6201, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6792048929663609, |
|
"eval_loss": 0.5992993712425232, |
|
"eval_runtime": 43.2326, |
|
"eval_samples_per_second": 75.637, |
|
"eval_steps_per_second": 9.46, |
|
"step": 12980 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7024464831804281, |
|
"epoch": 22.0, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 0.0037977966101694917, |
|
"loss": 0.629, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 0.0037130508474576274, |
|
"loss": 0.6026, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6633027522935779, |
|
"eval_loss": 0.6735050082206726, |
|
"eval_runtime": 43.216, |
|
"eval_samples_per_second": 75.666, |
|
"eval_steps_per_second": 9.464, |
|
"step": 13570 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7024464831804281, |
|
"epoch": 23.0, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 0.0036283050847457626, |
|
"loss": 0.5826, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6605504587155964, |
|
"eval_loss": 0.6619319319725037, |
|
"eval_runtime": 43.1433, |
|
"eval_samples_per_second": 75.794, |
|
"eval_steps_per_second": 9.48, |
|
"step": 14160 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7024464831804281, |
|
"epoch": 24.0, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 0.0035435593220338986, |
|
"loss": 0.5831, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7073394495412844, |
|
"eval_loss": 0.7766701579093933, |
|
"eval_runtime": 43.2655, |
|
"eval_samples_per_second": 75.58, |
|
"eval_steps_per_second": 9.453, |
|
"step": 14750 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7073394495412844, |
|
"epoch": 25.0, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 0.003458813559322034, |
|
"loss": 0.5809, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5425076452599389, |
|
"eval_loss": 1.2840725183486938, |
|
"eval_runtime": 43.2156, |
|
"eval_samples_per_second": 75.667, |
|
"eval_steps_per_second": 9.464, |
|
"step": 15340 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7073394495412844, |
|
"epoch": 26.0, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 0.0033740677966101694, |
|
"loss": 0.6095, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6400611620795107, |
|
"eval_loss": 0.8816479444503784, |
|
"eval_runtime": 43.1544, |
|
"eval_samples_per_second": 75.774, |
|
"eval_steps_per_second": 9.478, |
|
"step": 15930 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7073394495412844, |
|
"epoch": 27.0, |
|
"step": 15930 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 0.0032893220338983055, |
|
"loss": 0.5729, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"learning_rate": 0.0032045762711864407, |
|
"loss": 0.5478, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7189602446483181, |
|
"eval_loss": 0.6825653910636902, |
|
"eval_runtime": 43.238, |
|
"eval_samples_per_second": 75.628, |
|
"eval_steps_per_second": 9.459, |
|
"step": 16520 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 28.0, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 28.81, |
|
"learning_rate": 0.0031198305084745763, |
|
"loss": 0.5516, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7168195718654434, |
|
"eval_loss": 0.6076229214668274, |
|
"eval_runtime": 43.2007, |
|
"eval_samples_per_second": 75.693, |
|
"eval_steps_per_second": 9.467, |
|
"step": 17110 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 29.0, |
|
"step": 17110 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"learning_rate": 0.0030352542372881356, |
|
"loss": 0.5538, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6859327217125383, |
|
"eval_loss": 0.9477331042289734, |
|
"eval_runtime": 43.2891, |
|
"eval_samples_per_second": 75.539, |
|
"eval_steps_per_second": 9.448, |
|
"step": 17700 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 30.0, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 30.51, |
|
"learning_rate": 0.002950508474576271, |
|
"loss": 0.5516, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7137614678899082, |
|
"eval_loss": 0.6786766052246094, |
|
"eval_runtime": 43.2599, |
|
"eval_samples_per_second": 75.59, |
|
"eval_steps_per_second": 9.454, |
|
"step": 18290 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 31.0, |
|
"step": 18290 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 0.002865762711864407, |
|
"loss": 0.5296, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7006116207951071, |
|
"eval_loss": 0.8120760917663574, |
|
"eval_runtime": 43.2461, |
|
"eval_samples_per_second": 75.614, |
|
"eval_steps_per_second": 9.458, |
|
"step": 18880 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 32.0, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"learning_rate": 0.002781186440677966, |
|
"loss": 0.5209, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7018348623853211, |
|
"eval_loss": 0.8754389882087708, |
|
"eval_runtime": 43.2296, |
|
"eval_samples_per_second": 75.643, |
|
"eval_steps_per_second": 9.461, |
|
"step": 19470 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 33.0, |
|
"step": 19470 |
|
}, |
|
{ |
|
"epoch": 33.05, |
|
"learning_rate": 0.0026964406779661017, |
|
"loss": 0.5172, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"learning_rate": 0.0026116949152542373, |
|
"loss": 0.4932, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7097859327217125, |
|
"eval_loss": 0.6252529621124268, |
|
"eval_runtime": 43.3313, |
|
"eval_samples_per_second": 75.465, |
|
"eval_steps_per_second": 9.439, |
|
"step": 20060 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 34.0, |
|
"step": 20060 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"learning_rate": 0.002527118644067797, |
|
"loss": 0.4914, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7039755351681957, |
|
"eval_loss": 0.6481243968009949, |
|
"eval_runtime": 43.2111, |
|
"eval_samples_per_second": 75.675, |
|
"eval_steps_per_second": 9.465, |
|
"step": 20650 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7189602446483181, |
|
"epoch": 35.0, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 35.59, |
|
"learning_rate": 0.002442372881355932, |
|
"loss": 0.4845, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7207951070336391, |
|
"eval_loss": 0.6696820855140686, |
|
"eval_runtime": 43.2359, |
|
"eval_samples_per_second": 75.632, |
|
"eval_steps_per_second": 9.46, |
|
"step": 21240 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7207951070336391, |
|
"epoch": 36.0, |
|
"step": 21240 |
|
}, |
|
{ |
|
"epoch": 36.44, |
|
"learning_rate": 0.002357627118644068, |
|
"loss": 0.4836, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7363914373088685, |
|
"eval_loss": 0.6275990605354309, |
|
"eval_runtime": 43.254, |
|
"eval_samples_per_second": 75.6, |
|
"eval_steps_per_second": 9.456, |
|
"step": 21830 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.7363914373088685, |
|
"epoch": 37.0, |
|
"step": 21830 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"learning_rate": 0.0022728813559322034, |
|
"loss": 0.4592, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7342507645259939, |
|
"eval_loss": 0.5963826775550842, |
|
"eval_runtime": 43.2046, |
|
"eval_samples_per_second": 75.686, |
|
"eval_steps_per_second": 9.467, |
|
"step": 22420 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.7363914373088685, |
|
"epoch": 38.0, |
|
"step": 22420 |
|
}, |
|
{ |
|
"epoch": 38.14, |
|
"learning_rate": 0.002188135593220339, |
|
"loss": 0.4692, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"learning_rate": 0.0021033898305084747, |
|
"loss": 0.4642, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7363914373088685, |
|
"eval_loss": 0.5508460402488708, |
|
"eval_runtime": 43.299, |
|
"eval_samples_per_second": 75.521, |
|
"eval_steps_per_second": 9.446, |
|
"step": 23010 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.7363914373088685, |
|
"epoch": 39.0, |
|
"step": 23010 |
|
}, |
|
{ |
|
"epoch": 39.83, |
|
"learning_rate": 0.002018813559322034, |
|
"loss": 0.4704, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.708256880733945, |
|
"eval_loss": 0.8355740308761597, |
|
"eval_runtime": 43.2934, |
|
"eval_samples_per_second": 75.531, |
|
"eval_steps_per_second": 9.447, |
|
"step": 23600 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.7363914373088685, |
|
"epoch": 40.0, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"learning_rate": 0.0019340677966101694, |
|
"loss": 0.4556, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7339449541284404, |
|
"eval_loss": 0.6307940483093262, |
|
"eval_runtime": 43.1769, |
|
"eval_samples_per_second": 75.735, |
|
"eval_steps_per_second": 9.473, |
|
"step": 24190 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.7363914373088685, |
|
"epoch": 41.0, |
|
"step": 24190 |
|
}, |
|
{ |
|
"epoch": 41.53, |
|
"learning_rate": 0.0018494915254237288, |
|
"loss": 0.4583, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7373088685015291, |
|
"eval_loss": 0.5991156697273254, |
|
"eval_runtime": 43.2189, |
|
"eval_samples_per_second": 75.661, |
|
"eval_steps_per_second": 9.463, |
|
"step": 24780 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7373088685015291, |
|
"epoch": 42.0, |
|
"step": 24780 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"learning_rate": 0.0017647457627118644, |
|
"loss": 0.4445, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7247706422018348, |
|
"eval_loss": 0.6277905106544495, |
|
"eval_runtime": 43.2757, |
|
"eval_samples_per_second": 75.562, |
|
"eval_steps_per_second": 9.451, |
|
"step": 25370 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7373088685015291, |
|
"epoch": 43.0, |
|
"step": 25370 |
|
}, |
|
{ |
|
"epoch": 43.22, |
|
"learning_rate": 0.0016800000000000003, |
|
"loss": 0.4298, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6880733944954128, |
|
"eval_loss": 0.7619650363922119, |
|
"eval_runtime": 43.2274, |
|
"eval_samples_per_second": 75.646, |
|
"eval_steps_per_second": 9.462, |
|
"step": 25960 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7373088685015291, |
|
"epoch": 44.0, |
|
"step": 25960 |
|
}, |
|
{ |
|
"epoch": 44.07, |
|
"learning_rate": 0.0015952542372881355, |
|
"loss": 0.4346, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 44.92, |
|
"learning_rate": 0.0015105084745762713, |
|
"loss": 0.4319, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7311926605504587, |
|
"eval_loss": 0.6154680848121643, |
|
"eval_runtime": 43.3006, |
|
"eval_samples_per_second": 75.519, |
|
"eval_steps_per_second": 9.446, |
|
"step": 26550 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7373088685015291, |
|
"epoch": 45.0, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 45.76, |
|
"learning_rate": 0.0014257627118644067, |
|
"loss": 0.4178, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.736085626911315, |
|
"eval_loss": 0.6141914129257202, |
|
"eval_runtime": 43.3021, |
|
"eval_samples_per_second": 75.516, |
|
"eval_steps_per_second": 9.445, |
|
"step": 27140 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7373088685015291, |
|
"epoch": 46.0, |
|
"step": 27140 |
|
}, |
|
{ |
|
"epoch": 46.61, |
|
"learning_rate": 0.0013410169491525424, |
|
"loss": 0.4204, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7321100917431193, |
|
"eval_loss": 0.6599806547164917, |
|
"eval_runtime": 43.3421, |
|
"eval_samples_per_second": 75.446, |
|
"eval_steps_per_second": 9.437, |
|
"step": 27730 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7373088685015291, |
|
"epoch": 47.0, |
|
"step": 27730 |
|
}, |
|
{ |
|
"epoch": 47.46, |
|
"learning_rate": 0.001256271186440678, |
|
"loss": 0.4204, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7403669724770642, |
|
"eval_loss": 0.601150631904602, |
|
"eval_runtime": 43.197, |
|
"eval_samples_per_second": 75.7, |
|
"eval_steps_per_second": 9.468, |
|
"step": 28320 |
|
}, |
|
{ |
|
"best_epoch": 47, |
|
"best_eval_accuracy": 0.7403669724770642, |
|
"epoch": 48.0, |
|
"step": 28320 |
|
}, |
|
{ |
|
"epoch": 48.31, |
|
"learning_rate": 0.0011715254237288136, |
|
"loss": 0.4038, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7391437308868501, |
|
"eval_loss": 0.6091906428337097, |
|
"eval_runtime": 43.2721, |
|
"eval_samples_per_second": 75.568, |
|
"eval_steps_per_second": 9.452, |
|
"step": 28910 |
|
}, |
|
{ |
|
"best_epoch": 47, |
|
"best_eval_accuracy": 0.7403669724770642, |
|
"epoch": 49.0, |
|
"step": 28910 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"learning_rate": 0.001086779661016949, |
|
"loss": 0.4017, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0010022033898305085, |
|
"loss": 0.4103, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.6660399436950684, |
|
"eval_runtime": 43.2592, |
|
"eval_samples_per_second": 75.591, |
|
"eval_steps_per_second": 9.455, |
|
"step": 29500 |
|
}, |
|
{ |
|
"best_epoch": 47, |
|
"best_eval_accuracy": 0.7403669724770642, |
|
"epoch": 50.0, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 50.85, |
|
"learning_rate": 0.000917627118644068, |
|
"loss": 0.3979, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7363914373088685, |
|
"eval_loss": 0.6605736613273621, |
|
"eval_runtime": 43.1587, |
|
"eval_samples_per_second": 75.767, |
|
"eval_steps_per_second": 9.477, |
|
"step": 30090 |
|
}, |
|
{ |
|
"best_epoch": 47, |
|
"best_eval_accuracy": 0.7403669724770642, |
|
"epoch": 51.0, |
|
"step": 30090 |
|
}, |
|
{ |
|
"epoch": 51.69, |
|
"learning_rate": 0.0008328813559322035, |
|
"loss": 0.3946, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7223241590214067, |
|
"eval_loss": 0.7039574384689331, |
|
"eval_runtime": 43.2757, |
|
"eval_samples_per_second": 75.562, |
|
"eval_steps_per_second": 9.451, |
|
"step": 30680 |
|
}, |
|
{ |
|
"best_epoch": 47, |
|
"best_eval_accuracy": 0.7403669724770642, |
|
"epoch": 52.0, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 52.54, |
|
"learning_rate": 0.000748135593220339, |
|
"loss": 0.3857, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7412844036697248, |
|
"eval_loss": 0.6302646398544312, |
|
"eval_runtime": 43.1428, |
|
"eval_samples_per_second": 75.795, |
|
"eval_steps_per_second": 9.48, |
|
"step": 31270 |
|
}, |
|
{ |
|
"best_epoch": 52, |
|
"best_eval_accuracy": 0.7412844036697248, |
|
"epoch": 53.0, |
|
"step": 31270 |
|
}, |
|
{ |
|
"epoch": 53.39, |
|
"learning_rate": 0.0006633898305084746, |
|
"loss": 0.3837, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7269113149847095, |
|
"eval_loss": 0.6580860614776611, |
|
"eval_runtime": 43.2755, |
|
"eval_samples_per_second": 75.562, |
|
"eval_steps_per_second": 9.451, |
|
"step": 31860 |
|
}, |
|
{ |
|
"best_epoch": 52, |
|
"best_eval_accuracy": 0.7412844036697248, |
|
"epoch": 54.0, |
|
"step": 31860 |
|
}, |
|
{ |
|
"epoch": 54.24, |
|
"learning_rate": 0.0005786440677966102, |
|
"loss": 0.3803, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7281345565749235, |
|
"eval_loss": 0.6364992260932922, |
|
"eval_runtime": 43.2674, |
|
"eval_samples_per_second": 75.577, |
|
"eval_steps_per_second": 9.453, |
|
"step": 32450 |
|
}, |
|
{ |
|
"best_epoch": 52, |
|
"best_eval_accuracy": 0.7412844036697248, |
|
"epoch": 55.0, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 55.08, |
|
"learning_rate": 0.0004938983050847458, |
|
"loss": 0.3847, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 55.93, |
|
"learning_rate": 0.0004091525423728814, |
|
"loss": 0.3792, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7302752293577982, |
|
"eval_loss": 0.6349842548370361, |
|
"eval_runtime": 43.2291, |
|
"eval_samples_per_second": 75.643, |
|
"eval_steps_per_second": 9.461, |
|
"step": 33040 |
|
}, |
|
{ |
|
"best_epoch": 52, |
|
"best_eval_accuracy": 0.7412844036697248, |
|
"epoch": 56.0, |
|
"step": 33040 |
|
}, |
|
{ |
|
"epoch": 56.78, |
|
"learning_rate": 0.00032440677966101696, |
|
"loss": 0.3826, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7415902140672783, |
|
"eval_loss": 0.6233869791030884, |
|
"eval_runtime": 43.2253, |
|
"eval_samples_per_second": 75.65, |
|
"eval_steps_per_second": 9.462, |
|
"step": 33630 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.7415902140672783, |
|
"epoch": 57.0, |
|
"step": 33630 |
|
}, |
|
{ |
|
"epoch": 57.63, |
|
"learning_rate": 0.00023966101694915254, |
|
"loss": 0.3784, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.735474006116208, |
|
"eval_loss": 0.6312357187271118, |
|
"eval_runtime": 43.257, |
|
"eval_samples_per_second": 75.595, |
|
"eval_steps_per_second": 9.455, |
|
"step": 34220 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.7415902140672783, |
|
"epoch": 58.0, |
|
"step": 34220 |
|
}, |
|
{ |
|
"epoch": 58.47, |
|
"learning_rate": 0.00015491525423728814, |
|
"loss": 0.373, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7403669724770642, |
|
"eval_loss": 0.6151607632637024, |
|
"eval_runtime": 43.2076, |
|
"eval_samples_per_second": 75.681, |
|
"eval_steps_per_second": 9.466, |
|
"step": 34810 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.7415902140672783, |
|
"epoch": 59.0, |
|
"step": 34810 |
|
}, |
|
{ |
|
"epoch": 59.32, |
|
"learning_rate": 7.016949152542373e-05, |
|
"loss": 0.3713, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.735474006116208, |
|
"eval_loss": 0.6204875707626343, |
|
"eval_runtime": 20.923, |
|
"eval_samples_per_second": 156.287, |
|
"eval_steps_per_second": 19.548, |
|
"step": 35400 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.7415902140672783, |
|
"epoch": 60.0, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 35400, |
|
"total_flos": 2.6355950886279168e+17, |
|
"train_loss": 0.581377860031559, |
|
"train_runtime": 12400.4956, |
|
"train_samples_per_second": 45.613, |
|
"train_steps_per_second": 2.855 |
|
} |
|
], |
|
"max_steps": 35400, |
|
"num_train_epochs": 60, |
|
"total_flos": 2.6355950886279168e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|