|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3570820987224579, |
|
"eval_runtime": 4.3773, |
|
"eval_samples_per_second": 63.281, |
|
"eval_steps_per_second": 7.996, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4729241877256318, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.048664529914529915, |
|
"loss": 6.8951, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 1.4464904069900513, |
|
"eval_runtime": 4.4679, |
|
"eval_samples_per_second": 61.998, |
|
"eval_steps_per_second": 7.834, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 1.4737317562103271, |
|
"eval_runtime": 4.4886, |
|
"eval_samples_per_second": 61.712, |
|
"eval_steps_per_second": 7.798, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.04732905982905983, |
|
"loss": 4.4457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.4591030478477478, |
|
"eval_runtime": 4.4956, |
|
"eval_samples_per_second": 61.615, |
|
"eval_steps_per_second": 7.785, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.04599358974358975, |
|
"loss": 3.2957, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.40218400955200195, |
|
"eval_runtime": 4.4894, |
|
"eval_samples_per_second": 61.7, |
|
"eval_steps_per_second": 7.796, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 1.2355494499206543, |
|
"eval_runtime": 4.4719, |
|
"eval_samples_per_second": 61.942, |
|
"eval_steps_per_second": 7.827, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.044658119658119666, |
|
"loss": 3.7646, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 9.376609802246094, |
|
"eval_runtime": 4.487, |
|
"eval_samples_per_second": 61.733, |
|
"eval_steps_per_second": 7.8, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3763505816459656, |
|
"eval_runtime": 4.4865, |
|
"eval_samples_per_second": 61.741, |
|
"eval_steps_per_second": 7.801, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.04332264957264958, |
|
"loss": 3.3825, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 4.616496562957764, |
|
"eval_runtime": 4.4867, |
|
"eval_samples_per_second": 61.738, |
|
"eval_steps_per_second": 7.801, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.04198717948717949, |
|
"loss": 2.7848, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 3.261960506439209, |
|
"eval_runtime": 4.486, |
|
"eval_samples_per_second": 61.748, |
|
"eval_steps_per_second": 7.802, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 2.300961971282959, |
|
"eval_runtime": 4.482, |
|
"eval_samples_per_second": 61.803, |
|
"eval_steps_per_second": 7.809, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.0406517094017094, |
|
"loss": 2.3837, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34843170642852783, |
|
"eval_runtime": 4.4826, |
|
"eval_samples_per_second": 61.794, |
|
"eval_steps_per_second": 7.808, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.039316239316239315, |
|
"loss": 2.1666, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.43984633684158325, |
|
"eval_runtime": 4.4884, |
|
"eval_samples_per_second": 61.714, |
|
"eval_steps_per_second": 7.798, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 1.4702966213226318, |
|
"eval_runtime": 4.4859, |
|
"eval_samples_per_second": 61.749, |
|
"eval_steps_per_second": 7.802, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.037980769230769235, |
|
"loss": 2.107, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 1.0550079345703125, |
|
"eval_runtime": 4.4897, |
|
"eval_samples_per_second": 61.697, |
|
"eval_steps_per_second": 7.796, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 1.000760793685913, |
|
"eval_runtime": 4.4924, |
|
"eval_samples_per_second": 61.659, |
|
"eval_steps_per_second": 7.791, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.03664529914529915, |
|
"loss": 2.161, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.7809963822364807, |
|
"eval_runtime": 4.4932, |
|
"eval_samples_per_second": 61.649, |
|
"eval_steps_per_second": 7.79, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.035309829059829066, |
|
"loss": 1.927, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.84182208776474, |
|
"eval_runtime": 4.4906, |
|
"eval_samples_per_second": 61.684, |
|
"eval_steps_per_second": 7.794, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.5165618062019348, |
|
"eval_runtime": 4.4892, |
|
"eval_samples_per_second": 61.703, |
|
"eval_steps_per_second": 7.796, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.03397435897435898, |
|
"loss": 1.8072, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34929215908050537, |
|
"eval_runtime": 4.4926, |
|
"eval_samples_per_second": 61.657, |
|
"eval_steps_per_second": 7.791, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.03263888888888889, |
|
"loss": 1.7187, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 1.4220978021621704, |
|
"eval_runtime": 4.4942, |
|
"eval_samples_per_second": 61.636, |
|
"eval_steps_per_second": 7.788, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 2.9355978965759277, |
|
"eval_runtime": 4.4848, |
|
"eval_samples_per_second": 61.764, |
|
"eval_steps_per_second": 7.804, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0313034188034188, |
|
"loss": 2.1333, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.8474471569061279, |
|
"eval_runtime": 4.4899, |
|
"eval_samples_per_second": 61.693, |
|
"eval_steps_per_second": 7.795, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 5.1220269203186035, |
|
"eval_runtime": 4.4961, |
|
"eval_samples_per_second": 61.609, |
|
"eval_steps_per_second": 7.785, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.02996794871794872, |
|
"loss": 2.0017, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3589232265949249, |
|
"eval_runtime": 4.4864, |
|
"eval_samples_per_second": 61.742, |
|
"eval_steps_per_second": 7.801, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.02863247863247863, |
|
"loss": 1.6518, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.39960214495658875, |
|
"eval_runtime": 4.4875, |
|
"eval_samples_per_second": 61.726, |
|
"eval_steps_per_second": 7.799, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.535088062286377, |
|
"eval_runtime": 4.4855, |
|
"eval_samples_per_second": 61.755, |
|
"eval_steps_per_second": 7.803, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.027297008547008547, |
|
"loss": 1.5012, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3478943407535553, |
|
"eval_runtime": 4.4915, |
|
"eval_samples_per_second": 61.672, |
|
"eval_steps_per_second": 7.792, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.025961538461538466, |
|
"loss": 1.4194, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3492168188095093, |
|
"eval_runtime": 4.4867, |
|
"eval_samples_per_second": 61.738, |
|
"eval_steps_per_second": 7.801, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.6942079663276672, |
|
"eval_runtime": 4.4877, |
|
"eval_samples_per_second": 61.725, |
|
"eval_steps_per_second": 7.799, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.02462606837606838, |
|
"loss": 1.3048, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.5088778734207153, |
|
"eval_runtime": 4.4883, |
|
"eval_samples_per_second": 61.716, |
|
"eval_steps_per_second": 7.798, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 1.1509357690811157, |
|
"eval_runtime": 4.4831, |
|
"eval_samples_per_second": 61.787, |
|
"eval_steps_per_second": 7.807, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.02329059829059829, |
|
"loss": 1.2972, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 1.1207164525985718, |
|
"eval_runtime": 4.4926, |
|
"eval_samples_per_second": 61.657, |
|
"eval_steps_per_second": 7.791, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.021955128205128207, |
|
"loss": 1.1774, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 1.4442700147628784, |
|
"eval_runtime": 4.4967, |
|
"eval_samples_per_second": 61.601, |
|
"eval_steps_per_second": 7.783, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 2.3752989768981934, |
|
"eval_runtime": 4.4944, |
|
"eval_samples_per_second": 61.633, |
|
"eval_steps_per_second": 7.788, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.02061965811965812, |
|
"loss": 1.492, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.36219045519828796, |
|
"eval_runtime": 4.4975, |
|
"eval_samples_per_second": 61.59, |
|
"eval_steps_per_second": 7.782, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.019284188034188035, |
|
"loss": 1.3617, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 1.356360673904419, |
|
"eval_runtime": 4.4982, |
|
"eval_samples_per_second": 61.58, |
|
"eval_steps_per_second": 7.781, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.6943920254707336, |
|
"eval_runtime": 4.4948, |
|
"eval_samples_per_second": 61.627, |
|
"eval_steps_per_second": 7.787, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.01794871794871795, |
|
"loss": 1.4582, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.5509728193283081, |
|
"eval_runtime": 4.502, |
|
"eval_samples_per_second": 61.528, |
|
"eval_steps_per_second": 7.774, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3659651577472687, |
|
"eval_runtime": 4.4965, |
|
"eval_samples_per_second": 61.603, |
|
"eval_steps_per_second": 7.784, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.016613247863247863, |
|
"loss": 1.0904, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3479963541030884, |
|
"eval_runtime": 4.4948, |
|
"eval_samples_per_second": 61.627, |
|
"eval_steps_per_second": 7.787, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.015277777777777779, |
|
"loss": 0.9409, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.48353341221809387, |
|
"eval_runtime": 4.4928, |
|
"eval_samples_per_second": 61.654, |
|
"eval_steps_per_second": 7.79, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.6226072311401367, |
|
"eval_runtime": 4.4898, |
|
"eval_samples_per_second": 61.696, |
|
"eval_steps_per_second": 7.795, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.013942307692307693, |
|
"loss": 0.9404, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.402121365070343, |
|
"eval_runtime": 4.4924, |
|
"eval_samples_per_second": 61.659, |
|
"eval_steps_per_second": 7.791, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.012606837606837607, |
|
"loss": 0.8008, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.5381026268005371, |
|
"eval_runtime": 4.4922, |
|
"eval_samples_per_second": 61.663, |
|
"eval_steps_per_second": 7.791, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3887210786342621, |
|
"eval_runtime": 4.4883, |
|
"eval_samples_per_second": 61.716, |
|
"eval_steps_per_second": 7.798, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.011271367521367523, |
|
"loss": 0.841, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3763456344604492, |
|
"eval_runtime": 4.4925, |
|
"eval_samples_per_second": 61.658, |
|
"eval_steps_per_second": 7.791, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.36665645241737366, |
|
"eval_runtime": 4.492, |
|
"eval_samples_per_second": 61.665, |
|
"eval_steps_per_second": 7.792, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.009935897435897435, |
|
"loss": 0.6912, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.44896015524864197, |
|
"eval_runtime": 4.4907, |
|
"eval_samples_per_second": 61.684, |
|
"eval_steps_per_second": 7.794, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.008600427350427351, |
|
"loss": 0.6381, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.7096900939941406, |
|
"eval_runtime": 4.489, |
|
"eval_samples_per_second": 61.707, |
|
"eval_steps_per_second": 7.797, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.36391106247901917, |
|
"eval_runtime": 4.4886, |
|
"eval_samples_per_second": 61.712, |
|
"eval_steps_per_second": 7.798, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.007264957264957266, |
|
"loss": 0.5792, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.379825621843338, |
|
"eval_runtime": 4.4914, |
|
"eval_samples_per_second": 61.673, |
|
"eval_steps_per_second": 7.793, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.005929487179487179, |
|
"loss": 0.53, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.38536888360977173, |
|
"eval_runtime": 4.4972, |
|
"eval_samples_per_second": 61.594, |
|
"eval_steps_per_second": 7.783, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3884303867816925, |
|
"eval_runtime": 4.493, |
|
"eval_samples_per_second": 61.652, |
|
"eval_steps_per_second": 7.79, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.004594017094017094, |
|
"loss": 0.4977, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3898393511772156, |
|
"eval_runtime": 4.497, |
|
"eval_samples_per_second": 61.597, |
|
"eval_steps_per_second": 7.783, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3479559123516083, |
|
"eval_runtime": 4.4975, |
|
"eval_samples_per_second": 61.59, |
|
"eval_steps_per_second": 7.782, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.003258547008547009, |
|
"loss": 0.4596, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3541661202907562, |
|
"eval_runtime": 4.494, |
|
"eval_samples_per_second": 61.638, |
|
"eval_steps_per_second": 7.788, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0019230769230769232, |
|
"loss": 0.4228, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3539113700389862, |
|
"eval_runtime": 4.4902, |
|
"eval_samples_per_second": 61.689, |
|
"eval_steps_per_second": 7.795, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34990349411964417, |
|
"eval_runtime": 4.4855, |
|
"eval_samples_per_second": 61.755, |
|
"eval_steps_per_second": 7.803, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 0.0005876068376068377, |
|
"loss": 0.3933, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.353112131357193, |
|
"eval_runtime": 4.4599, |
|
"eval_samples_per_second": 62.109, |
|
"eval_steps_per_second": 7.848, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 7.25686341470208e+16, |
|
"train_loss": 1.7262601273691551, |
|
"train_runtime": 4126.2043, |
|
"train_samples_per_second": 36.208, |
|
"train_steps_per_second": 4.537 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 7.25686341470208e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|