{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.3570820987224579, "eval_runtime": 4.3773, "eval_samples_per_second": 63.281, "eval_steps_per_second": 7.996, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.4729241877256318, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.048664529914529915, "loss": 6.8951, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 1.4464904069900513, "eval_runtime": 4.4679, "eval_samples_per_second": 61.998, "eval_steps_per_second": 7.834, "step": 624 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 1.4737317562103271, "eval_runtime": 4.4886, "eval_samples_per_second": 61.712, "eval_steps_per_second": 7.798, "step": 936 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.04732905982905983, "loss": 4.4457, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.4591030478477478, "eval_runtime": 4.4956, "eval_samples_per_second": 61.615, "eval_steps_per_second": 7.785, "step": 1248 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.04599358974358975, "loss": 3.2957, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.40218400955200195, "eval_runtime": 4.4894, "eval_samples_per_second": 61.7, "eval_steps_per_second": 7.796, "step": 1560 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 1.2355494499206543, "eval_runtime": 4.4719, "eval_samples_per_second": 61.942, "eval_steps_per_second": 7.827, "step": 1872 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.044658119658119666, "loss": 3.7646, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 9.376609802246094, "eval_runtime": 4.487, "eval_samples_per_second": 61.733, "eval_steps_per_second": 7.8, "step": 2184 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3763505816459656, "eval_runtime": 4.4865, "eval_samples_per_second": 61.741, "eval_steps_per_second": 7.801, "step": 2496 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.04332264957264958, "loss": 3.3825, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 4.616496562957764, "eval_runtime": 4.4867, "eval_samples_per_second": 61.738, "eval_steps_per_second": 7.801, "step": 2808 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.04198717948717949, "loss": 2.7848, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 3.261960506439209, "eval_runtime": 4.486, "eval_samples_per_second": 61.748, "eval_steps_per_second": 7.802, "step": 3120 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 2.300961971282959, "eval_runtime": 4.482, "eval_samples_per_second": 61.803, "eval_steps_per_second": 7.809, "step": 3432 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.0406517094017094, "loss": 2.3837, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.34843170642852783, "eval_runtime": 4.4826, "eval_samples_per_second": 61.794, "eval_steps_per_second": 7.808, "step": 3744 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.039316239316239315, "loss": 2.1666, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.43984633684158325, "eval_runtime": 4.4884, "eval_samples_per_second": 61.714, "eval_steps_per_second": 7.798, "step": 4056 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 1.4702966213226318, "eval_runtime": 4.4859, "eval_samples_per_second": 61.749, "eval_steps_per_second": 7.802, "step": 4368 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.037980769230769235, "loss": 2.107, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 1.0550079345703125, "eval_runtime": 4.4897, "eval_samples_per_second": 61.697, "eval_steps_per_second": 7.796, "step": 4680 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 1.000760793685913, "eval_runtime": 4.4924, "eval_samples_per_second": 61.659, "eval_steps_per_second": 7.791, "step": 4992 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.03664529914529915, "loss": 2.161, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7809963822364807, "eval_runtime": 4.4932, "eval_samples_per_second": 61.649, "eval_steps_per_second": 7.79, "step": 5304 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.035309829059829066, "loss": 1.927, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.84182208776474, "eval_runtime": 4.4906, "eval_samples_per_second": 61.684, "eval_steps_per_second": 7.794, "step": 5616 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.5165618062019348, "eval_runtime": 4.4892, "eval_samples_per_second": 61.703, "eval_steps_per_second": 7.796, "step": 5928 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.03397435897435898, "loss": 1.8072, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.34929215908050537, "eval_runtime": 4.4926, "eval_samples_per_second": 61.657, "eval_steps_per_second": 7.791, "step": 6240 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.03263888888888889, "loss": 1.7187, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 1.4220978021621704, "eval_runtime": 4.4942, "eval_samples_per_second": 61.636, "eval_steps_per_second": 7.788, "step": 6552 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 2.9355978965759277, "eval_runtime": 4.4848, "eval_samples_per_second": 61.764, "eval_steps_per_second": 7.804, "step": 6864 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0313034188034188, "loss": 2.1333, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.8474471569061279, "eval_runtime": 4.4899, "eval_samples_per_second": 61.693, "eval_steps_per_second": 7.795, "step": 7176 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 5.1220269203186035, "eval_runtime": 4.4961, "eval_samples_per_second": 61.609, "eval_steps_per_second": 7.785, "step": 7488 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.02996794871794872, "loss": 2.0017, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.3589232265949249, "eval_runtime": 4.4864, "eval_samples_per_second": 61.742, "eval_steps_per_second": 7.801, "step": 7800 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.02863247863247863, "loss": 1.6518, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.39960214495658875, "eval_runtime": 4.4875, "eval_samples_per_second": 61.726, "eval_steps_per_second": 7.799, "step": 8112 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.535088062286377, "eval_runtime": 4.4855, "eval_samples_per_second": 61.755, "eval_steps_per_second": 7.803, "step": 8424 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.027297008547008547, "loss": 1.5012, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3478943407535553, "eval_runtime": 4.4915, "eval_samples_per_second": 61.672, "eval_steps_per_second": 7.792, "step": 8736 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.025961538461538466, "loss": 1.4194, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3492168188095093, "eval_runtime": 4.4867, "eval_samples_per_second": 61.738, "eval_steps_per_second": 7.801, "step": 9048 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6942079663276672, "eval_runtime": 4.4877, "eval_samples_per_second": 61.725, "eval_steps_per_second": 7.799, "step": 9360 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.02462606837606838, "loss": 1.3048, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.5088778734207153, "eval_runtime": 4.4883, "eval_samples_per_second": 61.716, "eval_steps_per_second": 7.798, "step": 9672 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 1.1509357690811157, "eval_runtime": 4.4831, "eval_samples_per_second": 61.787, "eval_steps_per_second": 7.807, "step": 9984 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.02329059829059829, "loss": 1.2972, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 1.1207164525985718, "eval_runtime": 4.4926, "eval_samples_per_second": 61.657, "eval_steps_per_second": 7.791, "step": 10296 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.021955128205128207, "loss": 1.1774, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 1.4442700147628784, "eval_runtime": 4.4967, "eval_samples_per_second": 61.601, "eval_steps_per_second": 7.783, "step": 10608 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 2.3752989768981934, "eval_runtime": 4.4944, "eval_samples_per_second": 61.633, "eval_steps_per_second": 7.788, "step": 10920 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.02061965811965812, "loss": 1.492, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.36219045519828796, "eval_runtime": 4.4975, "eval_samples_per_second": 61.59, "eval_steps_per_second": 7.782, "step": 11232 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.019284188034188035, "loss": 1.3617, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 1.356360673904419, "eval_runtime": 4.4982, "eval_samples_per_second": 61.58, "eval_steps_per_second": 7.781, "step": 11544 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6943920254707336, "eval_runtime": 4.4948, "eval_samples_per_second": 61.627, "eval_steps_per_second": 7.787, "step": 11856 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.01794871794871795, "loss": 1.4582, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.5509728193283081, "eval_runtime": 4.502, "eval_samples_per_second": 61.528, "eval_steps_per_second": 7.774, "step": 12168 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3659651577472687, "eval_runtime": 4.4965, "eval_samples_per_second": 61.603, "eval_steps_per_second": 7.784, "step": 12480 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.016613247863247863, "loss": 1.0904, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3479963541030884, "eval_runtime": 4.4948, "eval_samples_per_second": 61.627, "eval_steps_per_second": 7.787, "step": 12792 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.015277777777777779, "loss": 0.9409, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.48353341221809387, "eval_runtime": 4.4928, "eval_samples_per_second": 61.654, "eval_steps_per_second": 7.79, "step": 13104 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6226072311401367, "eval_runtime": 4.4898, "eval_samples_per_second": 61.696, "eval_steps_per_second": 7.795, "step": 13416 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.013942307692307693, "loss": 0.9404, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.402121365070343, "eval_runtime": 4.4924, "eval_samples_per_second": 61.659, "eval_steps_per_second": 7.791, "step": 13728 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.012606837606837607, "loss": 0.8008, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.5381026268005371, "eval_runtime": 4.4922, "eval_samples_per_second": 61.663, "eval_steps_per_second": 7.791, "step": 14040 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.3887210786342621, "eval_runtime": 4.4883, "eval_samples_per_second": 61.716, "eval_steps_per_second": 7.798, "step": 14352 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.011271367521367523, "loss": 0.841, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3763456344604492, "eval_runtime": 4.4925, "eval_samples_per_second": 61.658, "eval_steps_per_second": 7.791, "step": 14664 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.36665645241737366, "eval_runtime": 4.492, "eval_samples_per_second": 61.665, "eval_steps_per_second": 7.792, "step": 14976 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.009935897435897435, "loss": 0.6912, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.44896015524864197, "eval_runtime": 4.4907, "eval_samples_per_second": 61.684, "eval_steps_per_second": 7.794, "step": 15288 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.008600427350427351, "loss": 0.6381, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.7096900939941406, "eval_runtime": 4.489, "eval_samples_per_second": 61.707, "eval_steps_per_second": 7.797, "step": 15600 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.36391106247901917, "eval_runtime": 4.4886, "eval_samples_per_second": 61.712, "eval_steps_per_second": 7.798, "step": 15912 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.007264957264957266, "loss": 0.5792, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.379825621843338, "eval_runtime": 4.4914, "eval_samples_per_second": 61.673, "eval_steps_per_second": 7.793, "step": 16224 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.005929487179487179, "loss": 0.53, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.38536888360977173, "eval_runtime": 4.4972, "eval_samples_per_second": 61.594, "eval_steps_per_second": 7.783, "step": 16536 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.3884303867816925, "eval_runtime": 4.493, "eval_samples_per_second": 61.652, "eval_steps_per_second": 7.79, "step": 16848 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 0.004594017094017094, "loss": 0.4977, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.3898393511772156, "eval_runtime": 4.497, "eval_samples_per_second": 61.597, "eval_steps_per_second": 7.783, "step": 17160 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3479559123516083, "eval_runtime": 4.4975, "eval_samples_per_second": 61.59, "eval_steps_per_second": 7.782, "step": 17472 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 0.003258547008547009, "loss": 0.4596, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.3541661202907562, "eval_runtime": 4.494, "eval_samples_per_second": 61.638, "eval_steps_per_second": 7.788, "step": 17784 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 0.0019230769230769232, "loss": 0.4228, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.3539113700389862, "eval_runtime": 4.4902, "eval_samples_per_second": 61.689, "eval_steps_per_second": 7.795, "step": 18096 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.34990349411964417, "eval_runtime": 4.4855, "eval_samples_per_second": 61.755, "eval_steps_per_second": 7.803, "step": 18408 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 0.0005876068376068377, "loss": 0.3933, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.353112131357193, "eval_runtime": 4.4599, "eval_samples_per_second": 62.109, "eval_steps_per_second": 7.848, "step": 18720 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 7.25686341470208e+16, "train_loss": 1.7262601273691551, "train_runtime": 4126.2043, "train_samples_per_second": 36.208, "train_steps_per_second": 4.537 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 7.25686341470208e+16, "trial_name": null, "trial_params": null }