{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "eval_steps": 500, "global_step": 7400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.218454865161452, "eval_loss": 6.010812759399414, "eval_runtime": 12.8832, "eval_samples_per_second": 90.738, "eval_steps_per_second": 2.872, "step": 148 }, { "epoch": 2.0, "eval_accuracy": 0.2297482956165591, "eval_loss": 5.830421447753906, "eval_runtime": 12.8689, "eval_samples_per_second": 90.839, "eval_steps_per_second": 2.875, "step": 296 }, { "epoch": 3.0, "eval_accuracy": 0.23940862863018553, "eval_loss": 5.686750888824463, "eval_runtime": 12.8635, "eval_samples_per_second": 90.877, "eval_steps_per_second": 2.876, "step": 444 }, { "epoch": 3.38, "learning_rate": 4.6621621621621625e-06, "loss": 5.9726, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.24915536891584797, "eval_loss": 5.57318639755249, "eval_runtime": 12.8537, "eval_samples_per_second": 90.947, "eval_steps_per_second": 2.879, "step": 592 }, { "epoch": 5.0, "eval_accuracy": 0.2586860910214204, "eval_loss": 5.474750518798828, "eval_runtime": 12.849, "eval_samples_per_second": 90.98, "eval_steps_per_second": 2.88, "step": 740 }, { "epoch": 6.0, "eval_accuracy": 0.26733545895222544, "eval_loss": 5.387089729309082, "eval_runtime": 12.8597, "eval_samples_per_second": 90.904, "eval_steps_per_second": 2.877, "step": 888 }, { "epoch": 6.76, "learning_rate": 4.324324324324325e-06, "loss": 5.5397, "step": 1000 }, { "epoch": 7.0, "eval_accuracy": 0.27559599415886843, "eval_loss": 5.308462142944336, "eval_runtime": 12.8451, "eval_samples_per_second": 91.008, "eval_steps_per_second": 2.88, "step": 1036 }, { "epoch": 8.0, "eval_accuracy": 0.2826554682842108, "eval_loss": 5.240093231201172, "eval_runtime": 12.8308, "eval_samples_per_second": 91.109, "eval_steps_per_second": 2.884, "step": 1184 }, { "epoch": 9.0, "eval_accuracy": 0.2887731031443606, "eval_loss": 5.181127071380615, "eval_runtime": 12.8466, "eval_samples_per_second": 90.997, "eval_steps_per_second": 2.88, "step": 1332 }, { "epoch": 10.0, "eval_accuracy": 0.2933094849262514, "eval_loss": 5.127747535705566, "eval_runtime": 12.8511, "eval_samples_per_second": 90.965, "eval_steps_per_second": 2.879, "step": 1480 }, { "epoch": 10.14, "learning_rate": 3.986486486486487e-06, "loss": 5.2883, "step": 1500 }, { "epoch": 11.0, "eval_accuracy": 0.29827790306832225, "eval_loss": 5.07957124710083, "eval_runtime": 12.8494, "eval_samples_per_second": 90.977, "eval_steps_per_second": 2.88, "step": 1628 }, { "epoch": 12.0, "eval_accuracy": 0.3029957401214886, "eval_loss": 5.035754680633545, "eval_runtime": 12.8272, "eval_samples_per_second": 91.135, "eval_steps_per_second": 2.885, "step": 1776 }, { "epoch": 13.0, "eval_accuracy": 0.30672853427344443, "eval_loss": 4.995058059692383, "eval_runtime": 12.8564, "eval_samples_per_second": 90.927, "eval_steps_per_second": 2.878, "step": 1924 }, { "epoch": 13.51, "learning_rate": 3.648648648648649e-06, "loss": 5.1076, "step": 2000 }, { "epoch": 14.0, "eval_accuracy": 0.31034035824454986, "eval_loss": 4.957174777984619, "eval_runtime": 12.8689, "eval_samples_per_second": 90.839, "eval_steps_per_second": 2.875, "step": 2072 }, { "epoch": 15.0, "eval_accuracy": 0.3138916971252301, "eval_loss": 4.919981479644775, "eval_runtime": 12.8448, "eval_samples_per_second": 91.009, "eval_steps_per_second": 2.881, "step": 2220 }, { "epoch": 16.0, "eval_accuracy": 0.3172183770986166, "eval_loss": 4.887693405151367, "eval_runtime": 12.8684, "eval_samples_per_second": 90.842, "eval_steps_per_second": 2.875, "step": 2368 }, { "epoch": 16.89, "learning_rate": 3.310810810810811e-06, "loss": 4.9674, "step": 2500 }, { "epoch": 17.0, "eval_accuracy": 0.32033767961911674, "eval_loss": 4.855071544647217, "eval_runtime": 12.87, "eval_samples_per_second": 90.832, "eval_steps_per_second": 2.875, "step": 2516 }, { "epoch": 18.0, "eval_accuracy": 0.32318911959630525, "eval_loss": 4.825829982757568, "eval_runtime": 12.8411, "eval_samples_per_second": 91.036, "eval_steps_per_second": 2.881, "step": 2664 }, { "epoch": 19.0, "eval_accuracy": 0.3264725959336738, "eval_loss": 4.800779342651367, "eval_runtime": 12.8589, "eval_samples_per_second": 90.91, "eval_steps_per_second": 2.877, "step": 2812 }, { "epoch": 20.0, "eval_accuracy": 0.3288919995506822, "eval_loss": 4.774311542510986, "eval_runtime": 12.8368, "eval_samples_per_second": 91.066, "eval_steps_per_second": 2.882, "step": 2960 }, { "epoch": 20.27, "learning_rate": 2.9729729729729736e-06, "loss": 4.858, "step": 3000 }, { "epoch": 21.0, "eval_accuracy": 0.3316915951646491, "eval_loss": 4.749689102172852, "eval_runtime": 12.8577, "eval_samples_per_second": 90.919, "eval_steps_per_second": 2.878, "step": 3108 }, { "epoch": 22.0, "eval_accuracy": 0.33381721405673503, "eval_loss": 4.727055549621582, "eval_runtime": 12.849, "eval_samples_per_second": 90.98, "eval_steps_per_second": 2.88, "step": 3256 }, { "epoch": 23.0, "eval_accuracy": 0.33634030640018664, "eval_loss": 4.705799102783203, "eval_runtime": 12.8414, "eval_samples_per_second": 91.034, "eval_steps_per_second": 2.881, "step": 3404 }, { "epoch": 23.65, "learning_rate": 2.6351351351351353e-06, "loss": 4.76, "step": 3500 }, { "epoch": 24.0, "eval_accuracy": 0.3384486438378654, "eval_loss": 4.686633586883545, "eval_runtime": 12.852, "eval_samples_per_second": 90.959, "eval_steps_per_second": 2.879, "step": 3552 }, { "epoch": 25.0, "eval_accuracy": 0.3403841667314721, "eval_loss": 4.668373107910156, "eval_runtime": 12.8631, "eval_samples_per_second": 90.88, "eval_steps_per_second": 2.876, "step": 3700 }, { "epoch": 26.0, "eval_accuracy": 0.34248386344194726, "eval_loss": 4.648622512817383, "eval_runtime": 12.8633, "eval_samples_per_second": 90.879, "eval_steps_per_second": 2.876, "step": 3848 }, { "epoch": 27.0, "eval_accuracy": 0.3443416197907216, "eval_loss": 4.632272243499756, "eval_runtime": 12.8435, "eval_samples_per_second": 91.019, "eval_steps_per_second": 2.881, "step": 3996 }, { "epoch": 27.03, "learning_rate": 2.297297297297298e-06, "loss": 4.6863, "step": 4000 }, { "epoch": 28.0, "eval_accuracy": 0.34594015432338787, "eval_loss": 4.615506172180176, "eval_runtime": 12.8661, "eval_samples_per_second": 90.859, "eval_steps_per_second": 2.876, "step": 4144 }, { "epoch": 29.0, "eval_accuracy": 0.34756461103766495, "eval_loss": 4.601576805114746, "eval_runtime": 12.8739, "eval_samples_per_second": 90.804, "eval_steps_per_second": 2.874, "step": 4292 }, { "epoch": 30.0, "eval_accuracy": 0.34898169029905557, "eval_loss": 4.5874128341674805, "eval_runtime": 12.8754, "eval_samples_per_second": 90.793, "eval_steps_per_second": 2.874, "step": 4440 }, { "epoch": 30.41, "learning_rate": 1.9594594594594595e-06, "loss": 4.6168, "step": 4500 }, { "epoch": 31.0, "eval_accuracy": 0.3504592546508714, "eval_loss": 4.574199676513672, "eval_runtime": 12.8467, "eval_samples_per_second": 90.996, "eval_steps_per_second": 2.88, "step": 4588 }, { "epoch": 32.0, "eval_accuracy": 0.35180720809463323, "eval_loss": 4.562849998474121, "eval_runtime": 12.8628, "eval_samples_per_second": 90.882, "eval_steps_per_second": 2.877, "step": 4736 }, { "epoch": 33.0, "eval_accuracy": 0.3534230240817067, "eval_loss": 4.550704479217529, "eval_runtime": 12.8788, "eval_samples_per_second": 90.769, "eval_steps_per_second": 2.873, "step": 4884 }, { "epoch": 33.78, "learning_rate": 1.6216216216216219e-06, "loss": 4.5684, "step": 5000 }, { "epoch": 34.0, "eval_accuracy": 0.35426117462045603, "eval_loss": 4.541166305541992, "eval_runtime": 12.9077, "eval_samples_per_second": 90.566, "eval_steps_per_second": 2.866, "step": 5032 }, { "epoch": 35.0, "eval_accuracy": 0.3557905833354935, "eval_loss": 4.531555652618408, "eval_runtime": 12.8613, "eval_samples_per_second": 90.893, "eval_steps_per_second": 2.877, "step": 5180 }, { "epoch": 36.0, "eval_accuracy": 0.3569570815079797, "eval_loss": 4.520727157592773, "eval_runtime": 12.9563, "eval_samples_per_second": 90.226, "eval_steps_per_second": 2.856, "step": 5328 }, { "epoch": 37.0, "eval_accuracy": 0.3579594058635975, "eval_loss": 4.513218879699707, "eval_runtime": 12.9069, "eval_samples_per_second": 90.572, "eval_steps_per_second": 2.867, "step": 5476 }, { "epoch": 37.16, "learning_rate": 1.2837837837837838e-06, "loss": 4.5277, "step": 5500 }, { "epoch": 38.0, "eval_accuracy": 0.3587629934935324, "eval_loss": 4.505405426025391, "eval_runtime": 12.8784, "eval_samples_per_second": 90.772, "eval_steps_per_second": 2.873, "step": 5624 }, { "epoch": 39.0, "eval_accuracy": 0.35967891057711415, "eval_loss": 4.499256610870361, "eval_runtime": 12.8813, "eval_samples_per_second": 90.752, "eval_steps_per_second": 2.872, "step": 5772 }, { "epoch": 40.0, "eval_accuracy": 0.3604306538438275, "eval_loss": 4.493128776550293, "eval_runtime": 12.8448, "eval_samples_per_second": 91.009, "eval_steps_per_second": 2.881, "step": 5920 }, { "epoch": 40.54, "learning_rate": 9.459459459459461e-07, "loss": 4.4886, "step": 6000 }, { "epoch": 41.0, "eval_accuracy": 0.3610959898385048, "eval_loss": 4.487875461578369, "eval_runtime": 12.8279, "eval_samples_per_second": 91.13, "eval_steps_per_second": 2.884, "step": 6068 }, { "epoch": 42.0, "eval_accuracy": 0.3616749185611461, "eval_loss": 4.482149124145508, "eval_runtime": 12.9187, "eval_samples_per_second": 90.489, "eval_steps_per_second": 2.864, "step": 6216 }, { "epoch": 43.0, "eval_accuracy": 0.3622365658293802, "eval_loss": 4.477830410003662, "eval_runtime": 12.8586, "eval_samples_per_second": 90.912, "eval_steps_per_second": 2.877, "step": 6364 }, { "epoch": 43.92, "learning_rate": 6.081081081081082e-07, "loss": 4.4727, "step": 6500 }, { "epoch": 44.0, "eval_accuracy": 0.36262539855354226, "eval_loss": 4.474149703979492, "eval_runtime": 12.8383, "eval_samples_per_second": 91.056, "eval_steps_per_second": 2.882, "step": 6512 }, { "epoch": 45.0, "eval_accuracy": 0.3629969498232971, "eval_loss": 4.471028804779053, "eval_runtime": 12.8363, "eval_samples_per_second": 91.07, "eval_steps_per_second": 2.882, "step": 6660 }, { "epoch": 46.0, "eval_accuracy": 0.36328209382101595, "eval_loss": 4.469077110290527, "eval_runtime": 12.8487, "eval_samples_per_second": 90.982, "eval_steps_per_second": 2.88, "step": 6808 }, { "epoch": 47.0, "eval_accuracy": 0.3634462676378844, "eval_loss": 4.4663591384887695, "eval_runtime": 12.8695, "eval_samples_per_second": 90.835, "eval_steps_per_second": 2.875, "step": 6956 }, { "epoch": 47.3, "learning_rate": 2.702702702702703e-07, "loss": 4.4542, "step": 7000 }, { "epoch": 48.0, "eval_accuracy": 0.36362772290916, "eval_loss": 4.465246200561523, "eval_runtime": 12.8565, "eval_samples_per_second": 90.926, "eval_steps_per_second": 2.878, "step": 7104 }, { "epoch": 49.0, "eval_accuracy": 0.36365364509077086, "eval_loss": 4.464395046234131, "eval_runtime": 12.8452, "eval_samples_per_second": 91.007, "eval_steps_per_second": 2.88, "step": 7252 }, { "epoch": 50.0, "eval_accuracy": 0.36367956727238165, "eval_loss": 4.464205741882324, "eval_runtime": 12.8483, "eval_samples_per_second": 90.984, "eval_steps_per_second": 2.88, "step": 7400 }, { "epoch": 50.0, "step": 7400, "total_flos": 1.00265577216e+17, "train_loss": 4.855868909681165, "train_runtime": 6356.4736, "train_samples_per_second": 37.112, "train_steps_per_second": 1.164 } ], "logging_steps": 500, "max_steps": 7400, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.00265577216e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }