|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 7400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.218454865161452, |
|
"eval_loss": 6.010812759399414, |
|
"eval_runtime": 12.8832, |
|
"eval_samples_per_second": 90.738, |
|
"eval_steps_per_second": 2.872, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.2297482956165591, |
|
"eval_loss": 5.830421447753906, |
|
"eval_runtime": 12.8689, |
|
"eval_samples_per_second": 90.839, |
|
"eval_steps_per_second": 2.875, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.23940862863018553, |
|
"eval_loss": 5.686750888824463, |
|
"eval_runtime": 12.8635, |
|
"eval_samples_per_second": 90.877, |
|
"eval_steps_per_second": 2.876, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.6621621621621625e-06, |
|
"loss": 5.9726, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.24915536891584797, |
|
"eval_loss": 5.57318639755249, |
|
"eval_runtime": 12.8537, |
|
"eval_samples_per_second": 90.947, |
|
"eval_steps_per_second": 2.879, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.2586860910214204, |
|
"eval_loss": 5.474750518798828, |
|
"eval_runtime": 12.849, |
|
"eval_samples_per_second": 90.98, |
|
"eval_steps_per_second": 2.88, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.26733545895222544, |
|
"eval_loss": 5.387089729309082, |
|
"eval_runtime": 12.8597, |
|
"eval_samples_per_second": 90.904, |
|
"eval_steps_per_second": 2.877, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 5.5397, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.27559599415886843, |
|
"eval_loss": 5.308462142944336, |
|
"eval_runtime": 12.8451, |
|
"eval_samples_per_second": 91.008, |
|
"eval_steps_per_second": 2.88, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.2826554682842108, |
|
"eval_loss": 5.240093231201172, |
|
"eval_runtime": 12.8308, |
|
"eval_samples_per_second": 91.109, |
|
"eval_steps_per_second": 2.884, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.2887731031443606, |
|
"eval_loss": 5.181127071380615, |
|
"eval_runtime": 12.8466, |
|
"eval_samples_per_second": 90.997, |
|
"eval_steps_per_second": 2.88, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2933094849262514, |
|
"eval_loss": 5.127747535705566, |
|
"eval_runtime": 12.8511, |
|
"eval_samples_per_second": 90.965, |
|
"eval_steps_per_second": 2.879, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 3.986486486486487e-06, |
|
"loss": 5.2883, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.29827790306832225, |
|
"eval_loss": 5.07957124710083, |
|
"eval_runtime": 12.8494, |
|
"eval_samples_per_second": 90.977, |
|
"eval_steps_per_second": 2.88, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3029957401214886, |
|
"eval_loss": 5.035754680633545, |
|
"eval_runtime": 12.8272, |
|
"eval_samples_per_second": 91.135, |
|
"eval_steps_per_second": 2.885, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.30672853427344443, |
|
"eval_loss": 4.995058059692383, |
|
"eval_runtime": 12.8564, |
|
"eval_samples_per_second": 90.927, |
|
"eval_steps_per_second": 2.878, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.648648648648649e-06, |
|
"loss": 5.1076, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.31034035824454986, |
|
"eval_loss": 4.957174777984619, |
|
"eval_runtime": 12.8689, |
|
"eval_samples_per_second": 90.839, |
|
"eval_steps_per_second": 2.875, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.3138916971252301, |
|
"eval_loss": 4.919981479644775, |
|
"eval_runtime": 12.8448, |
|
"eval_samples_per_second": 91.009, |
|
"eval_steps_per_second": 2.881, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.3172183770986166, |
|
"eval_loss": 4.887693405151367, |
|
"eval_runtime": 12.8684, |
|
"eval_samples_per_second": 90.842, |
|
"eval_steps_per_second": 2.875, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 3.310810810810811e-06, |
|
"loss": 4.9674, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.32033767961911674, |
|
"eval_loss": 4.855071544647217, |
|
"eval_runtime": 12.87, |
|
"eval_samples_per_second": 90.832, |
|
"eval_steps_per_second": 2.875, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.32318911959630525, |
|
"eval_loss": 4.825829982757568, |
|
"eval_runtime": 12.8411, |
|
"eval_samples_per_second": 91.036, |
|
"eval_steps_per_second": 2.881, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.3264725959336738, |
|
"eval_loss": 4.800779342651367, |
|
"eval_runtime": 12.8589, |
|
"eval_samples_per_second": 90.91, |
|
"eval_steps_per_second": 2.877, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.3288919995506822, |
|
"eval_loss": 4.774311542510986, |
|
"eval_runtime": 12.8368, |
|
"eval_samples_per_second": 91.066, |
|
"eval_steps_per_second": 2.882, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 2.9729729729729736e-06, |
|
"loss": 4.858, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.3316915951646491, |
|
"eval_loss": 4.749689102172852, |
|
"eval_runtime": 12.8577, |
|
"eval_samples_per_second": 90.919, |
|
"eval_steps_per_second": 2.878, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.33381721405673503, |
|
"eval_loss": 4.727055549621582, |
|
"eval_runtime": 12.849, |
|
"eval_samples_per_second": 90.98, |
|
"eval_steps_per_second": 2.88, |
|
"step": 3256 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.33634030640018664, |
|
"eval_loss": 4.705799102783203, |
|
"eval_runtime": 12.8414, |
|
"eval_samples_per_second": 91.034, |
|
"eval_steps_per_second": 2.881, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 23.65, |
|
"learning_rate": 2.6351351351351353e-06, |
|
"loss": 4.76, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.3384486438378654, |
|
"eval_loss": 4.686633586883545, |
|
"eval_runtime": 12.852, |
|
"eval_samples_per_second": 90.959, |
|
"eval_steps_per_second": 2.879, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.3403841667314721, |
|
"eval_loss": 4.668373107910156, |
|
"eval_runtime": 12.8631, |
|
"eval_samples_per_second": 90.88, |
|
"eval_steps_per_second": 2.876, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.34248386344194726, |
|
"eval_loss": 4.648622512817383, |
|
"eval_runtime": 12.8633, |
|
"eval_samples_per_second": 90.879, |
|
"eval_steps_per_second": 2.876, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.3443416197907216, |
|
"eval_loss": 4.632272243499756, |
|
"eval_runtime": 12.8435, |
|
"eval_samples_per_second": 91.019, |
|
"eval_steps_per_second": 2.881, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 2.297297297297298e-06, |
|
"loss": 4.6863, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.34594015432338787, |
|
"eval_loss": 4.615506172180176, |
|
"eval_runtime": 12.8661, |
|
"eval_samples_per_second": 90.859, |
|
"eval_steps_per_second": 2.876, |
|
"step": 4144 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.34756461103766495, |
|
"eval_loss": 4.601576805114746, |
|
"eval_runtime": 12.8739, |
|
"eval_samples_per_second": 90.804, |
|
"eval_steps_per_second": 2.874, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.34898169029905557, |
|
"eval_loss": 4.5874128341674805, |
|
"eval_runtime": 12.8754, |
|
"eval_samples_per_second": 90.793, |
|
"eval_steps_per_second": 2.874, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 30.41, |
|
"learning_rate": 1.9594594594594595e-06, |
|
"loss": 4.6168, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.3504592546508714, |
|
"eval_loss": 4.574199676513672, |
|
"eval_runtime": 12.8467, |
|
"eval_samples_per_second": 90.996, |
|
"eval_steps_per_second": 2.88, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.35180720809463323, |
|
"eval_loss": 4.562849998474121, |
|
"eval_runtime": 12.8628, |
|
"eval_samples_per_second": 90.882, |
|
"eval_steps_per_second": 2.877, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.3534230240817067, |
|
"eval_loss": 4.550704479217529, |
|
"eval_runtime": 12.8788, |
|
"eval_samples_per_second": 90.769, |
|
"eval_steps_per_second": 2.873, |
|
"step": 4884 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"learning_rate": 1.6216216216216219e-06, |
|
"loss": 4.5684, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.35426117462045603, |
|
"eval_loss": 4.541166305541992, |
|
"eval_runtime": 12.9077, |
|
"eval_samples_per_second": 90.566, |
|
"eval_steps_per_second": 2.866, |
|
"step": 5032 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.3557905833354935, |
|
"eval_loss": 4.531555652618408, |
|
"eval_runtime": 12.8613, |
|
"eval_samples_per_second": 90.893, |
|
"eval_steps_per_second": 2.877, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.3569570815079797, |
|
"eval_loss": 4.520727157592773, |
|
"eval_runtime": 12.9563, |
|
"eval_samples_per_second": 90.226, |
|
"eval_steps_per_second": 2.856, |
|
"step": 5328 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.3579594058635975, |
|
"eval_loss": 4.513218879699707, |
|
"eval_runtime": 12.9069, |
|
"eval_samples_per_second": 90.572, |
|
"eval_steps_per_second": 2.867, |
|
"step": 5476 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 1.2837837837837838e-06, |
|
"loss": 4.5277, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.3587629934935324, |
|
"eval_loss": 4.505405426025391, |
|
"eval_runtime": 12.8784, |
|
"eval_samples_per_second": 90.772, |
|
"eval_steps_per_second": 2.873, |
|
"step": 5624 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.35967891057711415, |
|
"eval_loss": 4.499256610870361, |
|
"eval_runtime": 12.8813, |
|
"eval_samples_per_second": 90.752, |
|
"eval_steps_per_second": 2.872, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.3604306538438275, |
|
"eval_loss": 4.493128776550293, |
|
"eval_runtime": 12.8448, |
|
"eval_samples_per_second": 91.009, |
|
"eval_steps_per_second": 2.881, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"learning_rate": 9.459459459459461e-07, |
|
"loss": 4.4886, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.3610959898385048, |
|
"eval_loss": 4.487875461578369, |
|
"eval_runtime": 12.8279, |
|
"eval_samples_per_second": 91.13, |
|
"eval_steps_per_second": 2.884, |
|
"step": 6068 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.3616749185611461, |
|
"eval_loss": 4.482149124145508, |
|
"eval_runtime": 12.9187, |
|
"eval_samples_per_second": 90.489, |
|
"eval_steps_per_second": 2.864, |
|
"step": 6216 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.3622365658293802, |
|
"eval_loss": 4.477830410003662, |
|
"eval_runtime": 12.8586, |
|
"eval_samples_per_second": 90.912, |
|
"eval_steps_per_second": 2.877, |
|
"step": 6364 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"learning_rate": 6.081081081081082e-07, |
|
"loss": 4.4727, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.36262539855354226, |
|
"eval_loss": 4.474149703979492, |
|
"eval_runtime": 12.8383, |
|
"eval_samples_per_second": 91.056, |
|
"eval_steps_per_second": 2.882, |
|
"step": 6512 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.3629969498232971, |
|
"eval_loss": 4.471028804779053, |
|
"eval_runtime": 12.8363, |
|
"eval_samples_per_second": 91.07, |
|
"eval_steps_per_second": 2.882, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.36328209382101595, |
|
"eval_loss": 4.469077110290527, |
|
"eval_runtime": 12.8487, |
|
"eval_samples_per_second": 90.982, |
|
"eval_steps_per_second": 2.88, |
|
"step": 6808 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.3634462676378844, |
|
"eval_loss": 4.4663591384887695, |
|
"eval_runtime": 12.8695, |
|
"eval_samples_per_second": 90.835, |
|
"eval_steps_per_second": 2.875, |
|
"step": 6956 |
|
}, |
|
{ |
|
"epoch": 47.3, |
|
"learning_rate": 2.702702702702703e-07, |
|
"loss": 4.4542, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.36362772290916, |
|
"eval_loss": 4.465246200561523, |
|
"eval_runtime": 12.8565, |
|
"eval_samples_per_second": 90.926, |
|
"eval_steps_per_second": 2.878, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.36365364509077086, |
|
"eval_loss": 4.464395046234131, |
|
"eval_runtime": 12.8452, |
|
"eval_samples_per_second": 91.007, |
|
"eval_steps_per_second": 2.88, |
|
"step": 7252 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.36367956727238165, |
|
"eval_loss": 4.464205741882324, |
|
"eval_runtime": 12.8483, |
|
"eval_samples_per_second": 90.984, |
|
"eval_steps_per_second": 2.88, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 7400, |
|
"total_flos": 1.00265577216e+17, |
|
"train_loss": 4.855868909681165, |
|
"train_runtime": 6356.4736, |
|
"train_samples_per_second": 37.112, |
|
"train_steps_per_second": 1.164 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.00265577216e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|