{ "best_metric": null, "best_model_checkpoint": null, "epoch": 38.0, "global_step": 4028, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.3832186408159307e-05, "loss": 1.7921, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.7072680181157848, "eval_loss": 1.4908801317214966, "eval_runtime": 35.2427, "eval_samples_per_second": 50.223, "eval_steps_per_second": 0.17, "step": 106 }, { "epoch": 2.0, "learning_rate": 1.5888124272106204e-05, "loss": 1.4864, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.7269994857792617, "eval_loss": 1.3473752737045288, "eval_runtime": 34.9927, "eval_samples_per_second": 50.582, "eval_steps_per_second": 0.171, "step": 212 }, { "epoch": 3.0, "learning_rate": 1.7090770826327895e-05, "loss": 1.3756, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.7408165661368673, "eval_loss": 1.2464056015014648, "eval_runtime": 34.6698, "eval_samples_per_second": 51.053, "eval_steps_per_second": 0.173, "step": 318 }, { "epoch": 4.0, "learning_rate": 1.7944062136053104e-05, "loss": 1.3032, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.7535117507143178, "eval_loss": 1.1721432209014893, "eval_runtime": 34.7274, "eval_samples_per_second": 50.968, "eval_steps_per_second": 0.173, "step": 424 }, { "epoch": 5.0, "learning_rate": 1.860592629580032e-05, "loss": 1.2584, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.7601500829124033, "eval_loss": 1.1279706954956055, "eval_runtime": 34.8917, "eval_samples_per_second": 50.728, "eval_steps_per_second": 0.172, "step": 530 }, { "epoch": 6.0, "learning_rate": 1.9146708690274792e-05, "loss": 1.2103, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.7582043276054062, "eval_loss": 1.1379011869430542, "eval_runtime": 34.6334, "eval_samples_per_second": 51.107, "eval_steps_per_second": 0.173, "step": 636 }, { "epoch": 7.0, "learning_rate": 1.9603933689955228e-05, "loss": 1.183, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.7704412195286752, "eval_loss": 1.062961220741272, "eval_runtime": 34.3317, "eval_samples_per_second": 51.556, "eval_steps_per_second": 0.175, "step": 742 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 1.1546, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.7686654368803604, "eval_loss": 1.0676072835922241, "eval_runtime": 34.7431, "eval_samples_per_second": 50.945, "eval_steps_per_second": 0.173, "step": 848 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.1263, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.7708787569856306, "eval_loss": 1.0571365356445312, "eval_runtime": 34.8256, "eval_samples_per_second": 50.825, "eval_steps_per_second": 0.172, "step": 954 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.1191, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.781194386249465, "eval_loss": 0.9872472286224365, "eval_runtime": 34.7655, "eval_samples_per_second": 50.913, "eval_steps_per_second": 0.173, "step": 1060 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.0948, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.7812378898109896, "eval_loss": 0.9977697134017944, "eval_runtime": 34.7432, "eval_samples_per_second": 50.945, "eval_steps_per_second": 0.173, "step": 1166 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.0841, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.7803615513259536, "eval_loss": 0.9978513717651367, "eval_runtime": 34.6545, "eval_samples_per_second": 51.076, "eval_steps_per_second": 0.173, "step": 1272 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.0688, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.7858584354172097, "eval_loss": 0.9791596531867981, "eval_runtime": 34.9372, "eval_samples_per_second": 50.662, "eval_steps_per_second": 0.172, "step": 1378 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.0605, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.7874798359386868, "eval_loss": 0.9556354284286499, "eval_runtime": 34.6859, "eval_samples_per_second": 51.029, "eval_steps_per_second": 0.173, "step": 1484 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.0499, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.7894171406706451, "eval_loss": 0.9544544816017151, "eval_runtime": 34.7413, "eval_samples_per_second": 50.948, "eval_steps_per_second": 0.173, "step": 1590 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.0351, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.7894932603326688, "eval_loss": 0.9460939168930054, "eval_runtime": 34.704, "eval_samples_per_second": 51.003, "eval_steps_per_second": 0.173, "step": 1696 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.0286, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.7885454720602293, "eval_loss": 0.9521207809448242, "eval_runtime": 34.7606, "eval_samples_per_second": 50.92, "eval_steps_per_second": 0.173, "step": 1802 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.0173, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.7890063270306843, "eval_loss": 0.9481790661811829, "eval_runtime": 34.6779, "eval_samples_per_second": 51.041, "eval_steps_per_second": 0.173, "step": 1908 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.0079, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.7932532825907861, "eval_loss": 0.9254797101020813, "eval_runtime": 34.6588, "eval_samples_per_second": 51.069, "eval_steps_per_second": 0.173, "step": 2014 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.0, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.7949952307142005, "eval_loss": 0.9181823134422302, "eval_runtime": 34.812, "eval_samples_per_second": 50.845, "eval_steps_per_second": 0.172, "step": 2120 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 0.993, "step": 2226 }, { "epoch": 21.0, "eval_accuracy": 0.7947981315379723, "eval_loss": 0.9146238565444946, "eval_runtime": 34.6312, "eval_samples_per_second": 51.11, "eval_steps_per_second": 0.173, "step": 2226 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 0.9814, "step": 2332 }, { "epoch": 22.0, "eval_accuracy": 0.7955541733309638, "eval_loss": 0.9044105410575867, "eval_runtime": 35.6117, "eval_samples_per_second": 49.703, "eval_steps_per_second": 0.168, "step": 2332 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 0.9733, "step": 2438 }, { "epoch": 23.0, "eval_accuracy": 0.7970804770891298, "eval_loss": 0.8979274034500122, "eval_runtime": 35.0401, "eval_samples_per_second": 50.514, "eval_steps_per_second": 0.171, "step": 2438 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 0.9725, "step": 2544 }, { "epoch": 24.0, "eval_accuracy": 0.8038572415242785, "eval_loss": 0.867423415184021, "eval_runtime": 34.3534, "eval_samples_per_second": 51.523, "eval_steps_per_second": 0.175, "step": 2544 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 0.963, "step": 2650 }, { "epoch": 25.0, "eval_accuracy": 0.7997166910097103, "eval_loss": 0.8823758959770203, "eval_runtime": 34.7324, "eval_samples_per_second": 50.961, "eval_steps_per_second": 0.173, "step": 2650 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 0.9587, "step": 2756 }, { "epoch": 26.0, "eval_accuracy": 0.8008366256100395, "eval_loss": 0.8787974119186401, "eval_runtime": 34.68, "eval_samples_per_second": 51.038, "eval_steps_per_second": 0.173, "step": 2756 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 0.9523, "step": 2862 }, { "epoch": 27.0, "eval_accuracy": 0.802576527423399, "eval_loss": 0.8776472806930542, "eval_runtime": 34.5847, "eval_samples_per_second": 51.179, "eval_steps_per_second": 0.173, "step": 2862 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 0.9503, "step": 2968 }, { "epoch": 28.0, "eval_accuracy": 0.8046389019572702, "eval_loss": 0.8658241629600525, "eval_runtime": 34.7046, "eval_samples_per_second": 51.002, "eval_steps_per_second": 0.173, "step": 2968 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 0.9357, "step": 3074 }, { "epoch": 29.0, "eval_accuracy": 0.8068694155258628, "eval_loss": 0.8559547066688538, "eval_runtime": 34.6364, "eval_samples_per_second": 51.102, "eval_steps_per_second": 0.173, "step": 3074 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 0.9404, "step": 3180 }, { "epoch": 30.0, "eval_accuracy": 0.8065936664154513, "eval_loss": 0.8534895777702332, "eval_runtime": 34.816, "eval_samples_per_second": 50.839, "eval_steps_per_second": 0.172, "step": 3180 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 0.931, "step": 3286 }, { "epoch": 31.0, "eval_accuracy": 0.8132200449776263, "eval_loss": 0.8150569796562195, "eval_runtime": 34.836, "eval_samples_per_second": 50.81, "eval_steps_per_second": 0.172, "step": 3286 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 0.925, "step": 3392 }, { "epoch": 32.0, "eval_accuracy": 0.8086711486831303, "eval_loss": 0.8455161452293396, "eval_runtime": 35.717, "eval_samples_per_second": 49.556, "eval_steps_per_second": 0.168, "step": 3392 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 0.9159, "step": 3498 }, { "epoch": 33.0, "eval_accuracy": 0.8050653384287288, "eval_loss": 0.8572449088096619, "eval_runtime": 34.373, "eval_samples_per_second": 51.494, "eval_steps_per_second": 0.175, "step": 3498 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 0.9102, "step": 3604 }, { "epoch": 34.0, "eval_accuracy": 0.8064940892606243, "eval_loss": 0.8592977523803711, "eval_runtime": 34.6394, "eval_samples_per_second": 51.098, "eval_steps_per_second": 0.173, "step": 3604 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 0.9128, "step": 3710 }, { "epoch": 35.0, "eval_accuracy": 0.8136768783591909, "eval_loss": 0.813452959060669, "eval_runtime": 34.7174, "eval_samples_per_second": 50.983, "eval_steps_per_second": 0.173, "step": 3710 }, { "epoch": 36.0, "learning_rate": 2e-05, "loss": 0.9067, "step": 3816 }, { "epoch": 36.0, "eval_accuracy": 0.8131441385250527, "eval_loss": 0.821983277797699, "eval_runtime": 34.8233, "eval_samples_per_second": 50.828, "eval_steps_per_second": 0.172, "step": 3816 }, { "epoch": 37.0, "learning_rate": 2e-05, "loss": 0.8989, "step": 3922 }, { "epoch": 37.0, "eval_accuracy": 0.8117314414207155, "eval_loss": 0.827382504940033, "eval_runtime": 34.7538, "eval_samples_per_second": 50.93, "eval_steps_per_second": 0.173, "step": 3922 }, { "epoch": 38.0, "learning_rate": 2e-05, "loss": 0.8928, "step": 4028 }, { "epoch": 38.0, "eval_accuracy": 0.8077465845969074, "eval_loss": 0.8441253900527954, "eval_runtime": 34.7385, "eval_samples_per_second": 50.952, "eval_steps_per_second": 0.173, "step": 4028 } ], "max_steps": 4240, "num_train_epochs": 40, "total_flos": 1274410698801152.0, "trial_name": null, "trial_params": null }