{ "best_metric": null, "best_model_checkpoint": null, "epoch": 39.0, "global_step": 4134, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.3832186408159307e-05, "loss": 2.0685, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.6720276077614272, "eval_loss": 1.7388017177581787, "eval_runtime": 36.8297, "eval_samples_per_second": 48.059, "eval_steps_per_second": 0.163, "step": 106 }, { "epoch": 2.0, "learning_rate": 1.5888124272106204e-05, "loss": 1.7284, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.6931985254177139, "eval_loss": 1.5560609102249146, "eval_runtime": 37.6763, "eval_samples_per_second": 46.979, "eval_steps_per_second": 0.159, "step": 212 }, { "epoch": 3.0, "learning_rate": 1.7090770826327895e-05, "loss": 1.5997, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.7099605218744038, "eval_loss": 1.4386627674102783, "eval_runtime": 36.094, "eval_samples_per_second": 49.039, "eval_steps_per_second": 0.166, "step": 318 }, { "epoch": 4.0, "learning_rate": 1.7944062136053104e-05, "loss": 1.5195, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.7235852020285128, "eval_loss": 1.3606762886047363, "eval_runtime": 37.6742, "eval_samples_per_second": 46.982, "eval_steps_per_second": 0.159, "step": 424 }, { "epoch": 5.0, "learning_rate": 1.860592629580032e-05, "loss": 1.4706, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.7306072855931103, "eval_loss": 1.3053652048110962, "eval_runtime": 36.8913, "eval_samples_per_second": 47.979, "eval_steps_per_second": 0.163, "step": 530 }, { "epoch": 6.0, "learning_rate": 1.9146708690274792e-05, "loss": 1.4153, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.7296665866066945, "eval_loss": 1.3213350772857666, "eval_runtime": 37.6502, "eval_samples_per_second": 47.012, "eval_steps_per_second": 0.159, "step": 636 }, { "epoch": 7.0, "learning_rate": 1.9603933689955228e-05, "loss": 1.3838, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.7412067658165602, "eval_loss": 1.2423616647720337, "eval_runtime": 36.7828, "eval_samples_per_second": 48.12, "eval_steps_per_second": 0.163, "step": 742 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 1.3512, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.7385993664885682, "eval_loss": 1.2403146028518677, "eval_runtime": 37.6141, "eval_samples_per_second": 47.057, "eval_steps_per_second": 0.16, "step": 848 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.3188, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.7413660954138545, "eval_loss": 1.2292009592056274, "eval_runtime": 36.8608, "eval_samples_per_second": 48.019, "eval_steps_per_second": 0.163, "step": 954 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.3098, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.7530054891006436, "eval_loss": 1.1540861129760742, "eval_runtime": 36.7518, "eval_samples_per_second": 48.161, "eval_steps_per_second": 0.163, "step": 1060 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.2827, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.7532555843965927, "eval_loss": 1.1605820655822754, "eval_runtime": 37.6556, "eval_samples_per_second": 47.005, "eval_steps_per_second": 0.159, "step": 1166 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.2693, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.7515547286193142, "eval_loss": 1.167082667350769, "eval_runtime": 36.7311, "eval_samples_per_second": 48.188, "eval_steps_per_second": 0.163, "step": 1272 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.2521, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.7581785009494731, "eval_loss": 1.134334921836853, "eval_runtime": 37.5802, "eval_samples_per_second": 47.099, "eval_steps_per_second": 0.16, "step": 1378 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.2421, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.7592175982523275, "eval_loss": 1.1171754598617554, "eval_runtime": 36.9701, "eval_samples_per_second": 47.877, "eval_steps_per_second": 0.162, "step": 1484 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.2308, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.7617716097984533, "eval_loss": 1.1091045141220093, "eval_runtime": 37.8672, "eval_samples_per_second": 46.742, "eval_steps_per_second": 0.158, "step": 1590 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.2132, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.7612774124438361, "eval_loss": 1.1064728498458862, "eval_runtime": 36.7747, "eval_samples_per_second": 48.131, "eval_steps_per_second": 0.163, "step": 1696 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.2055, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.761372646367941, "eval_loss": 1.1087865829467773, "eval_runtime": 36.8483, "eval_samples_per_second": 48.035, "eval_steps_per_second": 0.163, "step": 1802 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.1931, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.7599678162601031, "eval_loss": 1.1089411973953247, "eval_runtime": 35.9217, "eval_samples_per_second": 49.274, "eval_steps_per_second": 0.167, "step": 1908 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.1815, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.7659135481142874, "eval_loss": 1.0751391649246216, "eval_runtime": 36.8871, "eval_samples_per_second": 47.984, "eval_steps_per_second": 0.163, "step": 2014 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.1728, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.768600214617861, "eval_loss": 1.069868803024292, "eval_runtime": 36.7387, "eval_samples_per_second": 48.178, "eval_steps_per_second": 0.163, "step": 2120 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.164, "step": 2226 }, { "epoch": 21.0, "eval_accuracy": 0.7675022113058035, "eval_loss": 1.065330147743225, "eval_runtime": 36.8047, "eval_samples_per_second": 48.092, "eval_steps_per_second": 0.163, "step": 2226 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.1524, "step": 2332 }, { "epoch": 22.0, "eval_accuracy": 0.7698299863752147, "eval_loss": 1.0548479557037354, "eval_runtime": 36.7052, "eval_samples_per_second": 48.222, "eval_steps_per_second": 0.163, "step": 2332 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.1425, "step": 2438 }, { "epoch": 23.0, "eval_accuracy": 0.769697828632807, "eval_loss": 1.047703504562378, "eval_runtime": 36.8466, "eval_samples_per_second": 48.037, "eval_steps_per_second": 0.163, "step": 2438 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.143, "step": 2544 }, { "epoch": 24.0, "eval_accuracy": 0.7772214047626197, "eval_loss": 1.0133404731750488, "eval_runtime": 36.8371, "eval_samples_per_second": 48.049, "eval_steps_per_second": 0.163, "step": 2544 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.1308, "step": 2650 }, { "epoch": 25.0, "eval_accuracy": 0.7741821838279008, "eval_loss": 1.0260401964187622, "eval_runtime": 36.8437, "eval_samples_per_second": 48.041, "eval_steps_per_second": 0.163, "step": 2650 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.1271, "step": 2756 }, { "epoch": 26.0, "eval_accuracy": 0.773661286574258, "eval_loss": 1.0230038166046143, "eval_runtime": 36.8925, "eval_samples_per_second": 47.977, "eval_steps_per_second": 0.163, "step": 2756 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 1.1202, "step": 2862 }, { "epoch": 27.0, "eval_accuracy": 0.7754215166511026, "eval_loss": 1.0241199731826782, "eval_runtime": 36.7992, "eval_samples_per_second": 48.099, "eval_steps_per_second": 0.163, "step": 2862 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 1.1168, "step": 2968 }, { "epoch": 28.0, "eval_accuracy": 0.7775761513243871, "eval_loss": 1.0062930583953857, "eval_runtime": 38.045, "eval_samples_per_second": 46.524, "eval_steps_per_second": 0.158, "step": 2968 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 1.1019, "step": 3074 }, { "epoch": 29.0, "eval_accuracy": 0.7796242613030875, "eval_loss": 0.9990780353546143, "eval_runtime": 37.599, "eval_samples_per_second": 47.076, "eval_steps_per_second": 0.16, "step": 3074 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 1.1071, "step": 3180 }, { "epoch": 30.0, "eval_accuracy": 0.7805559249900252, "eval_loss": 0.991283655166626, "eval_runtime": 36.7892, "eval_samples_per_second": 48.112, "eval_steps_per_second": 0.163, "step": 3180 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 1.0963, "step": 3286 }, { "epoch": 31.0, "eval_accuracy": 0.7864944315242504, "eval_loss": 0.9553370475769043, "eval_runtime": 36.7173, "eval_samples_per_second": 48.206, "eval_steps_per_second": 0.163, "step": 3286 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 1.089, "step": 3392 }, { "epoch": 32.0, "eval_accuracy": 0.783273055389144, "eval_loss": 0.9850459098815918, "eval_runtime": 36.7845, "eval_samples_per_second": 48.118, "eval_steps_per_second": 0.163, "step": 3392 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 1.0807, "step": 3498 }, { "epoch": 33.0, "eval_accuracy": 0.7774915196015286, "eval_loss": 0.9987505674362183, "eval_runtime": 36.8264, "eval_samples_per_second": 48.063, "eval_steps_per_second": 0.163, "step": 3498 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 1.0735, "step": 3604 }, { "epoch": 34.0, "eval_accuracy": 0.7789523242042387, "eval_loss": 0.9969209432601929, "eval_runtime": 37.6346, "eval_samples_per_second": 47.031, "eval_steps_per_second": 0.159, "step": 3604 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 1.0766, "step": 3710 }, { "epoch": 35.0, "eval_accuracy": 0.7880373776195184, "eval_loss": 0.9506202936172485, "eval_runtime": 36.9744, "eval_samples_per_second": 47.871, "eval_steps_per_second": 0.162, "step": 3710 }, { "epoch": 36.0, "learning_rate": 2e-05, "loss": 1.0698, "step": 3816 }, { "epoch": 36.0, "eval_accuracy": 0.7880023166711217, "eval_loss": 0.9565942883491516, "eval_runtime": 38.7958, "eval_samples_per_second": 45.623, "eval_steps_per_second": 0.155, "step": 3816 }, { "epoch": 37.0, "learning_rate": 2e-05, "loss": 1.0608, "step": 3922 }, { "epoch": 37.0, "eval_accuracy": 0.7855791137596568, "eval_loss": 0.9620457291603088, "eval_runtime": 37.7795, "eval_samples_per_second": 46.851, "eval_steps_per_second": 0.159, "step": 3922 }, { "epoch": 38.0, "learning_rate": 2e-05, "loss": 1.0543, "step": 4028 }, { "epoch": 38.0, "eval_accuracy": 0.7822399039183305, "eval_loss": 0.9812787175178528, "eval_runtime": 36.8483, "eval_samples_per_second": 48.035, "eval_steps_per_second": 0.163, "step": 4028 }, { "epoch": 39.0, "learning_rate": 2e-05, "loss": 1.0521, "step": 4134 }, { "epoch": 39.0, "eval_accuracy": 0.7884438270952546, "eval_loss": 0.9478756785392761, "eval_runtime": 36.7861, "eval_samples_per_second": 48.116, "eval_steps_per_second": 0.163, "step": 4134 } ], "max_steps": 4240, "num_train_epochs": 40, "total_flos": 498219970723840.0, "trial_name": null, "trial_params": null }