{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 13500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "eval_f1": 0.9838200292792664, "eval_loss": 0.3369638919830322, "eval_runtime": 9.8875, "eval_samples_per_second": 363.287, "eval_steps_per_second": 11.429, "step": 225 }, { "epoch": 1.0, "learning_rate": 1.933777777777778e-05, "loss": 0.7525, "step": 449 }, { "epoch": 1.0, "eval_f1": 0.9947156879317626, "eval_loss": 0.06897161155939102, "eval_runtime": 9.8175, "eval_samples_per_second": 365.878, "eval_steps_per_second": 11.51, "step": 450 }, { "epoch": 1.5, "eval_f1": 0.9963811589436764, "eval_loss": 0.02461547963321209, "eval_runtime": 9.8704, "eval_samples_per_second": 363.916, "eval_steps_per_second": 11.448, "step": 675 }, { "epoch": 2.0, "learning_rate": 1.867555555555556e-05, "loss": 0.0421, "step": 898 }, { "epoch": 2.0, "eval_f1": 0.9944299040377919, "eval_loss": 0.0366971492767334, "eval_runtime": 9.8419, "eval_samples_per_second": 364.97, "eval_steps_per_second": 11.482, "step": 900 }, { "epoch": 2.5, "eval_f1": 0.9952689302306932, "eval_loss": 0.028725871816277504, "eval_runtime": 9.8602, "eval_samples_per_second": 364.292, "eval_steps_per_second": 11.46, "step": 1125 }, { "epoch": 2.99, "learning_rate": 1.801037037037037e-05, "loss": 0.0171, "step": 1347 }, { "epoch": 3.0, "eval_f1": 0.9974916981915504, "eval_loss": 0.02087334357202053, "eval_runtime": 9.8419, "eval_samples_per_second": 364.971, "eval_steps_per_second": 11.482, "step": 1350 }, { "epoch": 3.5, "eval_f1": 0.9958079825309512, "eval_loss": 0.027463000267744064, "eval_runtime": 9.8899, "eval_samples_per_second": 363.198, "eval_steps_per_second": 11.426, "step": 1575 }, { "epoch": 3.99, "learning_rate": 1.7345185185185187e-05, "loss": 0.0101, "step": 1796 }, { "epoch": 4.0, "eval_f1": 0.9923778241572203, "eval_loss": 0.04307250306010246, "eval_runtime": 9.8651, "eval_samples_per_second": 364.112, "eval_steps_per_second": 11.455, "step": 1800 }, { "epoch": 4.5, "eval_f1": 0.9894604343019209, "eval_loss": 0.06550969928503036, "eval_runtime": 9.8831, "eval_samples_per_second": 363.448, "eval_steps_per_second": 11.434, "step": 2025 }, { "epoch": 4.99, "learning_rate": 1.668e-05, "loss": 0.0079, "step": 2245 }, { "epoch": 5.0, "eval_f1": 0.9966504962899037, "eval_loss": 0.024906333535909653, "eval_runtime": 9.8822, "eval_samples_per_second": 363.481, "eval_steps_per_second": 11.435, "step": 2250 }, { "epoch": 5.5, "eval_f1": 0.9947052626407842, "eval_loss": 0.032973531633615494, "eval_runtime": 9.8456, "eval_samples_per_second": 364.831, "eval_steps_per_second": 11.477, "step": 2475 }, { "epoch": 5.99, "learning_rate": 1.6014814814814815e-05, "loss": 0.014, "step": 2694 }, { "epoch": 6.0, "eval_f1": 0.9958134976155982, "eval_loss": 0.025115273892879486, "eval_runtime": 9.8838, "eval_samples_per_second": 363.422, "eval_steps_per_second": 11.433, "step": 2700 }, { "epoch": 6.5, "eval_f1": 0.9963697167771403, "eval_loss": 0.014389083720743656, "eval_runtime": 9.8519, "eval_samples_per_second": 364.599, "eval_steps_per_second": 11.47, "step": 2925 }, { "epoch": 6.98, "learning_rate": 1.534962962962963e-05, "loss": 0.0068, "step": 3143 }, { "epoch": 7.0, "eval_f1": 0.9974888410650935, "eval_loss": 0.01635473594069481, "eval_runtime": 9.8836, "eval_samples_per_second": 363.43, "eval_steps_per_second": 11.433, "step": 3150 }, { "epoch": 7.5, "eval_f1": 0.9980480978911765, "eval_loss": 0.01281055435538292, "eval_runtime": 9.8626, "eval_samples_per_second": 364.203, "eval_steps_per_second": 11.457, "step": 3375 }, { "epoch": 7.98, "learning_rate": 1.4684444444444445e-05, "loss": 0.0018, "step": 3592 }, { "epoch": 8.0, "eval_f1": 0.9966572157308153, "eval_loss": 0.021354708820581436, "eval_runtime": 9.8914, "eval_samples_per_second": 363.144, "eval_steps_per_second": 11.424, "step": 3600 }, { "epoch": 8.5, "eval_f1": 0.9969361037754005, "eval_loss": 0.019004661589860916, "eval_runtime": 9.8255, "eval_samples_per_second": 365.578, "eval_steps_per_second": 11.501, "step": 3825 }, { "epoch": 8.98, "learning_rate": 1.4020740740740742e-05, "loss": 0.0018, "step": 4041 }, { "epoch": 9.0, "eval_f1": 0.9977696305095403, "eval_loss": 0.01915433257818222, "eval_runtime": 9.847, "eval_samples_per_second": 364.78, "eval_steps_per_second": 11.476, "step": 4050 }, { "epoch": 9.5, "eval_f1": 0.9963730398525849, "eval_loss": 0.02228739485144615, "eval_runtime": 9.8515, "eval_samples_per_second": 364.615, "eval_steps_per_second": 11.47, "step": 4275 }, { "epoch": 9.98, "learning_rate": 1.3355555555555557e-05, "loss": 0.0049, "step": 4490 }, { "epoch": 10.0, "eval_f1": 0.9958423076694589, "eval_loss": 0.035607218742370605, "eval_runtime": 9.8349, "eval_samples_per_second": 365.229, "eval_steps_per_second": 11.49, "step": 4500 }, { "epoch": 10.5, "eval_f1": 0.9977709997223315, "eval_loss": 0.014773285947740078, "eval_runtime": 9.859, "eval_samples_per_second": 364.335, "eval_steps_per_second": 11.462, "step": 4725 }, { "epoch": 10.98, "learning_rate": 1.2690370370370371e-05, "loss": 0.0058, "step": 4939 }, { "epoch": 11.0, "eval_f1": 0.9960966553810828, "eval_loss": 0.027824491262435913, "eval_runtime": 9.8473, "eval_samples_per_second": 364.77, "eval_steps_per_second": 11.475, "step": 4950 }, { "epoch": 11.5, "eval_f1": 0.9958285144673014, "eval_loss": 0.034932490438222885, "eval_runtime": 9.8384, "eval_samples_per_second": 365.101, "eval_steps_per_second": 11.486, "step": 5175 }, { "epoch": 11.97, "learning_rate": 1.2025185185185186e-05, "loss": 0.004, "step": 5388 }, { "epoch": 12.0, "eval_f1": 0.9958167666486853, "eval_loss": 0.03234181925654411, "eval_runtime": 9.83, "eval_samples_per_second": 365.412, "eval_steps_per_second": 11.495, "step": 5400 }, { "epoch": 12.5, "eval_f1": 0.9960979327465159, "eval_loss": 0.03011472336947918, "eval_runtime": 9.9437, "eval_samples_per_second": 361.234, "eval_steps_per_second": 11.364, "step": 5625 }, { "epoch": 12.97, "learning_rate": 1.136e-05, "loss": 0.0002, "step": 5837 }, { "epoch": 13.0, "eval_f1": 0.9966535238124058, "eval_loss": 0.028114166110754013, "eval_runtime": 9.9673, "eval_samples_per_second": 360.379, "eval_steps_per_second": 11.337, "step": 5850 }, { "epoch": 13.5, "eval_f1": 0.9963785130348743, "eval_loss": 0.031257264316082, "eval_runtime": 9.9968, "eval_samples_per_second": 359.313, "eval_steps_per_second": 11.304, "step": 6075 }, { "epoch": 13.97, "learning_rate": 1.0694814814814815e-05, "loss": 0.0001, "step": 6286 }, { "epoch": 14.0, "eval_f1": 0.9963822843309736, "eval_loss": 0.026260120794177055, "eval_runtime": 9.944, "eval_samples_per_second": 361.222, "eval_steps_per_second": 11.364, "step": 6300 }, { "epoch": 14.5, "eval_f1": 0.9963822843309736, "eval_loss": 0.026301411911845207, "eval_runtime": 9.9178, "eval_samples_per_second": 362.178, "eval_steps_per_second": 11.394, "step": 6525 }, { "epoch": 14.97, "learning_rate": 1.0031111111111112e-05, "loss": 0.002, "step": 6735 }, { "epoch": 15.0, "eval_f1": 0.9949833769458274, "eval_loss": 0.045748304575681686, "eval_runtime": 9.9794, "eval_samples_per_second": 359.942, "eval_steps_per_second": 11.323, "step": 6750 }, { "epoch": 15.5, "eval_f1": 0.995825155358709, "eval_loss": 0.03245578706264496, "eval_runtime": 9.9495, "eval_samples_per_second": 361.023, "eval_steps_per_second": 11.357, "step": 6975 }, { "epoch": 15.96, "learning_rate": 9.365925925925928e-06, "loss": 0.0041, "step": 7184 }, { "epoch": 16.0, "eval_f1": 0.9963795479966517, "eval_loss": 0.02910827286541462, "eval_runtime": 9.9694, "eval_samples_per_second": 360.303, "eval_steps_per_second": 11.335, "step": 7200 }, { "epoch": 16.5, "eval_f1": 0.9952742268022056, "eval_loss": 0.04952191188931465, "eval_runtime": 9.9492, "eval_samples_per_second": 361.033, "eval_steps_per_second": 11.358, "step": 7425 }, { "epoch": 16.96, "learning_rate": 8.700740740740742e-06, "loss": 0.0047, "step": 7633 }, { "epoch": 17.0, "eval_f1": 0.9949872709487171, "eval_loss": 0.03917469084262848, "eval_runtime": 9.9497, "eval_samples_per_second": 361.015, "eval_steps_per_second": 11.357, "step": 7650 }, { "epoch": 17.5, "eval_f1": 0.9944269801435699, "eval_loss": 0.03998776897788048, "eval_runtime": 10.0031, "eval_samples_per_second": 359.089, "eval_steps_per_second": 11.297, "step": 7875 }, { "epoch": 17.96, "learning_rate": 8.035555555555556e-06, "loss": 0.0043, "step": 8082 }, { "epoch": 18.0, "eval_f1": 0.9947222403458923, "eval_loss": 0.049297936260700226, "eval_runtime": 9.9403, "eval_samples_per_second": 361.359, "eval_steps_per_second": 11.368, "step": 8100 }, { "epoch": 18.5, "eval_f1": 0.9966600284318996, "eval_loss": 0.03158922120928764, "eval_runtime": 10.0056, "eval_samples_per_second": 358.999, "eval_steps_per_second": 11.294, "step": 8325 }, { "epoch": 18.96, "learning_rate": 7.370370370370371e-06, "loss": 0.0001, "step": 8531 }, { "epoch": 19.0, "eval_f1": 0.9977691843456208, "eval_loss": 0.020285602658987045, "eval_runtime": 9.9628, "eval_samples_per_second": 360.543, "eval_steps_per_second": 11.342, "step": 8550 }, { "epoch": 19.5, "eval_f1": 0.9977691843456208, "eval_loss": 0.020556360483169556, "eval_runtime": 9.9808, "eval_samples_per_second": 359.89, "eval_steps_per_second": 11.322, "step": 8775 }, { "epoch": 19.96, "learning_rate": 6.705185185185186e-06, "loss": 0.0, "step": 8980 }, { "epoch": 20.0, "eval_f1": 0.99749170867917, "eval_loss": 0.018523868173360825, "eval_runtime": 9.9504, "eval_samples_per_second": 360.989, "eval_steps_per_second": 11.356, "step": 9000 }, { "epoch": 20.5, "eval_f1": 0.99749170867917, "eval_loss": 0.01606236957013607, "eval_runtime": 9.9683, "eval_samples_per_second": 360.341, "eval_steps_per_second": 11.336, "step": 9225 }, { "epoch": 20.95, "learning_rate": 6.040000000000001e-06, "loss": 0.0008, "step": 9429 }, { "epoch": 21.0, "eval_f1": 0.9961331807425458, "eval_loss": 0.027556220069527626, "eval_runtime": 9.9439, "eval_samples_per_second": 361.228, "eval_steps_per_second": 11.364, "step": 9450 }, { "epoch": 21.5, "eval_f1": 0.9974890406034552, "eval_loss": 0.022856123745441437, "eval_runtime": 9.9718, "eval_samples_per_second": 360.218, "eval_steps_per_second": 11.332, "step": 9675 }, { "epoch": 21.95, "learning_rate": 5.376296296296297e-06, "loss": 0.0015, "step": 9878 }, { "epoch": 22.0, "eval_f1": 0.996655754253198, "eval_loss": 0.02840564213693142, "eval_runtime": 9.9909, "eval_samples_per_second": 359.528, "eval_steps_per_second": 11.31, "step": 9900 }, { "epoch": 22.5, "eval_f1": 0.9969386569418872, "eval_loss": 0.02319738268852234, "eval_runtime": 9.9719, "eval_samples_per_second": 360.213, "eval_steps_per_second": 11.332, "step": 10125 }, { "epoch": 22.95, "learning_rate": 4.711111111111111e-06, "loss": 0.0009, "step": 10327 }, { "epoch": 23.0, "eval_f1": 0.9977709091258657, "eval_loss": 0.016745826229453087, "eval_runtime": 9.9669, "eval_samples_per_second": 360.394, "eval_steps_per_second": 11.338, "step": 10350 }, { "epoch": 23.5, "eval_f1": 0.9977709091258657, "eval_loss": 0.016933374106884003, "eval_runtime": 9.967, "eval_samples_per_second": 360.39, "eval_steps_per_second": 11.337, "step": 10575 }, { "epoch": 23.95, "learning_rate": 4.045925925925927e-06, "loss": 0.0, "step": 10776 }, { "epoch": 24.0, "eval_f1": 0.997211752501557, "eval_loss": 0.02103780210018158, "eval_runtime": 9.9446, "eval_samples_per_second": 361.2, "eval_steps_per_second": 11.363, "step": 10800 }, { "epoch": 24.5, "eval_f1": 0.997211752501557, "eval_loss": 0.02101019211113453, "eval_runtime": 9.9889, "eval_samples_per_second": 359.6, "eval_steps_per_second": 11.313, "step": 11025 }, { "epoch": 24.94, "learning_rate": 3.380740740740741e-06, "loss": 0.0, "step": 11225 }, { "epoch": 25.0, "eval_f1": 0.997211752501557, "eval_loss": 0.021039091050624847, "eval_runtime": 9.9809, "eval_samples_per_second": 359.889, "eval_steps_per_second": 11.322, "step": 11250 }, { "epoch": 25.5, "eval_f1": 0.997211752501557, "eval_loss": 0.021075071766972542, "eval_runtime": 9.9783, "eval_samples_per_second": 359.981, "eval_steps_per_second": 11.325, "step": 11475 }, { "epoch": 25.94, "learning_rate": 2.715555555555556e-06, "loss": 0.0, "step": 11674 }, { "epoch": 26.0, "eval_f1": 0.997211752501557, "eval_loss": 0.021231742575764656, "eval_runtime": 9.9681, "eval_samples_per_second": 360.349, "eval_steps_per_second": 11.336, "step": 11700 }, { "epoch": 26.5, "eval_f1": 0.997211752501557, "eval_loss": 0.021246111020445824, "eval_runtime": 9.9808, "eval_samples_per_second": 359.893, "eval_steps_per_second": 11.322, "step": 11925 }, { "epoch": 26.94, "learning_rate": 2.0503703703703704e-06, "loss": 0.0, "step": 12123 }, { "epoch": 27.0, "eval_f1": 0.997211752501557, "eval_loss": 0.021242721006274223, "eval_runtime": 9.9704, "eval_samples_per_second": 360.265, "eval_steps_per_second": 11.333, "step": 12150 }, { "epoch": 27.5, "eval_f1": 0.997211752501557, "eval_loss": 0.021284321323037148, "eval_runtime": 9.9704, "eval_samples_per_second": 360.267, "eval_steps_per_second": 11.334, "step": 12375 }, { "epoch": 27.94, "learning_rate": 1.3851851851851852e-06, "loss": 0.0, "step": 12572 }, { "epoch": 28.0, "eval_f1": 0.9974898648408176, "eval_loss": 0.02062426507472992, "eval_runtime": 9.9866, "eval_samples_per_second": 359.68, "eval_steps_per_second": 11.315, "step": 12600 }, { "epoch": 28.5, "eval_f1": 0.9974925455517304, "eval_loss": 0.014707241207361221, "eval_runtime": 10.0028, "eval_samples_per_second": 359.098, "eval_steps_per_second": 11.297, "step": 12825 }, { "epoch": 28.94, "learning_rate": 7.214814814814814e-07, "loss": 0.0004, "step": 13021 }, { "epoch": 29.0, "eval_f1": 0.9974925455517304, "eval_loss": 0.015917900949716568, "eval_runtime": 10.0109, "eval_samples_per_second": 358.81, "eval_steps_per_second": 11.288, "step": 13050 }, { "epoch": 29.5, "eval_f1": 0.9974925455517304, "eval_loss": 0.015875495970249176, "eval_runtime": 10.0428, "eval_samples_per_second": 357.669, "eval_steps_per_second": 11.252, "step": 13275 }, { "epoch": 29.93, "learning_rate": 5.62962962962963e-08, "loss": 0.0, "step": 13470 }, { "epoch": 30.0, "eval_f1": 0.9974925455517304, "eval_loss": 0.01589413359761238, "eval_runtime": 10.0081, "eval_samples_per_second": 358.909, "eval_steps_per_second": 11.291, "step": 13500 } ], "max_steps": 13500, "num_train_epochs": 30, "total_flos": 1.1347301109694464e+17, "trial_name": null, "trial_params": null }