{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.997714808043876e-05, "loss": 5.4892, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.9017367458866546e-05, "loss": 3.0607, "step": 43 }, { "epoch": 0.02, "eval_accuracy": 0.8749333333333333, "eval_f1": 0.8729341641831482, "eval_loss": 2.1355409622192383, "eval_precision": 0.887114537444934, "eval_recall": 0.8592, "eval_runtime": 581.4545, "eval_samples_per_second": 6.449, "eval_steps_per_second": 0.807, "step": 43 }, { "epoch": 0.04, "learning_rate": 4.8034734917733096e-05, "loss": 1.6864, "step": 86 }, { "epoch": 0.04, "eval_accuracy": 0.8901333333333333, "eval_f1": 0.8857459789240155, "eval_loss": 1.632243275642395, "eval_precision": 0.9225880993645291, "eval_recall": 0.8517333333333333, "eval_runtime": 581.1642, "eval_samples_per_second": 6.453, "eval_steps_per_second": 0.807, "step": 86 }, { "epoch": 0.06, "learning_rate": 4.705210237659964e-05, "loss": 1.0658, "step": 129 }, { "epoch": 0.06, "eval_accuracy": 0.904, "eval_f1": 0.9061032863849765, "eval_loss": 0.3131539821624756, "eval_precision": 0.886676875957121, "eval_recall": 0.9264, "eval_runtime": 580.6614, "eval_samples_per_second": 6.458, "eval_steps_per_second": 0.808, "step": 129 }, { "epoch": 0.08, "learning_rate": 4.606946983546618e-05, "loss": 0.4275, "step": 172 }, { "epoch": 0.08, "eval_accuracy": 0.8874666666666666, "eval_f1": 0.8765359859566997, "eval_loss": 1.6054295301437378, "eval_precision": 0.9708360337005832, "eval_recall": 0.7989333333333334, "eval_runtime": 580.8161, "eval_samples_per_second": 6.456, "eval_steps_per_second": 0.807, "step": 172 }, { "epoch": 0.1, "learning_rate": 4.5086837294332726e-05, "loss": 0.5456, "step": 215 }, { "epoch": 0.1, "eval_accuracy": 0.9170666666666667, "eval_f1": 0.9218789248932429, "eval_loss": 0.41803765296936035, "eval_precision": 0.8713200379867047, "eval_recall": 0.9786666666666667, "eval_runtime": 580.4549, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.808, "step": 215 }, { "epoch": 0.12, "learning_rate": 4.410420475319927e-05, "loss": 0.6111, "step": 258 }, { "epoch": 0.12, "eval_accuracy": 0.8786666666666667, "eval_f1": 0.8626622396619378, "eval_loss": 0.32219210267066956, "eval_precision": 0.9937413073713491, "eval_recall": 0.7621333333333333, "eval_runtime": 584.0808, "eval_samples_per_second": 6.42, "eval_steps_per_second": 0.803, "step": 258 }, { "epoch": 0.14, "learning_rate": 4.312157221206581e-05, "loss": 0.7106, "step": 301 }, { "epoch": 0.14, "eval_accuracy": 0.9274666666666667, "eval_f1": 0.9221967963386728, "eval_loss": 0.5022754073143005, "eval_precision": 0.9944478716841456, "eval_recall": 0.8597333333333333, "eval_runtime": 581.6225, "eval_samples_per_second": 6.447, "eval_steps_per_second": 0.806, "step": 301 }, { "epoch": 0.16, "learning_rate": 4.213893967093236e-05, "loss": 0.2426, "step": 344 }, { "epoch": 0.16, "eval_accuracy": 0.9586666666666667, "eval_f1": 0.9586997069011457, "eval_loss": 0.16574496030807495, "eval_precision": 0.9579339723109691, "eval_recall": 0.9594666666666667, "eval_runtime": 580.7975, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.808, "step": 344 }, { "epoch": 0.18, "learning_rate": 4.1156307129798905e-05, "loss": 0.3958, "step": 387 }, { "epoch": 0.18, "eval_accuracy": 0.9554666666666667, "eval_f1": 0.9535724214623298, "eval_loss": 0.24226774275302887, "eval_precision": 0.9959349593495935, "eval_recall": 0.9146666666666666, "eval_runtime": 581.1794, "eval_samples_per_second": 6.452, "eval_steps_per_second": 0.807, "step": 387 }, { "epoch": 0.2, "learning_rate": 4.017367458866545e-05, "loss": 0.4259, "step": 430 }, { "epoch": 0.2, "eval_accuracy": 0.9448, "eval_f1": 0.9449614464238233, "eval_loss": 0.3525341749191284, "eval_precision": 0.9422057264050901, "eval_recall": 0.9477333333333333, "eval_runtime": 580.7809, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.808, "step": 430 }, { "epoch": 0.22, "learning_rate": 3.919104204753199e-05, "loss": 0.2217, "step": 473 }, { "epoch": 0.22, "eval_accuracy": 0.9549333333333333, "eval_f1": 0.9531466592736346, "eval_loss": 0.22800709307193756, "eval_precision": 0.9924942263279446, "eval_recall": 0.9168, "eval_runtime": 580.8019, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.808, "step": 473 }, { "epoch": 0.24, "learning_rate": 3.820840950639854e-05, "loss": 0.1741, "step": 516 }, { "epoch": 0.24, "eval_accuracy": 0.9682666666666667, "eval_f1": 0.9673525377229081, "eval_loss": 0.15350370109081268, "eval_precision": 0.996045197740113, "eval_recall": 0.9402666666666667, "eval_runtime": 584.2385, "eval_samples_per_second": 6.419, "eval_steps_per_second": 0.803, "step": 516 }, { "epoch": 0.26, "learning_rate": 3.7225776965265085e-05, "loss": 0.1731, "step": 559 }, { "epoch": 0.26, "eval_accuracy": 0.9648, "eval_f1": 0.9636763896532745, "eval_loss": 0.29772499203681946, "eval_precision": 0.9954519613416714, "eval_recall": 0.9338666666666666, "eval_runtime": 580.9188, "eval_samples_per_second": 6.455, "eval_steps_per_second": 0.807, "step": 559 }, { "epoch": 0.28, "learning_rate": 3.624314442413163e-05, "loss": 0.1857, "step": 602 }, { "epoch": 0.28, "eval_accuracy": 0.9826666666666667, "eval_f1": 0.9826527888977848, "eval_loss": 0.16285915672779083, "eval_precision": 0.9834401709401709, "eval_recall": 0.9818666666666667, "eval_runtime": 581.4605, "eval_samples_per_second": 6.449, "eval_steps_per_second": 0.807, "step": 602 }, { "epoch": 0.29, "learning_rate": 3.526051188299818e-05, "loss": 0.241, "step": 645 }, { "epoch": 0.29, "eval_accuracy": 0.9677333333333333, "eval_f1": 0.9681997371879106, "eval_loss": 0.30683064460754395, "eval_precision": 0.9544041450777202, "eval_recall": 0.9824, "eval_runtime": 580.3999, "eval_samples_per_second": 6.461, "eval_steps_per_second": 0.808, "step": 645 }, { "epoch": 0.31, "learning_rate": 3.4277879341864715e-05, "loss": 0.1674, "step": 688 }, { "epoch": 0.31, "eval_accuracy": 0.9050666666666667, "eval_f1": 0.9130434782608695, "eval_loss": 0.5339795351028442, "eval_precision": 0.8422712933753943, "eval_recall": 0.9968, "eval_runtime": 580.4617, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.808, "step": 688 }, { "epoch": 0.33, "learning_rate": 3.329524680073126e-05, "loss": 0.1452, "step": 731 }, { "epoch": 0.33, "eval_accuracy": 0.9754666666666667, "eval_f1": 0.9756871035940803, "eval_loss": 0.1770327091217041, "eval_precision": 0.9669984284965951, "eval_recall": 0.9845333333333334, "eval_runtime": 581.5384, "eval_samples_per_second": 6.448, "eval_steps_per_second": 0.806, "step": 731 }, { "epoch": 0.35, "learning_rate": 3.231261425959781e-05, "loss": 0.2261, "step": 774 }, { "epoch": 0.35, "eval_accuracy": 0.9714666666666667, "eval_f1": 0.97192337969037, "eval_loss": 0.2083805948495865, "eval_precision": 0.9566115702479339, "eval_recall": 0.9877333333333334, "eval_runtime": 582.4902, "eval_samples_per_second": 6.438, "eval_steps_per_second": 0.805, "step": 774 }, { "epoch": 0.37, "learning_rate": 3.132998171846435e-05, "loss": 0.1849, "step": 817 }, { "epoch": 0.37, "eval_accuracy": 0.9765333333333334, "eval_f1": 0.9763186221743809, "eval_loss": 0.12196581810712814, "eval_precision": 0.9853340575774036, "eval_recall": 0.9674666666666667, "eval_runtime": 578.7143, "eval_samples_per_second": 6.48, "eval_steps_per_second": 0.81, "step": 817 }, { "epoch": 0.39, "learning_rate": 3.03473491773309e-05, "loss": 0.1237, "step": 860 }, { "epoch": 0.39, "eval_accuracy": 0.9693333333333334, "eval_f1": 0.9700286682303884, "eval_loss": 0.2138151079416275, "eval_precision": 0.9485219164118247, "eval_recall": 0.9925333333333334, "eval_runtime": 578.8904, "eval_samples_per_second": 6.478, "eval_steps_per_second": 0.81, "step": 860 }, { "epoch": 0.41, "learning_rate": 2.9364716636197442e-05, "loss": 0.1627, "step": 903 }, { "epoch": 0.41, "eval_accuracy": 0.9584, "eval_f1": 0.9566425792106726, "eval_loss": 0.254962295293808, "eval_precision": 0.9988392338943702, "eval_recall": 0.9178666666666667, "eval_runtime": 578.6033, "eval_samples_per_second": 6.481, "eval_steps_per_second": 0.811, "step": 903 }, { "epoch": 0.43, "learning_rate": 2.838208409506399e-05, "loss": 0.2477, "step": 946 }, { "epoch": 0.43, "eval_accuracy": 0.9765333333333334, "eval_f1": 0.9761258817145958, "eval_loss": 0.13448475301265717, "eval_precision": 0.9933738266151297, "eval_recall": 0.9594666666666667, "eval_runtime": 578.5049, "eval_samples_per_second": 6.482, "eval_steps_per_second": 0.811, "step": 946 }, { "epoch": 0.45, "learning_rate": 2.739945155393053e-05, "loss": 0.0943, "step": 989 }, { "epoch": 0.45, "eval_accuracy": 0.9688, "eval_f1": 0.9695391825045562, "eval_loss": 0.18509739637374878, "eval_precision": 0.9471007121057986, "eval_recall": 0.9930666666666667, "eval_runtime": 578.7851, "eval_samples_per_second": 6.479, "eval_steps_per_second": 0.81, "step": 989 }, { "epoch": 0.47, "learning_rate": 2.641681901279708e-05, "loss": 0.2131, "step": 1032 }, { "epoch": 0.47, "eval_accuracy": 0.9826666666666667, "eval_f1": 0.9825970548862116, "eval_loss": 0.10872189700603485, "eval_precision": 0.9865591397849462, "eval_recall": 0.9786666666666667, "eval_runtime": 580.8924, "eval_samples_per_second": 6.456, "eval_steps_per_second": 0.807, "step": 1032 }, { "epoch": 0.49, "learning_rate": 2.5434186471663625e-05, "loss": 0.0829, "step": 1075 }, { "epoch": 0.49, "eval_accuracy": 0.9661333333333333, "eval_f1": 0.9651577503429356, "eval_loss": 0.2479323148727417, "eval_precision": 0.9937853107344633, "eval_recall": 0.9381333333333334, "eval_runtime": 578.7358, "eval_samples_per_second": 6.48, "eval_steps_per_second": 0.81, "step": 1075 }, { "epoch": 0.51, "learning_rate": 2.4451553930530165e-05, "loss": 0.1392, "step": 1118 }, { "epoch": 0.51, "eval_accuracy": 0.9613333333333334, "eval_f1": 0.959867146415721, "eval_loss": 0.20837165415287018, "eval_precision": 0.997698504027618, "eval_recall": 0.9248, "eval_runtime": 579.0845, "eval_samples_per_second": 6.476, "eval_steps_per_second": 0.81, "step": 1118 }, { "epoch": 0.53, "learning_rate": 2.346892138939671e-05, "loss": 0.0563, "step": 1161 }, { "epoch": 0.53, "eval_accuracy": 0.9874666666666667, "eval_f1": 0.9874833555259653, "eval_loss": 0.08351419121026993, "eval_precision": 0.9861702127659574, "eval_recall": 0.9888, "eval_runtime": 578.9179, "eval_samples_per_second": 6.478, "eval_steps_per_second": 0.81, "step": 1161 }, { "epoch": 0.55, "learning_rate": 2.2486288848263255e-05, "loss": 0.107, "step": 1204 }, { "epoch": 0.55, "eval_accuracy": 0.9826666666666667, "eval_f1": 0.9828269484808455, "eval_loss": 0.11463689059019089, "eval_precision": 0.9738219895287958, "eval_recall": 0.992, "eval_runtime": 579.4593, "eval_samples_per_second": 6.472, "eval_steps_per_second": 0.809, "step": 1204 }, { "epoch": 0.57, "learning_rate": 2.1503656307129798e-05, "loss": 0.0822, "step": 1247 }, { "epoch": 0.57, "eval_accuracy": 0.9816, "eval_f1": 0.9817218543046358, "eval_loss": 0.1307775229215622, "eval_precision": 0.9752631578947368, "eval_recall": 0.9882666666666666, "eval_runtime": 580.0354, "eval_samples_per_second": 6.465, "eval_steps_per_second": 0.809, "step": 1247 }, { "epoch": 0.59, "learning_rate": 2.0521023765996345e-05, "loss": 0.1165, "step": 1290 }, { "epoch": 0.59, "eval_accuracy": 0.9856, "eval_f1": 0.9855923159018143, "eval_loss": 0.09052357822656631, "eval_precision": 0.9861185264281901, "eval_recall": 0.9850666666666666, "eval_runtime": 581.231, "eval_samples_per_second": 6.452, "eval_steps_per_second": 0.807, "step": 1290 }, { "epoch": 0.61, "learning_rate": 1.953839122486289e-05, "loss": 0.1156, "step": 1333 }, { "epoch": 0.61, "eval_accuracy": 0.9864, "eval_f1": 0.9864972200158858, "eval_loss": 0.09110942482948303, "eval_precision": 0.9794952681388013, "eval_recall": 0.9936, "eval_runtime": 579.1835, "eval_samples_per_second": 6.475, "eval_steps_per_second": 0.81, "step": 1333 }, { "epoch": 0.63, "learning_rate": 1.8555758683729435e-05, "loss": 0.1172, "step": 1376 }, { "epoch": 0.63, "eval_accuracy": 0.984, "eval_f1": 0.9841772151898733, "eval_loss": 0.09306684881448746, "eval_precision": 0.97339593114241, "eval_recall": 0.9952, "eval_runtime": 579.0887, "eval_samples_per_second": 6.476, "eval_steps_per_second": 0.81, "step": 1376 }, { "epoch": 0.65, "learning_rate": 1.7573126142595978e-05, "loss": 0.1447, "step": 1419 }, { "epoch": 0.65, "eval_accuracy": 0.9848, "eval_f1": 0.9849086576648134, "eval_loss": 0.08812595903873444, "eval_precision": 0.9779179810725552, "eval_recall": 0.992, "eval_runtime": 579.3815, "eval_samples_per_second": 6.472, "eval_steps_per_second": 0.809, "step": 1419 }, { "epoch": 0.67, "learning_rate": 1.659049360146252e-05, "loss": 0.087, "step": 1462 }, { "epoch": 0.67, "eval_accuracy": 0.9872, "eval_f1": 0.9872272485364556, "eval_loss": 0.07127923518419266, "eval_precision": 0.9851301115241635, "eval_recall": 0.9893333333333333, "eval_runtime": 578.9808, "eval_samples_per_second": 6.477, "eval_steps_per_second": 0.81, "step": 1462 }, { "epoch": 0.69, "learning_rate": 1.5607861060329068e-05, "loss": 0.0313, "step": 1505 }, { "epoch": 0.69, "eval_accuracy": 0.9861333333333333, "eval_f1": 0.9862142099681867, "eval_loss": 0.1003463938832283, "eval_precision": 0.9804955192409067, "eval_recall": 0.992, "eval_runtime": 579.6786, "eval_samples_per_second": 6.469, "eval_steps_per_second": 0.809, "step": 1505 }, { "epoch": 0.71, "learning_rate": 1.4625228519195613e-05, "loss": 0.0729, "step": 1548 }, { "epoch": 0.71, "eval_accuracy": 0.9869333333333333, "eval_f1": 0.986859747921695, "eval_loss": 0.0750027745962143, "eval_precision": 0.9924487594390508, "eval_recall": 0.9813333333333333, "eval_runtime": 581.8591, "eval_samples_per_second": 6.445, "eval_steps_per_second": 0.806, "step": 1548 }, { "epoch": 0.73, "learning_rate": 1.3642595978062158e-05, "loss": 0.106, "step": 1591 }, { "epoch": 0.73, "eval_accuracy": 0.9874666666666667, "eval_f1": 0.9874499332443257, "eval_loss": 0.06067837029695511, "eval_precision": 0.9887700534759358, "eval_recall": 0.9861333333333333, "eval_runtime": 579.0471, "eval_samples_per_second": 6.476, "eval_steps_per_second": 0.81, "step": 1591 }, { "epoch": 0.75, "learning_rate": 1.2659963436928701e-05, "loss": 0.0823, "step": 1634 }, { "epoch": 0.75, "eval_accuracy": 0.9877333333333334, "eval_f1": 0.9877005347593582, "eval_loss": 0.07156907767057419, "eval_precision": 0.9903485254691688, "eval_recall": 0.9850666666666666, "eval_runtime": 578.809, "eval_samples_per_second": 6.479, "eval_steps_per_second": 0.81, "step": 1634 }, { "epoch": 0.77, "learning_rate": 1.1677330895795248e-05, "loss": 0.0817, "step": 1677 }, { "epoch": 0.77, "eval_accuracy": 0.9845333333333334, "eval_f1": 0.9846804014791337, "eval_loss": 0.09290226548910141, "eval_precision": 0.9754055468341183, "eval_recall": 0.9941333333333333, "eval_runtime": 578.5452, "eval_samples_per_second": 6.482, "eval_steps_per_second": 0.811, "step": 1677 }, { "epoch": 0.79, "learning_rate": 1.0694698354661791e-05, "loss": 0.1279, "step": 1720 }, { "epoch": 0.79, "eval_accuracy": 0.988, "eval_f1": 0.9879453522635949, "eval_loss": 0.07037492841482162, "eval_precision": 0.9924650161463939, "eval_recall": 0.9834666666666667, "eval_runtime": 579.5947, "eval_samples_per_second": 6.47, "eval_steps_per_second": 0.809, "step": 1720 }, { "epoch": 0.81, "learning_rate": 9.712065813528338e-06, "loss": 0.075, "step": 1763 }, { "epoch": 0.81, "eval_accuracy": 0.9890666666666666, "eval_f1": 0.9890462196099385, "eval_loss": 0.06325065344572067, "eval_precision": 0.9908993576017131, "eval_recall": 0.9872, "eval_runtime": 579.6298, "eval_samples_per_second": 6.47, "eval_steps_per_second": 0.809, "step": 1763 }, { "epoch": 0.83, "learning_rate": 8.729433272394881e-06, "loss": 0.0909, "step": 1806 }, { "epoch": 0.83, "eval_accuracy": 0.9842666666666666, "eval_f1": 0.9840841650930671, "eval_loss": 0.09650667011737823, "eval_precision": 0.9956331877729258, "eval_recall": 0.9728, "eval_runtime": 579.8501, "eval_samples_per_second": 6.467, "eval_steps_per_second": 0.809, "step": 1806 }, { "epoch": 0.85, "learning_rate": 7.746800731261426e-06, "loss": 0.0583, "step": 1849 }, { "epoch": 0.85, "eval_accuracy": 0.9893333333333333, "eval_f1": 0.9893276414087514, "eval_loss": 0.07162317633628845, "eval_precision": 0.9898558462359851, "eval_recall": 0.9888, "eval_runtime": 579.4799, "eval_samples_per_second": 6.471, "eval_steps_per_second": 0.809, "step": 1849 }, { "epoch": 0.86, "learning_rate": 6.764168190127972e-06, "loss": 0.0961, "step": 1892 }, { "epoch": 0.86, "eval_accuracy": 0.988, "eval_f1": 0.9879518072289156, "eval_loss": 0.05759565904736519, "eval_precision": 0.9919354838709677, "eval_recall": 0.984, "eval_runtime": 580.3503, "eval_samples_per_second": 6.462, "eval_steps_per_second": 0.808, "step": 1892 }, { "epoch": 0.88, "learning_rate": 5.781535648994515e-06, "loss": 0.0356, "step": 1935 }, { "epoch": 0.88, "eval_accuracy": 0.9882666666666666, "eval_f1": 0.9883103081827843, "eval_loss": 0.06381053477525711, "eval_precision": 0.984647961884595, "eval_recall": 0.992, "eval_runtime": 581.544, "eval_samples_per_second": 6.448, "eval_steps_per_second": 0.806, "step": 1935 }, { "epoch": 0.9, "learning_rate": 4.798903107861061e-06, "loss": 0.0848, "step": 1978 }, { "epoch": 0.9, "eval_accuracy": 0.9893333333333333, "eval_f1": 0.9893162393162394, "eval_loss": 0.05824807286262512, "eval_precision": 0.990904226859283, "eval_recall": 0.9877333333333334, "eval_runtime": 578.359, "eval_samples_per_second": 6.484, "eval_steps_per_second": 0.811, "step": 1978 }, { "epoch": 0.92, "learning_rate": 3.816270566727605e-06, "loss": 0.1019, "step": 2021 }, { "epoch": 0.92, "eval_accuracy": 0.9890666666666666, "eval_f1": 0.9890579129970644, "eval_loss": 0.055534329265356064, "eval_precision": 0.9898504273504274, "eval_recall": 0.9882666666666666, "eval_runtime": 579.2162, "eval_samples_per_second": 6.474, "eval_steps_per_second": 0.81, "step": 2021 }, { "epoch": 0.94, "learning_rate": 2.83363802559415e-06, "loss": 0.0274, "step": 2064 }, { "epoch": 0.94, "eval_accuracy": 0.9893333333333333, "eval_f1": 0.9892990904226859, "eval_loss": 0.05893222615122795, "eval_precision": 0.9924852388620504, "eval_recall": 0.9861333333333333, "eval_runtime": 581.1057, "eval_samples_per_second": 6.453, "eval_steps_per_second": 0.807, "step": 2064 }, { "epoch": 0.96, "learning_rate": 1.851005484460695e-06, "loss": 0.0313, "step": 2107 }, { "epoch": 0.96, "eval_accuracy": 0.9893333333333333, "eval_f1": 0.9892933618843683, "eval_loss": 0.0617961585521698, "eval_precision": 0.9930145083288554, "eval_recall": 0.9856, "eval_runtime": 580.2597, "eval_samples_per_second": 6.463, "eval_steps_per_second": 0.808, "step": 2107 }, { "epoch": 0.98, "learning_rate": 8.683729433272396e-07, "loss": 0.0513, "step": 2150 }, { "epoch": 0.98, "eval_accuracy": 0.9898666666666667, "eval_f1": 0.9898286937901497, "eval_loss": 0.05942407250404358, "eval_precision": 0.9935518538420204, "eval_recall": 0.9861333333333333, "eval_runtime": 580.5968, "eval_samples_per_second": 6.459, "eval_steps_per_second": 0.808, "step": 2150 } ], "max_steps": 2188, "num_train_epochs": 1, "total_flos": 1.268902526976e+17, "trial_name": null, "trial_params": null }