{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.997714808043876e-05, "loss": 6.9866, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.9017367458866546e-05, "loss": 3.9852, "step": 43 }, { "epoch": 0.02, "eval_accuracy": 0.5429333333333334, "eval_f1": 0.6712696586114307, "eval_loss": 1.4847679138183594, "eval_precision": 0.5241090146750524, "eval_recall": 0.9333333333333333, "eval_runtime": 305.136, "eval_samples_per_second": 12.29, "eval_steps_per_second": 1.537, "step": 43 }, { "epoch": 0.04, "learning_rate": 4.8034734917733096e-05, "loss": 2.6079, "step": 86 }, { "epoch": 0.04, "eval_accuracy": 0.5717333333333333, "eval_f1": 0.6556603773584906, "eval_loss": 0.8047523498535156, "eval_precision": 0.5482251703119397, "eval_recall": 0.8154666666666667, "eval_runtime": 305.1856, "eval_samples_per_second": 12.288, "eval_steps_per_second": 1.537, "step": 86 }, { "epoch": 0.06, "learning_rate": 4.705210237659964e-05, "loss": 1.0141, "step": 129 }, { "epoch": 0.06, "eval_accuracy": 0.5896, "eval_f1": 0.6892792247122956, "eval_loss": 0.7188379764556885, "eval_precision": 0.5545808966861598, "eval_recall": 0.9104, "eval_runtime": 305.1209, "eval_samples_per_second": 12.29, "eval_steps_per_second": 1.537, "step": 129 }, { "epoch": 0.08, "learning_rate": 4.606946983546618e-05, "loss": 0.8023, "step": 172 }, { "epoch": 0.08, "eval_accuracy": 0.49733333333333335, "eval_f1": 0.11874707807386631, "eval_loss": 2.954312801361084, "eval_precision": 0.4810606060606061, "eval_recall": 0.06773333333333334, "eval_runtime": 305.0816, "eval_samples_per_second": 12.292, "eval_steps_per_second": 1.537, "step": 172 }, { "epoch": 0.1, "learning_rate": 4.5086837294332726e-05, "loss": 2.4517, "step": 215 }, { "epoch": 0.1, "eval_accuracy": 0.6850666666666667, "eval_f1": 0.7327449649241911, "eval_loss": 0.6679351925849915, "eval_precision": 0.6363993710691824, "eval_recall": 0.8634666666666667, "eval_runtime": 304.8754, "eval_samples_per_second": 12.3, "eval_steps_per_second": 1.538, "step": 215 }, { "epoch": 0.12, "learning_rate": 4.410420475319927e-05, "loss": 1.6579, "step": 258 }, { "epoch": 0.12, "eval_accuracy": 0.8109333333333333, "eval_f1": 0.7899259259259258, "eval_loss": 0.7148910164833069, "eval_precision": 0.8886666666666667, "eval_recall": 0.7109333333333333, "eval_runtime": 305.6713, "eval_samples_per_second": 12.268, "eval_steps_per_second": 1.534, "step": 258 }, { "epoch": 0.14, "learning_rate": 4.312157221206581e-05, "loss": 1.3412, "step": 301 }, { "epoch": 0.14, "eval_accuracy": 0.4938666666666667, "eval_f1": 0.6546579330422125, "eval_loss": 3.249486207962036, "eval_precision": 0.4968240817453742, "eval_recall": 0.9594666666666667, "eval_runtime": 304.7083, "eval_samples_per_second": 12.307, "eval_steps_per_second": 1.539, "step": 301 }, { "epoch": 0.16, "learning_rate": 4.213893967093236e-05, "loss": 1.6235, "step": 344 }, { "epoch": 0.16, "eval_accuracy": 0.8013333333333333, "eval_f1": 0.797719250610915, "eval_loss": 0.4983011782169342, "eval_precision": 0.8125, "eval_recall": 0.7834666666666666, "eval_runtime": 304.8657, "eval_samples_per_second": 12.3, "eval_steps_per_second": 1.538, "step": 344 }, { "epoch": 0.18, "learning_rate": 4.1156307129798905e-05, "loss": 0.7822, "step": 387 }, { "epoch": 0.18, "eval_accuracy": 0.856, "eval_f1": 0.8429319371727749, "eval_loss": 0.8706910014152527, "eval_precision": 0.927063339731286, "eval_recall": 0.7728, "eval_runtime": 305.1415, "eval_samples_per_second": 12.289, "eval_steps_per_second": 1.537, "step": 387 }, { "epoch": 0.2, "learning_rate": 4.017367458866545e-05, "loss": 1.0922, "step": 430 }, { "epoch": 0.2, "eval_accuracy": 0.8034666666666667, "eval_f1": 0.7738570113531757, "eval_loss": 0.667508602142334, "eval_precision": 0.9111271676300579, "eval_recall": 0.6725333333333333, "eval_runtime": 305.5394, "eval_samples_per_second": 12.273, "eval_steps_per_second": 1.535, "step": 430 }, { "epoch": 0.22, "learning_rate": 3.919104204753199e-05, "loss": 0.5747, "step": 473 }, { "epoch": 0.22, "eval_accuracy": 0.88, "eval_f1": 0.8822605965463108, "eval_loss": 0.4237947165966034, "eval_precision": 0.8659476117103235, "eval_recall": 0.8992, "eval_runtime": 305.7145, "eval_samples_per_second": 12.266, "eval_steps_per_second": 1.534, "step": 473 }, { "epoch": 0.24, "learning_rate": 3.820840950639854e-05, "loss": 0.5248, "step": 516 }, { "epoch": 0.24, "eval_accuracy": 0.8424, "eval_f1": 0.8570737605804112, "eval_loss": 1.081692099571228, "eval_precision": 0.784070796460177, "eval_recall": 0.9450666666666667, "eval_runtime": 305.0927, "eval_samples_per_second": 12.291, "eval_steps_per_second": 1.537, "step": 516 }, { "epoch": 0.26, "learning_rate": 3.7225776965265085e-05, "loss": 0.6488, "step": 559 }, { "epoch": 0.26, "eval_accuracy": 0.8365333333333334, "eval_f1": 0.8115585613280049, "eval_loss": 0.5578557252883911, "eval_precision": 0.9579100145137881, "eval_recall": 0.704, "eval_runtime": 305.0779, "eval_samples_per_second": 12.292, "eval_steps_per_second": 1.537, "step": 559 }, { "epoch": 0.28, "learning_rate": 3.624314442413163e-05, "loss": 0.9679, "step": 602 }, { "epoch": 0.28, "eval_accuracy": 0.6829333333333333, "eval_f1": 0.5987175160310497, "eval_loss": 0.9434438943862915, "eval_precision": 0.8152573529411765, "eval_recall": 0.4730666666666667, "eval_runtime": 304.9674, "eval_samples_per_second": 12.296, "eval_steps_per_second": 1.538, "step": 602 }, { "epoch": 0.29, "learning_rate": 3.526051188299818e-05, "loss": 0.4584, "step": 645 }, { "epoch": 0.29, "eval_accuracy": 0.8458666666666667, "eval_f1": 0.8547008547008548, "eval_loss": 0.44859734177589417, "eval_precision": 0.8083689966714218, "eval_recall": 0.9066666666666666, "eval_runtime": 304.9696, "eval_samples_per_second": 12.296, "eval_steps_per_second": 1.538, "step": 645 }, { "epoch": 0.31, "learning_rate": 3.4277879341864715e-05, "loss": 0.418, "step": 688 }, { "epoch": 0.31, "eval_accuracy": 0.8901333333333333, "eval_f1": 0.892931392931393, "eval_loss": 0.5646004676818848, "eval_precision": 0.8707551951343132, "eval_recall": 0.9162666666666667, "eval_runtime": 304.779, "eval_samples_per_second": 12.304, "eval_steps_per_second": 1.539, "step": 688 }, { "epoch": 0.33, "learning_rate": 3.329524680073126e-05, "loss": 0.5809, "step": 731 }, { "epoch": 0.33, "eval_accuracy": 0.8773333333333333, "eval_f1": 0.8847117794486216, "eval_loss": 0.4200115501880646, "eval_precision": 0.83451536643026, "eval_recall": 0.9413333333333334, "eval_runtime": 304.7969, "eval_samples_per_second": 12.303, "eval_steps_per_second": 1.539, "step": 731 }, { "epoch": 0.35, "learning_rate": 3.231261425959781e-05, "loss": 0.4702, "step": 774 }, { "epoch": 0.35, "eval_accuracy": 0.7117333333333333, "eval_f1": 0.7615265828369733, "eval_loss": 0.5946761965751648, "eval_precision": 0.6493604213694507, "eval_recall": 0.9205333333333333, "eval_runtime": 304.8632, "eval_samples_per_second": 12.301, "eval_steps_per_second": 1.538, "step": 774 }, { "epoch": 0.37, "learning_rate": 3.132998171846435e-05, "loss": 0.3684, "step": 817 }, { "epoch": 0.37, "eval_accuracy": 0.8965333333333333, "eval_f1": 0.8914381645215446, "eval_loss": 0.30674976110458374, "eval_precision": 0.9376103590347263, "eval_recall": 0.8496, "eval_runtime": 304.8468, "eval_samples_per_second": 12.301, "eval_steps_per_second": 1.538, "step": 817 }, { "epoch": 0.39, "learning_rate": 3.03473491773309e-05, "loss": 0.5431, "step": 860 }, { "epoch": 0.39, "eval_accuracy": 0.8762666666666666, "eval_f1": 0.8831234256926952, "eval_loss": 0.8089134693145752, "eval_precision": 0.8367541766109785, "eval_recall": 0.9349333333333333, "eval_runtime": 304.976, "eval_samples_per_second": 12.296, "eval_steps_per_second": 1.538, "step": 860 }, { "epoch": 0.41, "learning_rate": 2.9364716636197442e-05, "loss": 0.4287, "step": 903 }, { "epoch": 0.41, "eval_accuracy": 0.8466666666666667, "eval_f1": 0.822366388631449, "eval_loss": 0.49679452180862427, "eval_precision": 0.9772393538913363, "eval_recall": 0.7098666666666666, "eval_runtime": 306.5019, "eval_samples_per_second": 12.235, "eval_steps_per_second": 1.53, "step": 903 }, { "epoch": 0.43, "learning_rate": 2.838208409506399e-05, "loss": 0.4942, "step": 946 }, { "epoch": 0.43, "eval_accuracy": 0.9157333333333333, "eval_f1": 0.9138025095471904, "eval_loss": 0.23615875840187073, "eval_precision": 0.9352317141261864, "eval_recall": 0.8933333333333333, "eval_runtime": 305.2275, "eval_samples_per_second": 12.286, "eval_steps_per_second": 1.537, "step": 946 }, { "epoch": 0.45, "learning_rate": 2.739945155393053e-05, "loss": 0.3997, "step": 989 }, { "epoch": 0.45, "eval_accuracy": 0.8874666666666666, "eval_f1": 0.8867418142780461, "eval_loss": 0.6193059086799622, "eval_precision": 0.8924905456509995, "eval_recall": 0.8810666666666667, "eval_runtime": 304.7962, "eval_samples_per_second": 12.303, "eval_steps_per_second": 1.539, "step": 989 }, { "epoch": 0.47, "learning_rate": 2.641681901279708e-05, "loss": 0.3862, "step": 1032 }, { "epoch": 0.47, "eval_accuracy": 0.8712, "eval_f1": 0.8817625458996328, "eval_loss": 0.6541677713394165, "eval_precision": 0.8149321266968326, "eval_recall": 0.9605333333333334, "eval_runtime": 305.7101, "eval_samples_per_second": 12.267, "eval_steps_per_second": 1.534, "step": 1032 }, { "epoch": 0.49, "learning_rate": 2.5434186471663625e-05, "loss": 0.4199, "step": 1075 }, { "epoch": 0.49, "eval_accuracy": 0.8778666666666667, "eval_f1": 0.8674768518518517, "eval_loss": 0.5205650329589844, "eval_precision": 0.948134092346616, "eval_recall": 0.7994666666666667, "eval_runtime": 306.2355, "eval_samples_per_second": 12.245, "eval_steps_per_second": 1.532, "step": 1075 }, { "epoch": 0.51, "learning_rate": 2.4451553930530165e-05, "loss": 0.4922, "step": 1118 }, { "epoch": 0.51, "eval_accuracy": 0.8664, "eval_f1": 0.8587538765153652, "eval_loss": 0.3552885353565216, "eval_precision": 0.9108851674641149, "eval_recall": 0.8122666666666667, "eval_runtime": 305.1994, "eval_samples_per_second": 12.287, "eval_steps_per_second": 1.537, "step": 1118 }, { "epoch": 0.53, "learning_rate": 2.346892138939671e-05, "loss": 0.3303, "step": 1161 }, { "epoch": 0.53, "eval_accuracy": 0.9082666666666667, "eval_f1": 0.9038569032979319, "eval_loss": 0.38709646463394165, "eval_precision": 0.9495008807985907, "eval_recall": 0.8624, "eval_runtime": 305.1281, "eval_samples_per_second": 12.29, "eval_steps_per_second": 1.537, "step": 1161 }, { "epoch": 0.55, "learning_rate": 2.2486288848263255e-05, "loss": 0.2829, "step": 1204 }, { "epoch": 0.55, "eval_accuracy": 0.9104, "eval_f1": 0.9114391143911439, "eval_loss": 0.3015682101249695, "eval_precision": 0.900990099009901, "eval_recall": 0.9221333333333334, "eval_runtime": 305.3174, "eval_samples_per_second": 12.282, "eval_steps_per_second": 1.536, "step": 1204 }, { "epoch": 0.57, "learning_rate": 2.1503656307129798e-05, "loss": 0.5477, "step": 1247 }, { "epoch": 0.57, "eval_accuracy": 0.8509333333333333, "eval_f1": 0.8479738917595866, "eval_loss": 0.5632778406143188, "eval_precision": 0.865149833518313, "eval_recall": 0.8314666666666667, "eval_runtime": 305.058, "eval_samples_per_second": 12.293, "eval_steps_per_second": 1.537, "step": 1247 }, { "epoch": 0.59, "learning_rate": 2.0521023765996345e-05, "loss": 0.4431, "step": 1290 }, { "epoch": 0.59, "eval_accuracy": 0.9082666666666667, "eval_f1": 0.9050248481501934, "eval_loss": 0.42895591259002686, "eval_precision": 0.9381797366914711, "eval_recall": 0.8741333333333333, "eval_runtime": 305.0347, "eval_samples_per_second": 12.294, "eval_steps_per_second": 1.538, "step": 1290 }, { "epoch": 0.61, "learning_rate": 1.953839122486289e-05, "loss": 0.3335, "step": 1333 }, { "epoch": 0.61, "eval_accuracy": 0.9069333333333334, "eval_f1": 0.90008588605783, "eval_loss": 0.32379406690597534, "eval_precision": 0.9715698393077874, "eval_recall": 0.8384, "eval_runtime": 306.6476, "eval_samples_per_second": 12.229, "eval_steps_per_second": 1.529, "step": 1333 }, { "epoch": 0.63, "learning_rate": 1.8555758683729435e-05, "loss": 0.3255, "step": 1376 }, { "epoch": 0.63, "eval_accuracy": 0.9152, "eval_f1": 0.9159619450317126, "eval_loss": 0.3898433446884155, "eval_precision": 0.9078051335777895, "eval_recall": 0.9242666666666667, "eval_runtime": 305.4671, "eval_samples_per_second": 12.276, "eval_steps_per_second": 1.535, "step": 1376 }, { "epoch": 0.65, "learning_rate": 1.7573126142595978e-05, "loss": 0.3103, "step": 1419 }, { "epoch": 0.65, "eval_accuracy": 0.9189333333333334, "eval_f1": 0.9140271493212669, "eval_loss": 0.25054827332496643, "eval_precision": 0.9729078868151716, "eval_recall": 0.8618666666666667, "eval_runtime": 304.9946, "eval_samples_per_second": 12.295, "eval_steps_per_second": 1.538, "step": 1419 }, { "epoch": 0.67, "learning_rate": 1.659049360146252e-05, "loss": 0.2878, "step": 1462 }, { "epoch": 0.67, "eval_accuracy": 0.9208, "eval_f1": 0.9207788743664976, "eval_loss": 0.29952123761177063, "eval_precision": 0.9210245464247598, "eval_recall": 0.9205333333333333, "eval_runtime": 304.6679, "eval_samples_per_second": 12.308, "eval_steps_per_second": 1.539, "step": 1462 }, { "epoch": 0.69, "learning_rate": 1.5607861060329068e-05, "loss": 0.3426, "step": 1505 }, { "epoch": 0.69, "eval_accuracy": 0.9248, "eval_f1": 0.9255936675461741, "eval_loss": 0.3102300763130188, "eval_precision": 0.9159268929503916, "eval_recall": 0.9354666666666667, "eval_runtime": 305.1368, "eval_samples_per_second": 12.29, "eval_steps_per_second": 1.537, "step": 1505 }, { "epoch": 0.71, "learning_rate": 1.4625228519195613e-05, "loss": 0.2799, "step": 1548 }, { "epoch": 0.71, "eval_accuracy": 0.9226666666666666, "eval_f1": 0.9236440231700894, "eval_loss": 0.32980650663375854, "eval_precision": 0.9121164846593863, "eval_recall": 0.9354666666666667, "eval_runtime": 305.2653, "eval_samples_per_second": 12.284, "eval_steps_per_second": 1.536, "step": 1548 }, { "epoch": 0.73, "learning_rate": 1.3642595978062158e-05, "loss": 0.4076, "step": 1591 }, { "epoch": 0.73, "eval_accuracy": 0.9277333333333333, "eval_f1": 0.9256515775034294, "eval_loss": 0.29825451970100403, "eval_precision": 0.9531073446327684, "eval_recall": 0.8997333333333334, "eval_runtime": 304.9271, "eval_samples_per_second": 12.298, "eval_steps_per_second": 1.538, "step": 1591 }, { "epoch": 0.75, "learning_rate": 1.2659963436928701e-05, "loss": 0.2303, "step": 1634 }, { "epoch": 0.75, "eval_accuracy": 0.9168, "eval_f1": 0.9147540983606557, "eval_loss": 0.27176761627197266, "eval_precision": 0.9378151260504202, "eval_recall": 0.8928, "eval_runtime": 305.6751, "eval_samples_per_second": 12.268, "eval_steps_per_second": 1.534, "step": 1634 }, { "epoch": 0.77, "learning_rate": 1.1677330895795248e-05, "loss": 0.2793, "step": 1677 }, { "epoch": 0.77, "eval_accuracy": 0.9229333333333334, "eval_f1": 0.9229949373834265, "eval_loss": 0.34073972702026367, "eval_precision": 0.9222577209797657, "eval_recall": 0.9237333333333333, "eval_runtime": 305.4396, "eval_samples_per_second": 12.277, "eval_steps_per_second": 1.535, "step": 1677 }, { "epoch": 0.79, "learning_rate": 1.0694698354661791e-05, "loss": 0.226, "step": 1720 }, { "epoch": 0.79, "eval_accuracy": 0.9290666666666667, "eval_f1": 0.9279132791327913, "eval_loss": 0.34017863869667053, "eval_precision": 0.9432506887052342, "eval_recall": 0.9130666666666667, "eval_runtime": 304.4994, "eval_samples_per_second": 12.315, "eval_steps_per_second": 1.54, "step": 1720 }, { "epoch": 0.81, "learning_rate": 9.712065813528338e-06, "loss": 0.5583, "step": 1763 }, { "epoch": 0.81, "eval_accuracy": 0.92, "eval_f1": 0.9220779220779222, "eval_loss": 0.26789960265159607, "eval_precision": 0.8987341772151899, "eval_recall": 0.9466666666666667, "eval_runtime": 304.5833, "eval_samples_per_second": 12.312, "eval_steps_per_second": 1.54, "step": 1763 }, { "epoch": 0.83, "learning_rate": 8.729433272394881e-06, "loss": 0.2609, "step": 1806 }, { "epoch": 0.83, "eval_accuracy": 0.9293333333333333, "eval_f1": 0.928436402916554, "eval_loss": 0.3069748878479004, "eval_precision": 0.9403719912472648, "eval_recall": 0.9168, "eval_runtime": 305.3926, "eval_samples_per_second": 12.279, "eval_steps_per_second": 1.536, "step": 1806 }, { "epoch": 0.85, "learning_rate": 7.746800731261426e-06, "loss": 0.3392, "step": 1849 }, { "epoch": 0.85, "eval_accuracy": 0.9309333333333333, "eval_f1": 0.928748280605227, "eval_loss": 0.24878720939159393, "eval_precision": 0.9590909090909091, "eval_recall": 0.9002666666666667, "eval_runtime": 305.0861, "eval_samples_per_second": 12.292, "eval_steps_per_second": 1.537, "step": 1849 }, { "epoch": 0.86, "learning_rate": 6.764168190127972e-06, "loss": 0.3378, "step": 1892 }, { "epoch": 0.86, "eval_accuracy": 0.9272, "eval_f1": 0.9248554913294799, "eval_loss": 0.2940199077129364, "eval_precision": 0.9556313993174061, "eval_recall": 0.896, "eval_runtime": 304.8472, "eval_samples_per_second": 12.301, "eval_steps_per_second": 1.538, "step": 1892 }, { "epoch": 0.88, "learning_rate": 5.781535648994515e-06, "loss": 0.2271, "step": 1935 }, { "epoch": 0.88, "eval_accuracy": 0.9349333333333333, "eval_f1": 0.934654525977504, "eval_loss": 0.23414117097854614, "eval_precision": 0.9386767079074771, "eval_recall": 0.9306666666666666, "eval_runtime": 304.8432, "eval_samples_per_second": 12.301, "eval_steps_per_second": 1.538, "step": 1935 }, { "epoch": 0.9, "learning_rate": 4.798903107861061e-06, "loss": 0.4284, "step": 1978 }, { "epoch": 0.9, "eval_accuracy": 0.9325333333333333, "eval_f1": 0.9310438811665304, "eval_loss": 0.30585160851478577, "eval_precision": 0.9520624303232998, "eval_recall": 0.9109333333333334, "eval_runtime": 304.8244, "eval_samples_per_second": 12.302, "eval_steps_per_second": 1.539, "step": 1978 }, { "epoch": 0.92, "learning_rate": 3.816270566727605e-06, "loss": 0.2887, "step": 2021 }, { "epoch": 0.92, "eval_accuracy": 0.9341333333333334, "eval_f1": 0.9327158812312721, "eval_loss": 0.27199041843414307, "eval_precision": 0.9532293986636972, "eval_recall": 0.9130666666666667, "eval_runtime": 305.2967, "eval_samples_per_second": 12.283, "eval_steps_per_second": 1.536, "step": 2021 }, { "epoch": 0.94, "learning_rate": 2.83363802559415e-06, "loss": 0.2287, "step": 2064 }, { "epoch": 0.94, "eval_accuracy": 0.9362666666666667, "eval_f1": 0.9355273806312382, "eval_loss": 0.3032676577568054, "eval_precision": 0.9465065502183406, "eval_recall": 0.9248, "eval_runtime": 305.4791, "eval_samples_per_second": 12.276, "eval_steps_per_second": 1.535, "step": 2064 }, { "epoch": 0.96, "learning_rate": 1.851005484460695e-06, "loss": 0.3379, "step": 2107 }, { "epoch": 0.96, "eval_accuracy": 0.9362666666666667, "eval_f1": 0.9354577369700243, "eval_loss": 0.25949496030807495, "eval_precision": 0.9474835886214442, "eval_recall": 0.9237333333333333, "eval_runtime": 305.1987, "eval_samples_per_second": 12.287, "eval_steps_per_second": 1.537, "step": 2107 }, { "epoch": 0.98, "learning_rate": 8.683729433272396e-07, "loss": 0.2405, "step": 2150 }, { "epoch": 0.98, "eval_accuracy": 0.9354666666666667, "eval_f1": 0.9345238095238096, "eval_loss": 0.2737696170806885, "eval_precision": 0.9483800109829764, "eval_recall": 0.9210666666666667, "eval_runtime": 304.727, "eval_samples_per_second": 12.306, "eval_steps_per_second": 1.539, "step": 2150 } ], "max_steps": 2188, "num_train_epochs": 1, "total_flos": 6.497491746816e+16, "trial_name": null, "trial_params": null }