{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0, "eval_accuracy": 0.6288470372071658, "eval_f1": 0.43084004602991943, "eval_loss": 1.2868107557296753, "eval_precision": 0.3991471215351812, "eval_recall": 0.468, "eval_runtime": 1.4977, "eval_samples_per_second": 36.054, "eval_steps_per_second": 0.668, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.7672255397335783, "eval_f1": 0.7671957671957673, "eval_loss": 0.8454405665397644, "eval_precision": 0.7391102873030584, "eval_recall": 0.7975, "eval_runtime": 2.3492, "eval_samples_per_second": 22.986, "eval_steps_per_second": 0.426, "step": 40 }, { "epoch": 6.0, "eval_accuracy": 0.7731970601745521, "eval_f1": 0.8189655172413792, "eval_loss": 0.6648961305618286, "eval_precision": 0.7858455882352942, "eval_recall": 0.855, "eval_runtime": 2.4309, "eval_samples_per_second": 22.214, "eval_steps_per_second": 0.411, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.8240698208543867, "eval_f1": 0.8516630250060694, "eval_loss": 0.6003230810165405, "eval_precision": 0.827748938178386, "eval_recall": 0.877, "eval_runtime": 1.946, "eval_samples_per_second": 27.749, "eval_steps_per_second": 0.514, "step": 80 }, { "epoch": 10.0, "eval_accuracy": 0.8000689021589343, "eval_f1": 0.8560497369679579, "eval_loss": 0.5940394401550293, "eval_precision": 0.8203483043079743, "eval_recall": 0.895, "eval_runtime": 1.5738, "eval_samples_per_second": 34.312, "eval_steps_per_second": 0.635, "step": 100 }, { "epoch": 12.0, "eval_accuracy": 0.8124712907671107, "eval_f1": 0.8579682233991335, "eval_loss": 0.6028273701667786, "eval_precision": 0.8272980501392758, "eval_recall": 0.891, "eval_runtime": 2.3361, "eval_samples_per_second": 23.116, "eval_steps_per_second": 0.428, "step": 120 }, { "epoch": 14.0, "eval_accuracy": 0.8300413412953606, "eval_f1": 0.8839373163565133, "eval_loss": 0.5925479531288147, "eval_precision": 0.8661228406909789, "eval_recall": 0.9025, "eval_runtime": 1.549, "eval_samples_per_second": 34.86, "eval_steps_per_second": 0.646, "step": 140 }, { "epoch": 16.0, "eval_accuracy": 0.8147680293982544, "eval_f1": 0.864694471387003, "eval_loss": 0.6319019198417664, "eval_precision": 0.8394538606403014, "eval_recall": 0.8915, "eval_runtime": 1.7343, "eval_samples_per_second": 31.136, "eval_steps_per_second": 0.577, "step": 160 }, { "epoch": 18.0, "eval_accuracy": 0.8295819935691319, "eval_f1": 0.8697776691913022, "eval_loss": 0.660470187664032, "eval_precision": 0.8504538939321548, "eval_recall": 0.89, "eval_runtime": 1.7198, "eval_samples_per_second": 31.398, "eval_steps_per_second": 0.581, "step": 180 }, { "epoch": 20.0, "eval_accuracy": 0.8275149288011024, "eval_f1": 0.8871046228710462, "eval_loss": 0.6778170466423035, "eval_precision": 0.8639810426540284, "eval_recall": 0.9115, "eval_runtime": 1.5366, "eval_samples_per_second": 35.142, "eval_steps_per_second": 0.651, "step": 200 }, { "epoch": 22.0, "eval_accuracy": 0.815686724850712, "eval_f1": 0.8839615668883962, "eval_loss": 0.7490019798278809, "eval_precision": 0.8712967459932006, "eval_recall": 0.897, "eval_runtime": 2.3087, "eval_samples_per_second": 23.39, "eval_steps_per_second": 0.433, "step": 220 }, { "epoch": 24.0, "eval_accuracy": 0.8222324299494718, "eval_f1": 0.8948667324777887, "eval_loss": 0.7746959924697876, "eval_precision": 0.8835282651072125, "eval_recall": 0.9065, "eval_runtime": 1.9661, "eval_samples_per_second": 27.465, "eval_steps_per_second": 0.509, "step": 240 }, { "epoch": 26.0, "eval_accuracy": 0.8058107487367937, "eval_f1": 0.8912883435582821, "eval_loss": 0.8429032564163208, "eval_precision": 0.8751807228915662, "eval_recall": 0.908, "eval_runtime": 1.9643, "eval_samples_per_second": 27.491, "eval_steps_per_second": 0.509, "step": 260 }, { "epoch": 28.0, "eval_accuracy": 0.8129306384933395, "eval_f1": 0.8899308983218163, "eval_loss": 0.8374262452125549, "eval_precision": 0.8786549707602339, "eval_recall": 0.9015, "eval_runtime": 1.7097, "eval_samples_per_second": 31.584, "eval_steps_per_second": 0.585, "step": 280 }, { "epoch": 30.0, "eval_accuracy": 0.8112080845199816, "eval_f1": 0.8747830399206545, "eval_loss": 0.9091736078262329, "eval_precision": 0.867683226758485, "eval_recall": 0.882, "eval_runtime": 2.3751, "eval_samples_per_second": 22.736, "eval_steps_per_second": 0.421, "step": 300 }, { "epoch": 32.0, "eval_accuracy": 0.8288929719797887, "eval_f1": 0.8963474827245804, "eval_loss": 0.8785933256149292, "eval_precision": 0.884990253411306, "eval_recall": 0.908, "eval_runtime": 2.0542, "eval_samples_per_second": 26.288, "eval_steps_per_second": 0.487, "step": 320 }, { "epoch": 34.0, "eval_accuracy": 0.8307303628847037, "eval_f1": 0.8931750741839762, "eval_loss": 0.91584312915802, "eval_precision": 0.8835616438356164, "eval_recall": 0.903, "eval_runtime": 2.4582, "eval_samples_per_second": 21.967, "eval_steps_per_second": 0.407, "step": 340 }, { "epoch": 36.0, "eval_accuracy": 0.8259072117593018, "eval_f1": 0.8948534843634572, "eval_loss": 0.9160082340240479, "eval_precision": 0.8816108685104318, "eval_recall": 0.9085, "eval_runtime": 2.3705, "eval_samples_per_second": 22.78, "eval_steps_per_second": 0.422, "step": 360 }, { "epoch": 38.0, "eval_accuracy": 0.8171796049609554, "eval_f1": 0.8942850134903116, "eval_loss": 0.9378513693809509, "eval_precision": 0.8777082330284064, "eval_recall": 0.9115, "eval_runtime": 2.2021, "eval_samples_per_second": 24.522, "eval_steps_per_second": 0.454, "step": 380 }, { "epoch": 40.0, "eval_accuracy": 0.8079926504363804, "eval_f1": 0.8961134197017844, "eval_loss": 0.9751215577125549, "eval_precision": 0.8766140602582496, "eval_recall": 0.9165, "eval_runtime": 1.9472, "eval_samples_per_second": 27.733, "eval_steps_per_second": 0.514, "step": 400 }, { "epoch": 42.0, "eval_accuracy": 0.8140790078089113, "eval_f1": 0.8976067110782137, "eval_loss": 0.922682523727417, "eval_precision": 0.8860204578665368, "eval_recall": 0.9095, "eval_runtime": 1.9496, "eval_samples_per_second": 27.699, "eval_steps_per_second": 0.513, "step": 420 }, { "epoch": 44.0, "eval_accuracy": 0.8154570509875976, "eval_f1": 0.8868017795353437, "eval_loss": 1.0090957880020142, "eval_precision": 0.8768328445747801, "eval_recall": 0.897, "eval_runtime": 1.9314, "eval_samples_per_second": 27.96, "eval_steps_per_second": 0.518, "step": 440 }, { "epoch": 46.0, "eval_accuracy": 0.8182131373449701, "eval_f1": 0.8956933034602937, "eval_loss": 0.9963611960411072, "eval_precision": 0.8919186911254338, "eval_recall": 0.8995, "eval_runtime": 2.4248, "eval_samples_per_second": 22.27, "eval_steps_per_second": 0.412, "step": 460 }, { "epoch": 48.0, "eval_accuracy": 0.8203950390445567, "eval_f1": 0.896329365079365, "eval_loss": 1.0006074905395508, "eval_precision": 0.8892716535433071, "eval_recall": 0.9035, "eval_runtime": 2.4269, "eval_samples_per_second": 22.25, "eval_steps_per_second": 0.412, "step": 480 }, { "epoch": 50.0, "learning_rate": 5e-06, "loss": 0.235, "step": 500 }, { "epoch": 50.0, "eval_accuracy": 0.8178686265502986, "eval_f1": 0.8960199004975123, "eval_loss": 1.0283308029174805, "eval_precision": 0.8915841584158416, "eval_recall": 0.9005, "eval_runtime": 2.2977, "eval_samples_per_second": 23.502, "eval_steps_per_second": 0.435, "step": 500 }, { "epoch": 52.0, "eval_accuracy": 0.827859439595774, "eval_f1": 0.8951014349332014, "eval_loss": 0.9926251769065857, "eval_precision": 0.885896180215475, "eval_recall": 0.9045, "eval_runtime": 1.6933, "eval_samples_per_second": 31.891, "eval_steps_per_second": 0.591, "step": 520 }, { "epoch": 54.0, "eval_accuracy": 0.8228066146072577, "eval_f1": 0.8948148148148148, "eval_loss": 1.0112966299057007, "eval_precision": 0.8839024390243903, "eval_recall": 0.906, "eval_runtime": 2.4054, "eval_samples_per_second": 22.449, "eval_steps_per_second": 0.416, "step": 540 }, { "epoch": 56.0, "eval_accuracy": 0.8290078089113458, "eval_f1": 0.8993819530284302, "eval_loss": 1.004191279411316, "eval_precision": 0.8894865525672372, "eval_recall": 0.9095, "eval_runtime": 1.6522, "eval_samples_per_second": 32.684, "eval_steps_per_second": 0.605, "step": 560 }, { "epoch": 58.0, "eval_accuracy": 0.8225769407441433, "eval_f1": 0.8922392486406326, "eval_loss": 1.0357481241226196, "eval_precision": 0.8822091886608016, "eval_recall": 0.9025, "eval_runtime": 2.1102, "eval_samples_per_second": 25.59, "eval_steps_per_second": 0.474, "step": 580 }, { "epoch": 60.0, "eval_accuracy": 0.8178686265502986, "eval_f1": 0.901213171577123, "eval_loss": 1.0394996404647827, "eval_precision": 0.8925944090240314, "eval_recall": 0.91, "eval_runtime": 2.4208, "eval_samples_per_second": 22.307, "eval_steps_per_second": 0.413, "step": 600 }, { "epoch": 62.0, "eval_accuracy": 0.8201653651814423, "eval_f1": 0.8971962616822431, "eval_loss": 1.004025936126709, "eval_precision": 0.882865440464666, "eval_recall": 0.912, "eval_runtime": 2.4098, "eval_samples_per_second": 22.408, "eval_steps_per_second": 0.415, "step": 620 }, { "epoch": 64.0, "eval_accuracy": 0.8195911805236564, "eval_f1": 0.8941929133858268, "eval_loss": 1.029054880142212, "eval_precision": 0.8803294573643411, "eval_recall": 0.9085, "eval_runtime": 2.4017, "eval_samples_per_second": 22.484, "eval_steps_per_second": 0.416, "step": 640 }, { "epoch": 66.0, "eval_accuracy": 0.8279742765273312, "eval_f1": 0.8993055555555556, "eval_loss": 1.075648546218872, "eval_precision": 0.8922244094488189, "eval_recall": 0.9065, "eval_runtime": 2.4111, "eval_samples_per_second": 22.396, "eval_steps_per_second": 0.415, "step": 660 }, { "epoch": 68.0, "eval_accuracy": 0.8234956361966008, "eval_f1": 0.8966716343765524, "eval_loss": 1.1056932210922241, "eval_precision": 0.8909180651530109, "eval_recall": 0.9025, "eval_runtime": 2.4045, "eval_samples_per_second": 22.458, "eval_steps_per_second": 0.416, "step": 680 }, { "epoch": 70.0, "eval_accuracy": 0.8268259072117593, "eval_f1": 0.9007407407407408, "eval_loss": 1.1429905891418457, "eval_precision": 0.8897560975609756, "eval_recall": 0.912, "eval_runtime": 2.3943, "eval_samples_per_second": 22.553, "eval_steps_per_second": 0.418, "step": 700 }, { "epoch": 72.0, "eval_accuracy": 0.8221175930179145, "eval_f1": 0.9012012748222604, "eval_loss": 1.0474272966384888, "eval_precision": 0.8840788840788841, "eval_recall": 0.919, "eval_runtime": 2.6325, "eval_samples_per_second": 20.512, "eval_steps_per_second": 0.38, "step": 720 }, { "epoch": 74.0, "eval_accuracy": 0.8191318327974276, "eval_f1": 0.9079694053787319, "eval_loss": 1.1182180643081665, "eval_precision": 0.8962493911349245, "eval_recall": 0.92, "eval_runtime": 2.4698, "eval_samples_per_second": 21.864, "eval_steps_per_second": 0.405, "step": 740 }, { "epoch": 76.0, "eval_accuracy": 0.8267110702802021, "eval_f1": 0.9056324110671936, "eval_loss": 1.1421239376068115, "eval_precision": 0.89501953125, "eval_recall": 0.9165, "eval_runtime": 2.0412, "eval_samples_per_second": 26.455, "eval_steps_per_second": 0.49, "step": 760 }, { "epoch": 78.0, "eval_accuracy": 0.825447864033073, "eval_f1": 0.9112103174603173, "eval_loss": 1.1723241806030273, "eval_precision": 0.9040354330708661, "eval_recall": 0.9185, "eval_runtime": 2.0338, "eval_samples_per_second": 26.551, "eval_steps_per_second": 0.492, "step": 780 }, { "epoch": 80.0, "eval_accuracy": 0.8314193844740468, "eval_f1": 0.9091358024691357, "eval_loss": 1.0977429151535034, "eval_precision": 0.8980487804878049, "eval_recall": 0.9205, "eval_runtime": 2.708, "eval_samples_per_second": 19.941, "eval_steps_per_second": 0.369, "step": 800 }, { "epoch": 82.0, "eval_accuracy": 0.826596233348645, "eval_f1": 0.9020771513353116, "eval_loss": 1.1165635585784912, "eval_precision": 0.8923679060665362, "eval_recall": 0.912, "eval_runtime": 2.3964, "eval_samples_per_second": 22.533, "eval_steps_per_second": 0.417, "step": 820 }, { "epoch": 84.0, "eval_accuracy": 0.83210840606339, "eval_f1": 0.9058561897702001, "eval_loss": 1.1296281814575195, "eval_precision": 0.8954567659990229, "eval_recall": 0.9165, "eval_runtime": 1.5143, "eval_samples_per_second": 35.659, "eval_steps_per_second": 0.66, "step": 840 }, { "epoch": 86.0, "eval_accuracy": 0.8313045475424896, "eval_f1": 0.9070687098368759, "eval_loss": 1.122943639755249, "eval_precision": 0.896871945259042, "eval_recall": 0.9175, "eval_runtime": 2.2961, "eval_samples_per_second": 23.518, "eval_steps_per_second": 0.436, "step": 860 }, { "epoch": 88.0, "eval_accuracy": 0.8325677537896188, "eval_f1": 0.9080573405832921, "eval_loss": 1.1123418807983398, "eval_precision": 0.8978494623655914, "eval_recall": 0.9185, "eval_runtime": 2.1658, "eval_samples_per_second": 24.933, "eval_steps_per_second": 0.462, "step": 880 }, { "epoch": 90.0, "eval_accuracy": 0.8325677537896188, "eval_f1": 0.9037843185753152, "eval_loss": 1.1032230854034424, "eval_precision": 0.8942731277533039, "eval_recall": 0.9135, "eval_runtime": 1.6625, "eval_samples_per_second": 32.481, "eval_steps_per_second": 0.601, "step": 900 }, { "epoch": 92.0, "eval_accuracy": 0.83578318787322, "eval_f1": 0.9086407526615499, "eval_loss": 1.0933949947357178, "eval_precision": 0.8999509563511525, "eval_recall": 0.9175, "eval_runtime": 2.2261, "eval_samples_per_second": 24.258, "eval_steps_per_second": 0.449, "step": 920 }, { "epoch": 94.0, "eval_accuracy": 0.8360128617363344, "eval_f1": 0.9090909090909091, "eval_loss": 1.0976922512054443, "eval_precision": 0.9008345606283751, "eval_recall": 0.9175, "eval_runtime": 2.4337, "eval_samples_per_second": 22.189, "eval_steps_per_second": 0.411, "step": 940 }, { "epoch": 96.0, "eval_accuracy": 0.833945796968305, "eval_f1": 0.9066205533596838, "eval_loss": 1.1002885103225708, "eval_precision": 0.89599609375, "eval_recall": 0.9175, "eval_runtime": 2.3183, "eval_samples_per_second": 23.293, "eval_steps_per_second": 0.431, "step": 960 }, { "epoch": 98.0, "eval_accuracy": 0.8329122645842904, "eval_f1": 0.9057164068299927, "eval_loss": 1.1073625087738037, "eval_precision": 0.8966193042626164, "eval_recall": 0.915, "eval_runtime": 2.4241, "eval_samples_per_second": 22.276, "eval_steps_per_second": 0.413, "step": 980 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 0.0088, "step": 1000 }, { "epoch": 100.0, "eval_accuracy": 0.8317638952687184, "eval_f1": 0.9058561897702001, "eval_loss": 1.1073323488235474, "eval_precision": 0.8954567659990229, "eval_recall": 0.9165, "eval_runtime": 1.5045, "eval_samples_per_second": 35.892, "eval_steps_per_second": 0.665, "step": 1000 }, { "epoch": 100.0, "step": 1000, "total_flos": 4006801297113088.0, "train_loss": 0.12190062952041626, "train_runtime": 303.1848, "train_samples_per_second": 52.773, "train_steps_per_second": 3.298 } ], "max_steps": 1000, "num_train_epochs": 100, "total_flos": 4006801297113088.0, "trial_name": null, "trial_params": null }