{ "best_metric": 0.978172514732208, "best_model_checkpoint": "models/pos_final_xlm_nl/checkpoint-2415", "epoch": 39.99638989169675, "global_step": 2760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.3200084762438131, "eval_f1": 0.2160928249139116, "eval_loss": 3.483713388442993, "eval_precision": 0.2936494317356812, "eval_recall": 0.17094424294584126, "eval_runtime": 10.7415, "eval_samples_per_second": 732.58, "eval_steps_per_second": 2.886, "step": 69 }, { "epoch": 2.0, "eval_accuracy": 0.8496904657393253, "eval_f1": 0.8458663165617639, "eval_loss": 0.8298526406288147, "eval_precision": 0.8501404908642128, "eval_recall": 0.8416349050224381, "eval_runtime": 11.2158, "eval_samples_per_second": 701.6, "eval_steps_per_second": 2.764, "step": 138 }, { "epoch": 3.0, "eval_accuracy": 0.9429140115337461, "eval_f1": 0.9413686917810061, "eval_loss": 0.27647557854652405, "eval_precision": 0.941929974380871, "eval_recall": 0.9408080777033258, "eval_runtime": 10.9493, "eval_samples_per_second": 718.679, "eval_steps_per_second": 2.831, "step": 207 }, { "epoch": 4.0, "eval_accuracy": 0.961107663432576, "eval_f1": 0.9598816317903192, "eval_loss": 0.17041535675525665, "eval_precision": 0.9601288546848211, "eval_recall": 0.9596345361775374, "eval_runtime": 10.8629, "eval_samples_per_second": 724.391, "eval_steps_per_second": 2.854, "step": 276 }, { "epoch": 5.0, "eval_accuracy": 0.969288752327183, "eval_f1": 0.9685681024447033, "eval_loss": 0.1259436309337616, "eval_precision": 0.9685494963155347, "eval_recall": 0.9685867092887441, "eval_runtime": 11.2662, "eval_samples_per_second": 698.46, "eval_steps_per_second": 2.752, "step": 345 }, { "epoch": 6.0, "eval_accuracy": 0.9719451465936095, "eval_f1": 0.9712171621320507, "eval_loss": 0.10845372825860977, "eval_precision": 0.9711350819772891, "eval_recall": 0.9712992561627836, "eval_runtime": 10.8884, "eval_samples_per_second": 722.693, "eval_steps_per_second": 2.847, "step": 414 }, { "epoch": 7.0, "eval_accuracy": 0.9737614845535593, "eval_f1": 0.9729360106642491, "eval_loss": 0.09838376194238663, "eval_precision": 0.9727977383942906, "eval_recall": 0.9730743222474949, "eval_runtime": 11.171, "eval_samples_per_second": 704.411, "eval_steps_per_second": 2.775, "step": 483 }, { "epoch": 7.25, "learning_rate": 4.99e-05, "loss": 1.1448, "step": 500 }, { "epoch": 8.0, "eval_accuracy": 0.9751691464725203, "eval_f1": 0.9743453807855432, "eval_loss": 0.09059575200080872, "eval_precision": 0.9742256161268514, "eval_recall": 0.9744651748939571, "eval_runtime": 10.9149, "eval_samples_per_second": 720.942, "eval_steps_per_second": 2.84, "step": 552 }, { "epoch": 9.0, "eval_accuracy": 0.9758427051326684, "eval_f1": 0.9750796169168182, "eval_loss": 0.08883357048034668, "eval_precision": 0.9749410400006145, "eval_recall": 0.9752182332329256, "eval_runtime": 10.9703, "eval_samples_per_second": 717.298, "eval_steps_per_second": 2.826, "step": 621 }, { "epoch": 10.0, "eval_accuracy": 0.9765313999424826, "eval_f1": 0.975770544327188, "eval_loss": 0.08642476052045822, "eval_precision": 0.9756543517174092, "eval_recall": 0.9758867646154792, "eval_runtime": 10.8323, "eval_samples_per_second": 726.436, "eval_steps_per_second": 2.862, "step": 690 }, { "epoch": 11.0, "eval_accuracy": 0.9772427989767963, "eval_f1": 0.9765426312513927, "eval_loss": 0.08421829342842102, "eval_precision": 0.9764300969531214, "eval_recall": 0.9766551914919777, "eval_runtime": 11.0199, "eval_samples_per_second": 714.071, "eval_steps_per_second": 2.813, "step": 759 }, { "epoch": 12.0, "eval_accuracy": 0.9773260477999607, "eval_f1": 0.9765903503380455, "eval_loss": 0.08395781368017197, "eval_precision": 0.9764103115590241, "eval_recall": 0.9767704555234524, "eval_runtime": 10.9053, "eval_samples_per_second": 721.579, "eval_steps_per_second": 2.843, "step": 828 }, { "epoch": 13.0, "eval_accuracy": 0.9775076815959556, "eval_f1": 0.976759194523621, "eval_loss": 0.08459737151861191, "eval_precision": 0.9765866248790155, "eval_recall": 0.9769318251675171, "eval_runtime": 11.552, "eval_samples_per_second": 681.178, "eval_steps_per_second": 2.684, "step": 897 }, { "epoch": 14.0, "eval_accuracy": 0.9776363388681187, "eval_f1": 0.9769464516897355, "eval_loss": 0.0853676050901413, "eval_precision": 0.9768151124290356, "eval_recall": 0.9770778262740517, "eval_runtime": 11.6095, "eval_samples_per_second": 677.81, "eval_steps_per_second": 2.67, "step": 966 }, { "epoch": 14.49, "learning_rate": 3.896017699115044e-05, "loss": 0.0668, "step": 1000 }, { "epoch": 15.0, "eval_accuracy": 0.9775909304191199, "eval_f1": 0.976843172808015, "eval_loss": 0.08673886954784393, "eval_precision": 0.9766930924287119, "eval_recall": 0.9769932993176369, "eval_runtime": 11.3435, "eval_samples_per_second": 693.7, "eval_steps_per_second": 2.733, "step": 1035 }, { "epoch": 16.0, "eval_accuracy": 0.9778028365144474, "eval_f1": 0.9770540169876339, "eval_loss": 0.0859028622508049, "eval_precision": 0.9769226632660116, "eval_recall": 0.9771854060367615, "eval_runtime": 11.0228, "eval_samples_per_second": 713.886, "eval_steps_per_second": 2.812, "step": 1104 }, { "epoch": 17.0, "eval_accuracy": 0.9780979914329393, "eval_f1": 0.9773386449285661, "eval_loss": 0.08584524691104889, "eval_precision": 0.9771922412137507, "eval_recall": 0.9774850925185959, "eval_runtime": 11.0063, "eval_samples_per_second": 714.952, "eval_steps_per_second": 2.817, "step": 1173 }, { "epoch": 18.0, "eval_accuracy": 0.9784990993990949, "eval_f1": 0.9777723141226096, "eval_loss": 0.08779104799032211, "eval_precision": 0.9776446185757087, "eval_recall": 0.977900043031905, "eval_runtime": 11.0526, "eval_samples_per_second": 711.958, "eval_steps_per_second": 2.805, "step": 1242 }, { "epoch": 19.0, "eval_accuracy": 0.9784839632494287, "eval_f1": 0.9777277546442126, "eval_loss": 0.08868438750505447, "eval_precision": 0.9775324914738686, "eval_recall": 0.9779230958382, "eval_runtime": 10.952, "eval_samples_per_second": 718.498, "eval_steps_per_second": 2.831, "step": 1311 }, { "epoch": 20.0, "eval_accuracy": 0.9782796252289343, "eval_f1": 0.977526622308957, "eval_loss": 0.09024880826473236, "eval_precision": 0.9773914513105737, "eval_recall": 0.9776618307001905, "eval_runtime": 10.9428, "eval_samples_per_second": 719.1, "eval_steps_per_second": 2.833, "step": 1380 }, { "epoch": 21.0, "eval_accuracy": 0.9782115125554361, "eval_f1": 0.9773772343294419, "eval_loss": 0.09100791066884995, "eval_precision": 0.9772233190194889, "eval_recall": 0.9775311981311858, "eval_runtime": 10.9089, "eval_samples_per_second": 721.337, "eval_steps_per_second": 2.842, "step": 1449 }, { "epoch": 21.74, "learning_rate": 2.7898230088495575e-05, "loss": 0.0375, "step": 1500 }, { "epoch": 22.0, "eval_accuracy": 0.9783098975282668, "eval_f1": 0.9775235578160474, "eval_loss": 0.09260567277669907, "eval_precision": 0.9773546062789501, "eval_recall": 0.9776925677752505, "eval_runtime": 10.9627, "eval_samples_per_second": 717.797, "eval_steps_per_second": 2.828, "step": 1518 }, { "epoch": 23.0, "eval_accuracy": 0.9786731651202567, "eval_f1": 0.9778607567218708, "eval_loss": 0.09297080338001251, "eval_precision": 0.9777292945433315, "eval_recall": 0.9779922542570849, "eval_runtime": 11.0584, "eval_samples_per_second": 711.584, "eval_steps_per_second": 2.803, "step": 1587 }, { "epoch": 24.0, "eval_accuracy": 0.9787034374195892, "eval_f1": 0.9779114614545398, "eval_loss": 0.09545727074146271, "eval_precision": 0.9777461975725918, "eval_recall": 0.9780767812134997, "eval_runtime": 12.1178, "eval_samples_per_second": 649.374, "eval_steps_per_second": 2.558, "step": 1656 }, { "epoch": 25.0, "eval_accuracy": 0.9787488458685879, "eval_f1": 0.9779918790071952, "eval_loss": 0.09549739956855774, "eval_precision": 0.9778378669042919, "eval_recall": 0.9781459396323846, "eval_runtime": 11.1672, "eval_samples_per_second": 704.655, "eval_steps_per_second": 2.776, "step": 1725 }, { "epoch": 26.0, "eval_accuracy": 0.9785445078480935, "eval_f1": 0.977742949116863, "eval_loss": 0.09780567139387131, "eval_precision": 0.9775551902662345, "eval_recall": 0.977930780106965, "eval_runtime": 10.9619, "eval_samples_per_second": 717.851, "eval_steps_per_second": 2.828, "step": 1794 }, { "epoch": 27.0, "eval_accuracy": 0.9782115125554361, "eval_f1": 0.9773690296457643, "eval_loss": 0.09968989342451096, "eval_precision": 0.9772376335742984, "eval_recall": 0.9775004610561259, "eval_runtime": 10.8805, "eval_samples_per_second": 723.22, "eval_steps_per_second": 2.849, "step": 1863 }, { "epoch": 28.0, "eval_accuracy": 0.9785596439977599, "eval_f1": 0.9777683870843819, "eval_loss": 0.10001282393932343, "eval_precision": 0.9776444468344998, "eval_recall": 0.9778923587631401, "eval_runtime": 11.0278, "eval_samples_per_second": 713.561, "eval_steps_per_second": 2.811, "step": 1932 }, { "epoch": 28.98, "learning_rate": 1.683628318584071e-05, "loss": 0.0238, "step": 2000 }, { "epoch": 29.0, "eval_accuracy": 0.9784612590249292, "eval_f1": 0.9776150651725449, "eval_loss": 0.10220629721879959, "eval_precision": 0.977476127922073, "eval_recall": 0.9777540419253704, "eval_runtime": 11.5178, "eval_samples_per_second": 683.205, "eval_steps_per_second": 2.691, "step": 2001 }, { "epoch": 30.0, "eval_accuracy": 0.9787034374195892, "eval_f1": 0.9778532436450527, "eval_loss": 0.10299359261989594, "eval_precision": 0.9777142725449978, "eval_recall": 0.9779922542570849, "eval_runtime": 11.5247, "eval_samples_per_second": 682.796, "eval_steps_per_second": 2.69, "step": 2070 }, { "epoch": 31.0, "eval_accuracy": 0.9786504608957574, "eval_f1": 0.9778916595277151, "eval_loss": 0.10408657044172287, "eval_precision": 0.9777526829680502, "eval_recall": 0.9780306756009098, "eval_runtime": 11.971, "eval_samples_per_second": 657.341, "eval_steps_per_second": 2.59, "step": 2139 }, { "epoch": 32.0, "eval_accuracy": 0.9787185735692554, "eval_f1": 0.9779299058419483, "eval_loss": 0.10540538281202316, "eval_precision": 0.9777984343671018, "eval_recall": 0.9780614126759698, "eval_runtime": 11.5819, "eval_samples_per_second": 679.422, "eval_steps_per_second": 2.677, "step": 2208 }, { "epoch": 33.0, "eval_accuracy": 0.978635324746091, "eval_f1": 0.9777916076017933, "eval_loss": 0.10549841076135635, "eval_precision": 0.9776601547195612, "eval_recall": 0.9779230958382, "eval_runtime": 12.6843, "eval_samples_per_second": 620.372, "eval_steps_per_second": 2.444, "step": 2277 }, { "epoch": 34.0, "eval_accuracy": 0.9787488458685879, "eval_f1": 0.9778990030925261, "eval_loss": 0.10634943097829819, "eval_precision": 0.9777750462859821, "eval_recall": 0.9780229913321449, "eval_runtime": 11.6157, "eval_samples_per_second": 677.447, "eval_steps_per_second": 2.669, "step": 2346 }, { "epoch": 35.0, "eval_accuracy": 0.9789456158142492, "eval_f1": 0.978172514732208, "eval_loss": 0.10656328499317169, "eval_precision": 0.9780147183087772, "eval_recall": 0.9783303620827442, "eval_runtime": 11.2324, "eval_samples_per_second": 700.56, "eval_steps_per_second": 2.76, "step": 2415 }, { "epoch": 36.0, "eval_accuracy": 0.978756413943421, "eval_f1": 0.9780024740493733, "eval_loss": 0.10749900341033936, "eval_precision": 0.9778897715225174, "eval_recall": 0.9781152025573246, "eval_runtime": 11.1336, "eval_samples_per_second": 706.779, "eval_steps_per_second": 2.784, "step": 2484 }, { "epoch": 36.23, "learning_rate": 5.774336283185841e-06, "loss": 0.0167, "step": 2500 }, { "epoch": 37.0, "eval_accuracy": 0.9789153435149167, "eval_f1": 0.9781257443163047, "eval_loss": 0.10826310515403748, "eval_precision": 0.977998002611969, "eval_recall": 0.9782535193950944, "eval_runtime": 11.1448, "eval_samples_per_second": 706.071, "eval_steps_per_second": 2.782, "step": 2553 }, { "epoch": 38.0, "eval_accuracy": 0.9789002073652504, "eval_f1": 0.9781295019304278, "eval_loss": 0.1082502156496048, "eval_precision": 0.978005515906245, "eval_recall": 0.9782535193950944, "eval_runtime": 11.2628, "eval_samples_per_second": 698.669, "eval_steps_per_second": 2.752, "step": 2622 }, { "epoch": 39.0, "eval_accuracy": 0.9788547989162517, "eval_f1": 0.9780682040821029, "eval_loss": 0.10867351293563843, "eval_precision": 0.9779367140146423, "eval_recall": 0.9781997295137395, "eval_runtime": 11.7323, "eval_samples_per_second": 670.712, "eval_steps_per_second": 2.642, "step": 2691 }, { "epoch": 40.0, "eval_accuracy": 0.9788926392904173, "eval_f1": 0.978110377786144, "eval_loss": 0.1087782010436058, "eval_precision": 0.9779826380886533, "eval_recall": 0.9782381508575644, "eval_runtime": 11.2634, "eval_samples_per_second": 698.633, "eval_steps_per_second": 2.752, "step": 2760 }, { "epoch": 40.0, "step": 2760, "total_flos": 1.673403126150724e+17, "train_loss": 0.23496506378270576, "train_runtime": 2350.6684, "train_samples_per_second": 1204.968, "train_steps_per_second": 1.174 } ], "max_steps": 2760, "num_train_epochs": 40, "total_flos": 1.673403126150724e+17, "trial_name": null, "trial_params": null }