{ "best_metric": 0.9153581572237279, "best_model_checkpoint": "vivit-surf-analytics-runpod/checkpoint-10374", "epoch": 29.033333333333335, "eval_steps": 500, "global_step": 22230, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000449842555105713, "grad_norm": 7.417573942802846e-05, "learning_rate": 2.249212775528565e-07, "loss": 0.0, "step": 10 }, { "epoch": 0.000899685110211426, "grad_norm": 9.81834891717881e-05, "learning_rate": 4.49842555105713e-07, "loss": 0.0, "step": 20 }, { "epoch": 0.001349527665317139, "grad_norm": 0.0005594987305812538, "learning_rate": 6.747638326585696e-07, "loss": 0.0, "step": 30 }, { "epoch": 0.001799370220422852, "grad_norm": 0.0003800458216574043, "learning_rate": 8.99685110211426e-07, "loss": 0.0, "step": 40 }, { "epoch": 0.002249212775528565, "grad_norm": 30.6152400970459, "learning_rate": 1.1246063877642827e-06, "loss": 0.2603, "step": 50 }, { "epoch": 0.002699055330634278, "grad_norm": 8.36638209875673e-05, "learning_rate": 1.3495276653171391e-06, "loss": 0.0, "step": 60 }, { "epoch": 0.003148897885739991, "grad_norm": 3.176023164996877e-05, "learning_rate": 1.5744489428699956e-06, "loss": 0.0, "step": 70 }, { "epoch": 0.003598740440845704, "grad_norm": 9.201840293826535e-05, "learning_rate": 1.799370220422852e-06, "loss": 0.2447, "step": 80 }, { "epoch": 0.004048582995951417, "grad_norm": 8.87176938704215e-05, "learning_rate": 2.0242914979757085e-06, "loss": 0.0, "step": 90 }, { "epoch": 0.00449842555105713, "grad_norm": 0.00012092135875718668, "learning_rate": 2.2492127755285654e-06, "loss": 0.0, "step": 100 }, { "epoch": 0.004948268106162843, "grad_norm": 0.00022134266328066587, "learning_rate": 2.474134053081422e-06, "loss": 0.0, "step": 110 }, { "epoch": 0.005398110661268556, "grad_norm": 7.128831202862784e-05, "learning_rate": 2.6990553306342783e-06, "loss": 0.0, "step": 120 }, { "epoch": 0.005847953216374269, "grad_norm": 8.539327245671302e-05, "learning_rate": 2.9239766081871343e-06, "loss": 0.0, "step": 130 }, { "epoch": 0.006297795771479982, "grad_norm": 4.0259514207718894e-05, "learning_rate": 3.148897885739991e-06, "loss": 0.0, "step": 140 }, { "epoch": 0.006747638326585695, "grad_norm": 9.067689097719267e-05, "learning_rate": 3.3738191632928476e-06, "loss": 0.0, "step": 150 }, { "epoch": 0.007197480881691408, "grad_norm": 0.00022617603826802224, "learning_rate": 3.598740440845704e-06, "loss": 0.0, "step": 160 }, { "epoch": 0.007647323436797121, "grad_norm": 7.794013799866661e-05, "learning_rate": 3.823661718398561e-06, "loss": 0.0, "step": 170 }, { "epoch": 0.008097165991902834, "grad_norm": 0.0003456588601693511, "learning_rate": 4.048582995951417e-06, "loss": 0.0, "step": 180 }, { "epoch": 0.008547008547008548, "grad_norm": 5.0067126721842214e-05, "learning_rate": 4.273504273504274e-06, "loss": 0.0, "step": 190 }, { "epoch": 0.00899685110211426, "grad_norm": 0.00012503593461588025, "learning_rate": 4.498425551057131e-06, "loss": 0.0, "step": 200 }, { "epoch": 0.009446693657219974, "grad_norm": 7.580386591143906e-05, "learning_rate": 4.723346828609987e-06, "loss": 0.0, "step": 210 }, { "epoch": 0.009896536212325686, "grad_norm": 5.437539948616177e-05, "learning_rate": 4.948268106162844e-06, "loss": 0.0, "step": 220 }, { "epoch": 0.0103463787674314, "grad_norm": 0.0003216730838175863, "learning_rate": 5.1731893837157e-06, "loss": 0.0, "step": 230 }, { "epoch": 0.010796221322537112, "grad_norm": 7.252412615343928e-05, "learning_rate": 5.3981106612685565e-06, "loss": 0.0, "step": 240 }, { "epoch": 0.011246063877642825, "grad_norm": 6.925016350578517e-05, "learning_rate": 5.623031938821413e-06, "loss": 0.0, "step": 250 }, { "epoch": 0.011695906432748537, "grad_norm": 7.834763528080657e-05, "learning_rate": 5.8479532163742686e-06, "loss": 0.0, "step": 260 }, { "epoch": 0.012145748987854251, "grad_norm": 0.00012906326446682215, "learning_rate": 6.0728744939271254e-06, "loss": 0.0, "step": 270 }, { "epoch": 0.012595591542959963, "grad_norm": 6.956917059142143e-05, "learning_rate": 6.297795771479982e-06, "loss": 0.0, "step": 280 }, { "epoch": 0.013045434098065677, "grad_norm": 9.101720934268087e-05, "learning_rate": 6.522717049032839e-06, "loss": 0.0, "step": 290 }, { "epoch": 0.01349527665317139, "grad_norm": 0.00039032549830153584, "learning_rate": 6.747638326585695e-06, "loss": 0.0, "step": 300 }, { "epoch": 0.013945119208277103, "grad_norm": 0.0003243921382818371, "learning_rate": 6.972559604138552e-06, "loss": 0.0, "step": 310 }, { "epoch": 0.014394961763382817, "grad_norm": 6.757626397302374e-05, "learning_rate": 7.197480881691408e-06, "loss": 0.0, "step": 320 }, { "epoch": 0.014844804318488529, "grad_norm": 8.017542859306559e-05, "learning_rate": 7.422402159244266e-06, "loss": 0.0, "step": 330 }, { "epoch": 0.015294646873594242, "grad_norm": 0.0002272631973028183, "learning_rate": 7.647323436797122e-06, "loss": 0.0, "step": 340 }, { "epoch": 0.015744489428699954, "grad_norm": 3.770879629882984e-05, "learning_rate": 7.872244714349977e-06, "loss": 0.0, "step": 350 }, { "epoch": 0.016194331983805668, "grad_norm": 3.7754245568066835e-05, "learning_rate": 8.097165991902834e-06, "loss": 0.0, "step": 360 }, { "epoch": 0.016644174538911382, "grad_norm": 3.0348071959451772e-05, "learning_rate": 8.32208726945569e-06, "loss": 0.0, "step": 370 }, { "epoch": 0.017094017094017096, "grad_norm": 7.795915735187009e-05, "learning_rate": 8.547008547008548e-06, "loss": 0.0, "step": 380 }, { "epoch": 0.017543859649122806, "grad_norm": 7.10234817233868e-05, "learning_rate": 8.771929824561403e-06, "loss": 0.0, "step": 390 }, { "epoch": 0.01799370220422852, "grad_norm": 6.444968312280253e-05, "learning_rate": 8.996851102114261e-06, "loss": 0.0, "step": 400 }, { "epoch": 0.018443544759334234, "grad_norm": 5.1695096772164106e-05, "learning_rate": 9.221772379667117e-06, "loss": 0.0, "step": 410 }, { "epoch": 0.018893387314439947, "grad_norm": 5.471765689435415e-05, "learning_rate": 9.446693657219973e-06, "loss": 0.0, "step": 420 }, { "epoch": 0.019343229869545658, "grad_norm": 0.00010291999933542684, "learning_rate": 9.67161493477283e-06, "loss": 0.0, "step": 430 }, { "epoch": 0.01979307242465137, "grad_norm": 0.00021923838357906789, "learning_rate": 9.896536212325687e-06, "loss": 0.0, "step": 440 }, { "epoch": 0.020242914979757085, "grad_norm": 5.563345257542096e-05, "learning_rate": 1.0121457489878542e-05, "loss": 0.0, "step": 450 }, { "epoch": 0.0206927575348628, "grad_norm": 0.00021992930851411074, "learning_rate": 1.03463787674314e-05, "loss": 0.0, "step": 460 }, { "epoch": 0.02114260008996851, "grad_norm": 6.939850572962314e-05, "learning_rate": 1.0571300044984256e-05, "loss": 0.0, "step": 470 }, { "epoch": 0.021592442645074223, "grad_norm": 0.00010088242561323568, "learning_rate": 1.0796221322537113e-05, "loss": 0.2466, "step": 480 }, { "epoch": 0.022042285200179937, "grad_norm": 0.0003670174046419561, "learning_rate": 1.1021142600089968e-05, "loss": 0.0, "step": 490 }, { "epoch": 0.02249212775528565, "grad_norm": 5.601453813142143e-05, "learning_rate": 1.1246063877642827e-05, "loss": 0.0, "step": 500 }, { "epoch": 0.022941970310391364, "grad_norm": 8.571091166231781e-05, "learning_rate": 1.1470985155195682e-05, "loss": 0.0, "step": 510 }, { "epoch": 0.023391812865497075, "grad_norm": 5.0970513257198036e-05, "learning_rate": 1.1695906432748537e-05, "loss": 0.0, "step": 520 }, { "epoch": 0.02384165542060279, "grad_norm": 3.0476896426989697e-05, "learning_rate": 1.1920827710301396e-05, "loss": 0.0, "step": 530 }, { "epoch": 0.024291497975708502, "grad_norm": 9.978284651879221e-05, "learning_rate": 1.2145748987854251e-05, "loss": 0.0, "step": 540 }, { "epoch": 0.024741340530814216, "grad_norm": 7.297137926798314e-05, "learning_rate": 1.2370670265407108e-05, "loss": 0.0108, "step": 550 }, { "epoch": 0.025191183085919926, "grad_norm": 3.328905222588219e-05, "learning_rate": 1.2595591542959965e-05, "loss": 0.0, "step": 560 }, { "epoch": 0.02564102564102564, "grad_norm": 7.152635225793347e-05, "learning_rate": 1.282051282051282e-05, "loss": 0.0, "step": 570 }, { "epoch": 0.026090868196131354, "grad_norm": 7.024151273071766e-05, "learning_rate": 1.3045434098065678e-05, "loss": 0.0, "step": 580 }, { "epoch": 0.026540710751237068, "grad_norm": 0.0001316197740379721, "learning_rate": 1.3270355375618535e-05, "loss": 0.0, "step": 590 }, { "epoch": 0.02699055330634278, "grad_norm": 0.0006974252173677087, "learning_rate": 1.349527665317139e-05, "loss": 0.0, "step": 600 }, { "epoch": 0.027440395861448492, "grad_norm": 0.00015963209443725646, "learning_rate": 1.3720197930724246e-05, "loss": 0.0, "step": 610 }, { "epoch": 0.027890238416554206, "grad_norm": 2.9086962967994623e-05, "learning_rate": 1.3945119208277104e-05, "loss": 0.0, "step": 620 }, { "epoch": 0.02834008097165992, "grad_norm": 0.00011626291961874813, "learning_rate": 1.4170040485829961e-05, "loss": 0.0, "step": 630 }, { "epoch": 0.028789923526765633, "grad_norm": 0.00013193227641750127, "learning_rate": 1.4394961763382816e-05, "loss": 0.0, "step": 640 }, { "epoch": 0.029239766081871343, "grad_norm": 2.149128522432875e-05, "learning_rate": 1.4619883040935673e-05, "loss": 0.0, "step": 650 }, { "epoch": 0.029689608636977057, "grad_norm": 7.958362402860075e-05, "learning_rate": 1.4844804318488532e-05, "loss": 0.0, "step": 660 }, { "epoch": 0.03013945119208277, "grad_norm": 2.2097266992204823e-05, "learning_rate": 1.5069725596041387e-05, "loss": 0.004, "step": 670 }, { "epoch": 0.030589293747188485, "grad_norm": 4.667111352318898e-05, "learning_rate": 1.5294646873594244e-05, "loss": 0.0, "step": 680 }, { "epoch": 0.0310391363022942, "grad_norm": 0.00012508452346082777, "learning_rate": 1.55195681511471e-05, "loss": 0.0, "step": 690 }, { "epoch": 0.03148897885739991, "grad_norm": 2.324238266737666e-05, "learning_rate": 1.5744489428699954e-05, "loss": 0.0, "step": 700 }, { "epoch": 0.031938821412505626, "grad_norm": 0.00011243636254221201, "learning_rate": 1.5969410706252813e-05, "loss": 0.0, "step": 710 }, { "epoch": 0.032388663967611336, "grad_norm": 6.449875945691019e-05, "learning_rate": 1.6194331983805668e-05, "loss": 0.0, "step": 720 }, { "epoch": 0.03283850652271705, "grad_norm": 8.980243728728965e-05, "learning_rate": 1.6419253261358526e-05, "loss": 0.0, "step": 730 }, { "epoch": 0.033288349077822764, "grad_norm": 9.912453242577612e-05, "learning_rate": 1.664417453891138e-05, "loss": 0.0, "step": 740 }, { "epoch": 0.03333333333333333, "eval_accuracy": 0.9069767441860465, "eval_f1": 0.9067730631247484, "eval_loss": 0.9082330465316772, "eval_runtime": 137.2887, "eval_samples_per_second": 1.566, "eval_steps_per_second": 1.566, "step": 741 }, { "epoch": 1.0004048582995952, "grad_norm": 7.618666859343648e-05, "learning_rate": 1.686909581646424e-05, "loss": 0.0, "step": 750 }, { "epoch": 1.0008547008547009, "grad_norm": 7.477263716282323e-05, "learning_rate": 1.7094017094017095e-05, "loss": 0.0, "step": 760 }, { "epoch": 1.0013045434098065, "grad_norm": 5.700959081877954e-05, "learning_rate": 1.731893837156995e-05, "loss": 0.0, "step": 770 }, { "epoch": 1.0017543859649123, "grad_norm": 0.00011909577006008476, "learning_rate": 1.7543859649122806e-05, "loss": 0.502, "step": 780 }, { "epoch": 1.002204228520018, "grad_norm": 6.308112642727792e-05, "learning_rate": 1.7768780926675664e-05, "loss": 0.0, "step": 790 }, { "epoch": 1.0026540710751237, "grad_norm": 4.7723224270157516e-05, "learning_rate": 1.7993702204228523e-05, "loss": 0.0, "step": 800 }, { "epoch": 1.0031039136302293, "grad_norm": 6.018473141011782e-05, "learning_rate": 1.8218623481781378e-05, "loss": 0.0, "step": 810 }, { "epoch": 1.0035537561853352, "grad_norm": 5.2601750212488696e-05, "learning_rate": 1.8443544759334233e-05, "loss": 0.0, "step": 820 }, { "epoch": 1.0040035987404408, "grad_norm": 4.043688022647984e-05, "learning_rate": 1.8668466036887092e-05, "loss": 0.0, "step": 830 }, { "epoch": 1.0044534412955466, "grad_norm": 6.112233677413315e-05, "learning_rate": 1.8893387314439947e-05, "loss": 0.0056, "step": 840 }, { "epoch": 1.0049032838506522, "grad_norm": 0.00011569919297471642, "learning_rate": 1.9118308591992802e-05, "loss": 0.0, "step": 850 }, { "epoch": 1.005353126405758, "grad_norm": 2.1692625523428433e-05, "learning_rate": 1.934322986954566e-05, "loss": 0.0, "step": 860 }, { "epoch": 1.0058029689608636, "grad_norm": 1.5330178939620964e-05, "learning_rate": 1.9568151147098516e-05, "loss": 0.0, "step": 870 }, { "epoch": 1.0062528115159695, "grad_norm": 0.00015317053475882858, "learning_rate": 1.9793072424651374e-05, "loss": 0.0, "step": 880 }, { "epoch": 1.006702654071075, "grad_norm": 4.9907925131265074e-05, "learning_rate": 2.001799370220423e-05, "loss": 0.0, "step": 890 }, { "epoch": 1.007152496626181, "grad_norm": 3.518067751429044e-05, "learning_rate": 2.0242914979757085e-05, "loss": 0.0, "step": 900 }, { "epoch": 1.0076023391812865, "grad_norm": 0.00011799114145105705, "learning_rate": 2.046783625730994e-05, "loss": 0.0, "step": 910 }, { "epoch": 1.0080521817363923, "grad_norm": 4.76725836051628e-05, "learning_rate": 2.06927575348628e-05, "loss": 0.0, "step": 920 }, { "epoch": 1.008502024291498, "grad_norm": 5.4757492762291804e-05, "learning_rate": 2.0917678812415657e-05, "loss": 0.0, "step": 930 }, { "epoch": 1.0089518668466038, "grad_norm": 2.9341641493374482e-05, "learning_rate": 2.1142600089968512e-05, "loss": 0.0, "step": 940 }, { "epoch": 1.0094017094017094, "grad_norm": 3.880517397192307e-05, "learning_rate": 2.1367521367521368e-05, "loss": 0.0, "step": 950 }, { "epoch": 1.0098515519568152, "grad_norm": 4.96865322929807e-05, "learning_rate": 2.1592442645074226e-05, "loss": 0.4444, "step": 960 }, { "epoch": 1.0103013945119208, "grad_norm": 3.4303524444112554e-05, "learning_rate": 2.181736392262708e-05, "loss": 0.0, "step": 970 }, { "epoch": 1.0107512370670266, "grad_norm": 0.00013209233293309808, "learning_rate": 2.2042285200179936e-05, "loss": 0.0, "step": 980 }, { "epoch": 1.0112010796221322, "grad_norm": 4.904479646938853e-05, "learning_rate": 2.2267206477732795e-05, "loss": 0.0, "step": 990 }, { "epoch": 1.011650922177238, "grad_norm": 6.844414019724354e-05, "learning_rate": 2.2492127755285654e-05, "loss": 0.0, "step": 1000 }, { "epoch": 1.0121007647323437, "grad_norm": 0.00032382344943471253, "learning_rate": 2.271704903283851e-05, "loss": 0.0, "step": 1010 }, { "epoch": 1.0125506072874495, "grad_norm": 0.00010385856876382604, "learning_rate": 2.2941970310391364e-05, "loss": 0.0, "step": 1020 }, { "epoch": 1.013000449842555, "grad_norm": 3.321765689179301e-05, "learning_rate": 2.316689158794422e-05, "loss": 0.0, "step": 1030 }, { "epoch": 1.0134502923976607, "grad_norm": 2.2119387722341344e-05, "learning_rate": 2.3391812865497074e-05, "loss": 0.0, "step": 1040 }, { "epoch": 1.0139001349527665, "grad_norm": 0.00013485200179275125, "learning_rate": 2.3616734143049933e-05, "loss": 0.0, "step": 1050 }, { "epoch": 1.0143499775078721, "grad_norm": 2.8853814001195133e-05, "learning_rate": 2.384165542060279e-05, "loss": 0.0, "step": 1060 }, { "epoch": 1.014799820062978, "grad_norm": 3.1664647394791245e-05, "learning_rate": 2.4066576698155647e-05, "loss": 0.0, "step": 1070 }, { "epoch": 1.0152496626180836, "grad_norm": 0.00011457729851827025, "learning_rate": 2.4291497975708502e-05, "loss": 0.0, "step": 1080 }, { "epoch": 1.0156995051731894, "grad_norm": 2.342585321457591e-05, "learning_rate": 2.451641925326136e-05, "loss": 0.0, "step": 1090 }, { "epoch": 1.016149347728295, "grad_norm": 0.00015773675113450736, "learning_rate": 2.4741340530814216e-05, "loss": 0.0, "step": 1100 }, { "epoch": 1.0165991902834008, "grad_norm": 3.402721995371394e-05, "learning_rate": 2.496626180836707e-05, "loss": 0.2988, "step": 1110 }, { "epoch": 1.0170490328385065, "grad_norm": 0.00018580129835754633, "learning_rate": 2.519118308591993e-05, "loss": 1.055, "step": 1120 }, { "epoch": 1.0174988753936123, "grad_norm": 0.00022666795121040195, "learning_rate": 2.5416104363472788e-05, "loss": 0.0003, "step": 1130 }, { "epoch": 1.0179487179487179, "grad_norm": 2.3005819457466714e-05, "learning_rate": 2.564102564102564e-05, "loss": 1.1488, "step": 1140 }, { "epoch": 1.0183985605038237, "grad_norm": 0.0027620664332062006, "learning_rate": 2.5865946918578498e-05, "loss": 0.9971, "step": 1150 }, { "epoch": 1.0188484030589293, "grad_norm": 2.9667680792044848e-05, "learning_rate": 2.6090868196131357e-05, "loss": 0.0001, "step": 1160 }, { "epoch": 1.0192982456140351, "grad_norm": 0.0030947765335440636, "learning_rate": 2.6315789473684212e-05, "loss": 1.1161, "step": 1170 }, { "epoch": 1.0197480881691408, "grad_norm": 0.034431636333465576, "learning_rate": 2.654071075123707e-05, "loss": 0.1076, "step": 1180 }, { "epoch": 1.0201979307242466, "grad_norm": 0.0028040127363055944, "learning_rate": 2.6765632028789922e-05, "loss": 0.0289, "step": 1190 }, { "epoch": 1.0206477732793522, "grad_norm": 0.004220534581691027, "learning_rate": 2.699055330634278e-05, "loss": 0.0001, "step": 1200 }, { "epoch": 1.021097615834458, "grad_norm": 3.05945795844309e-05, "learning_rate": 2.721547458389564e-05, "loss": 0.0328, "step": 1210 }, { "epoch": 1.0215474583895636, "grad_norm": 0.16082721948623657, "learning_rate": 2.744039586144849e-05, "loss": 0.0001, "step": 1220 }, { "epoch": 1.0219973009446695, "grad_norm": 0.053755827248096466, "learning_rate": 2.766531713900135e-05, "loss": 0.0044, "step": 1230 }, { "epoch": 1.022447143499775, "grad_norm": 0.0007409699610434473, "learning_rate": 2.789023841655421e-05, "loss": 0.892, "step": 1240 }, { "epoch": 1.0228969860548809, "grad_norm": 4.1867995605571195e-05, "learning_rate": 2.8115159694107064e-05, "loss": 0.0001, "step": 1250 }, { "epoch": 1.0233468286099865, "grad_norm": 0.0011809540446847677, "learning_rate": 2.8340080971659922e-05, "loss": 0.9006, "step": 1260 }, { "epoch": 1.0237966711650923, "grad_norm": 0.0012361080152913928, "learning_rate": 2.8565002249212774e-05, "loss": 0.0, "step": 1270 }, { "epoch": 1.024246513720198, "grad_norm": 0.0003413816448301077, "learning_rate": 2.8789923526765633e-05, "loss": 0.0001, "step": 1280 }, { "epoch": 1.0246963562753035, "grad_norm": 0.0015310284215956926, "learning_rate": 2.901484480431849e-05, "loss": 0.0001, "step": 1290 }, { "epoch": 1.0251461988304094, "grad_norm": 0.0007668680627830327, "learning_rate": 2.9239766081871346e-05, "loss": 0.0001, "step": 1300 }, { "epoch": 1.025596041385515, "grad_norm": 0.009191703051328659, "learning_rate": 2.9464687359424205e-05, "loss": 0.0, "step": 1310 }, { "epoch": 1.0260458839406208, "grad_norm": 0.00036078941775485873, "learning_rate": 2.9689608636977063e-05, "loss": 0.0002, "step": 1320 }, { "epoch": 1.0264957264957264, "grad_norm": 0.18627865612506866, "learning_rate": 2.9914529914529915e-05, "loss": 0.0003, "step": 1330 }, { "epoch": 1.0269455690508322, "grad_norm": 0.0007894066511653364, "learning_rate": 3.0139451192082774e-05, "loss": 0.0001, "step": 1340 }, { "epoch": 1.0273954116059378, "grad_norm": 0.00030962034361436963, "learning_rate": 3.0364372469635626e-05, "loss": 0.0004, "step": 1350 }, { "epoch": 1.0278452541610437, "grad_norm": 0.001865900936536491, "learning_rate": 3.058929374718849e-05, "loss": 0.2306, "step": 1360 }, { "epoch": 1.0282950967161493, "grad_norm": 0.0008512283093295991, "learning_rate": 3.0814215024741346e-05, "loss": 0.0, "step": 1370 }, { "epoch": 1.028744939271255, "grad_norm": 0.000559425912797451, "learning_rate": 3.10391363022942e-05, "loss": 0.021, "step": 1380 }, { "epoch": 1.0291947818263607, "grad_norm": 0.00035032653249800205, "learning_rate": 3.1264057579847056e-05, "loss": 0.0045, "step": 1390 }, { "epoch": 1.0296446243814665, "grad_norm": 0.08264846354722977, "learning_rate": 3.148897885739991e-05, "loss": 0.0001, "step": 1400 }, { "epoch": 1.0300944669365721, "grad_norm": 0.00041260552825406194, "learning_rate": 3.171390013495277e-05, "loss": 1.0298, "step": 1410 }, { "epoch": 1.030544309491678, "grad_norm": 0.0008482683333568275, "learning_rate": 3.1938821412505625e-05, "loss": 0.0005, "step": 1420 }, { "epoch": 1.0309941520467836, "grad_norm": 5.556947871809825e-05, "learning_rate": 3.216374269005848e-05, "loss": 1.2372, "step": 1430 }, { "epoch": 1.0314439946018894, "grad_norm": 0.004429028835147619, "learning_rate": 3.2388663967611336e-05, "loss": 0.0001, "step": 1440 }, { "epoch": 1.031893837156995, "grad_norm": 0.015903722494840622, "learning_rate": 3.2613585245164194e-05, "loss": 1.1106, "step": 1450 }, { "epoch": 1.0323436797121008, "grad_norm": 0.010684983804821968, "learning_rate": 3.283850652271705e-05, "loss": 0.0006, "step": 1460 }, { "epoch": 1.0327935222672064, "grad_norm": 257.4872741699219, "learning_rate": 3.306342780026991e-05, "loss": 0.6423, "step": 1470 }, { "epoch": 1.0332433648223123, "grad_norm": 4.5786131522618234e-05, "learning_rate": 3.328834907782276e-05, "loss": 0.5976, "step": 1480 }, { "epoch": 1.0333333333333334, "eval_accuracy": 0.7302325581395349, "eval_f1": 0.7285814720040579, "eval_loss": 2.547083854675293, "eval_runtime": 137.8064, "eval_samples_per_second": 1.56, "eval_steps_per_second": 1.56, "step": 1482 }, { "epoch": 2.0003598740440847, "grad_norm": 9.406705794390291e-05, "learning_rate": 3.351327035537562e-05, "loss": 0.9992, "step": 1490 }, { "epoch": 2.0008097165991905, "grad_norm": 0.0036620991304516792, "learning_rate": 3.373819163292848e-05, "loss": 0.0001, "step": 1500 }, { "epoch": 2.001259559154296, "grad_norm": 0.00037059831083752215, "learning_rate": 3.396311291048133e-05, "loss": 0.0001, "step": 1510 }, { "epoch": 2.0017094017094017, "grad_norm": 6.804233271395788e-05, "learning_rate": 3.418803418803419e-05, "loss": 0.0781, "step": 1520 }, { "epoch": 2.0021592442645075, "grad_norm": 0.0010862837079912424, "learning_rate": 3.441295546558704e-05, "loss": 0.0012, "step": 1530 }, { "epoch": 2.002609086819613, "grad_norm": 0.0012227609986439347, "learning_rate": 3.46378767431399e-05, "loss": 0.0, "step": 1540 }, { "epoch": 2.0030589293747187, "grad_norm": 7.27791921235621e-05, "learning_rate": 3.486279802069276e-05, "loss": 0.0, "step": 1550 }, { "epoch": 2.0035087719298246, "grad_norm": 0.001692951307632029, "learning_rate": 3.508771929824561e-05, "loss": 0.0, "step": 1560 }, { "epoch": 2.0039586144849304, "grad_norm": 3.618938684463501, "learning_rate": 3.531264057579847e-05, "loss": 0.045, "step": 1570 }, { "epoch": 2.004408457040036, "grad_norm": 0.0011820023646578193, "learning_rate": 3.553756185335133e-05, "loss": 0.0, "step": 1580 }, { "epoch": 2.0048582995951416, "grad_norm": 0.0007727951160632074, "learning_rate": 3.576248313090419e-05, "loss": 0.0037, "step": 1590 }, { "epoch": 2.0053081421502474, "grad_norm": 0.000959408120252192, "learning_rate": 3.5987404408457046e-05, "loss": 0.0, "step": 1600 }, { "epoch": 2.0057579847053533, "grad_norm": 0.003274928079918027, "learning_rate": 3.62123256860099e-05, "loss": 0.0009, "step": 1610 }, { "epoch": 2.0062078272604587, "grad_norm": 0.0007496404577977955, "learning_rate": 3.6437246963562756e-05, "loss": 0.0003, "step": 1620 }, { "epoch": 2.0066576698155645, "grad_norm": 6.212611333467066e-05, "learning_rate": 3.6662168241115615e-05, "loss": 0.0, "step": 1630 }, { "epoch": 2.0071075123706703, "grad_norm": 0.00011305105726933107, "learning_rate": 3.6887089518668466e-05, "loss": 0.0, "step": 1640 }, { "epoch": 2.007557354925776, "grad_norm": 3.263484904891811e-05, "learning_rate": 3.7112010796221325e-05, "loss": 0.0, "step": 1650 }, { "epoch": 2.0080071974808815, "grad_norm": 0.0006018219282850623, "learning_rate": 3.7336932073774184e-05, "loss": 0.0, "step": 1660 }, { "epoch": 2.0084570400359874, "grad_norm": 5.7446377468295395e-05, "learning_rate": 3.7561853351327035e-05, "loss": 0.0, "step": 1670 }, { "epoch": 2.008906882591093, "grad_norm": 0.0022095597814768553, "learning_rate": 3.7786774628879894e-05, "loss": 0.0, "step": 1680 }, { "epoch": 2.009356725146199, "grad_norm": 0.0004947467823512852, "learning_rate": 3.8011695906432746e-05, "loss": 0.0, "step": 1690 }, { "epoch": 2.0098065677013044, "grad_norm": 0.00028225936694070697, "learning_rate": 3.8236617183985604e-05, "loss": 0.0, "step": 1700 }, { "epoch": 2.01025641025641, "grad_norm": 0.0004905525129288435, "learning_rate": 3.846153846153846e-05, "loss": 0.0, "step": 1710 }, { "epoch": 2.010706252811516, "grad_norm": 9.621172904968262, "learning_rate": 3.868645973909132e-05, "loss": 0.0058, "step": 1720 }, { "epoch": 2.011156095366622, "grad_norm": 0.0007397395675070584, "learning_rate": 3.891138101664418e-05, "loss": 0.0, "step": 1730 }, { "epoch": 2.0116059379217273, "grad_norm": 4.029439514852129e-05, "learning_rate": 3.913630229419703e-05, "loss": 0.8889, "step": 1740 }, { "epoch": 2.012055780476833, "grad_norm": 0.0006796045927330852, "learning_rate": 3.936122357174989e-05, "loss": 0.7755, "step": 1750 }, { "epoch": 2.012505623031939, "grad_norm": 0.0005175217520445585, "learning_rate": 3.958614484930275e-05, "loss": 0.007, "step": 1760 }, { "epoch": 2.0129554655870447, "grad_norm": 0.013559658080339432, "learning_rate": 3.98110661268556e-05, "loss": 0.7176, "step": 1770 }, { "epoch": 2.01340530814215, "grad_norm": 6.632544682361186e-05, "learning_rate": 4.003598740440846e-05, "loss": 0.0002, "step": 1780 }, { "epoch": 2.013855150697256, "grad_norm": 0.0004375911958049983, "learning_rate": 4.026090868196132e-05, "loss": 1.3084, "step": 1790 }, { "epoch": 2.014304993252362, "grad_norm": 0.0004137333307880908, "learning_rate": 4.048582995951417e-05, "loss": 0.0003, "step": 1800 }, { "epoch": 2.014754835807467, "grad_norm": 0.00034907320514321327, "learning_rate": 4.071075123706703e-05, "loss": 0.0006, "step": 1810 }, { "epoch": 2.015204678362573, "grad_norm": 9.423385199625045e-05, "learning_rate": 4.093567251461988e-05, "loss": 0.0, "step": 1820 }, { "epoch": 2.015654520917679, "grad_norm": 0.00043919807649217546, "learning_rate": 4.116059379217274e-05, "loss": 0.0026, "step": 1830 }, { "epoch": 2.0161043634727847, "grad_norm": 0.0002354329189984128, "learning_rate": 4.13855150697256e-05, "loss": 0.0001, "step": 1840 }, { "epoch": 2.01655420602789, "grad_norm": 0.0004224287695251405, "learning_rate": 4.1610436347278456e-05, "loss": 0.0, "step": 1850 }, { "epoch": 2.017004048582996, "grad_norm": 0.0014064997667446733, "learning_rate": 4.1835357624831314e-05, "loss": 0.0, "step": 1860 }, { "epoch": 2.0174538911381017, "grad_norm": 0.0004488792910706252, "learning_rate": 4.2060278902384166e-05, "loss": 0.0001, "step": 1870 }, { "epoch": 2.0179037336932075, "grad_norm": 0.0011938769603148103, "learning_rate": 4.2285200179937025e-05, "loss": 0.0053, "step": 1880 }, { "epoch": 2.018353576248313, "grad_norm": 0.0003374482912477106, "learning_rate": 4.251012145748988e-05, "loss": 0.0, "step": 1890 }, { "epoch": 2.0188034188034187, "grad_norm": 4.417824311531149e-05, "learning_rate": 4.2735042735042735e-05, "loss": 0.0, "step": 1900 }, { "epoch": 2.0192532613585246, "grad_norm": 0.0008707281667739153, "learning_rate": 4.2959964012595594e-05, "loss": 0.0, "step": 1910 }, { "epoch": 2.0197031039136304, "grad_norm": 0.00010934803140116856, "learning_rate": 4.318488529014845e-05, "loss": 0.0, "step": 1920 }, { "epoch": 2.0201529464687358, "grad_norm": 2.9817039830959402e-05, "learning_rate": 4.3409806567701304e-05, "loss": 0.0, "step": 1930 }, { "epoch": 2.0206027890238416, "grad_norm": 0.0002529581543058157, "learning_rate": 4.363472784525416e-05, "loss": 0.0592, "step": 1940 }, { "epoch": 2.0210526315789474, "grad_norm": 0.00231338432058692, "learning_rate": 4.3859649122807014e-05, "loss": 0.0, "step": 1950 }, { "epoch": 2.0215024741340533, "grad_norm": 0.00036462812568061054, "learning_rate": 4.408457040035987e-05, "loss": 0.0, "step": 1960 }, { "epoch": 2.0219523166891586, "grad_norm": 0.0008055507205426693, "learning_rate": 4.430949167791273e-05, "loss": 0.0002, "step": 1970 }, { "epoch": 2.0224021592442645, "grad_norm": 0.00010392279364168644, "learning_rate": 4.453441295546559e-05, "loss": 0.0, "step": 1980 }, { "epoch": 2.0228520017993703, "grad_norm": 2.892052725655958e-05, "learning_rate": 4.475933423301845e-05, "loss": 0.0108, "step": 1990 }, { "epoch": 2.023301844354476, "grad_norm": 0.00043569228728301823, "learning_rate": 4.498425551057131e-05, "loss": 0.0, "step": 2000 }, { "epoch": 2.0237516869095815, "grad_norm": 0.0005211837706156075, "learning_rate": 4.520917678812416e-05, "loss": 0.0, "step": 2010 }, { "epoch": 2.0242015294646873, "grad_norm": 0.0003914069675374776, "learning_rate": 4.543409806567702e-05, "loss": 0.0, "step": 2020 }, { "epoch": 2.024651372019793, "grad_norm": 0.0003151560085825622, "learning_rate": 4.565901934322987e-05, "loss": 1.1083, "step": 2030 }, { "epoch": 2.025101214574899, "grad_norm": 0.014305598102509975, "learning_rate": 4.588394062078273e-05, "loss": 0.0021, "step": 2040 }, { "epoch": 2.0255510571300044, "grad_norm": 3.23637056350708, "learning_rate": 4.6108861898335586e-05, "loss": 0.7582, "step": 2050 }, { "epoch": 2.02600089968511, "grad_norm": 0.01840856671333313, "learning_rate": 4.633378317588844e-05, "loss": 0.0002, "step": 2060 }, { "epoch": 2.026450742240216, "grad_norm": 0.09801971912384033, "learning_rate": 4.65587044534413e-05, "loss": 1.2679, "step": 2070 }, { "epoch": 2.0269005847953214, "grad_norm": 0.0868731439113617, "learning_rate": 4.678362573099415e-05, "loss": 0.0092, "step": 2080 }, { "epoch": 2.0273504273504273, "grad_norm": 6.335088255582377e-05, "learning_rate": 4.700854700854701e-05, "loss": 0.0007, "step": 2090 }, { "epoch": 2.027800269905533, "grad_norm": 9.68923486652784e-05, "learning_rate": 4.7233468286099866e-05, "loss": 0.0022, "step": 2100 }, { "epoch": 2.028250112460639, "grad_norm": 0.0012198828626424074, "learning_rate": 4.7458389563652724e-05, "loss": 0.0125, "step": 2110 }, { "epoch": 2.0286999550157443, "grad_norm": 7.24686833564192e-05, "learning_rate": 4.768331084120558e-05, "loss": 0.0, "step": 2120 }, { "epoch": 2.02914979757085, "grad_norm": 61.91375732421875, "learning_rate": 4.790823211875844e-05, "loss": 2.5111, "step": 2130 }, { "epoch": 2.029599640125956, "grad_norm": 5.499041799339466e-05, "learning_rate": 4.813315339631129e-05, "loss": 0.0001, "step": 2140 }, { "epoch": 2.030049482681062, "grad_norm": 0.016495248302817345, "learning_rate": 4.835807467386415e-05, "loss": 0.0004, "step": 2150 }, { "epoch": 2.030499325236167, "grad_norm": 0.002960088662803173, "learning_rate": 4.8582995951417004e-05, "loss": 0.0002, "step": 2160 }, { "epoch": 2.030949167791273, "grad_norm": 0.000482844072394073, "learning_rate": 4.880791722896986e-05, "loss": 0.0001, "step": 2170 }, { "epoch": 2.031399010346379, "grad_norm": 4.5023582060821354e-05, "learning_rate": 4.903283850652272e-05, "loss": 0.0002, "step": 2180 }, { "epoch": 2.0318488529014846, "grad_norm": 0.000393081660149619, "learning_rate": 4.925775978407557e-05, "loss": 0.0002, "step": 2190 }, { "epoch": 2.03229869545659, "grad_norm": 7.064462261041626e-05, "learning_rate": 4.948268106162843e-05, "loss": 0.0002, "step": 2200 }, { "epoch": 2.032748538011696, "grad_norm": 0.00036134099354967475, "learning_rate": 4.970760233918128e-05, "loss": 0.359, "step": 2210 }, { "epoch": 2.0331983805668017, "grad_norm": 1004.1809692382812, "learning_rate": 4.993252361673414e-05, "loss": 0.1188, "step": 2220 }, { "epoch": 2.033333333333333, "eval_accuracy": 0.8697674418604651, "eval_f1": 0.8695301141326226, "eval_loss": 1.01445472240448, "eval_runtime": 137.7698, "eval_samples_per_second": 1.561, "eval_steps_per_second": 1.561, "step": 2223 }, { "epoch": 3.000314889788574, "grad_norm": 0.007830711081624031, "learning_rate": 4.9982506122857e-05, "loss": 0.0, "step": 2230 }, { "epoch": 3.0007647323436797, "grad_norm": 0.0007166070281527936, "learning_rate": 4.995751486979557e-05, "loss": 0.0, "step": 2240 }, { "epoch": 3.0012145748987855, "grad_norm": 0.0016394773265346885, "learning_rate": 4.993252361673414e-05, "loss": 0.9807, "step": 2250 }, { "epoch": 3.0016644174538913, "grad_norm": 5.172581222723238e-05, "learning_rate": 4.990753236367272e-05, "loss": 0.0001, "step": 2260 }, { "epoch": 3.0021142600089967, "grad_norm": 0.0009276026976294816, "learning_rate": 4.988254111061129e-05, "loss": 0.0002, "step": 2270 }, { "epoch": 3.0025641025641026, "grad_norm": 0.0011872582836076617, "learning_rate": 4.985754985754986e-05, "loss": 0.0, "step": 2280 }, { "epoch": 3.0030139451192084, "grad_norm": 0.00016443050117231905, "learning_rate": 4.983255860448843e-05, "loss": 0.0004, "step": 2290 }, { "epoch": 3.003463787674314, "grad_norm": 0.0002790816652122885, "learning_rate": 4.9807567351427e-05, "loss": 0.0308, "step": 2300 }, { "epoch": 3.0039136302294196, "grad_norm": 0.0010080060455948114, "learning_rate": 4.978257609836557e-05, "loss": 0.0001, "step": 2310 }, { "epoch": 3.0043634727845254, "grad_norm": 3.3183954656124115e-05, "learning_rate": 4.975758484530414e-05, "loss": 0.1412, "step": 2320 }, { "epoch": 3.0048133153396313, "grad_norm": 0.0015999029856175184, "learning_rate": 4.973259359224271e-05, "loss": 0.0005, "step": 2330 }, { "epoch": 3.0052631578947366, "grad_norm": 0.0009712292812764645, "learning_rate": 4.970760233918128e-05, "loss": 0.0001, "step": 2340 }, { "epoch": 3.0057130004498425, "grad_norm": 0.002008584327995777, "learning_rate": 4.968261108611986e-05, "loss": 0.8829, "step": 2350 }, { "epoch": 3.0061628430049483, "grad_norm": 0.001346872071735561, "learning_rate": 4.965761983305843e-05, "loss": 0.2219, "step": 2360 }, { "epoch": 3.006612685560054, "grad_norm": 0.002629069611430168, "learning_rate": 4.9632628579997e-05, "loss": 0.0001, "step": 2370 }, { "epoch": 3.0070625281151595, "grad_norm": 0.010934717021882534, "learning_rate": 4.960763732693557e-05, "loss": 0.7642, "step": 2380 }, { "epoch": 3.0075123706702653, "grad_norm": 0.0014673244440928102, "learning_rate": 4.958264607387414e-05, "loss": 0.0001, "step": 2390 }, { "epoch": 3.007962213225371, "grad_norm": 0.07875771820545197, "learning_rate": 4.955765482081271e-05, "loss": 0.7864, "step": 2400 }, { "epoch": 3.008412055780477, "grad_norm": 0.023119984194636345, "learning_rate": 4.953266356775129e-05, "loss": 0.0023, "step": 2410 }, { "epoch": 3.0088618983355824, "grad_norm": 0.007800516672432423, "learning_rate": 4.950767231468986e-05, "loss": 0.0207, "step": 2420 }, { "epoch": 3.009311740890688, "grad_norm": 0.002547218929976225, "learning_rate": 4.948268106162843e-05, "loss": 0.0, "step": 2430 }, { "epoch": 3.009761583445794, "grad_norm": 4.934496246278286e-05, "learning_rate": 4.9457689808567e-05, "loss": 0.0014, "step": 2440 }, { "epoch": 3.0102114260009, "grad_norm": 73.35270690917969, "learning_rate": 4.943269855550557e-05, "loss": 0.8348, "step": 2450 }, { "epoch": 3.0106612685560052, "grad_norm": 5.409602454164997e-05, "learning_rate": 4.940770730244414e-05, "loss": 0.0, "step": 2460 }, { "epoch": 3.011111111111111, "grad_norm": 0.016356101259589195, "learning_rate": 4.938271604938271e-05, "loss": 0.0002, "step": 2470 }, { "epoch": 3.011560953666217, "grad_norm": 0.020031297579407692, "learning_rate": 4.935772479632129e-05, "loss": 0.0007, "step": 2480 }, { "epoch": 3.0120107962213227, "grad_norm": 0.001010871957987547, "learning_rate": 4.933273354325986e-05, "loss": 0.0001, "step": 2490 }, { "epoch": 3.012460638776428, "grad_norm": 0.0009384601144120097, "learning_rate": 4.930774229019843e-05, "loss": 0.0, "step": 2500 }, { "epoch": 3.012910481331534, "grad_norm": 0.001430662116035819, "learning_rate": 4.9282751037137e-05, "loss": 0.0001, "step": 2510 }, { "epoch": 3.0133603238866398, "grad_norm": 0.00010839509195648134, "learning_rate": 4.925775978407557e-05, "loss": 0.7495, "step": 2520 }, { "epoch": 3.0138101664417456, "grad_norm": 0.13529542088508606, "learning_rate": 4.923276853101414e-05, "loss": 0.5714, "step": 2530 }, { "epoch": 3.014260008996851, "grad_norm": 0.0008542468422092497, "learning_rate": 4.920777727795272e-05, "loss": 0.0, "step": 2540 }, { "epoch": 3.014709851551957, "grad_norm": 0.00197191396728158, "learning_rate": 4.918278602489129e-05, "loss": 0.0187, "step": 2550 }, { "epoch": 3.0151596941070626, "grad_norm": 0.0036207581870257854, "learning_rate": 4.915779477182986e-05, "loss": 0.2981, "step": 2560 }, { "epoch": 3.015609536662168, "grad_norm": 0.0008531498024240136, "learning_rate": 4.913280351876843e-05, "loss": 0.0, "step": 2570 }, { "epoch": 3.016059379217274, "grad_norm": 6.706573185510933e-05, "learning_rate": 4.9107812265707e-05, "loss": 1.2627, "step": 2580 }, { "epoch": 3.0165092217723797, "grad_norm": 0.0011423870455473661, "learning_rate": 4.908282101264557e-05, "loss": 0.0001, "step": 2590 }, { "epoch": 3.0169590643274855, "grad_norm": 0.001493514864705503, "learning_rate": 4.9057829759584143e-05, "loss": 0.1326, "step": 2600 }, { "epoch": 3.017408906882591, "grad_norm": 0.06147133186459541, "learning_rate": 4.903283850652272e-05, "loss": 0.0003, "step": 2610 }, { "epoch": 3.0178587494376967, "grad_norm": 0.0017068564193323255, "learning_rate": 4.900784725346129e-05, "loss": 1.2241, "step": 2620 }, { "epoch": 3.0183085919928025, "grad_norm": 0.0021575004793703556, "learning_rate": 4.898285600039986e-05, "loss": 0.0, "step": 2630 }, { "epoch": 3.0187584345479084, "grad_norm": 0.006344155874103308, "learning_rate": 4.895786474733843e-05, "loss": 0.0005, "step": 2640 }, { "epoch": 3.0192082771030138, "grad_norm": 0.0009713447070680559, "learning_rate": 4.8932873494277e-05, "loss": 0.0, "step": 2650 }, { "epoch": 3.0196581196581196, "grad_norm": 0.001861833268776536, "learning_rate": 4.890788224121557e-05, "loss": 1.1243, "step": 2660 }, { "epoch": 3.0201079622132254, "grad_norm": 4.98304557800293, "learning_rate": 4.888289098815415e-05, "loss": 2.2231, "step": 2670 }, { "epoch": 3.0205578047683312, "grad_norm": 0.022887537255883217, "learning_rate": 4.885789973509272e-05, "loss": 1.426, "step": 2680 }, { "epoch": 3.0210076473234366, "grad_norm": 0.0629364475607872, "learning_rate": 4.883290848203129e-05, "loss": 0.7458, "step": 2690 }, { "epoch": 3.0214574898785425, "grad_norm": 0.03189025819301605, "learning_rate": 4.880791722896986e-05, "loss": 1.1516, "step": 2700 }, { "epoch": 3.0219073324336483, "grad_norm": 0.08581995218992233, "learning_rate": 4.878292597590843e-05, "loss": 0.2467, "step": 2710 }, { "epoch": 3.022357174988754, "grad_norm": 0.00025509484112262726, "learning_rate": 4.8757934722847e-05, "loss": 0.0014, "step": 2720 }, { "epoch": 3.0228070175438595, "grad_norm": 0.015680013224482536, "learning_rate": 4.8732943469785574e-05, "loss": 0.0014, "step": 2730 }, { "epoch": 3.0232568600989653, "grad_norm": 0.19162459671497345, "learning_rate": 4.870795221672415e-05, "loss": 0.0005, "step": 2740 }, { "epoch": 3.023706702654071, "grad_norm": 0.024777309969067574, "learning_rate": 4.868296096366272e-05, "loss": 0.0005, "step": 2750 }, { "epoch": 3.024156545209177, "grad_norm": 0.2723006308078766, "learning_rate": 4.865796971060129e-05, "loss": 0.0003, "step": 2760 }, { "epoch": 3.0246063877642824, "grad_norm": 0.00536182289943099, "learning_rate": 4.863297845753986e-05, "loss": 0.015, "step": 2770 }, { "epoch": 3.025056230319388, "grad_norm": 0.0022712110076099634, "learning_rate": 4.860798720447843e-05, "loss": 1.0397, "step": 2780 }, { "epoch": 3.025506072874494, "grad_norm": 0.0021477430127561092, "learning_rate": 4.8582995951417004e-05, "loss": 0.0006, "step": 2790 }, { "epoch": 3.0259559154296, "grad_norm": 0.0011462434194982052, "learning_rate": 4.855800469835558e-05, "loss": 0.0006, "step": 2800 }, { "epoch": 3.0264057579847052, "grad_norm": 0.006229563616216183, "learning_rate": 4.853301344529415e-05, "loss": 0.9134, "step": 2810 }, { "epoch": 3.026855600539811, "grad_norm": 0.007051269989460707, "learning_rate": 4.850802219223272e-05, "loss": 1.181, "step": 2820 }, { "epoch": 3.027305443094917, "grad_norm": 0.00026233578682877123, "learning_rate": 4.848303093917129e-05, "loss": 0.0003, "step": 2830 }, { "epoch": 3.0277552856500223, "grad_norm": 0.004520133603364229, "learning_rate": 4.845803968610986e-05, "loss": 0.0003, "step": 2840 }, { "epoch": 3.028205128205128, "grad_norm": 0.01766774244606495, "learning_rate": 4.8433048433048433e-05, "loss": 0.6139, "step": 2850 }, { "epoch": 3.028654970760234, "grad_norm": 0.017159637063741684, "learning_rate": 4.8408057179987004e-05, "loss": 0.0004, "step": 2860 }, { "epoch": 3.0291048133153398, "grad_norm": 0.0022382515016943216, "learning_rate": 4.838306592692558e-05, "loss": 0.0002, "step": 2870 }, { "epoch": 3.029554655870445, "grad_norm": 0.1522994339466095, "learning_rate": 4.835807467386415e-05, "loss": 0.0004, "step": 2880 }, { "epoch": 3.030004498425551, "grad_norm": 0.005515269935131073, "learning_rate": 4.833308342080272e-05, "loss": 0.0002, "step": 2890 }, { "epoch": 3.030454340980657, "grad_norm": 0.002576042665168643, "learning_rate": 4.830809216774129e-05, "loss": 0.0001, "step": 2900 }, { "epoch": 3.0309041835357626, "grad_norm": 0.0002456254733260721, "learning_rate": 4.828310091467986e-05, "loss": 0.0001, "step": 2910 }, { "epoch": 3.031354026090868, "grad_norm": 0.0018946458585560322, "learning_rate": 4.8258109661618434e-05, "loss": 0.0001, "step": 2920 }, { "epoch": 3.031803868645974, "grad_norm": 0.005287510808557272, "learning_rate": 4.823311840855701e-05, "loss": 0.0001, "step": 2930 }, { "epoch": 3.0322537112010797, "grad_norm": 0.0011081949342042208, "learning_rate": 4.820812715549558e-05, "loss": 0.0001, "step": 2940 }, { "epoch": 3.0327035537561855, "grad_norm": 0.0008467049337923527, "learning_rate": 4.818313590243415e-05, "loss": 0.0001, "step": 2950 }, { "epoch": 3.033153396311291, "grad_norm": 0.0021170161198824644, "learning_rate": 4.815814464937272e-05, "loss": 0.0001, "step": 2960 }, { "epoch": 3.033333333333333, "eval_accuracy": 0.8465116279069768, "eval_f1": 0.8384448967153867, "eval_loss": 1.1956254243850708, "eval_runtime": 137.7799, "eval_samples_per_second": 1.56, "eval_steps_per_second": 1.56, "step": 2964 }, { "epoch": 4.000269905533063, "grad_norm": 0.003063289448618889, "learning_rate": 4.813315339631129e-05, "loss": 0.5843, "step": 2970 }, { "epoch": 4.000719748088169, "grad_norm": 0.037047144025564194, "learning_rate": 4.8108162143249864e-05, "loss": 0.0001, "step": 2980 }, { "epoch": 4.001169590643275, "grad_norm": 0.00304495170712471, "learning_rate": 4.8083170890188434e-05, "loss": 0.1957, "step": 2990 }, { "epoch": 4.001619433198381, "grad_norm": 0.0018491466762498021, "learning_rate": 4.805817963712701e-05, "loss": 0.007, "step": 3000 }, { "epoch": 4.002069275753486, "grad_norm": 0.002826802432537079, "learning_rate": 4.803318838406558e-05, "loss": 0.6421, "step": 3010 }, { "epoch": 4.002519118308592, "grad_norm": 0.0018951667007058859, "learning_rate": 4.800819713100415e-05, "loss": 0.027, "step": 3020 }, { "epoch": 4.002968960863698, "grad_norm": 0.0007585145067423582, "learning_rate": 4.798320587794272e-05, "loss": 0.0012, "step": 3030 }, { "epoch": 4.003418803418803, "grad_norm": 0.002254917286336422, "learning_rate": 4.7958214624881294e-05, "loss": 0.8764, "step": 3040 }, { "epoch": 4.003868645973909, "grad_norm": 0.0017444049008190632, "learning_rate": 4.7933223371819864e-05, "loss": 0.0002, "step": 3050 }, { "epoch": 4.004318488529015, "grad_norm": 0.0016239539254456758, "learning_rate": 4.790823211875844e-05, "loss": 0.0005, "step": 3060 }, { "epoch": 4.0047683310841204, "grad_norm": 0.0019840076565742493, "learning_rate": 4.788324086569701e-05, "loss": 0.0004, "step": 3070 }, { "epoch": 4.005218173639226, "grad_norm": 0.004472987726330757, "learning_rate": 4.785824961263558e-05, "loss": 0.0002, "step": 3080 }, { "epoch": 4.005668016194332, "grad_norm": 0.005834953859448433, "learning_rate": 4.783325835957415e-05, "loss": 0.0001, "step": 3090 }, { "epoch": 4.0061178587494375, "grad_norm": 0.0027514523826539516, "learning_rate": 4.7808267106512723e-05, "loss": 0.0002, "step": 3100 }, { "epoch": 4.006567701304544, "grad_norm": 0.0023958778474479914, "learning_rate": 4.7783275853451294e-05, "loss": 0.5031, "step": 3110 }, { "epoch": 4.007017543859649, "grad_norm": 0.0011505301808938384, "learning_rate": 4.7758284600389865e-05, "loss": 0.6866, "step": 3120 }, { "epoch": 4.0074673864147545, "grad_norm": 0.008492215536534786, "learning_rate": 4.773329334732844e-05, "loss": 0.1066, "step": 3130 }, { "epoch": 4.007917228969861, "grad_norm": 0.0033910947386175394, "learning_rate": 4.770830209426701e-05, "loss": 0.0001, "step": 3140 }, { "epoch": 4.008367071524966, "grad_norm": 0.013192187063395977, "learning_rate": 4.768331084120558e-05, "loss": 0.8179, "step": 3150 }, { "epoch": 4.008816914080072, "grad_norm": 64.09408569335938, "learning_rate": 4.765831958814415e-05, "loss": 1.8657, "step": 3160 }, { "epoch": 4.009266756635178, "grad_norm": 0.18163689970970154, "learning_rate": 4.7633328335082724e-05, "loss": 0.0007, "step": 3170 }, { "epoch": 4.009716599190283, "grad_norm": 0.0034807687625288963, "learning_rate": 4.7608337082021294e-05, "loss": 0.8601, "step": 3180 }, { "epoch": 4.0101664417453895, "grad_norm": 0.30424579977989197, "learning_rate": 4.758334582895987e-05, "loss": 0.0011, "step": 3190 }, { "epoch": 4.010616284300495, "grad_norm": 0.011330274865031242, "learning_rate": 4.755835457589844e-05, "loss": 0.2305, "step": 3200 }, { "epoch": 4.0110661268556, "grad_norm": 0.015486850403249264, "learning_rate": 4.753336332283701e-05, "loss": 0.0032, "step": 3210 }, { "epoch": 4.0115159694107065, "grad_norm": 49.222476959228516, "learning_rate": 4.750837206977558e-05, "loss": 0.9411, "step": 3220 }, { "epoch": 4.011965811965812, "grad_norm": 0.009367048740386963, "learning_rate": 4.7483380816714154e-05, "loss": 0.0001, "step": 3230 }, { "epoch": 4.012415654520917, "grad_norm": 0.0017500806134194136, "learning_rate": 4.7458389563652724e-05, "loss": 0.2867, "step": 3240 }, { "epoch": 4.012865497076024, "grad_norm": 0.002059083664789796, "learning_rate": 4.7433398310591295e-05, "loss": 0.0032, "step": 3250 }, { "epoch": 4.013315339631129, "grad_norm": 0.0030120774172246456, "learning_rate": 4.740840705752987e-05, "loss": 0.0002, "step": 3260 }, { "epoch": 4.013765182186235, "grad_norm": 0.014846677891910076, "learning_rate": 4.738341580446844e-05, "loss": 0.0001, "step": 3270 }, { "epoch": 4.014215024741341, "grad_norm": 0.0009176618768833578, "learning_rate": 4.735842455140701e-05, "loss": 0.0001, "step": 3280 }, { "epoch": 4.014664867296446, "grad_norm": 0.003282545367255807, "learning_rate": 4.7333433298345584e-05, "loss": 0.0001, "step": 3290 }, { "epoch": 4.015114709851552, "grad_norm": 0.0032239246647804976, "learning_rate": 4.7308442045284154e-05, "loss": 0.6583, "step": 3300 }, { "epoch": 4.015564552406658, "grad_norm": 0.010072896257042885, "learning_rate": 4.7283450792222725e-05, "loss": 0.0001, "step": 3310 }, { "epoch": 4.016014394961763, "grad_norm": 0.004343103617429733, "learning_rate": 4.7258459539161295e-05, "loss": 0.0003, "step": 3320 }, { "epoch": 4.016464237516869, "grad_norm": 0.000928006018511951, "learning_rate": 4.7233468286099866e-05, "loss": 0.0002, "step": 3330 }, { "epoch": 4.016914080071975, "grad_norm": 511.0812683105469, "learning_rate": 4.7208477033038436e-05, "loss": 0.6173, "step": 3340 }, { "epoch": 4.01736392262708, "grad_norm": 0.030410442501306534, "learning_rate": 4.7183485779977014e-05, "loss": 0.0001, "step": 3350 }, { "epoch": 4.017813765182186, "grad_norm": 0.0011354315793141723, "learning_rate": 4.7158494526915584e-05, "loss": 0.0001, "step": 3360 }, { "epoch": 4.018263607737292, "grad_norm": 0.001497347024269402, "learning_rate": 4.7133503273854155e-05, "loss": 0.0001, "step": 3370 }, { "epoch": 4.018713450292398, "grad_norm": 1.775836706161499, "learning_rate": 4.7108512020792725e-05, "loss": 0.3524, "step": 3380 }, { "epoch": 4.019163292847503, "grad_norm": 0.0009653024608269334, "learning_rate": 4.7083520767731296e-05, "loss": 0.0005, "step": 3390 }, { "epoch": 4.019613135402609, "grad_norm": 0.0010982422390952706, "learning_rate": 4.7058529514669866e-05, "loss": 0.0001, "step": 3400 }, { "epoch": 4.020062977957715, "grad_norm": 0.0017036672215908766, "learning_rate": 4.703353826160844e-05, "loss": 1.3314, "step": 3410 }, { "epoch": 4.02051282051282, "grad_norm": 0.004006888717412949, "learning_rate": 4.700854700854701e-05, "loss": 0.0001, "step": 3420 }, { "epoch": 4.020962663067926, "grad_norm": 0.004073445685207844, "learning_rate": 4.698355575548558e-05, "loss": 0.7027, "step": 3430 }, { "epoch": 4.021412505623032, "grad_norm": 0.016773417592048645, "learning_rate": 4.6958564502424155e-05, "loss": 0.0071, "step": 3440 }, { "epoch": 4.0218623481781375, "grad_norm": 0.006877032108604908, "learning_rate": 4.6933573249362725e-05, "loss": 0.8031, "step": 3450 }, { "epoch": 4.022312190733244, "grad_norm": 0.0015978328883647919, "learning_rate": 4.6908581996301296e-05, "loss": 1.5776, "step": 3460 }, { "epoch": 4.022762033288349, "grad_norm": 0.002496246714144945, "learning_rate": 4.6883590743239867e-05, "loss": 0.001, "step": 3470 }, { "epoch": 4.0232118758434545, "grad_norm": 0.0062454696744680405, "learning_rate": 4.685859949017844e-05, "loss": 0.0006, "step": 3480 }, { "epoch": 4.023661718398561, "grad_norm": 0.030108321458101273, "learning_rate": 4.683360823711701e-05, "loss": 0.0009, "step": 3490 }, { "epoch": 4.024111560953666, "grad_norm": 0.005653415806591511, "learning_rate": 4.680861698405558e-05, "loss": 0.0008, "step": 3500 }, { "epoch": 4.024561403508772, "grad_norm": 0.002025309018790722, "learning_rate": 4.678362573099415e-05, "loss": 0.0005, "step": 3510 }, { "epoch": 4.025011246063878, "grad_norm": 0.003334170440211892, "learning_rate": 4.6758634477932726e-05, "loss": 0.2423, "step": 3520 }, { "epoch": 4.025461088618983, "grad_norm": 0.00606648251414299, "learning_rate": 4.6733643224871296e-05, "loss": 0.0002, "step": 3530 }, { "epoch": 4.0259109311740895, "grad_norm": 0.004713176283985376, "learning_rate": 4.670865197180987e-05, "loss": 0.3454, "step": 3540 }, { "epoch": 4.026360773729195, "grad_norm": 0.007831880822777748, "learning_rate": 4.668366071874844e-05, "loss": 0.2343, "step": 3550 }, { "epoch": 4.0268106162843, "grad_norm": 0.000614212651271373, "learning_rate": 4.665866946568701e-05, "loss": 0.9849, "step": 3560 }, { "epoch": 4.0272604588394065, "grad_norm": 0.07048475742340088, "learning_rate": 4.663367821262558e-05, "loss": 0.8629, "step": 3570 }, { "epoch": 4.027710301394512, "grad_norm": 0.10688374191522598, "learning_rate": 4.6608686959564156e-05, "loss": 0.0416, "step": 3580 }, { "epoch": 4.028160143949617, "grad_norm": 3.1646370887756348, "learning_rate": 4.6583695706502726e-05, "loss": 0.8795, "step": 3590 }, { "epoch": 4.028609986504724, "grad_norm": 6.063758373260498, "learning_rate": 4.65587044534413e-05, "loss": 0.0032, "step": 3600 }, { "epoch": 4.029059829059829, "grad_norm": 0.006781219504773617, "learning_rate": 4.653371320037987e-05, "loss": 0.3759, "step": 3610 }, { "epoch": 4.029509671614934, "grad_norm": 0.0013880052138119936, "learning_rate": 4.650872194731844e-05, "loss": 0.0003, "step": 3620 }, { "epoch": 4.029959514170041, "grad_norm": 0.0025116559118032455, "learning_rate": 4.648373069425701e-05, "loss": 0.8795, "step": 3630 }, { "epoch": 4.030409356725146, "grad_norm": 0.04905643314123154, "learning_rate": 4.645873944119558e-05, "loss": 0.9151, "step": 3640 }, { "epoch": 4.030859199280252, "grad_norm": 0.016656950116157532, "learning_rate": 4.6433748188134156e-05, "loss": 0.0008, "step": 3650 }, { "epoch": 4.031309041835358, "grad_norm": 0.009726610966026783, "learning_rate": 4.640875693507273e-05, "loss": 0.3804, "step": 3660 }, { "epoch": 4.031758884390463, "grad_norm": 0.022569816559553146, "learning_rate": 4.63837656820113e-05, "loss": 0.4053, "step": 3670 }, { "epoch": 4.032208726945569, "grad_norm": 0.015635719522833824, "learning_rate": 4.635877442894987e-05, "loss": 0.0003, "step": 3680 }, { "epoch": 4.032658569500675, "grad_norm": 0.002108023501932621, "learning_rate": 4.633378317588844e-05, "loss": 1.1057, "step": 3690 }, { "epoch": 4.03310841205578, "grad_norm": 0.002323881722986698, "learning_rate": 4.630879192282701e-05, "loss": 0.5026, "step": 3700 }, { "epoch": 4.033333333333333, "eval_accuracy": 0.8651162790697674, "eval_f1": 0.8608297784324714, "eval_loss": 0.819013774394989, "eval_runtime": 138.7203, "eval_samples_per_second": 1.55, "eval_steps_per_second": 1.55, "step": 3705 }, { "epoch": 5.000224921277553, "grad_norm": 0.004128398839384317, "learning_rate": 4.6283800669765586e-05, "loss": 0.0005, "step": 3710 }, { "epoch": 5.0006747638326585, "grad_norm": 0.002114119939506054, "learning_rate": 4.6258809416704157e-05, "loss": 0.5857, "step": 3720 }, { "epoch": 5.001124606387764, "grad_norm": 0.004332846030592918, "learning_rate": 4.623381816364273e-05, "loss": 0.0005, "step": 3730 }, { "epoch": 5.00157444894287, "grad_norm": 0.0049261488020420074, "learning_rate": 4.62088269105813e-05, "loss": 0.0002, "step": 3740 }, { "epoch": 5.002024291497976, "grad_norm": 0.0026781943161040545, "learning_rate": 4.618383565751987e-05, "loss": 0.0003, "step": 3750 }, { "epoch": 5.002474134053082, "grad_norm": 0.0003315515932627022, "learning_rate": 4.615884440445844e-05, "loss": 0.0001, "step": 3760 }, { "epoch": 5.002923976608187, "grad_norm": 0.0022097595501691103, "learning_rate": 4.613385315139701e-05, "loss": 0.0022, "step": 3770 }, { "epoch": 5.003373819163293, "grad_norm": 0.001770248869433999, "learning_rate": 4.6108861898335586e-05, "loss": 0.0001, "step": 3780 }, { "epoch": 5.003823661718399, "grad_norm": 0.0018920463044196367, "learning_rate": 4.608387064527416e-05, "loss": 0.0001, "step": 3790 }, { "epoch": 5.004273504273504, "grad_norm": 0.00018927233759313822, "learning_rate": 4.605887939221273e-05, "loss": 0.0001, "step": 3800 }, { "epoch": 5.00472334682861, "grad_norm": 0.0017554572550579906, "learning_rate": 4.60338881391513e-05, "loss": 0.3947, "step": 3810 }, { "epoch": 5.005173189383716, "grad_norm": 0.002000226406380534, "learning_rate": 4.600889688608987e-05, "loss": 0.0001, "step": 3820 }, { "epoch": 5.005623031938821, "grad_norm": 0.0010823224438354373, "learning_rate": 4.598390563302844e-05, "loss": 0.0001, "step": 3830 }, { "epoch": 5.006072874493927, "grad_norm": 0.0068075708113610744, "learning_rate": 4.5958914379967016e-05, "loss": 0.0001, "step": 3840 }, { "epoch": 5.006522717049033, "grad_norm": 0.0013595325872302055, "learning_rate": 4.593392312690559e-05, "loss": 0.0001, "step": 3850 }, { "epoch": 5.006972559604138, "grad_norm": 0.01718098111450672, "learning_rate": 4.590893187384416e-05, "loss": 0.0004, "step": 3860 }, { "epoch": 5.007422402159245, "grad_norm": 0.004415044095367193, "learning_rate": 4.588394062078273e-05, "loss": 0.0001, "step": 3870 }, { "epoch": 5.00787224471435, "grad_norm": 39.481815338134766, "learning_rate": 4.58589493677213e-05, "loss": 0.8108, "step": 3880 }, { "epoch": 5.008322087269455, "grad_norm": 0.0009131532860919833, "learning_rate": 4.583395811465987e-05, "loss": 0.0001, "step": 3890 }, { "epoch": 5.008771929824562, "grad_norm": 0.011264754459261894, "learning_rate": 4.580896686159844e-05, "loss": 0.7913, "step": 3900 }, { "epoch": 5.009221772379667, "grad_norm": 0.0064471992664039135, "learning_rate": 4.578397560853702e-05, "loss": 0.0009, "step": 3910 }, { "epoch": 5.009671614934772, "grad_norm": 0.003504559164866805, "learning_rate": 4.575898435547559e-05, "loss": 0.0009, "step": 3920 }, { "epoch": 5.010121457489879, "grad_norm": 0.00609386945143342, "learning_rate": 4.573399310241416e-05, "loss": 0.0002, "step": 3930 }, { "epoch": 5.010571300044984, "grad_norm": 0.0018323254771530628, "learning_rate": 4.570900184935273e-05, "loss": 0.0002, "step": 3940 }, { "epoch": 5.01102114260009, "grad_norm": 0.001413513207808137, "learning_rate": 4.56840105962913e-05, "loss": 0.0001, "step": 3950 }, { "epoch": 5.011470985155196, "grad_norm": 0.011085361242294312, "learning_rate": 4.565901934322987e-05, "loss": 0.0002, "step": 3960 }, { "epoch": 5.011920827710301, "grad_norm": 0.001412344048731029, "learning_rate": 4.5634028090168447e-05, "loss": 0.0001, "step": 3970 }, { "epoch": 5.012370670265407, "grad_norm": 0.004214429762214422, "learning_rate": 4.560903683710702e-05, "loss": 0.8244, "step": 3980 }, { "epoch": 5.012820512820513, "grad_norm": 0.7035354971885681, "learning_rate": 4.558404558404559e-05, "loss": 0.0012, "step": 3990 }, { "epoch": 5.013270355375618, "grad_norm": 0.00011023220577044412, "learning_rate": 4.555905433098416e-05, "loss": 0.0536, "step": 4000 }, { "epoch": 5.013720197930724, "grad_norm": 0.005476919002830982, "learning_rate": 4.553406307792273e-05, "loss": 0.0001, "step": 4010 }, { "epoch": 5.01417004048583, "grad_norm": 0.0474027618765831, "learning_rate": 4.55090718248613e-05, "loss": 0.2201, "step": 4020 }, { "epoch": 5.014619883040936, "grad_norm": 0.0035341158509254456, "learning_rate": 4.548408057179987e-05, "loss": 0.5272, "step": 4030 }, { "epoch": 5.0150697255960415, "grad_norm": 59.552127838134766, "learning_rate": 4.545908931873845e-05, "loss": 1.233, "step": 4040 }, { "epoch": 5.015519568151147, "grad_norm": 0.002133291680365801, "learning_rate": 4.543409806567702e-05, "loss": 0.0183, "step": 4050 }, { "epoch": 5.015969410706253, "grad_norm": 0.003412761492654681, "learning_rate": 4.540910681261559e-05, "loss": 0.6646, "step": 4060 }, { "epoch": 5.0164192532613585, "grad_norm": 0.009170116856694221, "learning_rate": 4.538411555955416e-05, "loss": 0.0004, "step": 4070 }, { "epoch": 5.016869095816464, "grad_norm": 0.012778709642589092, "learning_rate": 4.535912430649273e-05, "loss": 0.0004, "step": 4080 }, { "epoch": 5.01731893837157, "grad_norm": 0.004085094202309847, "learning_rate": 4.53341330534313e-05, "loss": 0.0003, "step": 4090 }, { "epoch": 5.0177687809266756, "grad_norm": 0.006893644109368324, "learning_rate": 4.530914180036988e-05, "loss": 0.4111, "step": 4100 }, { "epoch": 5.018218623481781, "grad_norm": 0.005604883655905724, "learning_rate": 4.528415054730845e-05, "loss": 0.6267, "step": 4110 }, { "epoch": 5.018668466036887, "grad_norm": 0.002571532968431711, "learning_rate": 4.525915929424702e-05, "loss": 0.0002, "step": 4120 }, { "epoch": 5.019118308591993, "grad_norm": 0.06922221928834915, "learning_rate": 4.523416804118559e-05, "loss": 0.0024, "step": 4130 }, { "epoch": 5.019568151147099, "grad_norm": 0.009377999231219292, "learning_rate": 4.520917678812416e-05, "loss": 0.0005, "step": 4140 }, { "epoch": 5.020017993702204, "grad_norm": 0.00175398588180542, "learning_rate": 4.518418553506273e-05, "loss": 0.0001, "step": 4150 }, { "epoch": 5.02046783625731, "grad_norm": 0.0014136598911136389, "learning_rate": 4.51591942820013e-05, "loss": 0.0001, "step": 4160 }, { "epoch": 5.020917678812416, "grad_norm": 0.001172757358290255, "learning_rate": 4.513420302893988e-05, "loss": 0.0001, "step": 4170 }, { "epoch": 5.021367521367521, "grad_norm": 0.0012657820479944348, "learning_rate": 4.510921177587845e-05, "loss": 0.0002, "step": 4180 }, { "epoch": 5.021817363922627, "grad_norm": 0.002966586034744978, "learning_rate": 4.508422052281702e-05, "loss": 0.0002, "step": 4190 }, { "epoch": 5.022267206477733, "grad_norm": 0.001086425269022584, "learning_rate": 4.505922926975559e-05, "loss": 0.0004, "step": 4200 }, { "epoch": 5.022717049032838, "grad_norm": 0.0016399910673499107, "learning_rate": 4.503423801669416e-05, "loss": 0.0001, "step": 4210 }, { "epoch": 5.023166891587945, "grad_norm": 0.001130513264797628, "learning_rate": 4.500924676363273e-05, "loss": 0.0001, "step": 4220 }, { "epoch": 5.02361673414305, "grad_norm": 0.001406257157213986, "learning_rate": 4.498425551057131e-05, "loss": 0.0001, "step": 4230 }, { "epoch": 5.024066576698155, "grad_norm": 0.0009919152362272143, "learning_rate": 4.495926425750988e-05, "loss": 0.0, "step": 4240 }, { "epoch": 5.024516419253262, "grad_norm": 0.0011825012043118477, "learning_rate": 4.493427300444845e-05, "loss": 0.0, "step": 4250 }, { "epoch": 5.024966261808367, "grad_norm": 0.004908334463834763, "learning_rate": 4.490928175138702e-05, "loss": 0.0001, "step": 4260 }, { "epoch": 5.025416104363472, "grad_norm": 0.0013756196713075042, "learning_rate": 4.488429049832559e-05, "loss": 0.0001, "step": 4270 }, { "epoch": 5.025865946918579, "grad_norm": 0.0023463487159460783, "learning_rate": 4.485929924526416e-05, "loss": 0.0, "step": 4280 }, { "epoch": 5.026315789473684, "grad_norm": 0.0009425626485608518, "learning_rate": 4.483430799220273e-05, "loss": 0.0002, "step": 4290 }, { "epoch": 5.02676563202879, "grad_norm": 0.00013434803986456245, "learning_rate": 4.480931673914131e-05, "loss": 0.0, "step": 4300 }, { "epoch": 5.027215474583896, "grad_norm": 0.0008213858236558735, "learning_rate": 4.478432548607988e-05, "loss": 0.0, "step": 4310 }, { "epoch": 5.027665317139001, "grad_norm": 0.0010580273810774088, "learning_rate": 4.475933423301845e-05, "loss": 0.0, "step": 4320 }, { "epoch": 5.028115159694107, "grad_norm": 0.0009096212452277541, "learning_rate": 4.473434297995702e-05, "loss": 0.0, "step": 4330 }, { "epoch": 5.028565002249213, "grad_norm": 0.0007809678791090846, "learning_rate": 4.470935172689559e-05, "loss": 0.0, "step": 4340 }, { "epoch": 5.029014844804318, "grad_norm": 0.0007799643790349364, "learning_rate": 4.468436047383416e-05, "loss": 0.0, "step": 4350 }, { "epoch": 5.029464687359424, "grad_norm": 0.0010519314091652632, "learning_rate": 4.465936922077274e-05, "loss": 0.0001, "step": 4360 }, { "epoch": 5.02991452991453, "grad_norm": 0.0023833918385207653, "learning_rate": 4.463437796771131e-05, "loss": 0.0001, "step": 4370 }, { "epoch": 5.030364372469635, "grad_norm": 0.017371298745274544, "learning_rate": 4.460938671464988e-05, "loss": 0.0001, "step": 4380 }, { "epoch": 5.0308142150247415, "grad_norm": 0.000934561132453382, "learning_rate": 4.458439546158845e-05, "loss": 0.0, "step": 4390 }, { "epoch": 5.031264057579847, "grad_norm": 0.0007338744471780956, "learning_rate": 4.455940420852702e-05, "loss": 0.0001, "step": 4400 }, { "epoch": 5.031713900134953, "grad_norm": 0.0006507931393571198, "learning_rate": 4.453441295546559e-05, "loss": 0.0, "step": 4410 }, { "epoch": 5.0321637426900585, "grad_norm": 0.0010418130550533533, "learning_rate": 4.450942170240416e-05, "loss": 0.0, "step": 4420 }, { "epoch": 5.032613585245164, "grad_norm": 0.00015877016994636506, "learning_rate": 4.448443044934273e-05, "loss": 0.3057, "step": 4430 }, { "epoch": 5.03306342780027, "grad_norm": 0.000679882476106286, "learning_rate": 4.445943919628131e-05, "loss": 0.0, "step": 4440 }, { "epoch": 5.033333333333333, "eval_accuracy": 0.8372093023255814, "eval_f1": 0.837673020554866, "eval_loss": 1.146551489830017, "eval_runtime": 137.0873, "eval_samples_per_second": 1.568, "eval_steps_per_second": 1.568, "step": 4446 }, { "epoch": 6.000179937022042, "grad_norm": 0.0035112162586301565, "learning_rate": 4.443444794321988e-05, "loss": 0.0, "step": 4450 }, { "epoch": 6.000629779577148, "grad_norm": 0.0006130553083494306, "learning_rate": 4.440945669015845e-05, "loss": 0.3816, "step": 4460 }, { "epoch": 6.001079622132254, "grad_norm": 0.0001103510512621142, "learning_rate": 4.438446543709702e-05, "loss": 0.0, "step": 4470 }, { "epoch": 6.001529464687359, "grad_norm": 0.00022545779938809574, "learning_rate": 4.435947418403559e-05, "loss": 0.0001, "step": 4480 }, { "epoch": 6.001979307242465, "grad_norm": 0.000643337145447731, "learning_rate": 4.433448293097416e-05, "loss": 0.0002, "step": 4490 }, { "epoch": 6.002429149797571, "grad_norm": 0.004245143383741379, "learning_rate": 4.430949167791273e-05, "loss": 0.0001, "step": 4500 }, { "epoch": 6.002878992352676, "grad_norm": 0.0002646330976858735, "learning_rate": 4.42845004248513e-05, "loss": 0.0, "step": 4510 }, { "epoch": 6.003328834907783, "grad_norm": 0.2916773557662964, "learning_rate": 4.425950917178987e-05, "loss": 0.2991, "step": 4520 }, { "epoch": 6.003778677462888, "grad_norm": 0.0009541168692521751, "learning_rate": 4.423451791872845e-05, "loss": 0.0001, "step": 4530 }, { "epoch": 6.0042285200179935, "grad_norm": 0.0005155953112989664, "learning_rate": 4.420952666566702e-05, "loss": 0.0, "step": 4540 }, { "epoch": 6.0046783625731, "grad_norm": 0.004492150619626045, "learning_rate": 4.418453541260559e-05, "loss": 0.0, "step": 4550 }, { "epoch": 6.005128205128205, "grad_norm": 0.009931230917572975, "learning_rate": 4.415954415954416e-05, "loss": 0.0001, "step": 4560 }, { "epoch": 6.0055780476833105, "grad_norm": 0.01787605881690979, "learning_rate": 4.413455290648273e-05, "loss": 0.0001, "step": 4570 }, { "epoch": 6.006027890238417, "grad_norm": 0.0008124601445160806, "learning_rate": 4.41095616534213e-05, "loss": 0.0, "step": 4580 }, { "epoch": 6.006477732793522, "grad_norm": 0.0014590932987630367, "learning_rate": 4.408457040035987e-05, "loss": 0.0001, "step": 4590 }, { "epoch": 6.006927575348628, "grad_norm": 0.0008858141954988241, "learning_rate": 4.405957914729844e-05, "loss": 0.0125, "step": 4600 }, { "epoch": 6.007377417903734, "grad_norm": 0.0005775902536697686, "learning_rate": 4.4034587894237014e-05, "loss": 0.0, "step": 4610 }, { "epoch": 6.007827260458839, "grad_norm": 0.0005155576509423554, "learning_rate": 4.400959664117559e-05, "loss": 0.0, "step": 4620 }, { "epoch": 6.0082771030139455, "grad_norm": 0.0006603579968214035, "learning_rate": 4.398460538811416e-05, "loss": 0.0, "step": 4630 }, { "epoch": 6.008726945569051, "grad_norm": 0.0012030662037432194, "learning_rate": 4.395961413505273e-05, "loss": 0.0, "step": 4640 }, { "epoch": 6.009176788124156, "grad_norm": 0.000455207860795781, "learning_rate": 4.39346228819913e-05, "loss": 0.0, "step": 4650 }, { "epoch": 6.0096266306792625, "grad_norm": 0.0006669253343716264, "learning_rate": 4.390963162892987e-05, "loss": 0.0, "step": 4660 }, { "epoch": 6.010076473234368, "grad_norm": 0.0008864286937750876, "learning_rate": 4.3884640375868444e-05, "loss": 0.0, "step": 4670 }, { "epoch": 6.010526315789473, "grad_norm": 0.0014433111064136028, "learning_rate": 4.3859649122807014e-05, "loss": 0.0, "step": 4680 }, { "epoch": 6.0109761583445795, "grad_norm": 0.0008397703059017658, "learning_rate": 4.383465786974559e-05, "loss": 0.0005, "step": 4690 }, { "epoch": 6.011426000899685, "grad_norm": 0.00044810696272179484, "learning_rate": 4.380966661668416e-05, "loss": 0.0, "step": 4700 }, { "epoch": 6.011875843454791, "grad_norm": 0.00043014311813749373, "learning_rate": 4.378467536362273e-05, "loss": 0.9891, "step": 4710 }, { "epoch": 6.012325686009897, "grad_norm": 0.0023797762114554644, "learning_rate": 4.37596841105613e-05, "loss": 0.0001, "step": 4720 }, { "epoch": 6.012775528565002, "grad_norm": 0.00011278155579930171, "learning_rate": 4.3734692857499874e-05, "loss": 0.0001, "step": 4730 }, { "epoch": 6.013225371120108, "grad_norm": 0.0008991266367956996, "learning_rate": 4.3709701604438444e-05, "loss": 0.4351, "step": 4740 }, { "epoch": 6.013675213675214, "grad_norm": 0.001712612109258771, "learning_rate": 4.368471035137702e-05, "loss": 0.0113, "step": 4750 }, { "epoch": 6.014125056230319, "grad_norm": 0.0010883569484576583, "learning_rate": 4.365971909831559e-05, "loss": 0.0, "step": 4760 }, { "epoch": 6.014574898785425, "grad_norm": 0.0004519332724157721, "learning_rate": 4.363472784525416e-05, "loss": 0.0001, "step": 4770 }, { "epoch": 6.015024741340531, "grad_norm": 0.0019816658459603786, "learning_rate": 4.360973659219273e-05, "loss": 0.0001, "step": 4780 }, { "epoch": 6.015474583895637, "grad_norm": 0.0005195261328481138, "learning_rate": 4.3584745339131304e-05, "loss": 0.0001, "step": 4790 }, { "epoch": 6.015924426450742, "grad_norm": 0.0058244881220161915, "learning_rate": 4.3559754086069874e-05, "loss": 0.0, "step": 4800 }, { "epoch": 6.016374269005848, "grad_norm": 0.0006286347052082419, "learning_rate": 4.3534762833008445e-05, "loss": 0.0, "step": 4810 }, { "epoch": 6.016824111560954, "grad_norm": 0.0005231102113611996, "learning_rate": 4.350977157994702e-05, "loss": 0.0126, "step": 4820 }, { "epoch": 6.017273954116059, "grad_norm": 0.022809773683547974, "learning_rate": 4.348478032688559e-05, "loss": 0.4197, "step": 4830 }, { "epoch": 6.017723796671165, "grad_norm": 0.00436067022383213, "learning_rate": 4.345978907382416e-05, "loss": 0.465, "step": 4840 }, { "epoch": 6.018173639226271, "grad_norm": 0.00015913942479528487, "learning_rate": 4.3434797820762733e-05, "loss": 0.0041, "step": 4850 }, { "epoch": 6.018623481781376, "grad_norm": 0.0005738962790928781, "learning_rate": 4.3409806567701304e-05, "loss": 0.0, "step": 4860 }, { "epoch": 6.019073324336482, "grad_norm": 0.0009546607616357505, "learning_rate": 4.3384815314639874e-05, "loss": 0.0, "step": 4870 }, { "epoch": 6.019523166891588, "grad_norm": 0.00024159648455679417, "learning_rate": 4.335982406157845e-05, "loss": 0.0018, "step": 4880 }, { "epoch": 6.0199730094466934, "grad_norm": 0.0014075442450121045, "learning_rate": 4.333483280851702e-05, "loss": 0.1457, "step": 4890 }, { "epoch": 6.0204228520018, "grad_norm": 0.00039532541995868087, "learning_rate": 4.330984155545559e-05, "loss": 0.9377, "step": 4900 }, { "epoch": 6.020872694556905, "grad_norm": 0.00044293890823610127, "learning_rate": 4.328485030239416e-05, "loss": 0.0, "step": 4910 }, { "epoch": 6.0213225371120105, "grad_norm": 0.21310268342494965, "learning_rate": 4.3259859049332734e-05, "loss": 1.5572, "step": 4920 }, { "epoch": 6.021772379667117, "grad_norm": 0.005545054562389851, "learning_rate": 4.3234867796271304e-05, "loss": 0.0001, "step": 4930 }, { "epoch": 6.022222222222222, "grad_norm": 0.0006077567813917994, "learning_rate": 4.3209876543209875e-05, "loss": 0.036, "step": 4940 }, { "epoch": 6.0226720647773275, "grad_norm": 0.004134323913604021, "learning_rate": 4.318488529014845e-05, "loss": 0.5602, "step": 4950 }, { "epoch": 6.023121907332434, "grad_norm": 0.001166753238067031, "learning_rate": 4.315989403708702e-05, "loss": 0.0001, "step": 4960 }, { "epoch": 6.023571749887539, "grad_norm": 0.0007378680165857077, "learning_rate": 4.313490278402559e-05, "loss": 0.0002, "step": 4970 }, { "epoch": 6.0240215924426455, "grad_norm": 0.0011038266820833087, "learning_rate": 4.3109911530964164e-05, "loss": 0.3239, "step": 4980 }, { "epoch": 6.024471434997751, "grad_norm": 0.0035847104154527187, "learning_rate": 4.3084920277902734e-05, "loss": 0.737, "step": 4990 }, { "epoch": 6.024921277552856, "grad_norm": 0.03575243055820465, "learning_rate": 4.3059929024841305e-05, "loss": 0.0002, "step": 5000 }, { "epoch": 6.0253711201079625, "grad_norm": 0.0008320559863932431, "learning_rate": 4.303493777177988e-05, "loss": 0.0, "step": 5010 }, { "epoch": 6.025820962663068, "grad_norm": 0.018821338191628456, "learning_rate": 4.300994651871845e-05, "loss": 0.0285, "step": 5020 }, { "epoch": 6.026270805218173, "grad_norm": 0.0036437322851270437, "learning_rate": 4.298495526565702e-05, "loss": 0.9183, "step": 5030 }, { "epoch": 6.0267206477732795, "grad_norm": 0.0027983973268419504, "learning_rate": 4.2959964012595594e-05, "loss": 0.1324, "step": 5040 }, { "epoch": 6.027170490328385, "grad_norm": 0.0007390477112494409, "learning_rate": 4.2934972759534164e-05, "loss": 0.0001, "step": 5050 }, { "epoch": 6.027620332883491, "grad_norm": 0.007952100597321987, "learning_rate": 4.2909981506472735e-05, "loss": 0.0001, "step": 5060 }, { "epoch": 6.028070175438597, "grad_norm": 0.0006814224761910737, "learning_rate": 4.2884990253411305e-05, "loss": 0.0002, "step": 5070 }, { "epoch": 6.028520017993702, "grad_norm": 0.006477903109043837, "learning_rate": 4.285999900034988e-05, "loss": 0.8966, "step": 5080 }, { "epoch": 6.028969860548808, "grad_norm": 0.002131652319803834, "learning_rate": 4.283500774728845e-05, "loss": 0.0002, "step": 5090 }, { "epoch": 6.029419703103914, "grad_norm": 0.0029353944119066, "learning_rate": 4.2810016494227023e-05, "loss": 0.0001, "step": 5100 }, { "epoch": 6.029869545659019, "grad_norm": 0.00022575960610993207, "learning_rate": 4.2785025241165594e-05, "loss": 0.0002, "step": 5110 }, { "epoch": 6.030319388214125, "grad_norm": 0.0023172213695943356, "learning_rate": 4.2760033988104164e-05, "loss": 0.0214, "step": 5120 }, { "epoch": 6.030769230769231, "grad_norm": 0.0022363965399563313, "learning_rate": 4.2735042735042735e-05, "loss": 0.0001, "step": 5130 }, { "epoch": 6.031219073324336, "grad_norm": 0.0010555433109402657, "learning_rate": 4.271005148198131e-05, "loss": 0.0001, "step": 5140 }, { "epoch": 6.031668915879442, "grad_norm": 0.0002139418647857383, "learning_rate": 4.268506022891988e-05, "loss": 0.0005, "step": 5150 }, { "epoch": 6.032118758434548, "grad_norm": 0.0002499015536159277, "learning_rate": 4.266006897585845e-05, "loss": 0.0001, "step": 5160 }, { "epoch": 6.032568600989654, "grad_norm": 0.001529783010482788, "learning_rate": 4.2635077722797024e-05, "loss": 0.0003, "step": 5170 }, { "epoch": 6.033018443544759, "grad_norm": 0.0026151693891733885, "learning_rate": 4.2610086469735594e-05, "loss": 0.0, "step": 5180 }, { "epoch": 6.033333333333333, "eval_accuracy": 0.8418604651162791, "eval_f1": 0.8357891859942043, "eval_loss": 1.080440878868103, "eval_runtime": 138.3666, "eval_samples_per_second": 1.554, "eval_steps_per_second": 1.554, "step": 5187 }, { "epoch": 7.0001349527665315, "grad_norm": 0.0007138159708119929, "learning_rate": 4.2585095216674165e-05, "loss": 0.0, "step": 5190 }, { "epoch": 7.000584795321638, "grad_norm": 0.001677382504567504, "learning_rate": 4.2560103963612735e-05, "loss": 0.4043, "step": 5200 }, { "epoch": 7.001034637876743, "grad_norm": 0.0011122095165774226, "learning_rate": 4.253511271055131e-05, "loss": 0.0001, "step": 5210 }, { "epoch": 7.001484480431849, "grad_norm": 0.0009910761145874858, "learning_rate": 4.251012145748988e-05, "loss": 0.0, "step": 5220 }, { "epoch": 7.001934322986955, "grad_norm": 0.003112983424216509, "learning_rate": 4.2485130204428454e-05, "loss": 0.0, "step": 5230 }, { "epoch": 7.00238416554206, "grad_norm": 1.3718911409378052, "learning_rate": 4.2460138951367024e-05, "loss": 0.0042, "step": 5240 }, { "epoch": 7.002834008097166, "grad_norm": 0.0015905714826658368, "learning_rate": 4.2435147698305595e-05, "loss": 0.0, "step": 5250 }, { "epoch": 7.003283850652272, "grad_norm": 0.001323228352703154, "learning_rate": 4.2410156445244165e-05, "loss": 0.0, "step": 5260 }, { "epoch": 7.003733693207377, "grad_norm": 0.0002908277092501521, "learning_rate": 4.238516519218274e-05, "loss": 0.3795, "step": 5270 }, { "epoch": 7.0041835357624835, "grad_norm": 0.00028200825909152627, "learning_rate": 4.236017393912131e-05, "loss": 0.0, "step": 5280 }, { "epoch": 7.004633378317589, "grad_norm": 0.0002451376640237868, "learning_rate": 4.2335182686059884e-05, "loss": 0.0, "step": 5290 }, { "epoch": 7.005083220872694, "grad_norm": 0.0014049593592062593, "learning_rate": 4.2310191432998454e-05, "loss": 0.0, "step": 5300 }, { "epoch": 7.005533063427801, "grad_norm": 0.0009300464298576117, "learning_rate": 4.2285200179937025e-05, "loss": 0.0, "step": 5310 }, { "epoch": 7.005982905982906, "grad_norm": 0.002207084558904171, "learning_rate": 4.2260208926875595e-05, "loss": 0.0, "step": 5320 }, { "epoch": 7.006432748538011, "grad_norm": 0.0008811824372969568, "learning_rate": 4.2235217673814166e-05, "loss": 0.0, "step": 5330 }, { "epoch": 7.006882591093118, "grad_norm": 0.0002765203535091132, "learning_rate": 4.221022642075274e-05, "loss": 0.5528, "step": 5340 }, { "epoch": 7.007332433648223, "grad_norm": 1.118844747543335, "learning_rate": 4.2185235167691313e-05, "loss": 0.0033, "step": 5350 }, { "epoch": 7.007782276203329, "grad_norm": 0.021484030410647392, "learning_rate": 4.2160243914629884e-05, "loss": 0.0001, "step": 5360 }, { "epoch": 7.008232118758435, "grad_norm": 0.00025987334083765745, "learning_rate": 4.2135252661568455e-05, "loss": 0.0, "step": 5370 }, { "epoch": 7.00868196131354, "grad_norm": 0.0003007091290783137, "learning_rate": 4.2110261408507025e-05, "loss": 0.3022, "step": 5380 }, { "epoch": 7.009131803868646, "grad_norm": 0.0006417080876417458, "learning_rate": 4.2085270155445596e-05, "loss": 0.0, "step": 5390 }, { "epoch": 7.009581646423752, "grad_norm": 0.005425110924988985, "learning_rate": 4.2060278902384166e-05, "loss": 0.0002, "step": 5400 }, { "epoch": 7.010031488978857, "grad_norm": 0.004679904319345951, "learning_rate": 4.203528764932274e-05, "loss": 0.0002, "step": 5410 }, { "epoch": 7.010481331533963, "grad_norm": 4.300858927308582e-05, "learning_rate": 4.2010296396261314e-05, "loss": 0.0004, "step": 5420 }, { "epoch": 7.010931174089069, "grad_norm": 0.03579103574156761, "learning_rate": 4.1985305143199884e-05, "loss": 0.0, "step": 5430 }, { "epoch": 7.011381016644174, "grad_norm": 0.0010514215100556612, "learning_rate": 4.1960313890138455e-05, "loss": 0.0001, "step": 5440 }, { "epoch": 7.01183085919928, "grad_norm": 0.0015332564944401383, "learning_rate": 4.1935322637077025e-05, "loss": 0.0, "step": 5450 }, { "epoch": 7.012280701754386, "grad_norm": 0.0022514229640364647, "learning_rate": 4.1910331384015596e-05, "loss": 0.0, "step": 5460 }, { "epoch": 7.012730544309492, "grad_norm": 6.670637230854481e-05, "learning_rate": 4.188534013095417e-05, "loss": 0.0, "step": 5470 }, { "epoch": 7.013180386864597, "grad_norm": 0.00018955659470520914, "learning_rate": 4.1860348877892744e-05, "loss": 0.0, "step": 5480 }, { "epoch": 7.013630229419703, "grad_norm": 0.0002056826197076589, "learning_rate": 4.1835357624831314e-05, "loss": 0.0, "step": 5490 }, { "epoch": 7.014080071974809, "grad_norm": 0.06601982563734055, "learning_rate": 4.1810366371769885e-05, "loss": 0.1744, "step": 5500 }, { "epoch": 7.0145299145299145, "grad_norm": 127.92781829833984, "learning_rate": 4.1785375118708455e-05, "loss": 0.6116, "step": 5510 }, { "epoch": 7.01497975708502, "grad_norm": 0.0017621285514906049, "learning_rate": 4.1760383865647026e-05, "loss": 0.14, "step": 5520 }, { "epoch": 7.015429599640126, "grad_norm": 0.0035515788476914167, "learning_rate": 4.1735392612585596e-05, "loss": 0.0, "step": 5530 }, { "epoch": 7.0158794421952315, "grad_norm": 501.9407958984375, "learning_rate": 4.1710401359524174e-05, "loss": 0.264, "step": 5540 }, { "epoch": 7.016329284750338, "grad_norm": 0.00041791191324591637, "learning_rate": 4.1685410106462744e-05, "loss": 0.0, "step": 5550 }, { "epoch": 7.016779127305443, "grad_norm": 0.0007269049528986216, "learning_rate": 4.1660418853401315e-05, "loss": 0.1762, "step": 5560 }, { "epoch": 7.017228969860549, "grad_norm": 0.003260241821408272, "learning_rate": 4.1635427600339885e-05, "loss": 0.0002, "step": 5570 }, { "epoch": 7.017678812415655, "grad_norm": 0.001824333448894322, "learning_rate": 4.1610436347278456e-05, "loss": 0.0, "step": 5580 }, { "epoch": 7.01812865497076, "grad_norm": 0.0004864515794906765, "learning_rate": 4.1585445094217026e-05, "loss": 0.8556, "step": 5590 }, { "epoch": 7.018578497525866, "grad_norm": 0.0010554906912147999, "learning_rate": 4.15604538411556e-05, "loss": 0.0, "step": 5600 }, { "epoch": 7.019028340080972, "grad_norm": 0.0006288652657531202, "learning_rate": 4.153546258809417e-05, "loss": 0.0, "step": 5610 }, { "epoch": 7.019478182636077, "grad_norm": 0.007150471210479736, "learning_rate": 4.1510471335032745e-05, "loss": 0.0, "step": 5620 }, { "epoch": 7.019928025191183, "grad_norm": 0.0005253459676168859, "learning_rate": 4.1485480081971315e-05, "loss": 0.0, "step": 5630 }, { "epoch": 7.020377867746289, "grad_norm": 0.0003124882059637457, "learning_rate": 4.1460488828909886e-05, "loss": 0.0, "step": 5640 }, { "epoch": 7.020827710301394, "grad_norm": 0.0003111250407528132, "learning_rate": 4.1435497575848456e-05, "loss": 0.0, "step": 5650 }, { "epoch": 7.021277552856501, "grad_norm": 0.0049888682551681995, "learning_rate": 4.141050632278703e-05, "loss": 0.0, "step": 5660 }, { "epoch": 7.021727395411606, "grad_norm": 0.0003852382651530206, "learning_rate": 4.13855150697256e-05, "loss": 0.0, "step": 5670 }, { "epoch": 7.022177237966711, "grad_norm": 0.0005623734323307872, "learning_rate": 4.136052381666417e-05, "loss": 0.0, "step": 5680 }, { "epoch": 7.022627080521818, "grad_norm": 0.005746079608798027, "learning_rate": 4.133553256360274e-05, "loss": 0.0004, "step": 5690 }, { "epoch": 7.023076923076923, "grad_norm": 0.001581136486493051, "learning_rate": 4.131054131054131e-05, "loss": 0.0, "step": 5700 }, { "epoch": 7.023526765632028, "grad_norm": 0.008480487391352654, "learning_rate": 4.128555005747988e-05, "loss": 0.0001, "step": 5710 }, { "epoch": 7.023976608187135, "grad_norm": 0.002177335089072585, "learning_rate": 4.1260558804418457e-05, "loss": 0.9871, "step": 5720 }, { "epoch": 7.02442645074224, "grad_norm": 0.0005311229615472257, "learning_rate": 4.123556755135703e-05, "loss": 0.0025, "step": 5730 }, { "epoch": 7.024876293297346, "grad_norm": 0.0005979858106002212, "learning_rate": 4.12105762982956e-05, "loss": 0.0, "step": 5740 }, { "epoch": 7.025326135852452, "grad_norm": 0.0006952813128009439, "learning_rate": 4.118558504523417e-05, "loss": 0.0, "step": 5750 }, { "epoch": 7.025775978407557, "grad_norm": 0.00034614797914400697, "learning_rate": 4.116059379217274e-05, "loss": 0.0, "step": 5760 }, { "epoch": 7.026225820962663, "grad_norm": 0.022977815940976143, "learning_rate": 4.113560253911131e-05, "loss": 0.0001, "step": 5770 }, { "epoch": 7.026675663517769, "grad_norm": 0.0005210953531786799, "learning_rate": 4.111061128604988e-05, "loss": 0.0, "step": 5780 }, { "epoch": 7.027125506072874, "grad_norm": 0.0012725845444947481, "learning_rate": 4.108562003298845e-05, "loss": 0.0, "step": 5790 }, { "epoch": 7.02757534862798, "grad_norm": 0.0016080683562904596, "learning_rate": 4.106062877992703e-05, "loss": 0.0, "step": 5800 }, { "epoch": 7.028025191183086, "grad_norm": 0.00036041668499819934, "learning_rate": 4.10356375268656e-05, "loss": 0.0001, "step": 5810 }, { "epoch": 7.028475033738192, "grad_norm": 0.000628514913842082, "learning_rate": 4.101064627380417e-05, "loss": 0.0004, "step": 5820 }, { "epoch": 7.028924876293297, "grad_norm": 0.0010594029445201159, "learning_rate": 4.098565502074274e-05, "loss": 0.0, "step": 5830 }, { "epoch": 7.029374718848403, "grad_norm": 0.0003191715804859996, "learning_rate": 4.096066376768131e-05, "loss": 0.0001, "step": 5840 }, { "epoch": 7.029824561403509, "grad_norm": 0.0013078658375889063, "learning_rate": 4.093567251461988e-05, "loss": 0.0, "step": 5850 }, { "epoch": 7.0302744039586145, "grad_norm": 0.0002901960688177496, "learning_rate": 4.091068126155846e-05, "loss": 0.0, "step": 5860 }, { "epoch": 7.03072424651372, "grad_norm": 0.000685417209751904, "learning_rate": 4.088569000849703e-05, "loss": 0.0, "step": 5870 }, { "epoch": 7.031174089068826, "grad_norm": 0.00033471034839749336, "learning_rate": 4.08606987554356e-05, "loss": 0.0, "step": 5880 }, { "epoch": 7.0316239316239315, "grad_norm": 0.0003678982029668987, "learning_rate": 4.083570750237417e-05, "loss": 0.0, "step": 5890 }, { "epoch": 7.032073774179038, "grad_norm": 0.0009671682491898537, "learning_rate": 4.081071624931274e-05, "loss": 0.0, "step": 5900 }, { "epoch": 7.032523616734143, "grad_norm": 0.00025281679700128734, "learning_rate": 4.078572499625131e-05, "loss": 0.0, "step": 5910 }, { "epoch": 7.0329734592892486, "grad_norm": 0.0006937732105143368, "learning_rate": 4.076073374318988e-05, "loss": 0.0, "step": 5920 }, { "epoch": 7.033333333333333, "eval_accuracy": 0.8930232558139535, "eval_f1": 0.8908988909837458, "eval_loss": 0.8534913659095764, "eval_runtime": 139.1847, "eval_samples_per_second": 1.545, "eval_steps_per_second": 1.545, "step": 5928 }, { "epoch": 8.000089968511022, "grad_norm": 0.0003428373602218926, "learning_rate": 4.073574249012846e-05, "loss": 0.0052, "step": 5930 }, { "epoch": 8.000539811066126, "grad_norm": 0.0004002130008302629, "learning_rate": 4.071075123706703e-05, "loss": 0.0, "step": 5940 }, { "epoch": 8.000989653621232, "grad_norm": 0.00027047377079725266, "learning_rate": 4.06857599840056e-05, "loss": 0.0, "step": 5950 }, { "epoch": 8.001439496176339, "grad_norm": 0.0011688901577144861, "learning_rate": 4.066076873094417e-05, "loss": 0.0, "step": 5960 }, { "epoch": 8.001889338731443, "grad_norm": 0.001081600203178823, "learning_rate": 4.063577747788274e-05, "loss": 0.3357, "step": 5970 }, { "epoch": 8.00233918128655, "grad_norm": 0.00041262098238803446, "learning_rate": 4.061078622482131e-05, "loss": 0.0, "step": 5980 }, { "epoch": 8.002789023841656, "grad_norm": 0.0004393083800096065, "learning_rate": 4.058579497175989e-05, "loss": 0.9892, "step": 5990 }, { "epoch": 8.003238866396762, "grad_norm": 0.00042270583799108863, "learning_rate": 4.056080371869846e-05, "loss": 0.0, "step": 6000 }, { "epoch": 8.003688708951866, "grad_norm": 0.0010285059688612819, "learning_rate": 4.053581246563703e-05, "loss": 0.0, "step": 6010 }, { "epoch": 8.004138551506973, "grad_norm": 0.00025913130957633257, "learning_rate": 4.05108212125756e-05, "loss": 0.5312, "step": 6020 }, { "epoch": 8.004588394062079, "grad_norm": 0.0002647424698807299, "learning_rate": 4.048582995951417e-05, "loss": 0.5985, "step": 6030 }, { "epoch": 8.005038236617184, "grad_norm": 0.00035898041096515954, "learning_rate": 4.046083870645274e-05, "loss": 0.0, "step": 6040 }, { "epoch": 8.00548807917229, "grad_norm": 0.0011684470809996128, "learning_rate": 4.043584745339131e-05, "loss": 0.0, "step": 6050 }, { "epoch": 8.005937921727396, "grad_norm": 0.000999482232145965, "learning_rate": 4.041085620032989e-05, "loss": 0.0019, "step": 6060 }, { "epoch": 8.0063877642825, "grad_norm": 0.0031640531960874796, "learning_rate": 4.038586494726846e-05, "loss": 0.0003, "step": 6070 }, { "epoch": 8.006837606837607, "grad_norm": 0.0010574915213510394, "learning_rate": 4.036087369420703e-05, "loss": 0.0001, "step": 6080 }, { "epoch": 8.007287449392713, "grad_norm": 0.002111629117280245, "learning_rate": 4.03358824411456e-05, "loss": 0.0022, "step": 6090 }, { "epoch": 8.007737291947818, "grad_norm": 0.0005225425702519715, "learning_rate": 4.031089118808417e-05, "loss": 0.0, "step": 6100 }, { "epoch": 8.008187134502924, "grad_norm": 0.00040085570071823895, "learning_rate": 4.028589993502274e-05, "loss": 0.0001, "step": 6110 }, { "epoch": 8.00863697705803, "grad_norm": 0.00045553798554465175, "learning_rate": 4.026090868196132e-05, "loss": 0.0001, "step": 6120 }, { "epoch": 8.009086819613135, "grad_norm": 0.01064038835465908, "learning_rate": 4.023591742889989e-05, "loss": 0.0, "step": 6130 }, { "epoch": 8.009536662168241, "grad_norm": 0.0005399421788752079, "learning_rate": 4.021092617583846e-05, "loss": 0.0838, "step": 6140 }, { "epoch": 8.009986504723347, "grad_norm": 0.001106308656744659, "learning_rate": 4.018593492277703e-05, "loss": 0.0, "step": 6150 }, { "epoch": 8.010436347278452, "grad_norm": 0.0004213255597278476, "learning_rate": 4.01609436697156e-05, "loss": 1.3147, "step": 6160 }, { "epoch": 8.010886189833558, "grad_norm": 0.005709492601454258, "learning_rate": 4.013595241665417e-05, "loss": 0.0104, "step": 6170 }, { "epoch": 8.011336032388664, "grad_norm": 0.0003802681458182633, "learning_rate": 4.011096116359274e-05, "loss": 0.0007, "step": 6180 }, { "epoch": 8.01178587494377, "grad_norm": 0.00036927941255271435, "learning_rate": 4.008596991053132e-05, "loss": 0.0, "step": 6190 }, { "epoch": 8.012235717498875, "grad_norm": 0.03572312742471695, "learning_rate": 4.006097865746989e-05, "loss": 0.0, "step": 6200 }, { "epoch": 8.012685560053981, "grad_norm": 0.0003122588386759162, "learning_rate": 4.003598740440846e-05, "loss": 0.0075, "step": 6210 }, { "epoch": 8.013135402609088, "grad_norm": 0.00024281599326059222, "learning_rate": 4.001099615134703e-05, "loss": 0.0, "step": 6220 }, { "epoch": 8.013585245164192, "grad_norm": 0.0008627267670817673, "learning_rate": 3.99860048982856e-05, "loss": 0.0, "step": 6230 }, { "epoch": 8.014035087719298, "grad_norm": 4.582615852355957, "learning_rate": 3.996101364522417e-05, "loss": 0.0009, "step": 6240 }, { "epoch": 8.014484930274405, "grad_norm": 0.0007836317527107894, "learning_rate": 3.993602239216275e-05, "loss": 0.0, "step": 6250 }, { "epoch": 8.014934772829509, "grad_norm": 0.00019490346312522888, "learning_rate": 3.991103113910132e-05, "loss": 0.0, "step": 6260 }, { "epoch": 8.015384615384615, "grad_norm": 0.00038138151285238564, "learning_rate": 3.988603988603989e-05, "loss": 0.0, "step": 6270 }, { "epoch": 8.015834457939722, "grad_norm": 0.0008199354051612318, "learning_rate": 3.986104863297846e-05, "loss": 0.0, "step": 6280 }, { "epoch": 8.016284300494826, "grad_norm": 0.003807654371485114, "learning_rate": 3.983605737991703e-05, "loss": 0.237, "step": 6290 }, { "epoch": 8.016734143049932, "grad_norm": 0.0003830906643997878, "learning_rate": 3.98110661268556e-05, "loss": 0.0, "step": 6300 }, { "epoch": 8.017183985605039, "grad_norm": 0.00023018223873805255, "learning_rate": 3.978607487379417e-05, "loss": 0.0029, "step": 6310 }, { "epoch": 8.017633828160143, "grad_norm": 0.00029084240668453276, "learning_rate": 3.976108362073275e-05, "loss": 0.0, "step": 6320 }, { "epoch": 8.01808367071525, "grad_norm": 7.912725413916633e-05, "learning_rate": 3.973609236767132e-05, "loss": 0.0001, "step": 6330 }, { "epoch": 8.018533513270356, "grad_norm": 0.0013584913685917854, "learning_rate": 3.971110111460989e-05, "loss": 0.0, "step": 6340 }, { "epoch": 8.018983355825462, "grad_norm": 0.00025199123774655163, "learning_rate": 3.968610986154846e-05, "loss": 0.0001, "step": 6350 }, { "epoch": 8.019433198380566, "grad_norm": 0.00044878857443109155, "learning_rate": 3.966111860848703e-05, "loss": 0.0, "step": 6360 }, { "epoch": 8.019883040935673, "grad_norm": 0.0002705784572754055, "learning_rate": 3.96361273554256e-05, "loss": 0.0, "step": 6370 }, { "epoch": 8.020332883490779, "grad_norm": 0.0015248061390593648, "learning_rate": 3.961113610236418e-05, "loss": 0.0, "step": 6380 }, { "epoch": 8.020782726045883, "grad_norm": 0.0004708465712610632, "learning_rate": 3.958614484930275e-05, "loss": 0.0, "step": 6390 }, { "epoch": 8.02123256860099, "grad_norm": 8.554902160540223e-05, "learning_rate": 3.956115359624132e-05, "loss": 0.0001, "step": 6400 }, { "epoch": 8.021682411156096, "grad_norm": 0.00029393951990641654, "learning_rate": 3.953616234317989e-05, "loss": 0.0, "step": 6410 }, { "epoch": 8.0221322537112, "grad_norm": 0.0004791915125679225, "learning_rate": 3.951117109011846e-05, "loss": 0.0, "step": 6420 }, { "epoch": 8.022582096266307, "grad_norm": 0.0002918030950240791, "learning_rate": 3.948617983705703e-05, "loss": 0.0008, "step": 6430 }, { "epoch": 8.023031938821413, "grad_norm": 0.00026181110297329724, "learning_rate": 3.94611885839956e-05, "loss": 0.0, "step": 6440 }, { "epoch": 8.023481781376518, "grad_norm": 0.0002527247997932136, "learning_rate": 3.943619733093418e-05, "loss": 0.5606, "step": 6450 }, { "epoch": 8.023931623931624, "grad_norm": 0.000233857732382603, "learning_rate": 3.941120607787275e-05, "loss": 0.5414, "step": 6460 }, { "epoch": 8.02438146648673, "grad_norm": 0.0006365039735101163, "learning_rate": 3.938621482481132e-05, "loss": 0.9496, "step": 6470 }, { "epoch": 8.024831309041835, "grad_norm": 0.0006839362904429436, "learning_rate": 3.936122357174989e-05, "loss": 0.0, "step": 6480 }, { "epoch": 8.02528115159694, "grad_norm": 0.001829691231250763, "learning_rate": 3.933623231868846e-05, "loss": 0.0001, "step": 6490 }, { "epoch": 8.025730994152047, "grad_norm": 0.0031682876870036125, "learning_rate": 3.931124106562703e-05, "loss": 0.0, "step": 6500 }, { "epoch": 8.026180836707152, "grad_norm": 0.0012081661261618137, "learning_rate": 3.928624981256561e-05, "loss": 0.0001, "step": 6510 }, { "epoch": 8.026630679262258, "grad_norm": 0.0017490022582933307, "learning_rate": 3.926125855950418e-05, "loss": 0.0002, "step": 6520 }, { "epoch": 8.027080521817364, "grad_norm": 0.0003666267148219049, "learning_rate": 3.923626730644275e-05, "loss": 0.0002, "step": 6530 }, { "epoch": 8.02753036437247, "grad_norm": 0.002081898972392082, "learning_rate": 3.921127605338132e-05, "loss": 0.0001, "step": 6540 }, { "epoch": 8.027980206927575, "grad_norm": 0.0007806915673427284, "learning_rate": 3.918628480031989e-05, "loss": 0.0, "step": 6550 }, { "epoch": 8.028430049482681, "grad_norm": 0.00017435807967558503, "learning_rate": 3.916129354725846e-05, "loss": 0.0, "step": 6560 }, { "epoch": 8.028879892037788, "grad_norm": 0.9364543557167053, "learning_rate": 3.913630229419703e-05, "loss": 0.003, "step": 6570 }, { "epoch": 8.029329734592892, "grad_norm": 0.000479085196275264, "learning_rate": 3.911131104113561e-05, "loss": 0.0, "step": 6580 }, { "epoch": 8.029779577147998, "grad_norm": 0.0007905447273515165, "learning_rate": 3.908631978807418e-05, "loss": 0.0001, "step": 6590 }, { "epoch": 8.030229419703105, "grad_norm": 0.0011296223383396864, "learning_rate": 3.906132853501275e-05, "loss": 0.0, "step": 6600 }, { "epoch": 8.030679262258209, "grad_norm": 0.0010889263357967138, "learning_rate": 3.903633728195132e-05, "loss": 0.0, "step": 6610 }, { "epoch": 8.031129104813315, "grad_norm": 0.00032578996615484357, "learning_rate": 3.901134602888989e-05, "loss": 0.0, "step": 6620 }, { "epoch": 8.031578947368422, "grad_norm": 0.00020901062816847116, "learning_rate": 3.898635477582846e-05, "loss": 0.0, "step": 6630 }, { "epoch": 8.032028789923526, "grad_norm": 0.00017999886767938733, "learning_rate": 3.896136352276704e-05, "loss": 0.0, "step": 6640 }, { "epoch": 8.032478632478632, "grad_norm": 0.0010392559925094247, "learning_rate": 3.893637226970561e-05, "loss": 0.5313, "step": 6650 }, { "epoch": 8.032928475033739, "grad_norm": 0.0005207455833442509, "learning_rate": 3.891138101664418e-05, "loss": 0.0, "step": 6660 }, { "epoch": 8.033333333333333, "eval_accuracy": 0.9069767441860465, "eval_f1": 0.907019761720607, "eval_loss": 0.6512415409088135, "eval_runtime": 139.7852, "eval_samples_per_second": 1.538, "eval_steps_per_second": 1.538, "step": 6669 }, { "epoch": 9.00004498425551, "grad_norm": 0.0008128684712573886, "learning_rate": 3.888638976358275e-05, "loss": 0.0, "step": 6670 }, { "epoch": 9.000494826810616, "grad_norm": 0.0012383826542645693, "learning_rate": 3.886139851052132e-05, "loss": 0.0, "step": 6680 }, { "epoch": 9.000944669365722, "grad_norm": 0.00025606638519093394, "learning_rate": 3.883640725745989e-05, "loss": 0.3597, "step": 6690 }, { "epoch": 9.001394511920827, "grad_norm": 0.00884920358657837, "learning_rate": 3.881141600439846e-05, "loss": 0.0001, "step": 6700 }, { "epoch": 9.001844354475933, "grad_norm": 0.000444287434220314, "learning_rate": 3.878642475133704e-05, "loss": 0.0, "step": 6710 }, { "epoch": 9.00229419703104, "grad_norm": 2.3916077613830566, "learning_rate": 3.876143349827561e-05, "loss": 0.0077, "step": 6720 }, { "epoch": 9.002744039586144, "grad_norm": 0.0007892856956459582, "learning_rate": 3.873644224521418e-05, "loss": 0.0, "step": 6730 }, { "epoch": 9.00319388214125, "grad_norm": 0.00018545080092735589, "learning_rate": 3.871145099215275e-05, "loss": 0.0038, "step": 6740 }, { "epoch": 9.003643724696357, "grad_norm": 0.00045868937741033733, "learning_rate": 3.868645973909132e-05, "loss": 0.0, "step": 6750 }, { "epoch": 9.004093567251463, "grad_norm": 0.0007905120146460831, "learning_rate": 3.866146848602989e-05, "loss": 0.0, "step": 6760 }, { "epoch": 9.004543409806567, "grad_norm": 6.415927782654762e-05, "learning_rate": 3.863647723296846e-05, "loss": 0.0001, "step": 6770 }, { "epoch": 9.004993252361674, "grad_norm": 0.0003376628446858376, "learning_rate": 3.861148597990703e-05, "loss": 0.0, "step": 6780 }, { "epoch": 9.00544309491678, "grad_norm": 0.0013937164330855012, "learning_rate": 3.8586494726845603e-05, "loss": 0.0, "step": 6790 }, { "epoch": 9.005892937471884, "grad_norm": 0.3749317526817322, "learning_rate": 3.8561503473784174e-05, "loss": 0.0012, "step": 6800 }, { "epoch": 9.00634278002699, "grad_norm": 4.6343757276190445e-05, "learning_rate": 3.853651222072275e-05, "loss": 0.0, "step": 6810 }, { "epoch": 9.006792622582097, "grad_norm": 0.00031851307721808553, "learning_rate": 3.851152096766132e-05, "loss": 0.0, "step": 6820 }, { "epoch": 9.007242465137201, "grad_norm": 0.0007985978736542165, "learning_rate": 3.848652971459989e-05, "loss": 0.0001, "step": 6830 }, { "epoch": 9.007692307692308, "grad_norm": 0.0035900601651519537, "learning_rate": 3.846153846153846e-05, "loss": 0.0, "step": 6840 }, { "epoch": 9.008142150247414, "grad_norm": 0.0001069472127710469, "learning_rate": 3.843654720847703e-05, "loss": 0.0, "step": 6850 }, { "epoch": 9.008591992802518, "grad_norm": 0.0004973487230017781, "learning_rate": 3.8411555955415604e-05, "loss": 0.0, "step": 6860 }, { "epoch": 9.009041835357625, "grad_norm": 25.509986877441406, "learning_rate": 3.8386564702354174e-05, "loss": 0.0054, "step": 6870 }, { "epoch": 9.009491677912731, "grad_norm": 0.00025141955120489, "learning_rate": 3.8361573449292745e-05, "loss": 0.0, "step": 6880 }, { "epoch": 9.009941520467835, "grad_norm": 0.00046081244363449514, "learning_rate": 3.8336582196231315e-05, "loss": 0.7702, "step": 6890 }, { "epoch": 9.010391363022942, "grad_norm": 0.0018445062451064587, "learning_rate": 3.831159094316989e-05, "loss": 0.0, "step": 6900 }, { "epoch": 9.010841205578048, "grad_norm": 0.0030081935692578554, "learning_rate": 3.828659969010846e-05, "loss": 0.0, "step": 6910 }, { "epoch": 9.011291048133153, "grad_norm": 0.00038244519964791834, "learning_rate": 3.8261608437047034e-05, "loss": 0.0, "step": 6920 }, { "epoch": 9.011740890688259, "grad_norm": 0.000907717680092901, "learning_rate": 3.8236617183985604e-05, "loss": 0.0, "step": 6930 }, { "epoch": 9.012190733243365, "grad_norm": 0.001138004707172513, "learning_rate": 3.8211625930924175e-05, "loss": 0.0, "step": 6940 }, { "epoch": 9.012640575798471, "grad_norm": 0.00086601497605443, "learning_rate": 3.8186634677862745e-05, "loss": 0.0, "step": 6950 }, { "epoch": 9.013090418353576, "grad_norm": 0.00015775859355926514, "learning_rate": 3.8161643424801316e-05, "loss": 0.0, "step": 6960 }, { "epoch": 9.013540260908682, "grad_norm": 8.768979023443535e-05, "learning_rate": 3.813665217173989e-05, "loss": 0.0001, "step": 6970 }, { "epoch": 9.013990103463788, "grad_norm": 0.0007052143337205052, "learning_rate": 3.8111660918678464e-05, "loss": 0.0, "step": 6980 }, { "epoch": 9.014439946018893, "grad_norm": 0.0004125000268686563, "learning_rate": 3.8086669665617034e-05, "loss": 0.0, "step": 6990 }, { "epoch": 9.014889788574, "grad_norm": 0.00040519636240787804, "learning_rate": 3.8061678412555605e-05, "loss": 0.0, "step": 7000 }, { "epoch": 9.015339631129105, "grad_norm": 0.0005299519398249686, "learning_rate": 3.8036687159494175e-05, "loss": 0.0, "step": 7010 }, { "epoch": 9.01578947368421, "grad_norm": 5.4071446356829256e-05, "learning_rate": 3.8011695906432746e-05, "loss": 0.0, "step": 7020 }, { "epoch": 9.016239316239316, "grad_norm": 0.000518677057698369, "learning_rate": 3.798670465337132e-05, "loss": 0.5424, "step": 7030 }, { "epoch": 9.016689158794422, "grad_norm": 0.0003273050533607602, "learning_rate": 3.7961713400309894e-05, "loss": 0.0, "step": 7040 }, { "epoch": 9.017139001349527, "grad_norm": 0.00316060334444046, "learning_rate": 3.7936722147248464e-05, "loss": 1.9054, "step": 7050 }, { "epoch": 9.017588843904633, "grad_norm": 0.9441694021224976, "learning_rate": 3.7911730894187035e-05, "loss": 0.0009, "step": 7060 }, { "epoch": 9.01803868645974, "grad_norm": 0.0186273492872715, "learning_rate": 3.7886739641125605e-05, "loss": 0.0003, "step": 7070 }, { "epoch": 9.018488529014844, "grad_norm": 0.00016842443437781185, "learning_rate": 3.7861748388064176e-05, "loss": 0.0004, "step": 7080 }, { "epoch": 9.01893837156995, "grad_norm": 0.006554078310728073, "learning_rate": 3.7836757135002746e-05, "loss": 0.0001, "step": 7090 }, { "epoch": 9.019388214125057, "grad_norm": 0.0008052074117586017, "learning_rate": 3.7811765881941323e-05, "loss": 0.0, "step": 7100 }, { "epoch": 9.019838056680163, "grad_norm": 0.000344007188687101, "learning_rate": 3.7786774628879894e-05, "loss": 0.0001, "step": 7110 }, { "epoch": 9.020287899235267, "grad_norm": 0.0040848939679563046, "learning_rate": 3.7761783375818464e-05, "loss": 0.0001, "step": 7120 }, { "epoch": 9.020737741790374, "grad_norm": 0.0001725117617752403, "learning_rate": 3.7736792122757035e-05, "loss": 0.0001, "step": 7130 }, { "epoch": 9.02118758434548, "grad_norm": 0.0005373347667045891, "learning_rate": 3.7711800869695605e-05, "loss": 0.0, "step": 7140 }, { "epoch": 9.021637426900584, "grad_norm": 0.0004129679873585701, "learning_rate": 3.7686809616634176e-05, "loss": 0.0001, "step": 7150 }, { "epoch": 9.02208726945569, "grad_norm": 0.00029074549092911184, "learning_rate": 3.766181836357275e-05, "loss": 0.0, "step": 7160 }, { "epoch": 9.022537112010797, "grad_norm": 0.00554650230333209, "learning_rate": 3.7636827110511324e-05, "loss": 0.0, "step": 7170 }, { "epoch": 9.022986954565901, "grad_norm": 0.0001639636029722169, "learning_rate": 3.7611835857449894e-05, "loss": 0.0001, "step": 7180 }, { "epoch": 9.023436797121008, "grad_norm": 0.00014451149036176503, "learning_rate": 3.7586844604388465e-05, "loss": 0.0001, "step": 7190 }, { "epoch": 9.023886639676114, "grad_norm": 0.01696661114692688, "learning_rate": 3.7561853351327035e-05, "loss": 0.0001, "step": 7200 }, { "epoch": 9.024336482231218, "grad_norm": 0.00015327459550462663, "learning_rate": 3.7536862098265606e-05, "loss": 0.0002, "step": 7210 }, { "epoch": 9.024786324786325, "grad_norm": 0.0009352820343337953, "learning_rate": 3.7511870845204176e-05, "loss": 0.0, "step": 7220 }, { "epoch": 9.025236167341431, "grad_norm": 7.278043631231412e-05, "learning_rate": 3.7486879592142754e-05, "loss": 0.0, "step": 7230 }, { "epoch": 9.025686009896535, "grad_norm": 0.0002747975813690573, "learning_rate": 3.7461888339081324e-05, "loss": 0.0, "step": 7240 }, { "epoch": 9.026135852451642, "grad_norm": 0.0026837526820600033, "learning_rate": 3.7436897086019895e-05, "loss": 0.0, "step": 7250 }, { "epoch": 9.026585695006748, "grad_norm": 7.603276026202366e-05, "learning_rate": 3.7411905832958465e-05, "loss": 0.0, "step": 7260 }, { "epoch": 9.027035537561853, "grad_norm": 0.0011712366249412298, "learning_rate": 3.7386914579897036e-05, "loss": 0.0, "step": 7270 }, { "epoch": 9.027485380116959, "grad_norm": 0.002048843540251255, "learning_rate": 3.7361923326835606e-05, "loss": 0.0001, "step": 7280 }, { "epoch": 9.027935222672065, "grad_norm": 0.00020485164714045823, "learning_rate": 3.7336932073774184e-05, "loss": 0.0001, "step": 7290 }, { "epoch": 9.028385065227171, "grad_norm": 0.00042100687278434634, "learning_rate": 3.7311940820712754e-05, "loss": 0.0001, "step": 7300 }, { "epoch": 9.028834907782276, "grad_norm": 0.001057146117091179, "learning_rate": 3.7286949567651325e-05, "loss": 0.0, "step": 7310 }, { "epoch": 9.029284750337382, "grad_norm": 0.000271879427600652, "learning_rate": 3.7261958314589895e-05, "loss": 0.0, "step": 7320 }, { "epoch": 9.029734592892488, "grad_norm": 0.0002733645960688591, "learning_rate": 3.7236967061528466e-05, "loss": 0.0001, "step": 7330 }, { "epoch": 9.030184435447593, "grad_norm": 0.00014798995107412338, "learning_rate": 3.7211975808467036e-05, "loss": 0.0, "step": 7340 }, { "epoch": 9.0306342780027, "grad_norm": 0.00019370263908058405, "learning_rate": 3.718698455540561e-05, "loss": 0.4263, "step": 7350 }, { "epoch": 9.031084120557805, "grad_norm": 0.004500905517488718, "learning_rate": 3.7161993302344184e-05, "loss": 0.0, "step": 7360 }, { "epoch": 9.03153396311291, "grad_norm": 0.00010842781193787232, "learning_rate": 3.7137002049282754e-05, "loss": 0.0, "step": 7370 }, { "epoch": 9.031983805668016, "grad_norm": 0.00021446951723191887, "learning_rate": 3.7112010796221325e-05, "loss": 0.0, "step": 7380 }, { "epoch": 9.032433648223122, "grad_norm": 0.00016653561033308506, "learning_rate": 3.7087019543159896e-05, "loss": 0.0, "step": 7390 }, { "epoch": 9.032883490778227, "grad_norm": 0.0003031170053873211, "learning_rate": 3.7062028290098466e-05, "loss": 0.0001, "step": 7400 }, { "epoch": 9.033333333333333, "grad_norm": 0.018410811200737953, "learning_rate": 3.7037037037037037e-05, "loss": 0.0001, "step": 7410 }, { "epoch": 9.033333333333333, "eval_accuracy": 0.8883720930232558, "eval_f1": 0.8887358249124389, "eval_loss": 0.8474938869476318, "eval_runtime": 139.8731, "eval_samples_per_second": 1.537, "eval_steps_per_second": 1.537, "step": 7410 }, { "epoch": 10.000449842555106, "grad_norm": 0.00024528594803996384, "learning_rate": 3.7012045783975614e-05, "loss": 0.0055, "step": 7420 }, { "epoch": 10.00089968511021, "grad_norm": 0.0016245536971837282, "learning_rate": 3.6987054530914184e-05, "loss": 0.0, "step": 7430 }, { "epoch": 10.001349527665317, "grad_norm": 0.00041075420449487865, "learning_rate": 3.6962063277852755e-05, "loss": 0.0, "step": 7440 }, { "epoch": 10.001799370220423, "grad_norm": 0.0009351651533506811, "learning_rate": 3.6937072024791325e-05, "loss": 0.0001, "step": 7450 }, { "epoch": 10.002249212775528, "grad_norm": 0.0002798937784973532, "learning_rate": 3.6912080771729896e-05, "loss": 0.0, "step": 7460 }, { "epoch": 10.002699055330634, "grad_norm": 0.00011713150888681412, "learning_rate": 3.6887089518668466e-05, "loss": 0.0306, "step": 7470 }, { "epoch": 10.00314889788574, "grad_norm": 32.94483184814453, "learning_rate": 3.686209826560704e-05, "loss": 0.4351, "step": 7480 }, { "epoch": 10.003598740440845, "grad_norm": 0.0012271259911358356, "learning_rate": 3.6837107012545614e-05, "loss": 0.0001, "step": 7490 }, { "epoch": 10.004048582995951, "grad_norm": 0.0008923054556362331, "learning_rate": 3.6812115759484185e-05, "loss": 0.0001, "step": 7500 }, { "epoch": 10.004498425551057, "grad_norm": 0.00240414054132998, "learning_rate": 3.6787124506422755e-05, "loss": 0.8894, "step": 7510 }, { "epoch": 10.004948268106164, "grad_norm": 0.00023968762252479792, "learning_rate": 3.6762133253361326e-05, "loss": 0.0001, "step": 7520 }, { "epoch": 10.005398110661268, "grad_norm": 0.00025512039428576827, "learning_rate": 3.6737142000299896e-05, "loss": 0.0004, "step": 7530 }, { "epoch": 10.005847953216374, "grad_norm": 0.012755263596773148, "learning_rate": 3.671215074723847e-05, "loss": 0.0001, "step": 7540 }, { "epoch": 10.00629779577148, "grad_norm": 0.0004144888080190867, "learning_rate": 3.6687159494177044e-05, "loss": 0.0002, "step": 7550 }, { "epoch": 10.006747638326585, "grad_norm": 0.0001776112912921235, "learning_rate": 3.6662168241115615e-05, "loss": 0.0002, "step": 7560 }, { "epoch": 10.007197480881691, "grad_norm": 0.0007238044054247439, "learning_rate": 3.6637176988054185e-05, "loss": 0.0001, "step": 7570 }, { "epoch": 10.007647323436798, "grad_norm": 0.00015240258653648198, "learning_rate": 3.6612185734992756e-05, "loss": 0.0001, "step": 7580 }, { "epoch": 10.008097165991902, "grad_norm": 0.008551902137696743, "learning_rate": 3.6587194481931326e-05, "loss": 0.0002, "step": 7590 }, { "epoch": 10.008547008547009, "grad_norm": 0.00021076900884509087, "learning_rate": 3.65622032288699e-05, "loss": 0.0001, "step": 7600 }, { "epoch": 10.008996851102115, "grad_norm": 0.0001914712047437206, "learning_rate": 3.653721197580847e-05, "loss": 0.0, "step": 7610 }, { "epoch": 10.00944669365722, "grad_norm": 0.009119285270571709, "learning_rate": 3.6512220722747045e-05, "loss": 0.0, "step": 7620 }, { "epoch": 10.009896536212326, "grad_norm": 0.00012121932377340272, "learning_rate": 3.6487229469685615e-05, "loss": 0.0, "step": 7630 }, { "epoch": 10.010346378767432, "grad_norm": 6.134089198894799e-05, "learning_rate": 3.6462238216624186e-05, "loss": 0.0, "step": 7640 }, { "epoch": 10.010796221322536, "grad_norm": 0.0002513824438210577, "learning_rate": 3.6437246963562756e-05, "loss": 0.0001, "step": 7650 }, { "epoch": 10.011246063877643, "grad_norm": 0.00018178249592892826, "learning_rate": 3.6412255710501327e-05, "loss": 0.0001, "step": 7660 }, { "epoch": 10.011695906432749, "grad_norm": 0.00022319064009934664, "learning_rate": 3.63872644574399e-05, "loss": 0.0001, "step": 7670 }, { "epoch": 10.012145748987853, "grad_norm": 0.0003128175449091941, "learning_rate": 3.6362273204378474e-05, "loss": 0.0, "step": 7680 }, { "epoch": 10.01259559154296, "grad_norm": 0.00024288838903885335, "learning_rate": 3.6337281951317045e-05, "loss": 0.0, "step": 7690 }, { "epoch": 10.013045434098066, "grad_norm": 1.4908808469772339, "learning_rate": 3.6312290698255615e-05, "loss": 0.0047, "step": 7700 }, { "epoch": 10.013495276653172, "grad_norm": 0.00025356997502967715, "learning_rate": 3.6287299445194186e-05, "loss": 0.0, "step": 7710 }, { "epoch": 10.013945119208277, "grad_norm": 0.0025148168206214905, "learning_rate": 3.6262308192132756e-05, "loss": 0.0, "step": 7720 }, { "epoch": 10.014394961763383, "grad_norm": 0.00045464435243047774, "learning_rate": 3.623731693907133e-05, "loss": 0.0, "step": 7730 }, { "epoch": 10.01484480431849, "grad_norm": 0.0001578824158059433, "learning_rate": 3.62123256860099e-05, "loss": 0.0002, "step": 7740 }, { "epoch": 10.015294646873594, "grad_norm": 0.0001619454415049404, "learning_rate": 3.6187334432948475e-05, "loss": 0.0, "step": 7750 }, { "epoch": 10.0157444894287, "grad_norm": 0.0025609820149838924, "learning_rate": 3.6162343179887045e-05, "loss": 0.0, "step": 7760 }, { "epoch": 10.016194331983806, "grad_norm": 0.00015278044156730175, "learning_rate": 3.6137351926825616e-05, "loss": 0.0, "step": 7770 }, { "epoch": 10.01664417453891, "grad_norm": 0.00010679446131689474, "learning_rate": 3.6112360673764186e-05, "loss": 0.0, "step": 7780 }, { "epoch": 10.017094017094017, "grad_norm": 0.00017766524979379028, "learning_rate": 3.608736942070276e-05, "loss": 0.0, "step": 7790 }, { "epoch": 10.017543859649123, "grad_norm": 0.001718810759484768, "learning_rate": 3.606237816764133e-05, "loss": 0.0, "step": 7800 }, { "epoch": 10.017993702204228, "grad_norm": 0.045913103967905045, "learning_rate": 3.6037386914579905e-05, "loss": 0.0001, "step": 7810 }, { "epoch": 10.018443544759334, "grad_norm": 0.00011542309948708862, "learning_rate": 3.6012395661518475e-05, "loss": 0.0, "step": 7820 }, { "epoch": 10.01889338731444, "grad_norm": 0.00016304092423524708, "learning_rate": 3.5987404408457046e-05, "loss": 0.402, "step": 7830 }, { "epoch": 10.019343229869545, "grad_norm": 0.00015203592192847282, "learning_rate": 3.5962413155395616e-05, "loss": 0.6809, "step": 7840 }, { "epoch": 10.019793072424651, "grad_norm": 0.001909156795591116, "learning_rate": 3.593742190233419e-05, "loss": 0.0, "step": 7850 }, { "epoch": 10.020242914979757, "grad_norm": 0.00022746258764527738, "learning_rate": 3.591243064927276e-05, "loss": 0.0, "step": 7860 }, { "epoch": 10.020692757534864, "grad_norm": 0.0001504740648670122, "learning_rate": 3.588743939621133e-05, "loss": 0.0, "step": 7870 }, { "epoch": 10.021142600089968, "grad_norm": 0.00018601583724375814, "learning_rate": 3.58624481431499e-05, "loss": 0.7792, "step": 7880 }, { "epoch": 10.021592442645074, "grad_norm": 31.400115966796875, "learning_rate": 3.583745689008847e-05, "loss": 0.6299, "step": 7890 }, { "epoch": 10.02204228520018, "grad_norm": 0.0003739451931323856, "learning_rate": 3.5812465637027046e-05, "loss": 0.0005, "step": 7900 }, { "epoch": 10.022492127755285, "grad_norm": 0.00020355303422547877, "learning_rate": 3.578747438396562e-05, "loss": 0.0, "step": 7910 }, { "epoch": 10.022941970310391, "grad_norm": 0.0003386130556464195, "learning_rate": 3.576248313090419e-05, "loss": 0.0001, "step": 7920 }, { "epoch": 10.023391812865498, "grad_norm": 0.0002632398682180792, "learning_rate": 3.573749187784276e-05, "loss": 0.0002, "step": 7930 }, { "epoch": 10.023841655420602, "grad_norm": 0.00021201920753810555, "learning_rate": 3.571250062478133e-05, "loss": 0.552, "step": 7940 }, { "epoch": 10.024291497975709, "grad_norm": 0.000274560006801039, "learning_rate": 3.56875093717199e-05, "loss": 0.0, "step": 7950 }, { "epoch": 10.024741340530815, "grad_norm": 0.00022992574668023735, "learning_rate": 3.566251811865847e-05, "loss": 0.0001, "step": 7960 }, { "epoch": 10.02519118308592, "grad_norm": 8.076251833699644e-05, "learning_rate": 3.563752686559704e-05, "loss": 0.0, "step": 7970 }, { "epoch": 10.025641025641026, "grad_norm": 3.499783992767334, "learning_rate": 3.561253561253561e-05, "loss": 0.0116, "step": 7980 }, { "epoch": 10.026090868196132, "grad_norm": 0.00017006031703203917, "learning_rate": 3.558754435947419e-05, "loss": 0.0, "step": 7990 }, { "epoch": 10.026540710751236, "grad_norm": 0.003741120221093297, "learning_rate": 3.556255310641276e-05, "loss": 0.0, "step": 8000 }, { "epoch": 10.026990553306343, "grad_norm": 0.00023381954815704376, "learning_rate": 3.553756185335133e-05, "loss": 0.0, "step": 8010 }, { "epoch": 10.027440395861449, "grad_norm": 0.025399984791874886, "learning_rate": 3.55125706002899e-05, "loss": 0.0, "step": 8020 }, { "epoch": 10.027890238416553, "grad_norm": 0.00022456375882029533, "learning_rate": 3.548757934722847e-05, "loss": 0.0, "step": 8030 }, { "epoch": 10.02834008097166, "grad_norm": 0.0019360154401510954, "learning_rate": 3.546258809416704e-05, "loss": 0.7898, "step": 8040 }, { "epoch": 10.028789923526766, "grad_norm": 0.004126640502363443, "learning_rate": 3.543759684110561e-05, "loss": 1.1777, "step": 8050 }, { "epoch": 10.029239766081872, "grad_norm": 0.00023276350111700594, "learning_rate": 3.541260558804418e-05, "loss": 0.0, "step": 8060 }, { "epoch": 10.029689608636977, "grad_norm": 0.011403778567910194, "learning_rate": 3.538761433498276e-05, "loss": 0.0001, "step": 8070 }, { "epoch": 10.030139451192083, "grad_norm": 0.001898506423458457, "learning_rate": 3.536262308192133e-05, "loss": 0.5942, "step": 8080 }, { "epoch": 10.03058929374719, "grad_norm": 0.0008307425887323916, "learning_rate": 3.53376318288599e-05, "loss": 0.0001, "step": 8090 }, { "epoch": 10.031039136302294, "grad_norm": 0.0002182068128604442, "learning_rate": 3.531264057579847e-05, "loss": 0.7595, "step": 8100 }, { "epoch": 10.0314889788574, "grad_norm": 0.00015726529818493873, "learning_rate": 3.528764932273704e-05, "loss": 0.0004, "step": 8110 }, { "epoch": 10.031938821412506, "grad_norm": 0.0001907955011120066, "learning_rate": 3.526265806967561e-05, "loss": 0.0003, "step": 8120 }, { "epoch": 10.03238866396761, "grad_norm": 0.006571646314114332, "learning_rate": 3.523766681661418e-05, "loss": 0.0002, "step": 8130 }, { "epoch": 10.032838506522717, "grad_norm": 0.0750848576426506, "learning_rate": 3.521267556355276e-05, "loss": 0.0002, "step": 8140 }, { "epoch": 10.033288349077823, "grad_norm": 0.0016188470181077719, "learning_rate": 3.518768431049133e-05, "loss": 0.0001, "step": 8150 }, { "epoch": 10.033333333333333, "eval_accuracy": 0.8976744186046511, "eval_f1": 0.897161269054231, "eval_loss": 0.7335283160209656, "eval_runtime": 135.8702, "eval_samples_per_second": 1.582, "eval_steps_per_second": 1.582, "step": 8151 }, { "epoch": 11.000404858299595, "grad_norm": 0.001428902498446405, "learning_rate": 3.51626930574299e-05, "loss": 0.0, "step": 8160 }, { "epoch": 11.0008547008547, "grad_norm": 0.002165209036320448, "learning_rate": 3.513770180436847e-05, "loss": 0.0, "step": 8170 }, { "epoch": 11.001304543409807, "grad_norm": 0.0007204430876299739, "learning_rate": 3.511271055130704e-05, "loss": 0.0, "step": 8180 }, { "epoch": 11.001754385964912, "grad_norm": 0.009247183799743652, "learning_rate": 3.508771929824561e-05, "loss": 0.0, "step": 8190 }, { "epoch": 11.002204228520018, "grad_norm": 0.004571564961224794, "learning_rate": 3.506272804518419e-05, "loss": 0.0, "step": 8200 }, { "epoch": 11.002654071075124, "grad_norm": 0.0008249584934674203, "learning_rate": 3.503773679212276e-05, "loss": 0.0001, "step": 8210 }, { "epoch": 11.003103913630229, "grad_norm": 0.0015224753879010677, "learning_rate": 3.501274553906133e-05, "loss": 0.0, "step": 8220 }, { "epoch": 11.003553756185335, "grad_norm": 0.001436219783499837, "learning_rate": 3.49877542859999e-05, "loss": 0.0, "step": 8230 }, { "epoch": 11.004003598740441, "grad_norm": 0.00047727636410854757, "learning_rate": 3.496276303293847e-05, "loss": 0.0, "step": 8240 }, { "epoch": 11.004453441295546, "grad_norm": 0.0011544268345460296, "learning_rate": 3.493777177987704e-05, "loss": 0.0, "step": 8250 }, { "epoch": 11.004903283850652, "grad_norm": 3.234441464883275e-05, "learning_rate": 3.491278052681561e-05, "loss": 0.0, "step": 8260 }, { "epoch": 11.005353126405758, "grad_norm": 0.00013633703929372132, "learning_rate": 3.488778927375419e-05, "loss": 0.0, "step": 8270 }, { "epoch": 11.005802968960865, "grad_norm": 0.0005703929928131402, "learning_rate": 3.486279802069276e-05, "loss": 0.0, "step": 8280 }, { "epoch": 11.006252811515969, "grad_norm": 0.00037035878631286323, "learning_rate": 3.483780676763133e-05, "loss": 0.0, "step": 8290 }, { "epoch": 11.006702654071075, "grad_norm": 0.0004477359470911324, "learning_rate": 3.48128155145699e-05, "loss": 0.0, "step": 8300 }, { "epoch": 11.007152496626182, "grad_norm": 0.0006062974571250379, "learning_rate": 3.478782426150847e-05, "loss": 0.0, "step": 8310 }, { "epoch": 11.007602339181286, "grad_norm": 0.000724676763638854, "learning_rate": 3.476283300844704e-05, "loss": 0.0, "step": 8320 }, { "epoch": 11.008052181736392, "grad_norm": 0.0002412635221844539, "learning_rate": 3.473784175538562e-05, "loss": 0.0, "step": 8330 }, { "epoch": 11.008502024291499, "grad_norm": 0.00020055378263350576, "learning_rate": 3.471285050232419e-05, "loss": 0.0, "step": 8340 }, { "epoch": 11.008951866846603, "grad_norm": 0.004657375160604715, "learning_rate": 3.468785924926276e-05, "loss": 0.0, "step": 8350 }, { "epoch": 11.00940170940171, "grad_norm": 0.0004026202659588307, "learning_rate": 3.466286799620133e-05, "loss": 0.0, "step": 8360 }, { "epoch": 11.009851551956816, "grad_norm": 0.0008407204877585173, "learning_rate": 3.46378767431399e-05, "loss": 0.0, "step": 8370 }, { "epoch": 11.01030139451192, "grad_norm": 0.00027741765370592475, "learning_rate": 3.461288549007847e-05, "loss": 0.0, "step": 8380 }, { "epoch": 11.010751237067026, "grad_norm": 0.00032141339033842087, "learning_rate": 3.458789423701704e-05, "loss": 0.0, "step": 8390 }, { "epoch": 11.011201079622133, "grad_norm": 0.0013355346163734794, "learning_rate": 3.456290298395562e-05, "loss": 0.0, "step": 8400 }, { "epoch": 11.011650922177237, "grad_norm": 0.00030394369969144464, "learning_rate": 3.453791173089419e-05, "loss": 0.0, "step": 8410 }, { "epoch": 11.012100764732343, "grad_norm": 0.0005493278731592, "learning_rate": 3.451292047783276e-05, "loss": 0.0, "step": 8420 }, { "epoch": 11.01255060728745, "grad_norm": 0.0014805661048740149, "learning_rate": 3.448792922477133e-05, "loss": 0.0, "step": 8430 }, { "epoch": 11.013000449842554, "grad_norm": 5.258614328340627e-05, "learning_rate": 3.44629379717099e-05, "loss": 0.0, "step": 8440 }, { "epoch": 11.01345029239766, "grad_norm": 0.0007955559412948787, "learning_rate": 3.443794671864847e-05, "loss": 0.0001, "step": 8450 }, { "epoch": 11.013900134952767, "grad_norm": 0.0003474180994089693, "learning_rate": 3.441295546558704e-05, "loss": 0.0, "step": 8460 }, { "epoch": 11.014349977507873, "grad_norm": 0.000871289346832782, "learning_rate": 3.438796421252562e-05, "loss": 0.0, "step": 8470 }, { "epoch": 11.014799820062978, "grad_norm": 0.07600659132003784, "learning_rate": 3.436297295946419e-05, "loss": 0.0001, "step": 8480 }, { "epoch": 11.015249662618084, "grad_norm": 0.00020997344108764082, "learning_rate": 3.433798170640276e-05, "loss": 0.0, "step": 8490 }, { "epoch": 11.01569950517319, "grad_norm": 0.0003163398359902203, "learning_rate": 3.431299045334133e-05, "loss": 0.0, "step": 8500 }, { "epoch": 11.016149347728295, "grad_norm": 0.001323432195931673, "learning_rate": 3.42879992002799e-05, "loss": 0.0, "step": 8510 }, { "epoch": 11.0165991902834, "grad_norm": 0.0006478857831098139, "learning_rate": 3.426300794721847e-05, "loss": 0.0, "step": 8520 }, { "epoch": 11.017049032838507, "grad_norm": 0.000311774667352438, "learning_rate": 3.423801669415705e-05, "loss": 0.0, "step": 8530 }, { "epoch": 11.017498875393612, "grad_norm": 0.0005344708915799856, "learning_rate": 3.421302544109562e-05, "loss": 0.0, "step": 8540 }, { "epoch": 11.017948717948718, "grad_norm": 0.0002655949501786381, "learning_rate": 3.418803418803419e-05, "loss": 0.0, "step": 8550 }, { "epoch": 11.018398560503824, "grad_norm": 0.00027809853781946003, "learning_rate": 3.416304293497276e-05, "loss": 0.0, "step": 8560 }, { "epoch": 11.018848403058929, "grad_norm": 0.0005219580489210784, "learning_rate": 3.413805168191133e-05, "loss": 0.0, "step": 8570 }, { "epoch": 11.019298245614035, "grad_norm": 0.00022840422752778977, "learning_rate": 3.41130604288499e-05, "loss": 0.0, "step": 8580 }, { "epoch": 11.019748088169141, "grad_norm": 0.0001896558969747275, "learning_rate": 3.408806917578847e-05, "loss": 0.0, "step": 8590 }, { "epoch": 11.020197930724246, "grad_norm": 0.0002398019569227472, "learning_rate": 3.406307792272705e-05, "loss": 0.0, "step": 8600 }, { "epoch": 11.020647773279352, "grad_norm": 0.0006122702034190297, "learning_rate": 3.403808666966562e-05, "loss": 0.0, "step": 8610 }, { "epoch": 11.021097615834458, "grad_norm": 0.00022049447579775006, "learning_rate": 3.401309541660419e-05, "loss": 0.0, "step": 8620 }, { "epoch": 11.021547458389565, "grad_norm": 0.000534306513145566, "learning_rate": 3.398810416354276e-05, "loss": 0.0, "step": 8630 }, { "epoch": 11.021997300944669, "grad_norm": 0.00017685074999462813, "learning_rate": 3.396311291048133e-05, "loss": 0.0, "step": 8640 }, { "epoch": 11.022447143499775, "grad_norm": 0.00019588108989410102, "learning_rate": 3.39381216574199e-05, "loss": 0.0, "step": 8650 }, { "epoch": 11.022896986054882, "grad_norm": 0.00020436478371266276, "learning_rate": 3.391313040435848e-05, "loss": 0.0, "step": 8660 }, { "epoch": 11.023346828609986, "grad_norm": 0.0005555666284635663, "learning_rate": 3.388813915129705e-05, "loss": 0.0, "step": 8670 }, { "epoch": 11.023796671165092, "grad_norm": 0.00168550421949476, "learning_rate": 3.386314789823562e-05, "loss": 0.0, "step": 8680 }, { "epoch": 11.024246513720199, "grad_norm": 0.0005679897731170058, "learning_rate": 3.383815664517419e-05, "loss": 0.0, "step": 8690 }, { "epoch": 11.024696356275303, "grad_norm": 0.00011237384023843333, "learning_rate": 3.381316539211276e-05, "loss": 0.0, "step": 8700 }, { "epoch": 11.02514619883041, "grad_norm": 0.00018332702165935189, "learning_rate": 3.378817413905133e-05, "loss": 0.0033, "step": 8710 }, { "epoch": 11.025596041385516, "grad_norm": 0.00036435856600292027, "learning_rate": 3.37631828859899e-05, "loss": 0.0, "step": 8720 }, { "epoch": 11.02604588394062, "grad_norm": 0.0001401156187057495, "learning_rate": 3.373819163292848e-05, "loss": 0.0013, "step": 8730 }, { "epoch": 11.026495726495726, "grad_norm": 0.0003301562392152846, "learning_rate": 3.371320037986705e-05, "loss": 0.0, "step": 8740 }, { "epoch": 11.026945569050833, "grad_norm": 0.00021467800252139568, "learning_rate": 3.368820912680562e-05, "loss": 0.5155, "step": 8750 }, { "epoch": 11.027395411605937, "grad_norm": 0.00021535850828513503, "learning_rate": 3.366321787374419e-05, "loss": 0.0, "step": 8760 }, { "epoch": 11.027845254161043, "grad_norm": 0.0002004179114010185, "learning_rate": 3.363822662068276e-05, "loss": 0.0, "step": 8770 }, { "epoch": 11.02829509671615, "grad_norm": 0.00015145773068070412, "learning_rate": 3.361323536762133e-05, "loss": 0.4842, "step": 8780 }, { "epoch": 11.028744939271254, "grad_norm": 0.0002250123507110402, "learning_rate": 3.358824411455991e-05, "loss": 0.0, "step": 8790 }, { "epoch": 11.02919478182636, "grad_norm": 0.00022337694827001542, "learning_rate": 3.356325286149848e-05, "loss": 0.0, "step": 8800 }, { "epoch": 11.029644624381467, "grad_norm": 0.001533370348624885, "learning_rate": 3.353826160843705e-05, "loss": 0.0, "step": 8810 }, { "epoch": 11.030094466936573, "grad_norm": 0.0010389601811766624, "learning_rate": 3.351327035537562e-05, "loss": 0.0, "step": 8820 }, { "epoch": 11.030544309491678, "grad_norm": 0.002386595821008086, "learning_rate": 3.348827910231419e-05, "loss": 0.0, "step": 8830 }, { "epoch": 11.030994152046784, "grad_norm": 0.0001584437268320471, "learning_rate": 3.346328784925276e-05, "loss": 0.0, "step": 8840 }, { "epoch": 11.03144399460189, "grad_norm": 0.0001821911137085408, "learning_rate": 3.343829659619133e-05, "loss": 0.0, "step": 8850 }, { "epoch": 11.031893837156995, "grad_norm": 5.665854041581042e-05, "learning_rate": 3.341330534312991e-05, "loss": 0.0, "step": 8860 }, { "epoch": 11.0323436797121, "grad_norm": 0.0030433007050305605, "learning_rate": 3.338831409006848e-05, "loss": 0.0001, "step": 8870 }, { "epoch": 11.032793522267207, "grad_norm": 0.0015938675496727228, "learning_rate": 3.336332283700705e-05, "loss": 0.3367, "step": 8880 }, { "epoch": 11.033243364822312, "grad_norm": 4.122028985875659e-05, "learning_rate": 3.333833158394562e-05, "loss": 0.0, "step": 8890 }, { "epoch": 11.033333333333333, "eval_accuracy": 0.9069767441860465, "eval_f1": 0.9053870531009628, "eval_loss": 0.7774099111557007, "eval_runtime": 141.4542, "eval_samples_per_second": 1.52, "eval_steps_per_second": 1.52, "step": 8892 }, { "epoch": 12.000359874044085, "grad_norm": 0.00020151936041656882, "learning_rate": 3.331334033088419e-05, "loss": 0.0, "step": 8900 }, { "epoch": 12.000809716599191, "grad_norm": 0.00013850448885932565, "learning_rate": 3.328834907782276e-05, "loss": 0.0, "step": 8910 }, { "epoch": 12.001259559154295, "grad_norm": 0.00021233613369986415, "learning_rate": 3.326335782476134e-05, "loss": 0.0, "step": 8920 }, { "epoch": 12.001709401709402, "grad_norm": 0.00012982673069927841, "learning_rate": 3.323836657169991e-05, "loss": 0.0, "step": 8930 }, { "epoch": 12.002159244264508, "grad_norm": 3.26802921295166, "learning_rate": 3.321337531863848e-05, "loss": 0.0107, "step": 8940 }, { "epoch": 12.002609086819612, "grad_norm": 0.00024683051742613316, "learning_rate": 3.318838406557705e-05, "loss": 0.0, "step": 8950 }, { "epoch": 12.003058929374719, "grad_norm": 5.0081136578228325e-05, "learning_rate": 3.316339281251562e-05, "loss": 0.0, "step": 8960 }, { "epoch": 12.003508771929825, "grad_norm": 0.00014993154036346823, "learning_rate": 3.313840155945419e-05, "loss": 0.0, "step": 8970 }, { "epoch": 12.00395861448493, "grad_norm": 0.0003631724393926561, "learning_rate": 3.3113410306392764e-05, "loss": 0.0, "step": 8980 }, { "epoch": 12.004408457040036, "grad_norm": 5.246075670584105e-05, "learning_rate": 3.308841905333134e-05, "loss": 0.0, "step": 8990 }, { "epoch": 12.004858299595142, "grad_norm": 3.658925925265066e-05, "learning_rate": 3.306342780026991e-05, "loss": 0.0, "step": 9000 }, { "epoch": 12.005308142150247, "grad_norm": 0.00019986837287433445, "learning_rate": 3.303843654720848e-05, "loss": 0.0, "step": 9010 }, { "epoch": 12.005757984705353, "grad_norm": 0.0001759282749844715, "learning_rate": 3.301344529414705e-05, "loss": 0.0, "step": 9020 }, { "epoch": 12.006207827260459, "grad_norm": 0.0005781189538538456, "learning_rate": 3.298845404108562e-05, "loss": 0.0, "step": 9030 }, { "epoch": 12.006657669815565, "grad_norm": 0.00017990671040024608, "learning_rate": 3.2963462788024193e-05, "loss": 0.0, "step": 9040 }, { "epoch": 12.00710751237067, "grad_norm": 0.00021019112318754196, "learning_rate": 3.2938471534962764e-05, "loss": 0.0, "step": 9050 }, { "epoch": 12.007557354925776, "grad_norm": 0.00022014744172338396, "learning_rate": 3.2913480281901335e-05, "loss": 0.0, "step": 9060 }, { "epoch": 12.008007197480882, "grad_norm": 0.00015751954924780875, "learning_rate": 3.2888489028839905e-05, "loss": 0.0, "step": 9070 }, { "epoch": 12.008457040035987, "grad_norm": 0.00013810979726258665, "learning_rate": 3.286349777577848e-05, "loss": 0.0, "step": 9080 }, { "epoch": 12.008906882591093, "grad_norm": 0.00017571424541529268, "learning_rate": 3.283850652271705e-05, "loss": 0.0, "step": 9090 }, { "epoch": 12.0093567251462, "grad_norm": 0.00015500276640523225, "learning_rate": 3.281351526965562e-05, "loss": 0.0, "step": 9100 }, { "epoch": 12.009806567701304, "grad_norm": 0.0001064740281435661, "learning_rate": 3.2788524016594194e-05, "loss": 0.0, "step": 9110 }, { "epoch": 12.01025641025641, "grad_norm": 0.00011470354365883395, "learning_rate": 3.2763532763532764e-05, "loss": 0.0, "step": 9120 }, { "epoch": 12.010706252811516, "grad_norm": 0.0006126867956481874, "learning_rate": 3.2738541510471335e-05, "loss": 0.0, "step": 9130 }, { "epoch": 12.011156095366621, "grad_norm": 0.0006744645652361214, "learning_rate": 3.2713550257409905e-05, "loss": 0.0, "step": 9140 }, { "epoch": 12.011605937921727, "grad_norm": 0.0001367456716252491, "learning_rate": 3.2688559004348476e-05, "loss": 0.0, "step": 9150 }, { "epoch": 12.012055780476834, "grad_norm": 8.94782569957897e-05, "learning_rate": 3.2663567751287046e-05, "loss": 0.0, "step": 9160 }, { "epoch": 12.012505623031938, "grad_norm": 0.00015804458234924823, "learning_rate": 3.263857649822562e-05, "loss": 0.0, "step": 9170 }, { "epoch": 12.012955465587044, "grad_norm": 0.00017785992531571537, "learning_rate": 3.2613585245164194e-05, "loss": 0.0, "step": 9180 }, { "epoch": 12.01340530814215, "grad_norm": 0.0001567164290463552, "learning_rate": 3.2588593992102765e-05, "loss": 0.0, "step": 9190 }, { "epoch": 12.013855150697257, "grad_norm": 0.00015910508227534592, "learning_rate": 3.2563602739041335e-05, "loss": 0.0, "step": 9200 }, { "epoch": 12.014304993252361, "grad_norm": 0.0006528611411340535, "learning_rate": 3.2538611485979906e-05, "loss": 0.0, "step": 9210 }, { "epoch": 12.014754835807468, "grad_norm": 0.00017753789143171161, "learning_rate": 3.2513620232918476e-05, "loss": 0.0, "step": 9220 }, { "epoch": 12.015204678362574, "grad_norm": 0.0005326801910996437, "learning_rate": 3.248862897985705e-05, "loss": 0.0, "step": 9230 }, { "epoch": 12.015654520917678, "grad_norm": 0.0004457755421753973, "learning_rate": 3.246363772679562e-05, "loss": 0.0, "step": 9240 }, { "epoch": 12.016104363472785, "grad_norm": 0.0001971879682969302, "learning_rate": 3.2438646473734195e-05, "loss": 0.0, "step": 9250 }, { "epoch": 12.016554206027891, "grad_norm": 0.0001983325491892174, "learning_rate": 3.2413655220672765e-05, "loss": 0.0, "step": 9260 }, { "epoch": 12.017004048582995, "grad_norm": 0.0005349721759557724, "learning_rate": 3.2388663967611336e-05, "loss": 0.0, "step": 9270 }, { "epoch": 12.017453891138102, "grad_norm": 0.00017649315122980624, "learning_rate": 3.2363672714549906e-05, "loss": 0.0, "step": 9280 }, { "epoch": 12.017903733693208, "grad_norm": 0.000954252784140408, "learning_rate": 3.233868146148848e-05, "loss": 0.0, "step": 9290 }, { "epoch": 12.018353576248312, "grad_norm": 0.00014862856187392026, "learning_rate": 3.231369020842705e-05, "loss": 0.0, "step": 9300 }, { "epoch": 12.018803418803419, "grad_norm": 0.00023731630062684417, "learning_rate": 3.2288698955365625e-05, "loss": 0.0, "step": 9310 }, { "epoch": 12.019253261358525, "grad_norm": 7.314868707908317e-05, "learning_rate": 3.2263707702304195e-05, "loss": 0.0, "step": 9320 }, { "epoch": 12.01970310391363, "grad_norm": 0.00014190156070981175, "learning_rate": 3.2238716449242766e-05, "loss": 0.0, "step": 9330 }, { "epoch": 12.020152946468736, "grad_norm": 0.00014419823128264397, "learning_rate": 3.2213725196181336e-05, "loss": 0.3148, "step": 9340 }, { "epoch": 12.020602789023842, "grad_norm": 7.464476948371157e-05, "learning_rate": 3.218873394311991e-05, "loss": 0.0, "step": 9350 }, { "epoch": 12.021052631578947, "grad_norm": 7.09200685378164e-05, "learning_rate": 3.216374269005848e-05, "loss": 0.0, "step": 9360 }, { "epoch": 12.021502474134053, "grad_norm": 0.0005207779468037188, "learning_rate": 3.213875143699705e-05, "loss": 0.0, "step": 9370 }, { "epoch": 12.021952316689159, "grad_norm": 0.00021795896464027464, "learning_rate": 3.2113760183935625e-05, "loss": 0.0, "step": 9380 }, { "epoch": 12.022402159244265, "grad_norm": 0.00011653057299554348, "learning_rate": 3.2088768930874195e-05, "loss": 0.0, "step": 9390 }, { "epoch": 12.02285200179937, "grad_norm": 7.740705041214824e-05, "learning_rate": 3.2063777677812766e-05, "loss": 0.0, "step": 9400 }, { "epoch": 12.023301844354476, "grad_norm": 2.158115603378974e-05, "learning_rate": 3.2038786424751337e-05, "loss": 0.0, "step": 9410 }, { "epoch": 12.023751686909582, "grad_norm": 0.00024682231014594436, "learning_rate": 3.201379517168991e-05, "loss": 0.0, "step": 9420 }, { "epoch": 12.024201529464687, "grad_norm": 0.00012609024997800589, "learning_rate": 3.198880391862848e-05, "loss": 0.0, "step": 9430 }, { "epoch": 12.024651372019793, "grad_norm": 3.3463482395745814e-05, "learning_rate": 3.1963812665567055e-05, "loss": 0.0, "step": 9440 }, { "epoch": 12.0251012145749, "grad_norm": 8.899492240743712e-05, "learning_rate": 3.1938821412505625e-05, "loss": 0.0, "step": 9450 }, { "epoch": 12.025551057130004, "grad_norm": 0.00047481912770308554, "learning_rate": 3.1913830159444196e-05, "loss": 0.0229, "step": 9460 }, { "epoch": 12.02600089968511, "grad_norm": 0.00015337904915213585, "learning_rate": 3.1888838906382766e-05, "loss": 0.0, "step": 9470 }, { "epoch": 12.026450742240216, "grad_norm": 0.0001666390453465283, "learning_rate": 3.186384765332134e-05, "loss": 0.0, "step": 9480 }, { "epoch": 12.026900584795321, "grad_norm": 0.00015255472681019455, "learning_rate": 3.183885640025991e-05, "loss": 0.0, "step": 9490 }, { "epoch": 12.027350427350427, "grad_norm": 3.052962303161621, "learning_rate": 3.181386514719848e-05, "loss": 0.3157, "step": 9500 }, { "epoch": 12.027800269905534, "grad_norm": 0.00015363984857685864, "learning_rate": 3.1788873894137055e-05, "loss": 0.0, "step": 9510 }, { "epoch": 12.028250112460638, "grad_norm": 0.0002119541313732043, "learning_rate": 3.1763882641075626e-05, "loss": 0.3137, "step": 9520 }, { "epoch": 12.028699955015744, "grad_norm": 0.002833995735272765, "learning_rate": 3.1738891388014196e-05, "loss": 0.0, "step": 9530 }, { "epoch": 12.02914979757085, "grad_norm": 0.0011035765055567026, "learning_rate": 3.171390013495277e-05, "loss": 0.0, "step": 9540 }, { "epoch": 12.029599640125955, "grad_norm": 0.0014688618248328567, "learning_rate": 3.168890888189134e-05, "loss": 0.0, "step": 9550 }, { "epoch": 12.030049482681061, "grad_norm": 0.0011424071853980422, "learning_rate": 3.166391762882991e-05, "loss": 0.0, "step": 9560 }, { "epoch": 12.030499325236168, "grad_norm": 0.0001472189906053245, "learning_rate": 3.1638926375768485e-05, "loss": 0.0, "step": 9570 }, { "epoch": 12.030949167791274, "grad_norm": 0.0003368165052961558, "learning_rate": 3.1613935122707056e-05, "loss": 0.0, "step": 9580 }, { "epoch": 12.031399010346378, "grad_norm": 2.6690780941862613e-05, "learning_rate": 3.1588943869645626e-05, "loss": 0.0, "step": 9590 }, { "epoch": 12.031848852901485, "grad_norm": 0.00013679896073881537, "learning_rate": 3.15639526165842e-05, "loss": 0.0, "step": 9600 }, { "epoch": 12.032298695456591, "grad_norm": 0.00015291971794795245, "learning_rate": 3.153896136352277e-05, "loss": 0.0, "step": 9610 }, { "epoch": 12.032748538011695, "grad_norm": 6.685734842903912e-05, "learning_rate": 3.151397011046134e-05, "loss": 0.0, "step": 9620 }, { "epoch": 12.033198380566802, "grad_norm": 0.0008128658519126475, "learning_rate": 3.148897885739991e-05, "loss": 0.0, "step": 9630 }, { "epoch": 12.033333333333333, "eval_accuracy": 0.9116279069767442, "eval_f1": 0.9106815288812778, "eval_loss": 0.7346043586730957, "eval_runtime": 144.5835, "eval_samples_per_second": 1.487, "eval_steps_per_second": 1.487, "step": 9633 }, { "epoch": 13.000314889788575, "grad_norm": 2.2119875211501494e-05, "learning_rate": 3.1463987604338486e-05, "loss": 0.0, "step": 9640 }, { "epoch": 13.00076473234368, "grad_norm": 29.083669662475586, "learning_rate": 3.1438996351277056e-05, "loss": 0.2284, "step": 9650 }, { "epoch": 13.001214574898786, "grad_norm": 0.0006658387719653547, "learning_rate": 3.1414005098215627e-05, "loss": 0.0, "step": 9660 }, { "epoch": 13.001664417453892, "grad_norm": 0.00016840879106894135, "learning_rate": 3.13890138451542e-05, "loss": 0.0, "step": 9670 }, { "epoch": 13.002114260008996, "grad_norm": 0.00011239601008128375, "learning_rate": 3.136402259209277e-05, "loss": 0.0, "step": 9680 }, { "epoch": 13.002564102564103, "grad_norm": 0.00016939439228735864, "learning_rate": 3.133903133903134e-05, "loss": 0.0, "step": 9690 }, { "epoch": 13.003013945119209, "grad_norm": 0.00015041980077512562, "learning_rate": 3.1314040085969915e-05, "loss": 0.0253, "step": 9700 }, { "epoch": 13.003463787674313, "grad_norm": 0.0007081007934175432, "learning_rate": 3.1289048832908486e-05, "loss": 0.0, "step": 9710 }, { "epoch": 13.00391363022942, "grad_norm": 0.00013756906264461577, "learning_rate": 3.1264057579847056e-05, "loss": 0.0, "step": 9720 }, { "epoch": 13.004363472784526, "grad_norm": 0.00012444762978702784, "learning_rate": 3.123906632678563e-05, "loss": 0.0, "step": 9730 }, { "epoch": 13.00481331533963, "grad_norm": 0.00015908994828350842, "learning_rate": 3.12140750737242e-05, "loss": 0.0, "step": 9740 }, { "epoch": 13.005263157894737, "grad_norm": 0.00039560432196594775, "learning_rate": 3.118908382066277e-05, "loss": 0.0, "step": 9750 }, { "epoch": 13.005713000449843, "grad_norm": 0.00016459373000543565, "learning_rate": 3.116409256760134e-05, "loss": 0.0, "step": 9760 }, { "epoch": 13.006162843004947, "grad_norm": 0.00043750248732976615, "learning_rate": 3.1139101314539916e-05, "loss": 0.0, "step": 9770 }, { "epoch": 13.006612685560054, "grad_norm": 0.0001374034909531474, "learning_rate": 3.1114110061478486e-05, "loss": 0.0, "step": 9780 }, { "epoch": 13.00706252811516, "grad_norm": 0.00015766483556944877, "learning_rate": 3.108911880841706e-05, "loss": 0.0, "step": 9790 }, { "epoch": 13.007512370670266, "grad_norm": 0.0001353984116576612, "learning_rate": 3.106412755535563e-05, "loss": 0.2201, "step": 9800 }, { "epoch": 13.00796221322537, "grad_norm": 7.859266770537943e-05, "learning_rate": 3.10391363022942e-05, "loss": 0.0, "step": 9810 }, { "epoch": 13.008412055780477, "grad_norm": 4.097376222489402e-05, "learning_rate": 3.101414504923277e-05, "loss": 0.0, "step": 9820 }, { "epoch": 13.008861898335583, "grad_norm": 0.0001220850390382111, "learning_rate": 3.0989153796171346e-05, "loss": 0.0, "step": 9830 }, { "epoch": 13.009311740890688, "grad_norm": 0.00034916229196824133, "learning_rate": 3.0964162543109916e-05, "loss": 0.0, "step": 9840 }, { "epoch": 13.009761583445794, "grad_norm": 0.00012380116095300764, "learning_rate": 3.093917129004849e-05, "loss": 0.0, "step": 9850 }, { "epoch": 13.0102114260009, "grad_norm": 7.655571243958548e-05, "learning_rate": 3.091418003698706e-05, "loss": 0.0, "step": 9860 }, { "epoch": 13.010661268556005, "grad_norm": 0.00022760548745281994, "learning_rate": 3.088918878392563e-05, "loss": 0.0, "step": 9870 }, { "epoch": 13.011111111111111, "grad_norm": 0.00011886010906891897, "learning_rate": 3.08641975308642e-05, "loss": 0.0, "step": 9880 }, { "epoch": 13.011560953666217, "grad_norm": 6.31094808341004e-05, "learning_rate": 3.083920627780277e-05, "loss": 0.0, "step": 9890 }, { "epoch": 13.012010796221322, "grad_norm": 0.0002199704322265461, "learning_rate": 3.0814215024741346e-05, "loss": 0.0, "step": 9900 }, { "epoch": 13.012460638776428, "grad_norm": 0.00020364417287055403, "learning_rate": 3.0789223771679917e-05, "loss": 0.0, "step": 9910 }, { "epoch": 13.012910481331534, "grad_norm": 0.0001407665404258296, "learning_rate": 3.076423251861849e-05, "loss": 0.0, "step": 9920 }, { "epoch": 13.013360323886639, "grad_norm": 0.0011458994122222066, "learning_rate": 3.073924126555706e-05, "loss": 0.0, "step": 9930 }, { "epoch": 13.013810166441745, "grad_norm": 7.350280793616548e-05, "learning_rate": 3.071425001249563e-05, "loss": 0.0, "step": 9940 }, { "epoch": 13.014260008996851, "grad_norm": 0.00045448917080648243, "learning_rate": 3.06892587594342e-05, "loss": 0.0, "step": 9950 }, { "epoch": 13.014709851551958, "grad_norm": 0.0008066979353316128, "learning_rate": 3.0664267506372776e-05, "loss": 0.0, "step": 9960 }, { "epoch": 13.015159694107062, "grad_norm": 5.5154716392280534e-05, "learning_rate": 3.0639276253311346e-05, "loss": 0.0, "step": 9970 }, { "epoch": 13.015609536662168, "grad_norm": 0.0008176863193511963, "learning_rate": 3.061428500024992e-05, "loss": 0.0, "step": 9980 }, { "epoch": 13.016059379217275, "grad_norm": 0.0009561642073094845, "learning_rate": 3.058929374718849e-05, "loss": 0.0, "step": 9990 }, { "epoch": 13.01650922177238, "grad_norm": 0.0007762933382764459, "learning_rate": 3.056430249412706e-05, "loss": 0.0, "step": 10000 }, { "epoch": 13.016959064327486, "grad_norm": 0.0001092751554097049, "learning_rate": 3.053931124106563e-05, "loss": 0.0, "step": 10010 }, { "epoch": 13.017408906882592, "grad_norm": 0.0001278644340345636, "learning_rate": 3.05143199880042e-05, "loss": 0.0, "step": 10020 }, { "epoch": 13.017858749437696, "grad_norm": 0.0005311881541274488, "learning_rate": 3.0489328734942773e-05, "loss": 0.0, "step": 10030 }, { "epoch": 13.018308591992803, "grad_norm": 5.970336860627867e-05, "learning_rate": 3.0464337481881343e-05, "loss": 0.0, "step": 10040 }, { "epoch": 13.018758434547909, "grad_norm": 8.20477944216691e-05, "learning_rate": 3.0439346228819914e-05, "loss": 0.0, "step": 10050 }, { "epoch": 13.019208277103013, "grad_norm": 4.542940223473124e-05, "learning_rate": 3.0414354975758485e-05, "loss": 0.0, "step": 10060 }, { "epoch": 13.01965811965812, "grad_norm": 0.0001387633674312383, "learning_rate": 3.0389363722697055e-05, "loss": 0.0, "step": 10070 }, { "epoch": 13.020107962213226, "grad_norm": 0.00011543036089278758, "learning_rate": 3.0364372469635626e-05, "loss": 0.0, "step": 10080 }, { "epoch": 13.02055780476833, "grad_norm": 0.0016869920073077083, "learning_rate": 3.0339381216574203e-05, "loss": 0.0, "step": 10090 }, { "epoch": 13.021007647323437, "grad_norm": 0.00011007647844962776, "learning_rate": 3.0314389963512773e-05, "loss": 0.0, "step": 10100 }, { "epoch": 13.021457489878543, "grad_norm": 5.2834442612947896e-05, "learning_rate": 3.0289398710451344e-05, "loss": 0.0, "step": 10110 }, { "epoch": 13.021907332433647, "grad_norm": 7.224770524771884e-05, "learning_rate": 3.0264407457389914e-05, "loss": 0.0, "step": 10120 }, { "epoch": 13.022357174988754, "grad_norm": 0.0006507145590148866, "learning_rate": 3.0239416204328485e-05, "loss": 0.0, "step": 10130 }, { "epoch": 13.02280701754386, "grad_norm": 0.00012081662862328812, "learning_rate": 3.0214424951267055e-05, "loss": 0.0, "step": 10140 }, { "epoch": 13.023256860098966, "grad_norm": 0.0011048165615648031, "learning_rate": 3.0189433698205626e-05, "loss": 0.1674, "step": 10150 }, { "epoch": 13.02370670265407, "grad_norm": 2.869908894354012e-05, "learning_rate": 3.0164442445144203e-05, "loss": 0.0, "step": 10160 }, { "epoch": 13.024156545209177, "grad_norm": 0.0001253574009751901, "learning_rate": 3.0139451192082774e-05, "loss": 0.0, "step": 10170 }, { "epoch": 13.024606387764283, "grad_norm": 9.93120193015784e-05, "learning_rate": 3.0114459939021344e-05, "loss": 0.0655, "step": 10180 }, { "epoch": 13.025056230319388, "grad_norm": 0.00032672417000867426, "learning_rate": 3.0089468685959915e-05, "loss": 0.0, "step": 10190 }, { "epoch": 13.025506072874494, "grad_norm": 0.0005742954672314227, "learning_rate": 3.0064477432898485e-05, "loss": 0.0, "step": 10200 }, { "epoch": 13.0259559154296, "grad_norm": 2.859183405234944e-05, "learning_rate": 3.0039486179837056e-05, "loss": 0.0, "step": 10210 }, { "epoch": 13.026405757984705, "grad_norm": 0.0004644894797820598, "learning_rate": 3.0014494926775633e-05, "loss": 0.0, "step": 10220 }, { "epoch": 13.026855600539811, "grad_norm": 8.646947389934212e-05, "learning_rate": 2.9989503673714204e-05, "loss": 0.0, "step": 10230 }, { "epoch": 13.027305443094917, "grad_norm": 2.897646845667623e-05, "learning_rate": 2.9964512420652774e-05, "loss": 0.0, "step": 10240 }, { "epoch": 13.027755285650022, "grad_norm": 0.00012719782534986734, "learning_rate": 2.9939521167591345e-05, "loss": 0.0, "step": 10250 }, { "epoch": 13.028205128205128, "grad_norm": 3.783694774028845e-05, "learning_rate": 2.9914529914529915e-05, "loss": 0.0, "step": 10260 }, { "epoch": 13.028654970760234, "grad_norm": 0.0001277789706364274, "learning_rate": 2.9889538661468486e-05, "loss": 0.0, "step": 10270 }, { "epoch": 13.029104813315339, "grad_norm": 0.0001792252151062712, "learning_rate": 2.9864547408407056e-05, "loss": 0.0, "step": 10280 }, { "epoch": 13.029554655870445, "grad_norm": 0.0006702026585116982, "learning_rate": 2.9839556155345634e-05, "loss": 0.0, "step": 10290 }, { "epoch": 13.030004498425551, "grad_norm": 4.530565274762921e-05, "learning_rate": 2.9814564902284204e-05, "loss": 0.0, "step": 10300 }, { "epoch": 13.030454340980656, "grad_norm": 0.0005598829593509436, "learning_rate": 2.9789573649222775e-05, "loss": 0.0258, "step": 10310 }, { "epoch": 13.030904183535762, "grad_norm": 0.000537554151378572, "learning_rate": 2.9764582396161345e-05, "loss": 0.0, "step": 10320 }, { "epoch": 13.031354026090868, "grad_norm": 0.00045727810356765985, "learning_rate": 2.9739591143099916e-05, "loss": 0.0, "step": 10330 }, { "epoch": 13.031803868645975, "grad_norm": 9.528025839244947e-05, "learning_rate": 2.9714599890038486e-05, "loss": 0.0, "step": 10340 }, { "epoch": 13.03225371120108, "grad_norm": 0.0003974073042627424, "learning_rate": 2.9689608636977063e-05, "loss": 0.0, "step": 10350 }, { "epoch": 13.032703553756185, "grad_norm": 9.360355761600658e-05, "learning_rate": 2.9664617383915634e-05, "loss": 0.0, "step": 10360 }, { "epoch": 13.033153396311292, "grad_norm": 0.0003298447700217366, "learning_rate": 2.9639626130854204e-05, "loss": 0.0, "step": 10370 }, { "epoch": 13.033333333333333, "eval_accuracy": 0.9162790697674419, "eval_f1": 0.9153581572237279, "eval_loss": 0.7608879804611206, "eval_runtime": 143.5695, "eval_samples_per_second": 1.498, "eval_steps_per_second": 1.498, "step": 10374 }, { "epoch": 14.000269905533063, "grad_norm": 6.596490129595622e-05, "learning_rate": 2.9614634877792775e-05, "loss": 0.0, "step": 10380 }, { "epoch": 14.00071974808817, "grad_norm": 7.115936023183167e-05, "learning_rate": 2.9589643624731345e-05, "loss": 0.0, "step": 10390 }, { "epoch": 14.001169590643276, "grad_norm": 0.00047136584180407226, "learning_rate": 2.9564652371669916e-05, "loss": 0.0, "step": 10400 }, { "epoch": 14.00161943319838, "grad_norm": 6.940974708413705e-05, "learning_rate": 2.9539661118608486e-05, "loss": 0.0, "step": 10410 }, { "epoch": 14.002069275753486, "grad_norm": 0.00010945695248665288, "learning_rate": 2.9514669865547064e-05, "loss": 0.0, "step": 10420 }, { "epoch": 14.002519118308593, "grad_norm": 0.00021431237109936774, "learning_rate": 2.9489678612485634e-05, "loss": 0.0, "step": 10430 }, { "epoch": 14.002968960863697, "grad_norm": 0.0001210120681207627, "learning_rate": 2.9464687359424205e-05, "loss": 0.0, "step": 10440 }, { "epoch": 14.003418803418803, "grad_norm": 0.00020459068764466792, "learning_rate": 2.9439696106362775e-05, "loss": 0.0, "step": 10450 }, { "epoch": 14.00386864597391, "grad_norm": 0.00012309110024943948, "learning_rate": 2.9414704853301346e-05, "loss": 0.0, "step": 10460 }, { "epoch": 14.004318488529014, "grad_norm": 0.00011228691437281668, "learning_rate": 2.9389713600239916e-05, "loss": 0.0, "step": 10470 }, { "epoch": 14.00476833108412, "grad_norm": 0.0001071939041139558, "learning_rate": 2.936472234717849e-05, "loss": 0.0102, "step": 10480 }, { "epoch": 14.005218173639227, "grad_norm": 9.234516619471833e-05, "learning_rate": 2.933973109411706e-05, "loss": 0.0, "step": 10490 }, { "epoch": 14.005668016194331, "grad_norm": 0.00014496184303425252, "learning_rate": 2.931473984105563e-05, "loss": 0.0, "step": 10500 }, { "epoch": 14.006117858749437, "grad_norm": 0.00011150368663948029, "learning_rate": 2.9289748587994202e-05, "loss": 0.0, "step": 10510 }, { "epoch": 14.006567701304544, "grad_norm": 0.0003166896931361407, "learning_rate": 2.9264757334932776e-05, "loss": 0.0, "step": 10520 }, { "epoch": 14.007017543859648, "grad_norm": 3.257548451074399e-05, "learning_rate": 2.9239766081871346e-05, "loss": 0.0, "step": 10530 }, { "epoch": 14.007467386414755, "grad_norm": 0.00033311001607216895, "learning_rate": 2.9214774828809917e-05, "loss": 0.0, "step": 10540 }, { "epoch": 14.00791722896986, "grad_norm": 9.789070463739336e-05, "learning_rate": 2.918978357574849e-05, "loss": 0.0, "step": 10550 }, { "epoch": 14.008367071524967, "grad_norm": 5.3120838856557384e-05, "learning_rate": 2.916479232268706e-05, "loss": 0.0, "step": 10560 }, { "epoch": 14.008816914080072, "grad_norm": 5.1176062697777525e-05, "learning_rate": 2.9139801069625632e-05, "loss": 0.0, "step": 10570 }, { "epoch": 14.009266756635178, "grad_norm": 0.0004394018615130335, "learning_rate": 2.9114809816564202e-05, "loss": 0.0, "step": 10580 }, { "epoch": 14.009716599190284, "grad_norm": 9.724710253067315e-05, "learning_rate": 2.9089818563502773e-05, "loss": 0.0, "step": 10590 }, { "epoch": 14.010166441745389, "grad_norm": 9.49447785387747e-05, "learning_rate": 2.9064827310441343e-05, "loss": 0.0, "step": 10600 }, { "epoch": 14.010616284300495, "grad_norm": 9.665967081673443e-05, "learning_rate": 2.903983605737992e-05, "loss": 0.0, "step": 10610 }, { "epoch": 14.011066126855601, "grad_norm": 0.00022347108460962772, "learning_rate": 2.901484480431849e-05, "loss": 0.0, "step": 10620 }, { "epoch": 14.011515969410706, "grad_norm": 6.568954995600507e-05, "learning_rate": 2.898985355125706e-05, "loss": 0.0, "step": 10630 }, { "epoch": 14.011965811965812, "grad_norm": 0.0011864994885399938, "learning_rate": 2.8964862298195632e-05, "loss": 0.0, "step": 10640 }, { "epoch": 14.012415654520918, "grad_norm": 9.329157910542563e-05, "learning_rate": 2.8939871045134203e-05, "loss": 0.0, "step": 10650 }, { "epoch": 14.012865497076023, "grad_norm": 0.0008966324967332184, "learning_rate": 2.8914879792072773e-05, "loss": 0.0, "step": 10660 }, { "epoch": 14.013315339631129, "grad_norm": 2.1319468942238018e-05, "learning_rate": 2.8889888539011344e-05, "loss": 0.0, "step": 10670 }, { "epoch": 14.013765182186235, "grad_norm": 9.586493979441002e-05, "learning_rate": 2.886489728594992e-05, "loss": 0.0, "step": 10680 }, { "epoch": 14.01421502474134, "grad_norm": 4.144087870372459e-05, "learning_rate": 2.883990603288849e-05, "loss": 0.0, "step": 10690 }, { "epoch": 14.014664867296446, "grad_norm": 0.00012447714107111096, "learning_rate": 2.8814914779827062e-05, "loss": 0.0, "step": 10700 }, { "epoch": 14.015114709851552, "grad_norm": 0.0002389039727859199, "learning_rate": 2.8789923526765633e-05, "loss": 0.0, "step": 10710 }, { "epoch": 14.015564552406659, "grad_norm": 9.410877828486264e-05, "learning_rate": 2.8764932273704203e-05, "loss": 0.0, "step": 10720 }, { "epoch": 14.016014394961763, "grad_norm": 9.79511605692096e-05, "learning_rate": 2.8739941020642774e-05, "loss": 0.0, "step": 10730 }, { "epoch": 14.01646423751687, "grad_norm": 0.0001270372304134071, "learning_rate": 2.871494976758135e-05, "loss": 0.0, "step": 10740 }, { "epoch": 14.016914080071976, "grad_norm": 0.00010493004810996354, "learning_rate": 2.868995851451992e-05, "loss": 0.0, "step": 10750 }, { "epoch": 14.01736392262708, "grad_norm": 0.0005750772543251514, "learning_rate": 2.8664967261458492e-05, "loss": 0.0, "step": 10760 }, { "epoch": 14.017813765182186, "grad_norm": 0.00010215446673100814, "learning_rate": 2.8639976008397062e-05, "loss": 0.0, "step": 10770 }, { "epoch": 14.018263607737293, "grad_norm": 2.859937376342714e-05, "learning_rate": 2.8614984755335633e-05, "loss": 0.0, "step": 10780 }, { "epoch": 14.018713450292397, "grad_norm": 8.133342635119334e-05, "learning_rate": 2.8589993502274203e-05, "loss": 0.0, "step": 10790 }, { "epoch": 14.019163292847503, "grad_norm": 2.955237141577527e-05, "learning_rate": 2.8565002249212774e-05, "loss": 0.0, "step": 10800 }, { "epoch": 14.01961313540261, "grad_norm": 8.277234883280471e-05, "learning_rate": 2.854001099615135e-05, "loss": 0.0, "step": 10810 }, { "epoch": 14.020062977957714, "grad_norm": 0.0001320861338172108, "learning_rate": 2.8515019743089922e-05, "loss": 0.3221, "step": 10820 }, { "epoch": 14.02051282051282, "grad_norm": 9.680550283519551e-05, "learning_rate": 2.8490028490028492e-05, "loss": 0.0, "step": 10830 }, { "epoch": 14.020962663067927, "grad_norm": 0.00013269229384604841, "learning_rate": 2.8465037236967063e-05, "loss": 0.0, "step": 10840 }, { "epoch": 14.021412505623031, "grad_norm": 0.00018897015252150595, "learning_rate": 2.8440045983905633e-05, "loss": 0.241, "step": 10850 }, { "epoch": 14.021862348178137, "grad_norm": 7.382709009107202e-05, "learning_rate": 2.8415054730844204e-05, "loss": 0.0, "step": 10860 }, { "epoch": 14.022312190733244, "grad_norm": 0.00012246749247424304, "learning_rate": 2.839006347778278e-05, "loss": 0.0, "step": 10870 }, { "epoch": 14.022762033288348, "grad_norm": 8.214438275899738e-05, "learning_rate": 2.836507222472135e-05, "loss": 0.0, "step": 10880 }, { "epoch": 14.023211875843455, "grad_norm": 0.00019822335161734372, "learning_rate": 2.8340080971659922e-05, "loss": 0.0, "step": 10890 }, { "epoch": 14.02366171839856, "grad_norm": 0.00010232988279312849, "learning_rate": 2.8315089718598493e-05, "loss": 0.1515, "step": 10900 }, { "epoch": 14.024111560953667, "grad_norm": 8.460547542199492e-05, "learning_rate": 2.8290098465537063e-05, "loss": 0.0, "step": 10910 }, { "epoch": 14.024561403508772, "grad_norm": 0.00010293206287315115, "learning_rate": 2.8265107212475634e-05, "loss": 0.0, "step": 10920 }, { "epoch": 14.025011246063878, "grad_norm": 8.671776595292613e-05, "learning_rate": 2.8240115959414204e-05, "loss": 0.0, "step": 10930 }, { "epoch": 14.025461088618984, "grad_norm": 8.597327541792765e-05, "learning_rate": 2.821512470635278e-05, "loss": 0.0, "step": 10940 }, { "epoch": 14.025910931174089, "grad_norm": 9.427647455595434e-05, "learning_rate": 2.8190133453291352e-05, "loss": 0.0, "step": 10950 }, { "epoch": 14.026360773729195, "grad_norm": 7.468530384358019e-05, "learning_rate": 2.8165142200229923e-05, "loss": 0.0, "step": 10960 }, { "epoch": 14.026810616284301, "grad_norm": 8.215215348172933e-05, "learning_rate": 2.8140150947168493e-05, "loss": 0.0, "step": 10970 }, { "epoch": 14.027260458839406, "grad_norm": 7.946249388623983e-05, "learning_rate": 2.8115159694107064e-05, "loss": 0.0598, "step": 10980 }, { "epoch": 14.027710301394512, "grad_norm": 0.00045179069275036454, "learning_rate": 2.8090168441045634e-05, "loss": 0.0, "step": 10990 }, { "epoch": 14.028160143949618, "grad_norm": 3.443227251409553e-05, "learning_rate": 2.8065177187984208e-05, "loss": 0.0, "step": 11000 }, { "epoch": 14.028609986504723, "grad_norm": 9.500068699708208e-05, "learning_rate": 2.8040185934922782e-05, "loss": 0.0, "step": 11010 }, { "epoch": 14.029059829059829, "grad_norm": 0.00043259983067400753, "learning_rate": 2.8015194681861352e-05, "loss": 0.0, "step": 11020 }, { "epoch": 14.029509671614935, "grad_norm": 0.0002676035219337791, "learning_rate": 2.7990203428799923e-05, "loss": 0.0, "step": 11030 }, { "epoch": 14.02995951417004, "grad_norm": 7.962385279824957e-05, "learning_rate": 2.7965212175738493e-05, "loss": 0.0, "step": 11040 }, { "epoch": 14.030409356725146, "grad_norm": 8.924648864194751e-05, "learning_rate": 2.7940220922677064e-05, "loss": 0.0275, "step": 11050 }, { "epoch": 14.030859199280252, "grad_norm": 9.819430124480277e-05, "learning_rate": 2.7915229669615634e-05, "loss": 0.0, "step": 11060 }, { "epoch": 14.031309041835357, "grad_norm": 0.0008734805160202086, "learning_rate": 2.789023841655421e-05, "loss": 0.0, "step": 11070 }, { "epoch": 14.031758884390463, "grad_norm": 0.0001592394692124799, "learning_rate": 2.786524716349278e-05, "loss": 0.0, "step": 11080 }, { "epoch": 14.03220872694557, "grad_norm": 8.526194869773462e-05, "learning_rate": 2.784025591043135e-05, "loss": 0.0, "step": 11090 }, { "epoch": 14.032658569500676, "grad_norm": 9.230792784364894e-05, "learning_rate": 2.781526465736992e-05, "loss": 0.0, "step": 11100 }, { "epoch": 14.03310841205578, "grad_norm": 2.685280742298346e-05, "learning_rate": 2.7790273404308494e-05, "loss": 0.0, "step": 11110 }, { "epoch": 14.033333333333333, "eval_accuracy": 0.9069767441860465, "eval_f1": 0.9073831822788105, "eval_loss": 0.7559973001480103, "eval_runtime": 146.2998, "eval_samples_per_second": 1.47, "eval_steps_per_second": 1.47, "step": 11115 }, { "epoch": 15.000224921277553, "grad_norm": 0.00022097158944234252, "learning_rate": 2.7765282151247064e-05, "loss": 0.0, "step": 11120 }, { "epoch": 15.00067476383266, "grad_norm": 0.00011830481525976211, "learning_rate": 2.7740290898185638e-05, "loss": 0.0, "step": 11130 }, { "epoch": 15.001124606387764, "grad_norm": 0.00012794691429007798, "learning_rate": 2.771529964512421e-05, "loss": 0.0, "step": 11140 }, { "epoch": 15.00157444894287, "grad_norm": 9.617553587304428e-05, "learning_rate": 2.769030839206278e-05, "loss": 0.0, "step": 11150 }, { "epoch": 15.002024291497976, "grad_norm": 0.0002566665061749518, "learning_rate": 2.766531713900135e-05, "loss": 0.0, "step": 11160 }, { "epoch": 15.002474134053081, "grad_norm": 0.0003265255654696375, "learning_rate": 2.764032588593992e-05, "loss": 0.0, "step": 11170 }, { "epoch": 15.002923976608187, "grad_norm": 0.00010424172069178894, "learning_rate": 2.761533463287849e-05, "loss": 0.0, "step": 11180 }, { "epoch": 15.003373819163293, "grad_norm": 9.128820965997875e-05, "learning_rate": 2.759034337981706e-05, "loss": 0.0, "step": 11190 }, { "epoch": 15.003823661718398, "grad_norm": 3.308395389467478e-05, "learning_rate": 2.756535212675564e-05, "loss": 0.0, "step": 11200 }, { "epoch": 15.004273504273504, "grad_norm": 0.00011078887473559007, "learning_rate": 2.754036087369421e-05, "loss": 0.0, "step": 11210 }, { "epoch": 15.00472334682861, "grad_norm": 0.00022016778530087322, "learning_rate": 2.751536962063278e-05, "loss": 0.0, "step": 11220 }, { "epoch": 15.005173189383715, "grad_norm": 9.903871978167444e-05, "learning_rate": 2.749037836757135e-05, "loss": 0.0, "step": 11230 }, { "epoch": 15.005623031938821, "grad_norm": 0.00021584770001936704, "learning_rate": 2.746538711450992e-05, "loss": 0.0, "step": 11240 }, { "epoch": 15.006072874493928, "grad_norm": 0.00015262725355569273, "learning_rate": 2.744039586144849e-05, "loss": 0.0, "step": 11250 }, { "epoch": 15.006522717049032, "grad_norm": 0.000262904679402709, "learning_rate": 2.741540460838707e-05, "loss": 0.0, "step": 11260 }, { "epoch": 15.006972559604138, "grad_norm": 0.00010252036736346781, "learning_rate": 2.739041335532564e-05, "loss": 0.0, "step": 11270 }, { "epoch": 15.007422402159245, "grad_norm": 9.369424515170977e-05, "learning_rate": 2.736542210226421e-05, "loss": 0.0, "step": 11280 }, { "epoch": 15.007872244714349, "grad_norm": 0.00030531795346178114, "learning_rate": 2.734043084920278e-05, "loss": 0.0, "step": 11290 }, { "epoch": 15.008322087269455, "grad_norm": 8.130009518936276e-05, "learning_rate": 2.731543959614135e-05, "loss": 0.0, "step": 11300 }, { "epoch": 15.008771929824562, "grad_norm": 0.004767315462231636, "learning_rate": 2.729044834307992e-05, "loss": 0.0, "step": 11310 }, { "epoch": 15.009221772379668, "grad_norm": 4.4809039536630735e-05, "learning_rate": 2.726545709001849e-05, "loss": 0.0116, "step": 11320 }, { "epoch": 15.009671614934772, "grad_norm": 7.317520066862926e-05, "learning_rate": 2.724046583695707e-05, "loss": 0.0, "step": 11330 }, { "epoch": 15.010121457489879, "grad_norm": 1.954054278030526e-05, "learning_rate": 2.721547458389564e-05, "loss": 0.0, "step": 11340 }, { "epoch": 15.010571300044985, "grad_norm": 8.179496944649145e-05, "learning_rate": 2.719048333083421e-05, "loss": 0.0, "step": 11350 }, { "epoch": 15.01102114260009, "grad_norm": 7.982663373695686e-05, "learning_rate": 2.716549207777278e-05, "loss": 0.0, "step": 11360 }, { "epoch": 15.011470985155196, "grad_norm": 7.641351839993149e-05, "learning_rate": 2.714050082471135e-05, "loss": 0.0, "step": 11370 }, { "epoch": 15.011920827710302, "grad_norm": 5.523673098650761e-05, "learning_rate": 2.711550957164992e-05, "loss": 0.0, "step": 11380 }, { "epoch": 15.012370670265407, "grad_norm": 4.447564424481243e-05, "learning_rate": 2.70905183185885e-05, "loss": 0.0, "step": 11390 }, { "epoch": 15.012820512820513, "grad_norm": 0.0001433419674867764, "learning_rate": 2.706552706552707e-05, "loss": 0.0, "step": 11400 }, { "epoch": 15.013270355375619, "grad_norm": 0.00021968077635392547, "learning_rate": 2.704053581246564e-05, "loss": 0.0, "step": 11410 }, { "epoch": 15.013720197930724, "grad_norm": 8.483795681968331e-05, "learning_rate": 2.701554455940421e-05, "loss": 0.0, "step": 11420 }, { "epoch": 15.01417004048583, "grad_norm": 8.237075962824747e-05, "learning_rate": 2.699055330634278e-05, "loss": 0.0, "step": 11430 }, { "epoch": 15.014619883040936, "grad_norm": 2.0264124032109976e-05, "learning_rate": 2.696556205328135e-05, "loss": 0.0, "step": 11440 }, { "epoch": 15.01506972559604, "grad_norm": 0.0001085235780919902, "learning_rate": 2.6940570800219922e-05, "loss": 0.0, "step": 11450 }, { "epoch": 15.015519568151147, "grad_norm": 0.00016277152462862432, "learning_rate": 2.69155795471585e-05, "loss": 0.0, "step": 11460 }, { "epoch": 15.015969410706253, "grad_norm": 1.4354101949720643e-05, "learning_rate": 2.689058829409707e-05, "loss": 0.3053, "step": 11470 }, { "epoch": 15.01641925326136, "grad_norm": 7.077175541780889e-05, "learning_rate": 2.686559704103564e-05, "loss": 0.0, "step": 11480 }, { "epoch": 15.016869095816464, "grad_norm": 0.00022910711413715035, "learning_rate": 2.684060578797421e-05, "loss": 0.0, "step": 11490 }, { "epoch": 15.01731893837157, "grad_norm": 4.952902236254886e-05, "learning_rate": 2.681561453491278e-05, "loss": 0.0, "step": 11500 }, { "epoch": 15.017768780926676, "grad_norm": 0.00033590139355510473, "learning_rate": 2.6790623281851352e-05, "loss": 0.0, "step": 11510 }, { "epoch": 15.018218623481781, "grad_norm": 8.226411591749638e-05, "learning_rate": 2.6765632028789922e-05, "loss": 0.0, "step": 11520 }, { "epoch": 15.018668466036887, "grad_norm": 7.591211760882288e-05, "learning_rate": 2.67406407757285e-05, "loss": 0.0, "step": 11530 }, { "epoch": 15.019118308591993, "grad_norm": 9.363327990286052e-05, "learning_rate": 2.671564952266707e-05, "loss": 0.0, "step": 11540 }, { "epoch": 15.019568151147098, "grad_norm": 6.927661888767034e-05, "learning_rate": 2.669065826960564e-05, "loss": 0.0, "step": 11550 }, { "epoch": 15.020017993702204, "grad_norm": 5.9434809372760355e-05, "learning_rate": 2.666566701654421e-05, "loss": 0.0, "step": 11560 }, { "epoch": 15.02046783625731, "grad_norm": 8.832826279103756e-05, "learning_rate": 2.664067576348278e-05, "loss": 0.228, "step": 11570 }, { "epoch": 15.020917678812415, "grad_norm": 7.538399222539738e-05, "learning_rate": 2.6615684510421352e-05, "loss": 0.0, "step": 11580 }, { "epoch": 15.021367521367521, "grad_norm": 0.00040816108230501413, "learning_rate": 2.6590693257359926e-05, "loss": 0.0, "step": 11590 }, { "epoch": 15.021817363922628, "grad_norm": 8.975164382718503e-05, "learning_rate": 2.6565702004298497e-05, "loss": 0.0, "step": 11600 }, { "epoch": 15.022267206477732, "grad_norm": 3.374975494807586e-05, "learning_rate": 2.654071075123707e-05, "loss": 0.1537, "step": 11610 }, { "epoch": 15.022717049032838, "grad_norm": 4.336350684752688e-05, "learning_rate": 2.651571949817564e-05, "loss": 0.0, "step": 11620 }, { "epoch": 15.023166891587945, "grad_norm": 0.0005361968651413918, "learning_rate": 2.649072824511421e-05, "loss": 0.0, "step": 11630 }, { "epoch": 15.023616734143049, "grad_norm": 8.934557263273746e-05, "learning_rate": 2.6465736992052782e-05, "loss": 0.0502, "step": 11640 }, { "epoch": 15.024066576698155, "grad_norm": 0.00039845381979830563, "learning_rate": 2.6440745738991353e-05, "loss": 0.0, "step": 11650 }, { "epoch": 15.024516419253262, "grad_norm": 4.3109714169986546e-05, "learning_rate": 2.6415754485929927e-05, "loss": 0.0279, "step": 11660 }, { "epoch": 15.024966261808368, "grad_norm": 0.00045760790817439556, "learning_rate": 2.6390763232868497e-05, "loss": 0.0, "step": 11670 }, { "epoch": 15.025416104363472, "grad_norm": 9.255833720089868e-05, "learning_rate": 2.6365771979807068e-05, "loss": 0.0, "step": 11680 }, { "epoch": 15.025865946918579, "grad_norm": 0.00010978740465361625, "learning_rate": 2.6340780726745638e-05, "loss": 0.0, "step": 11690 }, { "epoch": 15.026315789473685, "grad_norm": 0.0004069530696142465, "learning_rate": 2.6315789473684212e-05, "loss": 0.0, "step": 11700 }, { "epoch": 15.02676563202879, "grad_norm": 0.00012593499559443444, "learning_rate": 2.6290798220622782e-05, "loss": 0.0, "step": 11710 }, { "epoch": 15.027215474583896, "grad_norm": 0.00015261063526850194, "learning_rate": 2.6265806967561356e-05, "loss": 0.0, "step": 11720 }, { "epoch": 15.027665317139002, "grad_norm": 0.00030580663587898016, "learning_rate": 2.6240815714499927e-05, "loss": 0.0, "step": 11730 }, { "epoch": 15.028115159694106, "grad_norm": 0.0003194242308381945, "learning_rate": 2.6215824461438497e-05, "loss": 0.0, "step": 11740 }, { "epoch": 15.028565002249213, "grad_norm": 0.00025326618924736977, "learning_rate": 2.6190833208377068e-05, "loss": 0.0, "step": 11750 }, { "epoch": 15.029014844804319, "grad_norm": 0.00011277001613052562, "learning_rate": 2.616584195531564e-05, "loss": 0.0, "step": 11760 }, { "epoch": 15.029464687359424, "grad_norm": 8.380229701288044e-05, "learning_rate": 2.614085070225421e-05, "loss": 0.0, "step": 11770 }, { "epoch": 15.02991452991453, "grad_norm": 8.622213499620557e-05, "learning_rate": 2.611585944919278e-05, "loss": 0.0, "step": 11780 }, { "epoch": 15.030364372469636, "grad_norm": 1.4006192941451445e-05, "learning_rate": 2.6090868196131357e-05, "loss": 0.0, "step": 11790 }, { "epoch": 15.03081421502474, "grad_norm": 7.735377585049719e-05, "learning_rate": 2.6065876943069927e-05, "loss": 0.0, "step": 11800 }, { "epoch": 15.031264057579847, "grad_norm": 8.729020191822201e-05, "learning_rate": 2.6040885690008498e-05, "loss": 0.0, "step": 11810 }, { "epoch": 15.031713900134953, "grad_norm": 0.0001028189726639539, "learning_rate": 2.601589443694707e-05, "loss": 0.0, "step": 11820 }, { "epoch": 15.032163742690058, "grad_norm": 8.698280726093799e-05, "learning_rate": 2.599090318388564e-05, "loss": 0.0, "step": 11830 }, { "epoch": 15.032613585245164, "grad_norm": 4.151571192778647e-05, "learning_rate": 2.596591193082421e-05, "loss": 0.0, "step": 11840 }, { "epoch": 15.03306342780027, "grad_norm": 0.00010592249600449577, "learning_rate": 2.5940920677762787e-05, "loss": 0.0, "step": 11850 }, { "epoch": 15.033333333333333, "eval_accuracy": 0.9162790697674419, "eval_f1": 0.9151483456140399, "eval_loss": 0.8035854697227478, "eval_runtime": 141.498, "eval_samples_per_second": 1.519, "eval_steps_per_second": 1.519, "step": 11856 }, { "epoch": 16.000179937022043, "grad_norm": 1.4883323274261784e-05, "learning_rate": 2.5915929424701357e-05, "loss": 0.0, "step": 11860 }, { "epoch": 16.00062977957715, "grad_norm": 0.00010592549369903281, "learning_rate": 2.5890938171639928e-05, "loss": 0.0, "step": 11870 }, { "epoch": 16.001079622132252, "grad_norm": 9.068657527677715e-05, "learning_rate": 2.5865946918578498e-05, "loss": 0.0, "step": 11880 }, { "epoch": 16.00152946468736, "grad_norm": 0.0001449688134016469, "learning_rate": 2.584095566551707e-05, "loss": 0.0, "step": 11890 }, { "epoch": 16.001979307242465, "grad_norm": 0.0004028608091175556, "learning_rate": 2.581596441245564e-05, "loss": 0.0, "step": 11900 }, { "epoch": 16.00242914979757, "grad_norm": 0.00010957779159070924, "learning_rate": 2.579097315939421e-05, "loss": 0.0, "step": 11910 }, { "epoch": 16.002878992352677, "grad_norm": 8.808275742921978e-05, "learning_rate": 2.5765981906332787e-05, "loss": 0.0, "step": 11920 }, { "epoch": 16.003328834907784, "grad_norm": 8.08186741778627e-05, "learning_rate": 2.5740990653271358e-05, "loss": 0.0, "step": 11930 }, { "epoch": 16.003778677462886, "grad_norm": 3.816796015598811e-05, "learning_rate": 2.5715999400209928e-05, "loss": 0.0, "step": 11940 }, { "epoch": 16.004228520017993, "grad_norm": 7.910098065622151e-05, "learning_rate": 2.56910081471485e-05, "loss": 0.0, "step": 11950 }, { "epoch": 16.0046783625731, "grad_norm": 4.106140841031447e-05, "learning_rate": 2.566601689408707e-05, "loss": 0.0, "step": 11960 }, { "epoch": 16.005128205128205, "grad_norm": 0.00018998274754267186, "learning_rate": 2.564102564102564e-05, "loss": 0.0, "step": 11970 }, { "epoch": 16.00557804768331, "grad_norm": 0.00025499030016362667, "learning_rate": 2.5616034387964217e-05, "loss": 0.0, "step": 11980 }, { "epoch": 16.006027890238418, "grad_norm": 0.00021445259335450828, "learning_rate": 2.5591043134902787e-05, "loss": 0.0, "step": 11990 }, { "epoch": 16.006477732793524, "grad_norm": 4.371452814666554e-05, "learning_rate": 2.5566051881841358e-05, "loss": 0.0, "step": 12000 }, { "epoch": 16.006927575348627, "grad_norm": 6.685888365609571e-05, "learning_rate": 2.554106062877993e-05, "loss": 0.0, "step": 12010 }, { "epoch": 16.007377417903733, "grad_norm": 2.7666777896229178e-05, "learning_rate": 2.55160693757185e-05, "loss": 0.0, "step": 12020 }, { "epoch": 16.00782726045884, "grad_norm": 6.504967313958332e-05, "learning_rate": 2.549107812265707e-05, "loss": 0.0, "step": 12030 }, { "epoch": 16.008277103013945, "grad_norm": 2.7263899028184824e-05, "learning_rate": 2.546608686959564e-05, "loss": 0.0, "step": 12040 }, { "epoch": 16.00872694556905, "grad_norm": 5.743377550970763e-05, "learning_rate": 2.5441095616534217e-05, "loss": 0.0, "step": 12050 }, { "epoch": 16.009176788124158, "grad_norm": 6.459606811404228e-05, "learning_rate": 2.5416104363472788e-05, "loss": 0.0, "step": 12060 }, { "epoch": 16.00962663067926, "grad_norm": 0.00010339605796616524, "learning_rate": 2.539111311041136e-05, "loss": 0.0124, "step": 12070 }, { "epoch": 16.010076473234367, "grad_norm": 0.0001992172037716955, "learning_rate": 2.536612185734993e-05, "loss": 0.0, "step": 12080 }, { "epoch": 16.010526315789473, "grad_norm": 2.163487624784466e-05, "learning_rate": 2.53411306042885e-05, "loss": 0.0, "step": 12090 }, { "epoch": 16.01097615834458, "grad_norm": 0.0001504887914052233, "learning_rate": 2.531613935122707e-05, "loss": 0.0, "step": 12100 }, { "epoch": 16.011426000899686, "grad_norm": 6.518846930703148e-05, "learning_rate": 2.5291148098165647e-05, "loss": 0.0, "step": 12110 }, { "epoch": 16.011875843454792, "grad_norm": 7.297332194866613e-05, "learning_rate": 2.5266156845104218e-05, "loss": 0.3053, "step": 12120 }, { "epoch": 16.012325686009895, "grad_norm": 6.301509711192921e-05, "learning_rate": 2.5241165592042788e-05, "loss": 0.0, "step": 12130 }, { "epoch": 16.012775528565, "grad_norm": 0.00010782436584122479, "learning_rate": 2.521617433898136e-05, "loss": 0.0, "step": 12140 }, { "epoch": 16.013225371120107, "grad_norm": 7.316889241337776e-05, "learning_rate": 2.519118308591993e-05, "loss": 0.2251, "step": 12150 }, { "epoch": 16.013675213675214, "grad_norm": 0.0002546868927311152, "learning_rate": 2.51661918328585e-05, "loss": 0.0, "step": 12160 }, { "epoch": 16.01412505623032, "grad_norm": 0.00021858216496184468, "learning_rate": 2.514120057979707e-05, "loss": 0.0, "step": 12170 }, { "epoch": 16.014574898785426, "grad_norm": 0.00043696112697944045, "learning_rate": 2.5116209326735644e-05, "loss": 0.0, "step": 12180 }, { "epoch": 16.015024741340532, "grad_norm": 0.0003867655177600682, "learning_rate": 2.5091218073674215e-05, "loss": 0.0, "step": 12190 }, { "epoch": 16.015474583895635, "grad_norm": 2.184785989811644e-05, "learning_rate": 2.506622682061279e-05, "loss": 0.0, "step": 12200 }, { "epoch": 16.01592442645074, "grad_norm": 8.181656448869035e-05, "learning_rate": 2.504123556755136e-05, "loss": 0.0, "step": 12210 }, { "epoch": 16.016374269005848, "grad_norm": 3.7737070670118555e-05, "learning_rate": 2.501624431448993e-05, "loss": 0.0, "step": 12220 }, { "epoch": 16.016824111560954, "grad_norm": 0.0003981046611443162, "learning_rate": 2.49912530614285e-05, "loss": 0.0, "step": 12230 }, { "epoch": 16.01727395411606, "grad_norm": 2.9591852580779232e-05, "learning_rate": 2.496626180836707e-05, "loss": 0.0, "step": 12240 }, { "epoch": 16.017723796671167, "grad_norm": 9.059899457497522e-05, "learning_rate": 2.4941270555305645e-05, "loss": 0.0, "step": 12250 }, { "epoch": 16.01817363922627, "grad_norm": 4.5262353523867205e-05, "learning_rate": 2.4916279302244215e-05, "loss": 0.0, "step": 12260 }, { "epoch": 16.018623481781376, "grad_norm": 3.29861904901918e-05, "learning_rate": 2.4891288049182786e-05, "loss": 0.0, "step": 12270 }, { "epoch": 16.019073324336482, "grad_norm": 0.00023501265968661755, "learning_rate": 2.4866296796121356e-05, "loss": 0.0, "step": 12280 }, { "epoch": 16.019523166891588, "grad_norm": 7.902314973762259e-05, "learning_rate": 2.484130554305993e-05, "loss": 0.0, "step": 12290 }, { "epoch": 16.019973009446694, "grad_norm": 6.875683175167069e-05, "learning_rate": 2.48163142899985e-05, "loss": 0.0, "step": 12300 }, { "epoch": 16.0204228520018, "grad_norm": 3.485895649646409e-05, "learning_rate": 2.479132303693707e-05, "loss": 0.1518, "step": 12310 }, { "epoch": 16.020872694556903, "grad_norm": 12.983430862426758, "learning_rate": 2.4766331783875645e-05, "loss": 0.0511, "step": 12320 }, { "epoch": 16.02132253711201, "grad_norm": 3.3444492146372795e-05, "learning_rate": 2.4741340530814216e-05, "loss": 0.0, "step": 12330 }, { "epoch": 16.021772379667116, "grad_norm": 0.0002497132809367031, "learning_rate": 2.4716349277752786e-05, "loss": 0.0, "step": 12340 }, { "epoch": 16.022222222222222, "grad_norm": 7.071927393553779e-05, "learning_rate": 2.4691358024691357e-05, "loss": 0.0265, "step": 12350 }, { "epoch": 16.02267206477733, "grad_norm": 0.00021349349117372185, "learning_rate": 2.466636677162993e-05, "loss": 0.0, "step": 12360 }, { "epoch": 16.023121907332435, "grad_norm": 0.00010089009447256103, "learning_rate": 2.46413755185685e-05, "loss": 0.0, "step": 12370 }, { "epoch": 16.02357174988754, "grad_norm": 7.457892934326082e-05, "learning_rate": 2.461638426550707e-05, "loss": 0.0, "step": 12380 }, { "epoch": 16.024021592442644, "grad_norm": 1.1549284863576759e-05, "learning_rate": 2.4591393012445645e-05, "loss": 0.0, "step": 12390 }, { "epoch": 16.02447143499775, "grad_norm": 7.670086051803082e-05, "learning_rate": 2.4566401759384216e-05, "loss": 0.0, "step": 12400 }, { "epoch": 16.024921277552856, "grad_norm": 0.00026809590053744614, "learning_rate": 2.4541410506322786e-05, "loss": 0.0, "step": 12410 }, { "epoch": 16.025371120107962, "grad_norm": 8.520954725099728e-05, "learning_rate": 2.451641925326136e-05, "loss": 0.0, "step": 12420 }, { "epoch": 16.02582096266307, "grad_norm": 9.638704068493098e-05, "learning_rate": 2.449142800019993e-05, "loss": 0.0, "step": 12430 }, { "epoch": 16.026270805218175, "grad_norm": 2.762759140750859e-05, "learning_rate": 2.44664367471385e-05, "loss": 0.0, "step": 12440 }, { "epoch": 16.026720647773278, "grad_norm": 0.00012492069799918681, "learning_rate": 2.4441445494077075e-05, "loss": 0.0, "step": 12450 }, { "epoch": 16.027170490328384, "grad_norm": 0.00024198577739298344, "learning_rate": 2.4416454241015646e-05, "loss": 0.0, "step": 12460 }, { "epoch": 16.02762033288349, "grad_norm": 0.00020759883045684546, "learning_rate": 2.4391462987954216e-05, "loss": 0.0, "step": 12470 }, { "epoch": 16.028070175438597, "grad_norm": 0.0003441913577262312, "learning_rate": 2.4366471734892787e-05, "loss": 0.0, "step": 12480 }, { "epoch": 16.028520017993703, "grad_norm": 7.506849215133116e-05, "learning_rate": 2.434148048183136e-05, "loss": 0.0, "step": 12490 }, { "epoch": 16.02896986054881, "grad_norm": 0.0005587295745499432, "learning_rate": 2.431648922876993e-05, "loss": 0.0, "step": 12500 }, { "epoch": 16.029419703103915, "grad_norm": 0.00014573961379937828, "learning_rate": 2.4291497975708502e-05, "loss": 0.0, "step": 12510 }, { "epoch": 16.029869545659018, "grad_norm": 0.00018406764138489962, "learning_rate": 2.4266506722647076e-05, "loss": 0.0, "step": 12520 }, { "epoch": 16.030319388214124, "grad_norm": 3.345272853039205e-05, "learning_rate": 2.4241515469585646e-05, "loss": 0.0, "step": 12530 }, { "epoch": 16.03076923076923, "grad_norm": 6.626223330385983e-05, "learning_rate": 2.4216524216524217e-05, "loss": 0.0, "step": 12540 }, { "epoch": 16.031219073324337, "grad_norm": 6.867072806926444e-05, "learning_rate": 2.419153296346279e-05, "loss": 0.0, "step": 12550 }, { "epoch": 16.031668915879443, "grad_norm": 7.72790881455876e-05, "learning_rate": 2.416654171040136e-05, "loss": 0.0, "step": 12560 }, { "epoch": 16.03211875843455, "grad_norm": 7.890671258792281e-05, "learning_rate": 2.414155045733993e-05, "loss": 0.0, "step": 12570 }, { "epoch": 16.032568600989652, "grad_norm": 0.00442487234249711, "learning_rate": 2.4116559204278506e-05, "loss": 0.0, "step": 12580 }, { "epoch": 16.03301844354476, "grad_norm": 2.8482128982432187e-05, "learning_rate": 2.4091567951217076e-05, "loss": 0.0, "step": 12590 }, { "epoch": 16.033333333333335, "eval_accuracy": 0.9162790697674419, "eval_f1": 0.9151483456140399, "eval_loss": 0.7961735129356384, "eval_runtime": 140.2935, "eval_samples_per_second": 1.533, "eval_steps_per_second": 1.533, "step": 12597 }, { "epoch": 17.000134952766533, "grad_norm": 0.0001641175040276721, "learning_rate": 2.4066576698155647e-05, "loss": 0.0, "step": 12600 }, { "epoch": 17.000584795321636, "grad_norm": 7.220463885460049e-05, "learning_rate": 2.4041585445094217e-05, "loss": 0.0, "step": 12610 }, { "epoch": 17.001034637876742, "grad_norm": 2.121922625519801e-05, "learning_rate": 2.401659419203279e-05, "loss": 0.0, "step": 12620 }, { "epoch": 17.00148448043185, "grad_norm": 0.0001709010248305276, "learning_rate": 2.399160293897136e-05, "loss": 0.0, "step": 12630 }, { "epoch": 17.001934322986955, "grad_norm": 7.326434570131823e-05, "learning_rate": 2.3966611685909932e-05, "loss": 0.0116, "step": 12640 }, { "epoch": 17.00238416554206, "grad_norm": 7.770003867335618e-05, "learning_rate": 2.3941620432848506e-05, "loss": 0.0, "step": 12650 }, { "epoch": 17.002834008097167, "grad_norm": 5.967661854811013e-05, "learning_rate": 2.3916629179787076e-05, "loss": 0.0, "step": 12660 }, { "epoch": 17.00328385065227, "grad_norm": 2.024409877776634e-05, "learning_rate": 2.3891637926725647e-05, "loss": 0.0, "step": 12670 }, { "epoch": 17.003733693207376, "grad_norm": 2.6715468266047537e-05, "learning_rate": 2.386664667366422e-05, "loss": 0.0, "step": 12680 }, { "epoch": 17.004183535762483, "grad_norm": 0.00016753451200202107, "learning_rate": 2.384165542060279e-05, "loss": 0.0, "step": 12690 }, { "epoch": 17.00463337831759, "grad_norm": 0.00020678855071309954, "learning_rate": 2.3816664167541362e-05, "loss": 0.0, "step": 12700 }, { "epoch": 17.005083220872695, "grad_norm": 0.00019322472508065403, "learning_rate": 2.3791672914479936e-05, "loss": 0.0, "step": 12710 }, { "epoch": 17.0055330634278, "grad_norm": 5.782678636023775e-05, "learning_rate": 2.3766681661418506e-05, "loss": 0.0, "step": 12720 }, { "epoch": 17.005982905982908, "grad_norm": 9.794850484468043e-05, "learning_rate": 2.3741690408357077e-05, "loss": 0.0, "step": 12730 }, { "epoch": 17.00643274853801, "grad_norm": 1.9544906535884365e-05, "learning_rate": 2.3716699155295647e-05, "loss": 0.0, "step": 12740 }, { "epoch": 17.006882591093117, "grad_norm": 6.616911559831351e-05, "learning_rate": 2.369170790223422e-05, "loss": 0.0, "step": 12750 }, { "epoch": 17.007332433648223, "grad_norm": 6.444547761930153e-05, "learning_rate": 2.3666716649172792e-05, "loss": 0.0, "step": 12760 }, { "epoch": 17.00778227620333, "grad_norm": 8.0848069046624e-05, "learning_rate": 2.3641725396111362e-05, "loss": 0.3065, "step": 12770 }, { "epoch": 17.008232118758436, "grad_norm": 7.264141459017992e-05, "learning_rate": 2.3616734143049933e-05, "loss": 0.0, "step": 12780 }, { "epoch": 17.008681961313542, "grad_norm": 0.0003356850938871503, "learning_rate": 2.3591742889988507e-05, "loss": 0.0, "step": 12790 }, { "epoch": 17.009131803868645, "grad_norm": 2.9646009352291003e-05, "learning_rate": 2.3566751636927077e-05, "loss": 0.0, "step": 12800 }, { "epoch": 17.00958164642375, "grad_norm": 0.00021348975133150816, "learning_rate": 2.3541760383865648e-05, "loss": 0.0, "step": 12810 }, { "epoch": 17.010031488978857, "grad_norm": 0.0004335721314419061, "learning_rate": 2.351676913080422e-05, "loss": 0.0, "step": 12820 }, { "epoch": 17.010481331533963, "grad_norm": 0.00017695478163659573, "learning_rate": 2.349177787774279e-05, "loss": 0.0, "step": 12830 }, { "epoch": 17.01093117408907, "grad_norm": 6.28066627541557e-05, "learning_rate": 2.3466786624681363e-05, "loss": 0.0, "step": 12840 }, { "epoch": 17.011381016644176, "grad_norm": 9.240847430191934e-05, "learning_rate": 2.3441795371619933e-05, "loss": 0.0, "step": 12850 }, { "epoch": 17.01183085919928, "grad_norm": 2.2763128072256222e-05, "learning_rate": 2.3416804118558504e-05, "loss": 0.0, "step": 12860 }, { "epoch": 17.012280701754385, "grad_norm": 2.9835151508450508e-05, "learning_rate": 2.3391812865497074e-05, "loss": 0.0, "step": 12870 }, { "epoch": 17.01273054430949, "grad_norm": 0.0001948346325661987, "learning_rate": 2.3366821612435648e-05, "loss": 0.0, "step": 12880 }, { "epoch": 17.013180386864597, "grad_norm": 0.0001966464042197913, "learning_rate": 2.334183035937422e-05, "loss": 0.0, "step": 12890 }, { "epoch": 17.013630229419704, "grad_norm": 1.4977697901485953e-05, "learning_rate": 2.331683910631279e-05, "loss": 0.0, "step": 12900 }, { "epoch": 17.01408007197481, "grad_norm": 0.0002130919456249103, "learning_rate": 2.3291847853251363e-05, "loss": 0.0, "step": 12910 }, { "epoch": 17.014529914529916, "grad_norm": 8.009717566892505e-05, "learning_rate": 2.3266856600189934e-05, "loss": 0.0, "step": 12920 }, { "epoch": 17.01497975708502, "grad_norm": 0.0001520434598205611, "learning_rate": 2.3241865347128504e-05, "loss": 0.0, "step": 12930 }, { "epoch": 17.015429599640125, "grad_norm": 7.912003638921306e-05, "learning_rate": 2.3216874094067078e-05, "loss": 0.0, "step": 12940 }, { "epoch": 17.01587944219523, "grad_norm": 3.507011933834292e-05, "learning_rate": 2.319188284100565e-05, "loss": 0.0, "step": 12950 }, { "epoch": 17.016329284750338, "grad_norm": 0.00019048333342652768, "learning_rate": 2.316689158794422e-05, "loss": 0.0, "step": 12960 }, { "epoch": 17.016779127305444, "grad_norm": 7.044454832794145e-05, "learning_rate": 2.3141900334882793e-05, "loss": 0.0, "step": 12970 }, { "epoch": 17.01722896986055, "grad_norm": 2.70738237304613e-05, "learning_rate": 2.3116909081821364e-05, "loss": 0.0, "step": 12980 }, { "epoch": 17.017678812415653, "grad_norm": 0.00013498218322638422, "learning_rate": 2.3091917828759934e-05, "loss": 0.0, "step": 12990 }, { "epoch": 17.01812865497076, "grad_norm": 6.619872874580324e-05, "learning_rate": 2.3066926575698505e-05, "loss": 0.0, "step": 13000 }, { "epoch": 17.018578497525866, "grad_norm": 0.0001674070517765358, "learning_rate": 2.304193532263708e-05, "loss": 0.0, "step": 13010 }, { "epoch": 17.019028340080972, "grad_norm": 6.599447078770027e-05, "learning_rate": 2.301694406957565e-05, "loss": 0.0, "step": 13020 }, { "epoch": 17.019478182636078, "grad_norm": 6.373401993187144e-05, "learning_rate": 2.299195281651422e-05, "loss": 0.0, "step": 13030 }, { "epoch": 17.019928025191184, "grad_norm": 2.741257230809424e-05, "learning_rate": 2.2966961563452793e-05, "loss": 0.0, "step": 13040 }, { "epoch": 17.020377867746287, "grad_norm": 3.3254778827540576e-05, "learning_rate": 2.2941970310391364e-05, "loss": 0.2261, "step": 13050 }, { "epoch": 17.020827710301393, "grad_norm": 0.00010442626080475748, "learning_rate": 2.2916979057329934e-05, "loss": 0.0, "step": 13060 }, { "epoch": 17.0212775528565, "grad_norm": 9.868868801277131e-05, "learning_rate": 2.289198780426851e-05, "loss": 0.0, "step": 13070 }, { "epoch": 17.021727395411606, "grad_norm": 2.7685786335496232e-05, "learning_rate": 2.286699655120708e-05, "loss": 0.0, "step": 13080 }, { "epoch": 17.022177237966712, "grad_norm": 2.1736814233008772e-05, "learning_rate": 2.284200529814565e-05, "loss": 0.0, "step": 13090 }, { "epoch": 17.02262708052182, "grad_norm": 6.275502528296784e-05, "learning_rate": 2.2817014045084223e-05, "loss": 0.0, "step": 13100 }, { "epoch": 17.023076923076925, "grad_norm": 6.224397657206282e-05, "learning_rate": 2.2792022792022794e-05, "loss": 0.0, "step": 13110 }, { "epoch": 17.023526765632027, "grad_norm": 5.3004965593572706e-05, "learning_rate": 2.2767031538961364e-05, "loss": 0.0, "step": 13120 }, { "epoch": 17.023976608187134, "grad_norm": 0.00020508788293227553, "learning_rate": 2.2742040285899935e-05, "loss": 0.0, "step": 13130 }, { "epoch": 17.02442645074224, "grad_norm": 6.470280641224235e-05, "learning_rate": 2.271704903283851e-05, "loss": 0.0, "step": 13140 }, { "epoch": 17.024876293297346, "grad_norm": 2.009273703151848e-05, "learning_rate": 2.269205777977708e-05, "loss": 0.0, "step": 13150 }, { "epoch": 17.025326135852453, "grad_norm": 0.00015423147124238312, "learning_rate": 2.266706652671565e-05, "loss": 0.0, "step": 13160 }, { "epoch": 17.02577597840756, "grad_norm": 4.953348252456635e-05, "learning_rate": 2.2642075273654224e-05, "loss": 0.0, "step": 13170 }, { "epoch": 17.02622582096266, "grad_norm": 9.755884821061045e-05, "learning_rate": 2.2617084020592794e-05, "loss": 0.0278, "step": 13180 }, { "epoch": 17.026675663517768, "grad_norm": 6.180327909532934e-05, "learning_rate": 2.2592092767531365e-05, "loss": 0.0, "step": 13190 }, { "epoch": 17.027125506072874, "grad_norm": 5.623041215585545e-05, "learning_rate": 2.256710151446994e-05, "loss": 0.0, "step": 13200 }, { "epoch": 17.02757534862798, "grad_norm": 4.467675898922607e-05, "learning_rate": 2.254211026140851e-05, "loss": 0.2083, "step": 13210 }, { "epoch": 17.028025191183087, "grad_norm": 2.24194136535516e-05, "learning_rate": 2.251711900834708e-05, "loss": 0.0, "step": 13220 }, { "epoch": 17.028475033738193, "grad_norm": 6.26306573394686e-05, "learning_rate": 2.2492127755285654e-05, "loss": 0.0, "step": 13230 }, { "epoch": 17.028924876293296, "grad_norm": 3.3596377761568874e-05, "learning_rate": 2.2467136502224224e-05, "loss": 0.0, "step": 13240 }, { "epoch": 17.029374718848402, "grad_norm": 1.0662932254490443e-05, "learning_rate": 2.2442145249162795e-05, "loss": 0.0, "step": 13250 }, { "epoch": 17.029824561403508, "grad_norm": 0.0003233685565646738, "learning_rate": 2.2417153996101365e-05, "loss": 0.0, "step": 13260 }, { "epoch": 17.030274403958614, "grad_norm": 8.707219240022823e-05, "learning_rate": 2.239216274303994e-05, "loss": 0.0, "step": 13270 }, { "epoch": 17.03072424651372, "grad_norm": 5.722818605136126e-05, "learning_rate": 2.236717148997851e-05, "loss": 0.0, "step": 13280 }, { "epoch": 17.031174089068827, "grad_norm": 0.00019058040925301611, "learning_rate": 2.234218023691708e-05, "loss": 0.0, "step": 13290 }, { "epoch": 17.031623931623933, "grad_norm": 5.1392893510637805e-05, "learning_rate": 2.2317188983855654e-05, "loss": 0.0, "step": 13300 }, { "epoch": 17.032073774179036, "grad_norm": 7.419912435580045e-05, "learning_rate": 2.2292197730794224e-05, "loss": 0.0, "step": 13310 }, { "epoch": 17.032523616734142, "grad_norm": 4.607512164511718e-05, "learning_rate": 2.2267206477732795e-05, "loss": 0.0269, "step": 13320 }, { "epoch": 17.03297345928925, "grad_norm": 5.807312481920235e-05, "learning_rate": 2.2242215224671366e-05, "loss": 0.0, "step": 13330 }, { "epoch": 17.033333333333335, "eval_accuracy": 0.9162790697674419, "eval_f1": 0.9147392357656249, "eval_loss": 0.7821058630943298, "eval_runtime": 138.5934, "eval_samples_per_second": 1.551, "eval_steps_per_second": 1.551, "step": 13338 }, { "epoch": 18.00008996851102, "grad_norm": 5.483150016516447e-05, "learning_rate": 2.221722397160994e-05, "loss": 0.0, "step": 13340 }, { "epoch": 18.000539811066126, "grad_norm": 0.00015658343909308314, "learning_rate": 2.219223271854851e-05, "loss": 0.0, "step": 13350 }, { "epoch": 18.000989653621232, "grad_norm": 5.4562246077694e-05, "learning_rate": 2.216724146548708e-05, "loss": 0.0, "step": 13360 }, { "epoch": 18.00143949617634, "grad_norm": 7.551629096269608e-05, "learning_rate": 2.214225021242565e-05, "loss": 0.0, "step": 13370 }, { "epoch": 18.001889338731445, "grad_norm": 0.00029881909722462296, "learning_rate": 2.2117258959364225e-05, "loss": 0.0, "step": 13380 }, { "epoch": 18.00233918128655, "grad_norm": 7.842564809834585e-05, "learning_rate": 2.2092267706302795e-05, "loss": 0.0, "step": 13390 }, { "epoch": 18.002789023841654, "grad_norm": 5.530476846615784e-05, "learning_rate": 2.2067276453241366e-05, "loss": 0.0, "step": 13400 }, { "epoch": 18.00323886639676, "grad_norm": 1.0733284398156684e-05, "learning_rate": 2.2042285200179936e-05, "loss": 0.0, "step": 13410 }, { "epoch": 18.003688708951866, "grad_norm": 0.00011710361286532134, "learning_rate": 2.2017293947118507e-05, "loss": 0.0, "step": 13420 }, { "epoch": 18.004138551506973, "grad_norm": 7.415860454784706e-05, "learning_rate": 2.199230269405708e-05, "loss": 0.0, "step": 13430 }, { "epoch": 18.00458839406208, "grad_norm": 0.00035124015994369984, "learning_rate": 2.196731144099565e-05, "loss": 0.0, "step": 13440 }, { "epoch": 18.005038236617185, "grad_norm": 5.070868064649403e-05, "learning_rate": 2.1942320187934222e-05, "loss": 0.0, "step": 13450 }, { "epoch": 18.005488079172288, "grad_norm": 0.00022137133055366576, "learning_rate": 2.1917328934872796e-05, "loss": 0.0, "step": 13460 }, { "epoch": 18.005937921727394, "grad_norm": 7.331067172344774e-05, "learning_rate": 2.1892337681811366e-05, "loss": 0.0133, "step": 13470 }, { "epoch": 18.0063877642825, "grad_norm": 2.320470412087161e-05, "learning_rate": 2.1867346428749937e-05, "loss": 0.0, "step": 13480 }, { "epoch": 18.006837606837607, "grad_norm": 1.4898135304974858e-05, "learning_rate": 2.184235517568851e-05, "loss": 0.0, "step": 13490 }, { "epoch": 18.007287449392713, "grad_norm": 2.944714287878014e-05, "learning_rate": 2.181736392262708e-05, "loss": 0.0, "step": 13500 }, { "epoch": 18.00773729194782, "grad_norm": 5.626433630823158e-05, "learning_rate": 2.1792372669565652e-05, "loss": 0.0, "step": 13510 }, { "epoch": 18.008187134502926, "grad_norm": 0.00017860099615063518, "learning_rate": 2.1767381416504222e-05, "loss": 0.0, "step": 13520 }, { "epoch": 18.00863697705803, "grad_norm": 1.1517636266944464e-05, "learning_rate": 2.1742390163442796e-05, "loss": 0.0, "step": 13530 }, { "epoch": 18.009086819613135, "grad_norm": 4.9624555686023086e-05, "learning_rate": 2.1717398910381367e-05, "loss": 0.0, "step": 13540 }, { "epoch": 18.00953666216824, "grad_norm": 6.268775905482471e-05, "learning_rate": 2.1692407657319937e-05, "loss": 0.0, "step": 13550 }, { "epoch": 18.009986504723347, "grad_norm": 2.0203098756610416e-05, "learning_rate": 2.166741640425851e-05, "loss": 0.0, "step": 13560 }, { "epoch": 18.010436347278453, "grad_norm": 5.930092811468057e-05, "learning_rate": 2.164242515119708e-05, "loss": 0.0066, "step": 13570 }, { "epoch": 18.01088618983356, "grad_norm": 1.07578653114615e-05, "learning_rate": 2.1617433898135652e-05, "loss": 0.0, "step": 13580 }, { "epoch": 18.011336032388662, "grad_norm": 5.7607805501902476e-05, "learning_rate": 2.1592442645074226e-05, "loss": 0.0, "step": 13590 }, { "epoch": 18.01178587494377, "grad_norm": 2.243590097350534e-05, "learning_rate": 2.1567451392012797e-05, "loss": 0.0, "step": 13600 }, { "epoch": 18.012235717498875, "grad_norm": 0.00010859694884857163, "learning_rate": 2.1542460138951367e-05, "loss": 0.0, "step": 13610 }, { "epoch": 18.01268556005398, "grad_norm": 2.3785549274180084e-05, "learning_rate": 2.151746888588994e-05, "loss": 0.0, "step": 13620 }, { "epoch": 18.013135402609088, "grad_norm": 0.00011240597814321518, "learning_rate": 2.149247763282851e-05, "loss": 0.0, "step": 13630 }, { "epoch": 18.013585245164194, "grad_norm": 4.919056664220989e-05, "learning_rate": 2.1467486379767082e-05, "loss": 0.0, "step": 13640 }, { "epoch": 18.014035087719297, "grad_norm": 9.91144625004381e-05, "learning_rate": 2.1442495126705653e-05, "loss": 0.0, "step": 13650 }, { "epoch": 18.014484930274403, "grad_norm": 0.00011071647168137133, "learning_rate": 2.1417503873644226e-05, "loss": 0.0, "step": 13660 }, { "epoch": 18.01493477282951, "grad_norm": 4.320732114138082e-05, "learning_rate": 2.1392512620582797e-05, "loss": 0.0, "step": 13670 }, { "epoch": 18.015384615384615, "grad_norm": 0.0001519931247457862, "learning_rate": 2.1367521367521368e-05, "loss": 0.0, "step": 13680 }, { "epoch": 18.01583445793972, "grad_norm": 0.00012933544348925352, "learning_rate": 2.134253011445994e-05, "loss": 0.0, "step": 13690 }, { "epoch": 18.016284300494828, "grad_norm": 8.911939221434295e-05, "learning_rate": 2.1317538861398512e-05, "loss": 0.0, "step": 13700 }, { "epoch": 18.016734143049934, "grad_norm": 6.675777694908902e-05, "learning_rate": 2.1292547608337082e-05, "loss": 0.6577, "step": 13710 }, { "epoch": 18.017183985605037, "grad_norm": 1.9774244719883427e-05, "learning_rate": 2.1267556355275656e-05, "loss": 0.0, "step": 13720 }, { "epoch": 18.017633828160143, "grad_norm": 1.4986289897933602e-05, "learning_rate": 2.1242565102214227e-05, "loss": 0.0, "step": 13730 }, { "epoch": 18.01808367071525, "grad_norm": 2.202994619437959e-05, "learning_rate": 2.1217573849152797e-05, "loss": 0.0, "step": 13740 }, { "epoch": 18.018533513270356, "grad_norm": 5.6772078096400946e-05, "learning_rate": 2.119258259609137e-05, "loss": 0.0, "step": 13750 }, { "epoch": 18.018983355825462, "grad_norm": 5.254266216070391e-05, "learning_rate": 2.1167591343029942e-05, "loss": 0.0, "step": 13760 }, { "epoch": 18.01943319838057, "grad_norm": 5.1801951485686004e-05, "learning_rate": 2.1142600089968512e-05, "loss": 0.0, "step": 13770 }, { "epoch": 18.01988304093567, "grad_norm": 0.00010733840463217348, "learning_rate": 2.1117608836907083e-05, "loss": 0.0, "step": 13780 }, { "epoch": 18.020332883490777, "grad_norm": 0.00014498195378109813, "learning_rate": 2.1092617583845657e-05, "loss": 0.0, "step": 13790 }, { "epoch": 18.020782726045883, "grad_norm": 5.8233621530234814e-05, "learning_rate": 2.1067626330784227e-05, "loss": 0.0, "step": 13800 }, { "epoch": 18.02123256860099, "grad_norm": 0.0001375300344079733, "learning_rate": 2.1042635077722798e-05, "loss": 0.0, "step": 13810 }, { "epoch": 18.021682411156096, "grad_norm": 0.00010820035822689533, "learning_rate": 2.101764382466137e-05, "loss": 0.0, "step": 13820 }, { "epoch": 18.022132253711202, "grad_norm": 0.00014155653479974717, "learning_rate": 2.0992652571599942e-05, "loss": 0.2145, "step": 13830 }, { "epoch": 18.022582096266305, "grad_norm": 7.766908674966544e-05, "learning_rate": 2.0967661318538513e-05, "loss": 0.0, "step": 13840 }, { "epoch": 18.02303193882141, "grad_norm": 5.034452260588296e-05, "learning_rate": 2.0942670065477087e-05, "loss": 0.0, "step": 13850 }, { "epoch": 18.023481781376518, "grad_norm": 0.00019449973478913307, "learning_rate": 2.0917678812415657e-05, "loss": 0.0, "step": 13860 }, { "epoch": 18.023931623931624, "grad_norm": 0.00017697630391921848, "learning_rate": 2.0892687559354228e-05, "loss": 0.0, "step": 13870 }, { "epoch": 18.02438146648673, "grad_norm": 0.0002889737079385668, "learning_rate": 2.0867696306292798e-05, "loss": 0.0, "step": 13880 }, { "epoch": 18.024831309041836, "grad_norm": 6.000805296935141e-05, "learning_rate": 2.0842705053231372e-05, "loss": 0.0, "step": 13890 }, { "epoch": 18.025281151596943, "grad_norm": 0.00017068527813535184, "learning_rate": 2.0817713800169943e-05, "loss": 0.0, "step": 13900 }, { "epoch": 18.025730994152045, "grad_norm": 4.639056351152249e-05, "learning_rate": 2.0792722547108513e-05, "loss": 0.0, "step": 13910 }, { "epoch": 18.02618083670715, "grad_norm": 0.00021335770725272596, "learning_rate": 2.0767731294047084e-05, "loss": 0.0, "step": 13920 }, { "epoch": 18.026630679262258, "grad_norm": 2.0693731130450033e-05, "learning_rate": 2.0742740040985658e-05, "loss": 0.0, "step": 13930 }, { "epoch": 18.027080521817364, "grad_norm": 0.00016776267148088664, "learning_rate": 2.0717748787924228e-05, "loss": 0.0, "step": 13940 }, { "epoch": 18.02753036437247, "grad_norm": 9.437461267225444e-05, "learning_rate": 2.06927575348628e-05, "loss": 0.0, "step": 13950 }, { "epoch": 18.027980206927577, "grad_norm": 7.0253143310546875, "learning_rate": 2.066776628180137e-05, "loss": 0.0248, "step": 13960 }, { "epoch": 18.02843004948268, "grad_norm": 8.818862988846377e-05, "learning_rate": 2.064277502873994e-05, "loss": 0.0, "step": 13970 }, { "epoch": 18.028879892037786, "grad_norm": 0.00014604382158722728, "learning_rate": 2.0617783775678514e-05, "loss": 0.0, "step": 13980 }, { "epoch": 18.029329734592892, "grad_norm": 5.273849092191085e-05, "learning_rate": 2.0592792522617084e-05, "loss": 0.0, "step": 13990 }, { "epoch": 18.029779577148, "grad_norm": 0.00010060014756163582, "learning_rate": 2.0567801269555655e-05, "loss": 0.0, "step": 14000 }, { "epoch": 18.030229419703105, "grad_norm": 5.0713028031168506e-05, "learning_rate": 2.0542810016494225e-05, "loss": 0.0, "step": 14010 }, { "epoch": 18.03067926225821, "grad_norm": 5.2299645176390186e-05, "learning_rate": 2.05178187634328e-05, "loss": 0.0, "step": 14020 }, { "epoch": 18.031129104813317, "grad_norm": 5.8988702221540734e-05, "learning_rate": 2.049282751037137e-05, "loss": 0.0, "step": 14030 }, { "epoch": 18.03157894736842, "grad_norm": 1.700358734524343e-05, "learning_rate": 2.046783625730994e-05, "loss": 0.0, "step": 14040 }, { "epoch": 18.032028789923526, "grad_norm": 0.00021670041314791888, "learning_rate": 2.0442845004248514e-05, "loss": 0.0, "step": 14050 }, { "epoch": 18.032478632478632, "grad_norm": 6.12562071182765e-05, "learning_rate": 2.0417853751187084e-05, "loss": 0.0, "step": 14060 }, { "epoch": 18.03292847503374, "grad_norm": 4.259039997123182e-05, "learning_rate": 2.0392862498125655e-05, "loss": 0.0, "step": 14070 }, { "epoch": 18.033333333333335, "eval_accuracy": 0.9162790697674419, "eval_f1": 0.9148995631629983, "eval_loss": 0.7897689342498779, "eval_runtime": 140.755, "eval_samples_per_second": 1.527, "eval_steps_per_second": 1.527, "step": 14079 }, { "epoch": 19.00004498425551, "grad_norm": 1.1039642231480684e-05, "learning_rate": 2.036787124506423e-05, "loss": 0.0, "step": 14080 }, { "epoch": 19.000494826810616, "grad_norm": 5.7648761867312714e-05, "learning_rate": 2.03428799920028e-05, "loss": 0.0, "step": 14090 }, { "epoch": 19.000944669365722, "grad_norm": 6.124685023678467e-05, "learning_rate": 2.031788873894137e-05, "loss": 0.0, "step": 14100 }, { "epoch": 19.00139451192083, "grad_norm": 7.301846926566213e-05, "learning_rate": 2.0292897485879944e-05, "loss": 0.0, "step": 14110 }, { "epoch": 19.001844354475935, "grad_norm": 1.1434106454544235e-05, "learning_rate": 2.0267906232818514e-05, "loss": 0.0, "step": 14120 }, { "epoch": 19.002294197031038, "grad_norm": 1.5610630725859664e-05, "learning_rate": 2.0242914979757085e-05, "loss": 0.0, "step": 14130 }, { "epoch": 19.002744039586144, "grad_norm": 1.1301989616185892e-05, "learning_rate": 2.0217923726695655e-05, "loss": 0.0, "step": 14140 }, { "epoch": 19.00319388214125, "grad_norm": 0.00017064204439520836, "learning_rate": 2.019293247363423e-05, "loss": 0.0, "step": 14150 }, { "epoch": 19.003643724696357, "grad_norm": 8.471190085401759e-05, "learning_rate": 2.01679412205728e-05, "loss": 0.0, "step": 14160 }, { "epoch": 19.004093567251463, "grad_norm": 1.5257011909852736e-05, "learning_rate": 2.014294996751137e-05, "loss": 0.0, "step": 14170 }, { "epoch": 19.00454340980657, "grad_norm": 5.6844593927962705e-05, "learning_rate": 2.0117958714449944e-05, "loss": 0.0, "step": 14180 }, { "epoch": 19.004993252361672, "grad_norm": 4.6091812691884115e-05, "learning_rate": 2.0092967461388515e-05, "loss": 0.0, "step": 14190 }, { "epoch": 19.005443094916778, "grad_norm": 1.0579492482065689e-05, "learning_rate": 2.0067976208327085e-05, "loss": 0.013, "step": 14200 }, { "epoch": 19.005892937471884, "grad_norm": 1.4395471225725487e-05, "learning_rate": 2.004298495526566e-05, "loss": 0.0, "step": 14210 }, { "epoch": 19.00634278002699, "grad_norm": 4.660670310840942e-05, "learning_rate": 2.001799370220423e-05, "loss": 0.0, "step": 14220 }, { "epoch": 19.006792622582097, "grad_norm": 1.0320351975678932e-05, "learning_rate": 1.99930024491428e-05, "loss": 0.0, "step": 14230 }, { "epoch": 19.007242465137203, "grad_norm": 7.319361611735076e-05, "learning_rate": 1.9968011196081374e-05, "loss": 0.0, "step": 14240 }, { "epoch": 19.00769230769231, "grad_norm": 3.016916525666602e-05, "learning_rate": 1.9943019943019945e-05, "loss": 0.0, "step": 14250 }, { "epoch": 19.008142150247412, "grad_norm": 4.895834717899561e-05, "learning_rate": 1.9918028689958515e-05, "loss": 0.0, "step": 14260 }, { "epoch": 19.00859199280252, "grad_norm": 1.472072926844703e-05, "learning_rate": 1.9893037436897086e-05, "loss": 0.0, "step": 14270 }, { "epoch": 19.009041835357625, "grad_norm": 9.543371561449021e-05, "learning_rate": 1.986804618383566e-05, "loss": 0.0, "step": 14280 }, { "epoch": 19.00949167791273, "grad_norm": 4.663685103878379e-05, "learning_rate": 1.984305493077423e-05, "loss": 0.2747, "step": 14290 }, { "epoch": 19.009941520467837, "grad_norm": 4.8441841499879956e-05, "learning_rate": 1.98180636777128e-05, "loss": 0.0, "step": 14300 }, { "epoch": 19.010391363022944, "grad_norm": 5.71393538848497e-05, "learning_rate": 1.9793072424651374e-05, "loss": 0.0117, "step": 14310 }, { "epoch": 19.010841205578046, "grad_norm": 0.00011387177073629573, "learning_rate": 1.9768081171589945e-05, "loss": 0.0, "step": 14320 }, { "epoch": 19.011291048133153, "grad_norm": 0.00011380530486349016, "learning_rate": 1.9743089918528515e-05, "loss": 0.0, "step": 14330 }, { "epoch": 19.01174089068826, "grad_norm": 2.2790763978264295e-05, "learning_rate": 1.971809866546709e-05, "loss": 0.0, "step": 14340 }, { "epoch": 19.012190733243365, "grad_norm": 9.649181447457522e-05, "learning_rate": 1.969310741240566e-05, "loss": 0.0, "step": 14350 }, { "epoch": 19.01264057579847, "grad_norm": 5.3576703066937625e-05, "learning_rate": 1.966811615934423e-05, "loss": 0.0, "step": 14360 }, { "epoch": 19.013090418353578, "grad_norm": 4.662879655370489e-05, "learning_rate": 1.9643124906282804e-05, "loss": 0.0, "step": 14370 }, { "epoch": 19.01354026090868, "grad_norm": 0.0001452353608328849, "learning_rate": 1.9618133653221375e-05, "loss": 0.0, "step": 14380 }, { "epoch": 19.013990103463787, "grad_norm": 1.0606477189867292e-05, "learning_rate": 1.9593142400159945e-05, "loss": 0.0, "step": 14390 }, { "epoch": 19.014439946018893, "grad_norm": 0.00013969159044791013, "learning_rate": 1.9568151147098516e-05, "loss": 0.0, "step": 14400 }, { "epoch": 19.014889788574, "grad_norm": 1.022082597046392e-05, "learning_rate": 1.954315989403709e-05, "loss": 0.0, "step": 14410 }, { "epoch": 19.015339631129105, "grad_norm": 0.00010005366493714973, "learning_rate": 1.951816864097566e-05, "loss": 0.0, "step": 14420 }, { "epoch": 19.01578947368421, "grad_norm": 0.00014606289914809167, "learning_rate": 1.949317738791423e-05, "loss": 0.0, "step": 14430 }, { "epoch": 19.016239316239318, "grad_norm": 0.00011967158934567124, "learning_rate": 1.9468186134852805e-05, "loss": 0.0, "step": 14440 }, { "epoch": 19.01668915879442, "grad_norm": 4.113120667170733e-05, "learning_rate": 1.9443194881791375e-05, "loss": 0.0, "step": 14450 }, { "epoch": 19.017139001349527, "grad_norm": 3.971313708461821e-05, "learning_rate": 1.9418203628729946e-05, "loss": 0.2763, "step": 14460 }, { "epoch": 19.017588843904633, "grad_norm": 4.3345396989025176e-05, "learning_rate": 1.939321237566852e-05, "loss": 0.0, "step": 14470 }, { "epoch": 19.01803868645974, "grad_norm": 4.4653254008153453e-05, "learning_rate": 1.936822112260709e-05, "loss": 0.0, "step": 14480 }, { "epoch": 19.018488529014846, "grad_norm": 4.632178752217442e-05, "learning_rate": 1.934322986954566e-05, "loss": 0.0, "step": 14490 }, { "epoch": 19.018938371569952, "grad_norm": 1.064251409843564e-05, "learning_rate": 1.931823861648423e-05, "loss": 0.0, "step": 14500 }, { "epoch": 19.019388214125055, "grad_norm": 4.600699321599677e-05, "learning_rate": 1.9293247363422802e-05, "loss": 0.0, "step": 14510 }, { "epoch": 19.01983805668016, "grad_norm": 1.6008989405236207e-05, "learning_rate": 1.9268256110361376e-05, "loss": 0.0, "step": 14520 }, { "epoch": 19.020287899235267, "grad_norm": 4.708122287411243e-05, "learning_rate": 1.9243264857299946e-05, "loss": 1.1088, "step": 14530 }, { "epoch": 19.020737741790374, "grad_norm": 0.0009263704996556044, "learning_rate": 1.9218273604238517e-05, "loss": 0.0, "step": 14540 }, { "epoch": 19.02118758434548, "grad_norm": 614.4429931640625, "learning_rate": 1.9193282351177087e-05, "loss": 0.6211, "step": 14550 }, { "epoch": 19.021637426900586, "grad_norm": 6.410405330825597e-05, "learning_rate": 1.9168291098115658e-05, "loss": 0.0, "step": 14560 }, { "epoch": 19.02208726945569, "grad_norm": 0.0001717218547128141, "learning_rate": 1.914329984505423e-05, "loss": 0.0, "step": 14570 }, { "epoch": 19.022537112010795, "grad_norm": 490.1104431152344, "learning_rate": 1.9118308591992802e-05, "loss": 0.4778, "step": 14580 }, { "epoch": 19.0229869545659, "grad_norm": 0.00038774669519625604, "learning_rate": 1.9093317338931373e-05, "loss": 0.0001, "step": 14590 }, { "epoch": 19.023436797121008, "grad_norm": 2.0246301573934034e-05, "learning_rate": 1.9068326085869947e-05, "loss": 0.0, "step": 14600 }, { "epoch": 19.023886639676114, "grad_norm": 5.87751273997128e-05, "learning_rate": 1.9043334832808517e-05, "loss": 0.0, "step": 14610 }, { "epoch": 19.02433648223122, "grad_norm": 5.758153201895766e-05, "learning_rate": 1.9018343579747088e-05, "loss": 0.0, "step": 14620 }, { "epoch": 19.024786324786326, "grad_norm": 0.0004094851028639823, "learning_rate": 1.899335232668566e-05, "loss": 0.0, "step": 14630 }, { "epoch": 19.02523616734143, "grad_norm": 0.00022166460985317826, "learning_rate": 1.8968361073624232e-05, "loss": 0.0122, "step": 14640 }, { "epoch": 19.025686009896535, "grad_norm": 7.218680548248813e-05, "learning_rate": 1.8943369820562803e-05, "loss": 0.0, "step": 14650 }, { "epoch": 19.02613585245164, "grad_norm": 0.00027647704700939357, "learning_rate": 1.8918378567501373e-05, "loss": 0.0, "step": 14660 }, { "epoch": 19.026585695006748, "grad_norm": 1.4640027984569315e-05, "learning_rate": 1.8893387314439947e-05, "loss": 0.6119, "step": 14670 }, { "epoch": 19.027035537561854, "grad_norm": 1.9758963389904238e-05, "learning_rate": 1.8868396061378517e-05, "loss": 0.0025, "step": 14680 }, { "epoch": 19.02748538011696, "grad_norm": 6.904696783749387e-05, "learning_rate": 1.8843404808317088e-05, "loss": 0.0, "step": 14690 }, { "epoch": 19.027935222672063, "grad_norm": 7.100107904989272e-05, "learning_rate": 1.8818413555255662e-05, "loss": 0.0, "step": 14700 }, { "epoch": 19.02838506522717, "grad_norm": 2.0463559849304147e-05, "learning_rate": 1.8793422302194232e-05, "loss": 0.0, "step": 14710 }, { "epoch": 19.028834907782276, "grad_norm": 5.6653156207175925e-05, "learning_rate": 1.8768431049132803e-05, "loss": 0.0, "step": 14720 }, { "epoch": 19.029284750337382, "grad_norm": 8.942672866396606e-05, "learning_rate": 1.8743439796071377e-05, "loss": 0.0, "step": 14730 }, { "epoch": 19.02973459289249, "grad_norm": 0.00024000680423341691, "learning_rate": 1.8718448543009947e-05, "loss": 0.0, "step": 14740 }, { "epoch": 19.030184435447595, "grad_norm": 0.00029164779698476195, "learning_rate": 1.8693457289948518e-05, "loss": 0.0, "step": 14750 }, { "epoch": 19.030634278002697, "grad_norm": 0.0005781080108135939, "learning_rate": 1.8668466036887092e-05, "loss": 0.0, "step": 14760 }, { "epoch": 19.031084120557804, "grad_norm": 6.452616071328521e-05, "learning_rate": 1.8643474783825662e-05, "loss": 0.0, "step": 14770 }, { "epoch": 19.03153396311291, "grad_norm": 0.0002783963573165238, "learning_rate": 1.8618483530764233e-05, "loss": 0.0, "step": 14780 }, { "epoch": 19.031983805668016, "grad_norm": 6.687716813758016e-05, "learning_rate": 1.8593492277702803e-05, "loss": 0.2626, "step": 14790 }, { "epoch": 19.032433648223122, "grad_norm": 7.202118285931647e-05, "learning_rate": 1.8568501024641377e-05, "loss": 0.0, "step": 14800 }, { "epoch": 19.03288349077823, "grad_norm": 1.5672780136810616e-05, "learning_rate": 1.8543509771579948e-05, "loss": 0.0001, "step": 14810 }, { "epoch": 19.033333333333335, "grad_norm": 7.143709808588028e-05, "learning_rate": 1.8518518518518518e-05, "loss": 0.0, "step": 14820 }, { "epoch": 19.033333333333335, "eval_accuracy": 0.8790697674418605, "eval_f1": 0.874810085703679, "eval_loss": 1.0165752172470093, "eval_runtime": 141.4698, "eval_samples_per_second": 1.52, "eval_steps_per_second": 1.52, "step": 14820 }, { "epoch": 20.000449842555106, "grad_norm": 6.498039147118106e-05, "learning_rate": 1.8493527265457092e-05, "loss": 0.0, "step": 14830 }, { "epoch": 20.000899685110213, "grad_norm": 0.00018787650333251804, "learning_rate": 1.8468536012395663e-05, "loss": 0.0, "step": 14840 }, { "epoch": 20.00134952766532, "grad_norm": 0.00011158216511830688, "learning_rate": 1.8443544759334233e-05, "loss": 0.1834, "step": 14850 }, { "epoch": 20.00179937022042, "grad_norm": 5.381126902648248e-05, "learning_rate": 1.8418553506272807e-05, "loss": 0.0, "step": 14860 }, { "epoch": 20.002249212775528, "grad_norm": 0.0007442950154654682, "learning_rate": 1.8393562253211378e-05, "loss": 0.0, "step": 14870 }, { "epoch": 20.002699055330634, "grad_norm": 4.8793222958920524e-05, "learning_rate": 1.8368571000149948e-05, "loss": 0.2188, "step": 14880 }, { "epoch": 20.00314889788574, "grad_norm": 5.59104882995598e-05, "learning_rate": 1.8343579747088522e-05, "loss": 0.0, "step": 14890 }, { "epoch": 20.003598740440847, "grad_norm": 5.503738429979421e-05, "learning_rate": 1.8318588494027093e-05, "loss": 0.0, "step": 14900 }, { "epoch": 20.004048582995953, "grad_norm": 2.2864727725391276e-05, "learning_rate": 1.8293597240965663e-05, "loss": 0.0, "step": 14910 }, { "epoch": 20.004498425551056, "grad_norm": 6.511223909910768e-05, "learning_rate": 1.8268605987904234e-05, "loss": 0.0, "step": 14920 }, { "epoch": 20.004948268106162, "grad_norm": 7.691395148867741e-05, "learning_rate": 1.8243614734842808e-05, "loss": 0.0, "step": 14930 }, { "epoch": 20.005398110661268, "grad_norm": 6.417711119865999e-05, "learning_rate": 1.8218623481781378e-05, "loss": 0.0, "step": 14940 }, { "epoch": 20.005847953216374, "grad_norm": 0.0010746776824817061, "learning_rate": 1.819363222871995e-05, "loss": 0.0, "step": 14950 }, { "epoch": 20.00629779577148, "grad_norm": 5.210592280491255e-05, "learning_rate": 1.8168640975658522e-05, "loss": 0.0, "step": 14960 }, { "epoch": 20.006747638326587, "grad_norm": 0.002465253695845604, "learning_rate": 1.8143649722597093e-05, "loss": 0.0, "step": 14970 }, { "epoch": 20.00719748088169, "grad_norm": 3.8758007576689124e-05, "learning_rate": 1.8118658469535663e-05, "loss": 0.0, "step": 14980 }, { "epoch": 20.007647323436796, "grad_norm": 1.1401968549762387e-05, "learning_rate": 1.8093667216474237e-05, "loss": 0.0, "step": 14990 }, { "epoch": 20.008097165991902, "grad_norm": 5.483052882482298e-05, "learning_rate": 1.8068675963412808e-05, "loss": 0.0, "step": 15000 }, { "epoch": 20.00854700854701, "grad_norm": 0.00010887756070587784, "learning_rate": 1.804368471035138e-05, "loss": 0.0, "step": 15010 }, { "epoch": 20.008996851102115, "grad_norm": 0.00021547269716393203, "learning_rate": 1.8018693457289952e-05, "loss": 0.0, "step": 15020 }, { "epoch": 20.00944669365722, "grad_norm": 4.9723839765647426e-05, "learning_rate": 1.7993702204228523e-05, "loss": 0.0, "step": 15030 }, { "epoch": 20.009896536212327, "grad_norm": 0.005576699040830135, "learning_rate": 1.7968710951167093e-05, "loss": 0.0, "step": 15040 }, { "epoch": 20.01034637876743, "grad_norm": 0.00015869362687226385, "learning_rate": 1.7943719698105664e-05, "loss": 0.0, "step": 15050 }, { "epoch": 20.010796221322536, "grad_norm": 0.000182233183295466, "learning_rate": 1.7918728445044234e-05, "loss": 0.0, "step": 15060 }, { "epoch": 20.011246063877643, "grad_norm": 1.4285763427324127e-05, "learning_rate": 1.789373719198281e-05, "loss": 0.0, "step": 15070 }, { "epoch": 20.01169590643275, "grad_norm": 7.55722212488763e-05, "learning_rate": 1.786874593892138e-05, "loss": 0.0, "step": 15080 }, { "epoch": 20.012145748987855, "grad_norm": 3.583104262361303e-05, "learning_rate": 1.784375468585995e-05, "loss": 0.0, "step": 15090 }, { "epoch": 20.01259559154296, "grad_norm": 4.920500941807404e-05, "learning_rate": 1.781876343279852e-05, "loss": 0.0, "step": 15100 }, { "epoch": 20.013045434098064, "grad_norm": 149.66781616210938, "learning_rate": 1.7793772179737094e-05, "loss": 0.0183, "step": 15110 }, { "epoch": 20.01349527665317, "grad_norm": 8.793131564743817e-05, "learning_rate": 1.7768780926675664e-05, "loss": 0.0, "step": 15120 }, { "epoch": 20.013945119208277, "grad_norm": 2.0180643332423642e-05, "learning_rate": 1.7743789673614235e-05, "loss": 0.0, "step": 15130 }, { "epoch": 20.014394961763383, "grad_norm": 0.0001428045507054776, "learning_rate": 1.7718798420552805e-05, "loss": 0.0, "step": 15140 }, { "epoch": 20.01484480431849, "grad_norm": 0.0006640001083724201, "learning_rate": 1.769380716749138e-05, "loss": 0.0, "step": 15150 }, { "epoch": 20.015294646873595, "grad_norm": 1.988168151001446e-05, "learning_rate": 1.766881591442995e-05, "loss": 0.3366, "step": 15160 }, { "epoch": 20.015744489428698, "grad_norm": 0.00014983706932980567, "learning_rate": 1.764382466136852e-05, "loss": 0.0, "step": 15170 }, { "epoch": 20.016194331983804, "grad_norm": 6.500460585812107e-05, "learning_rate": 1.761883340830709e-05, "loss": 0.0, "step": 15180 }, { "epoch": 20.01664417453891, "grad_norm": 0.0009453230304643512, "learning_rate": 1.7593842155245665e-05, "loss": 0.0, "step": 15190 }, { "epoch": 20.017094017094017, "grad_norm": 5.695874642697163e-05, "learning_rate": 1.7568850902184235e-05, "loss": 0.0, "step": 15200 }, { "epoch": 20.017543859649123, "grad_norm": 6.160345947137102e-05, "learning_rate": 1.7543859649122806e-05, "loss": 0.0, "step": 15210 }, { "epoch": 20.01799370220423, "grad_norm": 0.00010116116754943505, "learning_rate": 1.751886839606138e-05, "loss": 0.0, "step": 15220 }, { "epoch": 20.018443544759336, "grad_norm": 6.5195607021451e-05, "learning_rate": 1.749387714299995e-05, "loss": 0.0002, "step": 15230 }, { "epoch": 20.01889338731444, "grad_norm": 4.935001561534591e-05, "learning_rate": 1.746888588993852e-05, "loss": 0.0002, "step": 15240 }, { "epoch": 20.019343229869545, "grad_norm": 5.311663699103519e-05, "learning_rate": 1.7443894636877095e-05, "loss": 0.0, "step": 15250 }, { "epoch": 20.01979307242465, "grad_norm": 6.183510413393378e-05, "learning_rate": 1.7418903383815665e-05, "loss": 1.1859, "step": 15260 }, { "epoch": 20.020242914979757, "grad_norm": 2.1965566702419892e-05, "learning_rate": 1.7393912130754236e-05, "loss": 0.0, "step": 15270 }, { "epoch": 20.020692757534864, "grad_norm": 0.0001186119916383177, "learning_rate": 1.736892087769281e-05, "loss": 0.0, "step": 15280 }, { "epoch": 20.02114260008997, "grad_norm": 0.0001228961191372946, "learning_rate": 1.734392962463138e-05, "loss": 0.0, "step": 15290 }, { "epoch": 20.021592442645073, "grad_norm": 3.09136594296433e-05, "learning_rate": 1.731893837156995e-05, "loss": 0.0, "step": 15300 }, { "epoch": 20.02204228520018, "grad_norm": 0.00010722313163569197, "learning_rate": 1.729394711850852e-05, "loss": 0.0, "step": 15310 }, { "epoch": 20.022492127755285, "grad_norm": 6.701361417071894e-05, "learning_rate": 1.7268955865447095e-05, "loss": 0.0, "step": 15320 }, { "epoch": 20.02294197031039, "grad_norm": 0.000139567579026334, "learning_rate": 1.7243964612385665e-05, "loss": 0.0, "step": 15330 }, { "epoch": 20.023391812865498, "grad_norm": 0.00016461194900330156, "learning_rate": 1.7218973359324236e-05, "loss": 0.0, "step": 15340 }, { "epoch": 20.023841655420604, "grad_norm": 6.844043673481792e-05, "learning_rate": 1.719398210626281e-05, "loss": 0.0, "step": 15350 }, { "epoch": 20.024291497975707, "grad_norm": 0.00017817427578847855, "learning_rate": 1.716899085320138e-05, "loss": 0.0, "step": 15360 }, { "epoch": 20.024741340530813, "grad_norm": 2.7063017114414833e-05, "learning_rate": 1.714399960013995e-05, "loss": 0.0178, "step": 15370 }, { "epoch": 20.02519118308592, "grad_norm": 2.6730342142400332e-05, "learning_rate": 1.7119008347078525e-05, "loss": 0.0, "step": 15380 }, { "epoch": 20.025641025641026, "grad_norm": 0.00011669701780192554, "learning_rate": 1.7094017094017095e-05, "loss": 0.0, "step": 15390 }, { "epoch": 20.026090868196132, "grad_norm": 6.649976421613246e-05, "learning_rate": 1.7069025840955666e-05, "loss": 0.0, "step": 15400 }, { "epoch": 20.026540710751238, "grad_norm": 2.661532926140353e-05, "learning_rate": 1.7044034587894236e-05, "loss": 0.0, "step": 15410 }, { "epoch": 20.026990553306344, "grad_norm": 7.47826270526275e-05, "learning_rate": 1.701904333483281e-05, "loss": 0.2223, "step": 15420 }, { "epoch": 20.027440395861447, "grad_norm": 7.067176920827478e-05, "learning_rate": 1.699405208177138e-05, "loss": 0.0, "step": 15430 }, { "epoch": 20.027890238416553, "grad_norm": 0.00010112140444107354, "learning_rate": 1.696906082870995e-05, "loss": 0.0, "step": 15440 }, { "epoch": 20.02834008097166, "grad_norm": 1.5223273294395767e-05, "learning_rate": 1.6944069575648525e-05, "loss": 0.0, "step": 15450 }, { "epoch": 20.028789923526766, "grad_norm": 0.002139465184882283, "learning_rate": 1.6919078322587096e-05, "loss": 0.0, "step": 15460 }, { "epoch": 20.029239766081872, "grad_norm": 3.557377931429073e-05, "learning_rate": 1.6894087069525666e-05, "loss": 0.0, "step": 15470 }, { "epoch": 20.02968960863698, "grad_norm": 0.00017935926734935492, "learning_rate": 1.686909581646424e-05, "loss": 0.0, "step": 15480 }, { "epoch": 20.03013945119208, "grad_norm": 5.909262836212292e-05, "learning_rate": 1.684410456340281e-05, "loss": 0.0, "step": 15490 }, { "epoch": 20.030589293747187, "grad_norm": 0.00017241401656065136, "learning_rate": 1.681911331034138e-05, "loss": 0.0181, "step": 15500 }, { "epoch": 20.031039136302294, "grad_norm": 1.5363435522885993e-05, "learning_rate": 1.6794122057279955e-05, "loss": 0.0, "step": 15510 }, { "epoch": 20.0314889788574, "grad_norm": 8.823603275232017e-05, "learning_rate": 1.6769130804218526e-05, "loss": 0.2129, "step": 15520 }, { "epoch": 20.031938821412506, "grad_norm": 0.00011974719382124022, "learning_rate": 1.6744139551157096e-05, "loss": 0.0, "step": 15530 }, { "epoch": 20.032388663967613, "grad_norm": 4.92661714553833, "learning_rate": 1.6719148298095667e-05, "loss": 0.0171, "step": 15540 }, { "epoch": 20.03283850652272, "grad_norm": 0.0003888284554705024, "learning_rate": 1.669415704503424e-05, "loss": 0.0, "step": 15550 }, { "epoch": 20.03328834907782, "grad_norm": 5.786581095890142e-05, "learning_rate": 1.666916579197281e-05, "loss": 0.0, "step": 15560 }, { "epoch": 20.033333333333335, "eval_accuracy": 0.8976744186046511, "eval_f1": 0.8967709653516572, "eval_loss": 0.8696686625480652, "eval_runtime": 143.734, "eval_samples_per_second": 1.496, "eval_steps_per_second": 1.496, "step": 15561 }, { "epoch": 21.000404858299596, "grad_norm": 0.00013393645349424332, "learning_rate": 1.664417453891138e-05, "loss": 0.0124, "step": 15570 }, { "epoch": 21.000854700854703, "grad_norm": 0.00015060561418067664, "learning_rate": 1.6619183285849956e-05, "loss": 0.0, "step": 15580 }, { "epoch": 21.001304543409805, "grad_norm": 2.7493499146657996e-05, "learning_rate": 1.6594192032788526e-05, "loss": 0.4515, "step": 15590 }, { "epoch": 21.00175438596491, "grad_norm": 0.0002209960948675871, "learning_rate": 1.6569200779727097e-05, "loss": 0.0, "step": 15600 }, { "epoch": 21.002204228520018, "grad_norm": 1.9848734154948033e-05, "learning_rate": 1.654420952666567e-05, "loss": 0.0, "step": 15610 }, { "epoch": 21.002654071075124, "grad_norm": 7.113569154171273e-05, "learning_rate": 1.651921827360424e-05, "loss": 0.021, "step": 15620 }, { "epoch": 21.00310391363023, "grad_norm": 0.00019776188128162175, "learning_rate": 1.649422702054281e-05, "loss": 0.0, "step": 15630 }, { "epoch": 21.003553756185337, "grad_norm": 0.0002914995711762458, "learning_rate": 1.6469235767481382e-05, "loss": 0.0, "step": 15640 }, { "epoch": 21.00400359874044, "grad_norm": 9.022648737300187e-05, "learning_rate": 1.6444244514419953e-05, "loss": 0.0, "step": 15650 }, { "epoch": 21.004453441295546, "grad_norm": 0.0001052138686645776, "learning_rate": 1.6419253261358526e-05, "loss": 0.0, "step": 15660 }, { "epoch": 21.004903283850652, "grad_norm": 0.0002267085073981434, "learning_rate": 1.6394262008297097e-05, "loss": 0.0, "step": 15670 }, { "epoch": 21.00535312640576, "grad_norm": 0.00011711043771356344, "learning_rate": 1.6369270755235667e-05, "loss": 0.0, "step": 15680 }, { "epoch": 21.005802968960865, "grad_norm": 0.00013010339171160012, "learning_rate": 1.6344279502174238e-05, "loss": 0.0, "step": 15690 }, { "epoch": 21.00625281151597, "grad_norm": 0.0001116329658543691, "learning_rate": 1.631928824911281e-05, "loss": 0.0, "step": 15700 }, { "epoch": 21.006702654071074, "grad_norm": 8.411926683038473e-05, "learning_rate": 1.6294296996051382e-05, "loss": 0.0, "step": 15710 }, { "epoch": 21.00715249662618, "grad_norm": 8.1470629083924e-05, "learning_rate": 1.6269305742989953e-05, "loss": 0.0, "step": 15720 }, { "epoch": 21.007602339181286, "grad_norm": 0.00013351053348742425, "learning_rate": 1.6244314489928523e-05, "loss": 0.0, "step": 15730 }, { "epoch": 21.008052181736392, "grad_norm": 0.0004384877975098789, "learning_rate": 1.6219323236867097e-05, "loss": 0.2827, "step": 15740 }, { "epoch": 21.0085020242915, "grad_norm": 3.5664779716171324e-05, "learning_rate": 1.6194331983805668e-05, "loss": 0.0, "step": 15750 }, { "epoch": 21.008951866846605, "grad_norm": 8.512438216712326e-05, "learning_rate": 1.616934073074424e-05, "loss": 0.2554, "step": 15760 }, { "epoch": 21.00940170940171, "grad_norm": 0.000147869301144965, "learning_rate": 1.6144349477682812e-05, "loss": 0.0, "step": 15770 }, { "epoch": 21.009851551956814, "grad_norm": 3.806790118687786e-05, "learning_rate": 1.6119358224621383e-05, "loss": 0.0, "step": 15780 }, { "epoch": 21.01030139451192, "grad_norm": 6.963503255974501e-05, "learning_rate": 1.6094366971559953e-05, "loss": 0.0, "step": 15790 }, { "epoch": 21.010751237067026, "grad_norm": 0.00014841827214695513, "learning_rate": 1.6069375718498524e-05, "loss": 0.0, "step": 15800 }, { "epoch": 21.011201079622133, "grad_norm": 2.9537155569414608e-05, "learning_rate": 1.6044384465437098e-05, "loss": 0.0, "step": 15810 }, { "epoch": 21.01165092217724, "grad_norm": 0.0040170252323150635, "learning_rate": 1.6019393212375668e-05, "loss": 0.0, "step": 15820 }, { "epoch": 21.012100764732345, "grad_norm": 0.0002760277420748025, "learning_rate": 1.599440195931424e-05, "loss": 0.0, "step": 15830 }, { "epoch": 21.012550607287448, "grad_norm": 1.5282937965821475e-05, "learning_rate": 1.5969410706252813e-05, "loss": 0.0, "step": 15840 }, { "epoch": 21.013000449842554, "grad_norm": 6.329569441732019e-05, "learning_rate": 1.5944419453191383e-05, "loss": 0.0, "step": 15850 }, { "epoch": 21.01345029239766, "grad_norm": 6.280637171585113e-05, "learning_rate": 1.5919428200129954e-05, "loss": 0.0, "step": 15860 }, { "epoch": 21.013900134952767, "grad_norm": 8.514901128364727e-05, "learning_rate": 1.5894436947068528e-05, "loss": 0.0, "step": 15870 }, { "epoch": 21.014349977507873, "grad_norm": 0.00015846289170440286, "learning_rate": 1.5869445694007098e-05, "loss": 0.0, "step": 15880 }, { "epoch": 21.01479982006298, "grad_norm": 2.3402262741001323e-05, "learning_rate": 1.584445444094567e-05, "loss": 0.0, "step": 15890 }, { "epoch": 21.015249662618082, "grad_norm": 1.5503257600357756e-05, "learning_rate": 1.5819463187884243e-05, "loss": 0.0, "step": 15900 }, { "epoch": 21.01569950517319, "grad_norm": 5.879286618437618e-05, "learning_rate": 1.5794471934822813e-05, "loss": 0.0, "step": 15910 }, { "epoch": 21.016149347728295, "grad_norm": 5.737386527471244e-05, "learning_rate": 1.5769480681761384e-05, "loss": 0.0, "step": 15920 }, { "epoch": 21.0165991902834, "grad_norm": 3.3086245821323246e-05, "learning_rate": 1.5744489428699954e-05, "loss": 0.1384, "step": 15930 }, { "epoch": 21.017049032838507, "grad_norm": 7.178186933742836e-05, "learning_rate": 1.5719498175638528e-05, "loss": 0.2151, "step": 15940 }, { "epoch": 21.017498875393613, "grad_norm": 1.994244485103991e-05, "learning_rate": 1.56945069225771e-05, "loss": 0.0, "step": 15950 }, { "epoch": 21.01794871794872, "grad_norm": 7.299780554603785e-05, "learning_rate": 1.566951566951567e-05, "loss": 0.0, "step": 15960 }, { "epoch": 21.018398560503822, "grad_norm": 0.00011815952166216448, "learning_rate": 1.5644524416454243e-05, "loss": 0.0, "step": 15970 }, { "epoch": 21.01884840305893, "grad_norm": 0.0007531983428634703, "learning_rate": 1.5619533163392813e-05, "loss": 0.0, "step": 15980 }, { "epoch": 21.019298245614035, "grad_norm": 2.7799307645182125e-05, "learning_rate": 1.5594541910331384e-05, "loss": 0.5148, "step": 15990 }, { "epoch": 21.01974808816914, "grad_norm": 0.00012047717609675601, "learning_rate": 1.5569550657269958e-05, "loss": 0.0, "step": 16000 }, { "epoch": 21.020197930724247, "grad_norm": 1.4655407539976295e-05, "learning_rate": 1.554455940420853e-05, "loss": 0.0, "step": 16010 }, { "epoch": 21.020647773279354, "grad_norm": 0.00030937616247683764, "learning_rate": 1.55195681511471e-05, "loss": 0.0, "step": 16020 }, { "epoch": 21.021097615834456, "grad_norm": 0.00020693485566880554, "learning_rate": 1.5494576898085673e-05, "loss": 0.0, "step": 16030 }, { "epoch": 21.021547458389563, "grad_norm": 0.00015178917965386063, "learning_rate": 1.5469585645024243e-05, "loss": 0.0, "step": 16040 }, { "epoch": 21.02199730094467, "grad_norm": 0.00034165047691203654, "learning_rate": 1.5444594391962814e-05, "loss": 0.0, "step": 16050 }, { "epoch": 21.022447143499775, "grad_norm": 0.00012903207971248776, "learning_rate": 1.5419603138901384e-05, "loss": 0.0, "step": 16060 }, { "epoch": 21.02289698605488, "grad_norm": 0.0001874924637377262, "learning_rate": 1.5394611885839958e-05, "loss": 0.0, "step": 16070 }, { "epoch": 21.023346828609988, "grad_norm": 0.00022235391952563077, "learning_rate": 1.536962063277853e-05, "loss": 0.0, "step": 16080 }, { "epoch": 21.02379667116509, "grad_norm": 0.003367543686181307, "learning_rate": 1.53446293797171e-05, "loss": 0.0001, "step": 16090 }, { "epoch": 21.024246513720197, "grad_norm": 5.840232188347727e-05, "learning_rate": 1.5319638126655673e-05, "loss": 0.0, "step": 16100 }, { "epoch": 21.024696356275303, "grad_norm": 0.00011737307795556262, "learning_rate": 1.5294646873594244e-05, "loss": 0.0, "step": 16110 }, { "epoch": 21.02514619883041, "grad_norm": 0.00011675278801703826, "learning_rate": 1.5269655620532814e-05, "loss": 0.0, "step": 16120 }, { "epoch": 21.025596041385516, "grad_norm": 9.717835200717673e-05, "learning_rate": 1.5244664367471386e-05, "loss": 0.0, "step": 16130 }, { "epoch": 21.026045883940622, "grad_norm": 0.00020236059208400548, "learning_rate": 1.5219673114409957e-05, "loss": 0.0, "step": 16140 }, { "epoch": 21.026495726495728, "grad_norm": 0.00011191396333742887, "learning_rate": 1.5194681861348528e-05, "loss": 0.0, "step": 16150 }, { "epoch": 21.02694556905083, "grad_norm": 8.082162094069645e-05, "learning_rate": 1.5169690608287101e-05, "loss": 0.0, "step": 16160 }, { "epoch": 21.027395411605937, "grad_norm": 1.4630045370722655e-05, "learning_rate": 1.5144699355225672e-05, "loss": 0.0, "step": 16170 }, { "epoch": 21.027845254161043, "grad_norm": 8.45779140945524e-05, "learning_rate": 1.5119708102164242e-05, "loss": 0.017, "step": 16180 }, { "epoch": 21.02829509671615, "grad_norm": 2.756094909273088e-05, "learning_rate": 1.5094716849102813e-05, "loss": 0.0, "step": 16190 }, { "epoch": 21.028744939271256, "grad_norm": 6.739424861734733e-05, "learning_rate": 1.5069725596041387e-05, "loss": 0.0, "step": 16200 }, { "epoch": 21.029194781826362, "grad_norm": 3.043209835595917e-05, "learning_rate": 1.5044734342979957e-05, "loss": 0.0, "step": 16210 }, { "epoch": 21.029644624381465, "grad_norm": 1.4527555322274566e-05, "learning_rate": 1.5019743089918528e-05, "loss": 0.0, "step": 16220 }, { "epoch": 21.03009446693657, "grad_norm": 6.600967753911391e-05, "learning_rate": 1.4994751836857102e-05, "loss": 0.0, "step": 16230 }, { "epoch": 21.030544309491678, "grad_norm": 8.397259807679802e-05, "learning_rate": 1.4969760583795672e-05, "loss": 0.0, "step": 16240 }, { "epoch": 21.030994152046784, "grad_norm": 6.837156979599968e-05, "learning_rate": 1.4944769330734243e-05, "loss": 0.0, "step": 16250 }, { "epoch": 21.03144399460189, "grad_norm": 9.989409591071308e-05, "learning_rate": 1.4919778077672817e-05, "loss": 0.0, "step": 16260 }, { "epoch": 21.031893837156996, "grad_norm": 2.2470172552857548e-05, "learning_rate": 1.4894786824611387e-05, "loss": 0.0, "step": 16270 }, { "epoch": 21.0323436797121, "grad_norm": 8.348335541086271e-05, "learning_rate": 1.4869795571549958e-05, "loss": 0.0, "step": 16280 }, { "epoch": 21.032793522267205, "grad_norm": 0.00015816195809748024, "learning_rate": 1.4844804318488532e-05, "loss": 0.0, "step": 16290 }, { "epoch": 21.03324336482231, "grad_norm": 1.495928518124856e-05, "learning_rate": 1.4819813065427102e-05, "loss": 0.0, "step": 16300 }, { "epoch": 21.033333333333335, "eval_accuracy": 0.9023255813953488, "eval_f1": 0.9017303179743699, "eval_loss": 0.767001211643219, "eval_runtime": 143.498, "eval_samples_per_second": 1.498, "eval_steps_per_second": 1.498, "step": 16302 }, { "epoch": 22.000359874044083, "grad_norm": 0.00018205380183644593, "learning_rate": 1.4794821812365673e-05, "loss": 0.0, "step": 16310 }, { "epoch": 22.00080971659919, "grad_norm": 0.0001140242675319314, "learning_rate": 1.4769830559304243e-05, "loss": 0.0, "step": 16320 }, { "epoch": 22.001259559154295, "grad_norm": 0.00023855858307797462, "learning_rate": 1.4744839306242817e-05, "loss": 0.0, "step": 16330 }, { "epoch": 22.0017094017094, "grad_norm": 0.0001698070700513199, "learning_rate": 1.4719848053181388e-05, "loss": 0.0, "step": 16340 }, { "epoch": 22.002159244264508, "grad_norm": 4.1686416807351634e-05, "learning_rate": 1.4694856800119958e-05, "loss": 0.0, "step": 16350 }, { "epoch": 22.002609086819614, "grad_norm": 1.9726967366295867e-05, "learning_rate": 1.466986554705853e-05, "loss": 0.0, "step": 16360 }, { "epoch": 22.00305892937472, "grad_norm": 1.4606856893806253e-05, "learning_rate": 1.4644874293997101e-05, "loss": 0.0, "step": 16370 }, { "epoch": 22.003508771929823, "grad_norm": 6.480448791990057e-05, "learning_rate": 1.4619883040935673e-05, "loss": 0.0, "step": 16380 }, { "epoch": 22.00395861448493, "grad_norm": 0.010764636099338531, "learning_rate": 1.4594891787874245e-05, "loss": 0.0, "step": 16390 }, { "epoch": 22.004408457040036, "grad_norm": 0.0001633676583878696, "learning_rate": 1.4569900534812816e-05, "loss": 0.0, "step": 16400 }, { "epoch": 22.004858299595142, "grad_norm": 6.247393321245909e-05, "learning_rate": 1.4544909281751386e-05, "loss": 0.0122, "step": 16410 }, { "epoch": 22.00530814215025, "grad_norm": 3.104090501437895e-05, "learning_rate": 1.451991802868996e-05, "loss": 0.0, "step": 16420 }, { "epoch": 22.005757984705355, "grad_norm": 0.000234328254009597, "learning_rate": 1.449492677562853e-05, "loss": 0.0, "step": 16430 }, { "epoch": 22.006207827260457, "grad_norm": 0.0001240622077602893, "learning_rate": 1.4469935522567101e-05, "loss": 0.2439, "step": 16440 }, { "epoch": 22.006657669815564, "grad_norm": 0.0001401822519255802, "learning_rate": 1.4444944269505672e-05, "loss": 0.0, "step": 16450 }, { "epoch": 22.00710751237067, "grad_norm": 6.948616646695882e-05, "learning_rate": 1.4419953016444246e-05, "loss": 0.0, "step": 16460 }, { "epoch": 22.007557354925776, "grad_norm": 6.569693505298346e-05, "learning_rate": 1.4394961763382816e-05, "loss": 0.0, "step": 16470 }, { "epoch": 22.008007197480882, "grad_norm": 2.90517600660678e-05, "learning_rate": 1.4369970510321387e-05, "loss": 0.0, "step": 16480 }, { "epoch": 22.00845704003599, "grad_norm": 1.4559688679582905e-05, "learning_rate": 1.434497925725996e-05, "loss": 0.0, "step": 16490 }, { "epoch": 22.00890688259109, "grad_norm": 5.7797118643065915e-05, "learning_rate": 1.4319988004198531e-05, "loss": 0.0, "step": 16500 }, { "epoch": 22.009356725146198, "grad_norm": 1.4377340448845644e-05, "learning_rate": 1.4294996751137102e-05, "loss": 0.0, "step": 16510 }, { "epoch": 22.009806567701304, "grad_norm": 8.981933206086978e-05, "learning_rate": 1.4270005498075676e-05, "loss": 0.0, "step": 16520 }, { "epoch": 22.01025641025641, "grad_norm": 3.223190287826583e-05, "learning_rate": 1.4245014245014246e-05, "loss": 0.0, "step": 16530 }, { "epoch": 22.010706252811516, "grad_norm": 7.404076313832775e-05, "learning_rate": 1.4220022991952817e-05, "loss": 0.0, "step": 16540 }, { "epoch": 22.011156095366623, "grad_norm": 7.501559593947604e-05, "learning_rate": 1.419503173889139e-05, "loss": 0.0, "step": 16550 }, { "epoch": 22.01160593792173, "grad_norm": 0.00035023593227379024, "learning_rate": 1.4170040485829961e-05, "loss": 0.0, "step": 16560 }, { "epoch": 22.01205578047683, "grad_norm": 3.115064464509487e-05, "learning_rate": 1.4145049232768532e-05, "loss": 0.0, "step": 16570 }, { "epoch": 22.012505623031938, "grad_norm": 0.00011644191545201465, "learning_rate": 1.4120057979707102e-05, "loss": 0.0, "step": 16580 }, { "epoch": 22.012955465587044, "grad_norm": 0.000131905879243277, "learning_rate": 1.4095066726645676e-05, "loss": 0.0, "step": 16590 }, { "epoch": 22.01340530814215, "grad_norm": 6.491533713415265e-05, "learning_rate": 1.4070075473584247e-05, "loss": 0.0, "step": 16600 }, { "epoch": 22.013855150697257, "grad_norm": 0.00020082131959497929, "learning_rate": 1.4045084220522817e-05, "loss": 0.0, "step": 16610 }, { "epoch": 22.014304993252363, "grad_norm": 0.00010515145550016314, "learning_rate": 1.4020092967461391e-05, "loss": 0.0, "step": 16620 }, { "epoch": 22.014754835807466, "grad_norm": 2.1495878172572702e-05, "learning_rate": 1.3995101714399961e-05, "loss": 0.0, "step": 16630 }, { "epoch": 22.015204678362572, "grad_norm": 2.3377557226922363e-05, "learning_rate": 1.3970110461338532e-05, "loss": 0.0, "step": 16640 }, { "epoch": 22.01565452091768, "grad_norm": 0.00013570005830843002, "learning_rate": 1.3945119208277104e-05, "loss": 0.2202, "step": 16650 }, { "epoch": 22.016104363472785, "grad_norm": 2.253225102322176e-05, "learning_rate": 1.3920127955215675e-05, "loss": 0.0, "step": 16660 }, { "epoch": 22.01655420602789, "grad_norm": 0.00014195748372003436, "learning_rate": 1.3895136702154247e-05, "loss": 0.0, "step": 16670 }, { "epoch": 22.017004048582997, "grad_norm": 8.678180893184617e-05, "learning_rate": 1.3870145449092819e-05, "loss": 0.0, "step": 16680 }, { "epoch": 22.0174538911381, "grad_norm": 2.3750821128487587e-05, "learning_rate": 1.384515419603139e-05, "loss": 0.0, "step": 16690 }, { "epoch": 22.017903733693206, "grad_norm": 9.922748722601682e-05, "learning_rate": 1.382016294296996e-05, "loss": 0.0, "step": 16700 }, { "epoch": 22.018353576248312, "grad_norm": 3.073050902457908e-05, "learning_rate": 1.379517168990853e-05, "loss": 0.189, "step": 16710 }, { "epoch": 22.01880341880342, "grad_norm": 6.869553908472881e-05, "learning_rate": 1.3770180436847105e-05, "loss": 0.0, "step": 16720 }, { "epoch": 22.019253261358525, "grad_norm": 2.4198412575060502e-05, "learning_rate": 1.3745189183785675e-05, "loss": 0.0, "step": 16730 }, { "epoch": 22.01970310391363, "grad_norm": 1.4448706679104362e-05, "learning_rate": 1.3720197930724246e-05, "loss": 0.0, "step": 16740 }, { "epoch": 22.020152946468738, "grad_norm": 7.461678615072742e-05, "learning_rate": 1.369520667766282e-05, "loss": 0.0, "step": 16750 }, { "epoch": 22.02060278902384, "grad_norm": 7.355320849455893e-05, "learning_rate": 1.367021542460139e-05, "loss": 0.0, "step": 16760 }, { "epoch": 22.021052631578947, "grad_norm": 1.980231718334835e-05, "learning_rate": 1.364522417153996e-05, "loss": 0.0, "step": 16770 }, { "epoch": 22.021502474134053, "grad_norm": 7.697223190916702e-05, "learning_rate": 1.3620232918478534e-05, "loss": 0.0234, "step": 16780 }, { "epoch": 22.02195231668916, "grad_norm": 1.462399450247176e-05, "learning_rate": 1.3595241665417105e-05, "loss": 0.0, "step": 16790 }, { "epoch": 22.022402159244265, "grad_norm": 1.1642520803434309e-05, "learning_rate": 1.3570250412355676e-05, "loss": 0.0, "step": 16800 }, { "epoch": 22.02285200179937, "grad_norm": 5.9208872698945925e-05, "learning_rate": 1.354525915929425e-05, "loss": 0.0, "step": 16810 }, { "epoch": 22.023301844354474, "grad_norm": 4.6848759666318074e-05, "learning_rate": 1.352026790623282e-05, "loss": 0.0, "step": 16820 }, { "epoch": 22.02375168690958, "grad_norm": 0.00011021130194421858, "learning_rate": 1.349527665317139e-05, "loss": 0.0, "step": 16830 }, { "epoch": 22.024201529464687, "grad_norm": 1.1446140888438094e-05, "learning_rate": 1.3470285400109961e-05, "loss": 0.0169, "step": 16840 }, { "epoch": 22.024651372019793, "grad_norm": 0.00019674422219395638, "learning_rate": 1.3445294147048535e-05, "loss": 0.0, "step": 16850 }, { "epoch": 22.0251012145749, "grad_norm": 2.068349567707628e-05, "learning_rate": 1.3420302893987105e-05, "loss": 0.0, "step": 16860 }, { "epoch": 22.025551057130006, "grad_norm": 6.41013975837268e-05, "learning_rate": 1.3395311640925676e-05, "loss": 0.0, "step": 16870 }, { "epoch": 22.02600089968511, "grad_norm": 9.799375402508304e-05, "learning_rate": 1.337032038786425e-05, "loss": 0.0, "step": 16880 }, { "epoch": 22.026450742240215, "grad_norm": 3.078604277106933e-05, "learning_rate": 1.334532913480282e-05, "loss": 0.0, "step": 16890 }, { "epoch": 22.02690058479532, "grad_norm": 6.727012078044936e-05, "learning_rate": 1.332033788174139e-05, "loss": 0.0, "step": 16900 }, { "epoch": 22.027350427350427, "grad_norm": 2.0783510990440845e-05, "learning_rate": 1.3295346628679963e-05, "loss": 0.0, "step": 16910 }, { "epoch": 22.027800269905534, "grad_norm": 8.768763655098155e-05, "learning_rate": 1.3270355375618535e-05, "loss": 0.0, "step": 16920 }, { "epoch": 22.02825011246064, "grad_norm": 0.0001105818446376361, "learning_rate": 1.3245364122557106e-05, "loss": 0.0, "step": 16930 }, { "epoch": 22.028699955015746, "grad_norm": 6.362546992022544e-05, "learning_rate": 1.3220372869495676e-05, "loss": 0.0, "step": 16940 }, { "epoch": 22.02914979757085, "grad_norm": 5.6985183618962765e-05, "learning_rate": 1.3195381616434249e-05, "loss": 0.0, "step": 16950 }, { "epoch": 22.029599640125955, "grad_norm": 1.5035168871690985e-05, "learning_rate": 1.3170390363372819e-05, "loss": 0.0, "step": 16960 }, { "epoch": 22.03004948268106, "grad_norm": 8.474278001813218e-05, "learning_rate": 1.3145399110311391e-05, "loss": 0.0, "step": 16970 }, { "epoch": 22.030499325236168, "grad_norm": 8.718929893802851e-05, "learning_rate": 1.3120407857249963e-05, "loss": 0.0, "step": 16980 }, { "epoch": 22.030949167791274, "grad_norm": 0.0001404480281053111, "learning_rate": 1.3095416604188534e-05, "loss": 0.0, "step": 16990 }, { "epoch": 22.03139901034638, "grad_norm": 5.927497477387078e-05, "learning_rate": 1.3070425351127104e-05, "loss": 0.0, "step": 17000 }, { "epoch": 22.031848852901483, "grad_norm": 7.672845822526142e-05, "learning_rate": 1.3045434098065678e-05, "loss": 0.0, "step": 17010 }, { "epoch": 22.03229869545659, "grad_norm": 5.550547575694509e-05, "learning_rate": 1.3020442845004249e-05, "loss": 0.0, "step": 17020 }, { "epoch": 22.032748538011695, "grad_norm": 0.0003152852295897901, "learning_rate": 1.299545159194282e-05, "loss": 0.0, "step": 17030 }, { "epoch": 22.0331983805668, "grad_norm": 7.615220965817571e-05, "learning_rate": 1.2970460338881393e-05, "loss": 0.0, "step": 17040 }, { "epoch": 22.033333333333335, "eval_accuracy": 0.9116279069767442, "eval_f1": 0.9107290378585786, "eval_loss": 0.7398874163627625, "eval_runtime": 145.1728, "eval_samples_per_second": 1.481, "eval_steps_per_second": 1.481, "step": 17043 }, { "epoch": 23.000314889788573, "grad_norm": 8.533633808838204e-05, "learning_rate": 1.2945469085819964e-05, "loss": 0.0, "step": 17050 }, { "epoch": 23.00076473234368, "grad_norm": 0.00016142489039339125, "learning_rate": 1.2920477832758534e-05, "loss": 0.0, "step": 17060 }, { "epoch": 23.001214574898786, "grad_norm": 1.563602199894376e-05, "learning_rate": 1.2895486579697105e-05, "loss": 0.0, "step": 17070 }, { "epoch": 23.001664417453892, "grad_norm": 0.00012546544894576073, "learning_rate": 1.2870495326635679e-05, "loss": 0.0, "step": 17080 }, { "epoch": 23.002114260008998, "grad_norm": 3.8199003029149026e-05, "learning_rate": 1.284550407357425e-05, "loss": 0.0, "step": 17090 }, { "epoch": 23.002564102564104, "grad_norm": 0.00017885587294586003, "learning_rate": 1.282051282051282e-05, "loss": 0.0, "step": 17100 }, { "epoch": 23.003013945119207, "grad_norm": 5.190909359953366e-05, "learning_rate": 1.2795521567451394e-05, "loss": 0.0, "step": 17110 }, { "epoch": 23.003463787674313, "grad_norm": 2.7162528567714617e-05, "learning_rate": 1.2770530314389964e-05, "loss": 0.0, "step": 17120 }, { "epoch": 23.00391363022942, "grad_norm": 8.696081931702793e-05, "learning_rate": 1.2745539061328535e-05, "loss": 0.0, "step": 17130 }, { "epoch": 23.004363472784526, "grad_norm": 0.00015178235480561852, "learning_rate": 1.2720547808267109e-05, "loss": 0.212, "step": 17140 }, { "epoch": 23.004813315339632, "grad_norm": 2.0018293071188964e-05, "learning_rate": 1.269555655520568e-05, "loss": 0.0, "step": 17150 }, { "epoch": 23.00526315789474, "grad_norm": 5.057204907643609e-05, "learning_rate": 1.267056530214425e-05, "loss": 0.0, "step": 17160 }, { "epoch": 23.00571300044984, "grad_norm": 6.211805157363415e-05, "learning_rate": 1.2645574049082824e-05, "loss": 0.0, "step": 17170 }, { "epoch": 23.006162843004947, "grad_norm": 1.1010720299964305e-05, "learning_rate": 1.2620582796021394e-05, "loss": 0.0, "step": 17180 }, { "epoch": 23.006612685560054, "grad_norm": 1.984611662919633e-05, "learning_rate": 1.2595591542959965e-05, "loss": 0.0, "step": 17190 }, { "epoch": 23.00706252811516, "grad_norm": 5.641800453304313e-05, "learning_rate": 1.2570600289898535e-05, "loss": 0.0169, "step": 17200 }, { "epoch": 23.007512370670266, "grad_norm": 0.00011283885396551341, "learning_rate": 1.2545609036837107e-05, "loss": 0.0, "step": 17210 }, { "epoch": 23.007962213225372, "grad_norm": 0.0001210238624480553, "learning_rate": 1.252061778377568e-05, "loss": 0.0, "step": 17220 }, { "epoch": 23.008412055780475, "grad_norm": 8.198981231544167e-05, "learning_rate": 1.249562653071425e-05, "loss": 0.0, "step": 17230 }, { "epoch": 23.00886189833558, "grad_norm": 5.8402605645824224e-05, "learning_rate": 1.2470635277652822e-05, "loss": 0.0, "step": 17240 }, { "epoch": 23.009311740890688, "grad_norm": 0.00014611425285693258, "learning_rate": 1.2445644024591393e-05, "loss": 0.0, "step": 17250 }, { "epoch": 23.009761583445794, "grad_norm": 5.7104443840216845e-05, "learning_rate": 1.2420652771529965e-05, "loss": 0.0, "step": 17260 }, { "epoch": 23.0102114260009, "grad_norm": 4.097040437045507e-05, "learning_rate": 1.2395661518468536e-05, "loss": 0.0, "step": 17270 }, { "epoch": 23.010661268556007, "grad_norm": 6.87441643094644e-05, "learning_rate": 1.2370670265407108e-05, "loss": 0.0, "step": 17280 }, { "epoch": 23.011111111111113, "grad_norm": 6.479059084085748e-05, "learning_rate": 1.2345679012345678e-05, "loss": 0.0, "step": 17290 }, { "epoch": 23.011560953666216, "grad_norm": 6.475632835645229e-05, "learning_rate": 1.232068775928425e-05, "loss": 0.0, "step": 17300 }, { "epoch": 23.012010796221322, "grad_norm": 3.658682544482872e-05, "learning_rate": 1.2295696506222823e-05, "loss": 0.0, "step": 17310 }, { "epoch": 23.012460638776428, "grad_norm": 3.7255959510803223, "learning_rate": 1.2270705253161393e-05, "loss": 0.0127, "step": 17320 }, { "epoch": 23.012910481331534, "grad_norm": 8.939913095673546e-05, "learning_rate": 1.2245714000099965e-05, "loss": 0.0, "step": 17330 }, { "epoch": 23.01336032388664, "grad_norm": 0.0009572944836691022, "learning_rate": 1.2220722747038538e-05, "loss": 0.0, "step": 17340 }, { "epoch": 23.013810166441747, "grad_norm": 5.8545792853692546e-05, "learning_rate": 1.2195731493977108e-05, "loss": 0.0, "step": 17350 }, { "epoch": 23.01426000899685, "grad_norm": 1.49254992720671e-05, "learning_rate": 1.217074024091568e-05, "loss": 0.0, "step": 17360 }, { "epoch": 23.014709851551956, "grad_norm": 5.762762884842232e-05, "learning_rate": 1.2145748987854251e-05, "loss": 0.0, "step": 17370 }, { "epoch": 23.015159694107062, "grad_norm": 5.760184285463765e-05, "learning_rate": 1.2120757734792823e-05, "loss": 0.0, "step": 17380 }, { "epoch": 23.01560953666217, "grad_norm": 4.390942558529787e-05, "learning_rate": 1.2095766481731395e-05, "loss": 0.0, "step": 17390 }, { "epoch": 23.016059379217275, "grad_norm": 1.1090855878137518e-05, "learning_rate": 1.2070775228669966e-05, "loss": 0.0, "step": 17400 }, { "epoch": 23.01650922177238, "grad_norm": 1.5837080354685895e-05, "learning_rate": 1.2045783975608538e-05, "loss": 0.0, "step": 17410 }, { "epoch": 23.016959064327484, "grad_norm": 5.5962460464797914e-05, "learning_rate": 1.2020792722547109e-05, "loss": 0.0, "step": 17420 }, { "epoch": 23.01740890688259, "grad_norm": 6.750640022801235e-05, "learning_rate": 1.199580146948568e-05, "loss": 0.0, "step": 17430 }, { "epoch": 23.017858749437696, "grad_norm": 5.9126628912054e-05, "learning_rate": 1.1970810216424253e-05, "loss": 0.0, "step": 17440 }, { "epoch": 23.018308591992803, "grad_norm": 0.00012606965901795775, "learning_rate": 1.1945818963362824e-05, "loss": 0.0, "step": 17450 }, { "epoch": 23.01875843454791, "grad_norm": 9.246710396837443e-05, "learning_rate": 1.1920827710301396e-05, "loss": 0.0, "step": 17460 }, { "epoch": 23.019208277103015, "grad_norm": 0.00011985372111666948, "learning_rate": 1.1895836457239968e-05, "loss": 0.0, "step": 17470 }, { "epoch": 23.01965811965812, "grad_norm": 0.00021879107225686312, "learning_rate": 1.1870845204178538e-05, "loss": 0.0, "step": 17480 }, { "epoch": 23.020107962213224, "grad_norm": 9.994024003390223e-05, "learning_rate": 1.184585395111711e-05, "loss": 0.0, "step": 17490 }, { "epoch": 23.02055780476833, "grad_norm": 6.422989827115089e-05, "learning_rate": 1.1820862698055681e-05, "loss": 0.0, "step": 17500 }, { "epoch": 23.021007647323437, "grad_norm": 8.600562432548031e-05, "learning_rate": 1.1795871444994253e-05, "loss": 0.0, "step": 17510 }, { "epoch": 23.021457489878543, "grad_norm": 1.1497855666675605e-05, "learning_rate": 1.1770880191932824e-05, "loss": 0.0, "step": 17520 }, { "epoch": 23.02190733243365, "grad_norm": 4.8972764489008114e-05, "learning_rate": 1.1745888938871394e-05, "loss": 0.0, "step": 17530 }, { "epoch": 23.022357174988755, "grad_norm": 9.681389929028228e-05, "learning_rate": 1.1720897685809967e-05, "loss": 0.0, "step": 17540 }, { "epoch": 23.022807017543858, "grad_norm": 6.702203245367855e-05, "learning_rate": 1.1695906432748537e-05, "loss": 0.2432, "step": 17550 }, { "epoch": 23.023256860098964, "grad_norm": 7.384753553196788e-05, "learning_rate": 1.167091517968711e-05, "loss": 0.0, "step": 17560 }, { "epoch": 23.02370670265407, "grad_norm": 0.0001464281667722389, "learning_rate": 1.1645923926625682e-05, "loss": 0.0, "step": 17570 }, { "epoch": 23.024156545209177, "grad_norm": 5.367913763620891e-05, "learning_rate": 1.1620932673564252e-05, "loss": 0.0131, "step": 17580 }, { "epoch": 23.024606387764283, "grad_norm": 0.00015286209236364812, "learning_rate": 1.1595941420502824e-05, "loss": 0.2375, "step": 17590 }, { "epoch": 23.02505623031939, "grad_norm": 0.0001469031412852928, "learning_rate": 1.1570950167441397e-05, "loss": 0.0, "step": 17600 }, { "epoch": 23.025506072874492, "grad_norm": 6.043909161235206e-05, "learning_rate": 1.1545958914379967e-05, "loss": 0.0, "step": 17610 }, { "epoch": 23.0259559154296, "grad_norm": 0.0001146083086496219, "learning_rate": 1.152096766131854e-05, "loss": 0.0, "step": 17620 }, { "epoch": 23.026405757984705, "grad_norm": 0.0001174517601612024, "learning_rate": 1.149597640825711e-05, "loss": 0.0, "step": 17630 }, { "epoch": 23.02685560053981, "grad_norm": 6.389032205333933e-05, "learning_rate": 1.1470985155195682e-05, "loss": 0.0, "step": 17640 }, { "epoch": 23.027305443094917, "grad_norm": 5.768553091911599e-05, "learning_rate": 1.1445993902134254e-05, "loss": 0.0, "step": 17650 }, { "epoch": 23.027755285650024, "grad_norm": 0.0001224432053277269, "learning_rate": 1.1421002649072825e-05, "loss": 0.0, "step": 17660 }, { "epoch": 23.02820512820513, "grad_norm": 5.849784429301508e-05, "learning_rate": 1.1396011396011397e-05, "loss": 0.0, "step": 17670 }, { "epoch": 23.028654970760233, "grad_norm": 2.6603076548781246e-05, "learning_rate": 1.1371020142949967e-05, "loss": 0.0, "step": 17680 }, { "epoch": 23.02910481331534, "grad_norm": 1.9667755623231642e-05, "learning_rate": 1.134602888988854e-05, "loss": 0.0, "step": 17690 }, { "epoch": 23.029554655870445, "grad_norm": 1.0527923222980462e-05, "learning_rate": 1.1321037636827112e-05, "loss": 0.0, "step": 17700 }, { "epoch": 23.03000449842555, "grad_norm": 0.0002108281769324094, "learning_rate": 1.1296046383765682e-05, "loss": 0.0, "step": 17710 }, { "epoch": 23.030454340980658, "grad_norm": 0.00010548167483648285, "learning_rate": 1.1271055130704255e-05, "loss": 0.0, "step": 17720 }, { "epoch": 23.030904183535764, "grad_norm": 0.00010709363414207473, "learning_rate": 1.1246063877642827e-05, "loss": 0.0, "step": 17730 }, { "epoch": 23.031354026090867, "grad_norm": 1.1038410775654484e-05, "learning_rate": 1.1221072624581397e-05, "loss": 0.0, "step": 17740 }, { "epoch": 23.031803868645973, "grad_norm": 2.7871232305187732e-05, "learning_rate": 1.119608137151997e-05, "loss": 0.0, "step": 17750 }, { "epoch": 23.03225371120108, "grad_norm": 0.00011219466250622645, "learning_rate": 1.117109011845854e-05, "loss": 0.0, "step": 17760 }, { "epoch": 23.032703553756185, "grad_norm": 5.003081969334744e-05, "learning_rate": 1.1146098865397112e-05, "loss": 0.0, "step": 17770 }, { "epoch": 23.03315339631129, "grad_norm": 2.147640952898655e-05, "learning_rate": 1.1121107612335683e-05, "loss": 0.0, "step": 17780 }, { "epoch": 23.033333333333335, "eval_accuracy": 0.9116279069767442, "eval_f1": 0.9107290378585786, "eval_loss": 0.745821475982666, "eval_runtime": 142.6386, "eval_samples_per_second": 1.507, "eval_steps_per_second": 1.507, "step": 17784 }, { "epoch": 24.000269905533063, "grad_norm": 5.603049794444814e-05, "learning_rate": 1.1096116359274255e-05, "loss": 0.0, "step": 17790 }, { "epoch": 24.00071974808817, "grad_norm": 5.759598207077943e-05, "learning_rate": 1.1071125106212825e-05, "loss": 0.0, "step": 17800 }, { "epoch": 24.001169590643276, "grad_norm": 0.00021178045426495373, "learning_rate": 1.1046133853151398e-05, "loss": 0.0, "step": 17810 }, { "epoch": 24.001619433198382, "grad_norm": 6.592089630430564e-05, "learning_rate": 1.1021142600089968e-05, "loss": 0.0, "step": 17820 }, { "epoch": 24.002069275753485, "grad_norm": 0.0006991674308665097, "learning_rate": 1.099615134702854e-05, "loss": 0.0, "step": 17830 }, { "epoch": 24.00251911830859, "grad_norm": 9.876254625851288e-05, "learning_rate": 1.0971160093967111e-05, "loss": 0.0, "step": 17840 }, { "epoch": 24.002968960863697, "grad_norm": 9.231301373802125e-05, "learning_rate": 1.0946168840905683e-05, "loss": 0.2142, "step": 17850 }, { "epoch": 24.003418803418803, "grad_norm": 0.00012407703616190702, "learning_rate": 1.0921177587844255e-05, "loss": 0.0, "step": 17860 }, { "epoch": 24.00386864597391, "grad_norm": 8.260818140115589e-05, "learning_rate": 1.0896186334782826e-05, "loss": 0.0, "step": 17870 }, { "epoch": 24.004318488529016, "grad_norm": 0.00018968188669532537, "learning_rate": 1.0871195081721398e-05, "loss": 0.0, "step": 17880 }, { "epoch": 24.004768331084122, "grad_norm": 9.18388323043473e-05, "learning_rate": 1.0846203828659969e-05, "loss": 0.1794, "step": 17890 }, { "epoch": 24.005218173639225, "grad_norm": 0.0003018390270881355, "learning_rate": 1.082121257559854e-05, "loss": 0.0, "step": 17900 }, { "epoch": 24.00566801619433, "grad_norm": 0.00011420969531172886, "learning_rate": 1.0796221322537113e-05, "loss": 0.0, "step": 17910 }, { "epoch": 24.006117858749437, "grad_norm": 4.873359284829348e-05, "learning_rate": 1.0771230069475684e-05, "loss": 0.0, "step": 17920 }, { "epoch": 24.006567701304544, "grad_norm": 9.458748536417261e-05, "learning_rate": 1.0746238816414256e-05, "loss": 0.0, "step": 17930 }, { "epoch": 24.00701754385965, "grad_norm": 0.0001366679643979296, "learning_rate": 1.0721247563352826e-05, "loss": 0.0, "step": 17940 }, { "epoch": 24.007467386414756, "grad_norm": 0.00024823175044730306, "learning_rate": 1.0696256310291398e-05, "loss": 0.0, "step": 17950 }, { "epoch": 24.00791722896986, "grad_norm": 1.4808477317274082e-05, "learning_rate": 1.067126505722997e-05, "loss": 0.0, "step": 17960 }, { "epoch": 24.008367071524965, "grad_norm": 1.1253870979999192e-05, "learning_rate": 1.0646273804168541e-05, "loss": 0.0, "step": 17970 }, { "epoch": 24.00881691408007, "grad_norm": 0.00030266205430962145, "learning_rate": 1.0621282551107113e-05, "loss": 0.0, "step": 17980 }, { "epoch": 24.009266756635178, "grad_norm": 7.682810974074528e-05, "learning_rate": 1.0596291298045686e-05, "loss": 0.0, "step": 17990 }, { "epoch": 24.009716599190284, "grad_norm": 4.628812166629359e-05, "learning_rate": 1.0571300044984256e-05, "loss": 0.0, "step": 18000 }, { "epoch": 24.01016644174539, "grad_norm": 2.8186883355374448e-05, "learning_rate": 1.0546308791922828e-05, "loss": 0.0, "step": 18010 }, { "epoch": 24.010616284300493, "grad_norm": 7.12798282620497e-05, "learning_rate": 1.0521317538861399e-05, "loss": 0.1902, "step": 18020 }, { "epoch": 24.0110661268556, "grad_norm": 5.6434309954056516e-05, "learning_rate": 1.0496326285799971e-05, "loss": 0.0, "step": 18030 }, { "epoch": 24.011515969410706, "grad_norm": 8.612764213467017e-05, "learning_rate": 1.0471335032738543e-05, "loss": 0.0, "step": 18040 }, { "epoch": 24.011965811965812, "grad_norm": 1.4709163224324584e-05, "learning_rate": 1.0446343779677114e-05, "loss": 0.0, "step": 18050 }, { "epoch": 24.012415654520918, "grad_norm": 0.00013793316611554474, "learning_rate": 1.0421352526615686e-05, "loss": 0.2625, "step": 18060 }, { "epoch": 24.012865497076024, "grad_norm": 7.536695920862257e-05, "learning_rate": 1.0396361273554257e-05, "loss": 0.0, "step": 18070 }, { "epoch": 24.01331533963113, "grad_norm": 0.00020756882440764457, "learning_rate": 1.0371370020492829e-05, "loss": 0.0, "step": 18080 }, { "epoch": 24.013765182186233, "grad_norm": 5.0054117309628054e-05, "learning_rate": 1.03463787674314e-05, "loss": 0.0, "step": 18090 }, { "epoch": 24.01421502474134, "grad_norm": 0.0001415008446201682, "learning_rate": 1.032138751436997e-05, "loss": 0.0, "step": 18100 }, { "epoch": 24.014664867296446, "grad_norm": 3.128729804302566e-05, "learning_rate": 1.0296396261308542e-05, "loss": 0.4243, "step": 18110 }, { "epoch": 24.015114709851552, "grad_norm": 1.1264657587162219e-05, "learning_rate": 1.0271405008247113e-05, "loss": 0.0, "step": 18120 }, { "epoch": 24.01556455240666, "grad_norm": 5.5820470151957124e-05, "learning_rate": 1.0246413755185685e-05, "loss": 0.0, "step": 18130 }, { "epoch": 24.016014394961765, "grad_norm": 8.980453276308253e-05, "learning_rate": 1.0221422502124257e-05, "loss": 0.0, "step": 18140 }, { "epoch": 24.016464237516868, "grad_norm": 1.0743220627773553e-05, "learning_rate": 1.0196431249062827e-05, "loss": 0.0, "step": 18150 }, { "epoch": 24.016914080071974, "grad_norm": 0.00011442044342402369, "learning_rate": 1.01714399960014e-05, "loss": 0.1653, "step": 18160 }, { "epoch": 24.01736392262708, "grad_norm": 8.183925274352077e-06, "learning_rate": 1.0146448742939972e-05, "loss": 0.0, "step": 18170 }, { "epoch": 24.017813765182186, "grad_norm": 0.00018126594659406692, "learning_rate": 1.0121457489878542e-05, "loss": 0.0, "step": 18180 }, { "epoch": 24.018263607737293, "grad_norm": 0.0001915957109304145, "learning_rate": 1.0096466236817115e-05, "loss": 0.0, "step": 18190 }, { "epoch": 24.0187134502924, "grad_norm": 3.368945908732712e-05, "learning_rate": 1.0071474983755685e-05, "loss": 0.0, "step": 18200 }, { "epoch": 24.0191632928475, "grad_norm": 7.003805512795225e-05, "learning_rate": 1.0046483730694257e-05, "loss": 0.0, "step": 18210 }, { "epoch": 24.019613135402608, "grad_norm": 1.5302082829293795e-05, "learning_rate": 1.002149247763283e-05, "loss": 0.0, "step": 18220 }, { "epoch": 24.020062977957714, "grad_norm": 0.00014703479246236384, "learning_rate": 9.9965012245714e-06, "loss": 0.0, "step": 18230 }, { "epoch": 24.02051282051282, "grad_norm": 2.128718733729329e-05, "learning_rate": 9.971509971509972e-06, "loss": 0.0, "step": 18240 }, { "epoch": 24.020962663067927, "grad_norm": 0.0002198404399678111, "learning_rate": 9.946518718448543e-06, "loss": 0.0, "step": 18250 }, { "epoch": 24.021412505623033, "grad_norm": 9.207950643030927e-05, "learning_rate": 9.921527465387115e-06, "loss": 0.0, "step": 18260 }, { "epoch": 24.02186234817814, "grad_norm": 4.8545443860348314e-05, "learning_rate": 9.896536212325687e-06, "loss": 0.0, "step": 18270 }, { "epoch": 24.022312190733242, "grad_norm": 0.00014104934234637767, "learning_rate": 9.871544959264258e-06, "loss": 0.0266, "step": 18280 }, { "epoch": 24.02276203328835, "grad_norm": 5.212486212258227e-05, "learning_rate": 9.84655370620283e-06, "loss": 0.0, "step": 18290 }, { "epoch": 24.023211875843455, "grad_norm": 0.00012106738722650334, "learning_rate": 9.821562453141402e-06, "loss": 0.0219, "step": 18300 }, { "epoch": 24.02366171839856, "grad_norm": 0.0001401987683493644, "learning_rate": 9.796571200079973e-06, "loss": 0.0, "step": 18310 }, { "epoch": 24.024111560953667, "grad_norm": 6.417468102881685e-05, "learning_rate": 9.771579947018545e-06, "loss": 0.0, "step": 18320 }, { "epoch": 24.024561403508773, "grad_norm": 5.155670805834234e-05, "learning_rate": 9.746588693957115e-06, "loss": 0.0, "step": 18330 }, { "epoch": 24.025011246063876, "grad_norm": 8.428020009887405e-06, "learning_rate": 9.721597440895688e-06, "loss": 0.0, "step": 18340 }, { "epoch": 24.025461088618982, "grad_norm": 0.00018475548131391406, "learning_rate": 9.69660618783426e-06, "loss": 0.0, "step": 18350 }, { "epoch": 24.02591093117409, "grad_norm": 4.1850860725389794e-05, "learning_rate": 9.67161493477283e-06, "loss": 0.0, "step": 18360 }, { "epoch": 24.026360773729195, "grad_norm": 0.003346325596794486, "learning_rate": 9.646623681711401e-06, "loss": 0.0, "step": 18370 }, { "epoch": 24.0268106162843, "grad_norm": 2.0896746718790382e-05, "learning_rate": 9.621632428649973e-06, "loss": 0.0, "step": 18380 }, { "epoch": 24.027260458839407, "grad_norm": 6.863420276204124e-05, "learning_rate": 9.596641175588544e-06, "loss": 0.0, "step": 18390 }, { "epoch": 24.027710301394514, "grad_norm": 1.2015420907118823e-05, "learning_rate": 9.571649922527116e-06, "loss": 0.0, "step": 18400 }, { "epoch": 24.028160143949616, "grad_norm": 7.34830682631582e-05, "learning_rate": 9.546658669465686e-06, "loss": 0.0, "step": 18410 }, { "epoch": 24.028609986504723, "grad_norm": 9.017364936880767e-05, "learning_rate": 9.521667416404259e-06, "loss": 0.0, "step": 18420 }, { "epoch": 24.02905982905983, "grad_norm": 0.0007917342009022832, "learning_rate": 9.49667616334283e-06, "loss": 0.0, "step": 18430 }, { "epoch": 24.029509671614935, "grad_norm": 7.96579297457356e-06, "learning_rate": 9.471684910281401e-06, "loss": 0.0, "step": 18440 }, { "epoch": 24.02995951417004, "grad_norm": 5.128119300934486e-05, "learning_rate": 9.446693657219973e-06, "loss": 0.0, "step": 18450 }, { "epoch": 24.030409356725148, "grad_norm": 0.00017732007836457342, "learning_rate": 9.421702404158544e-06, "loss": 0.0176, "step": 18460 }, { "epoch": 24.03085919928025, "grad_norm": 1.0804946214193478e-05, "learning_rate": 9.396711151097116e-06, "loss": 0.0, "step": 18470 }, { "epoch": 24.031309041835357, "grad_norm": 0.0001877601898740977, "learning_rate": 9.371719898035688e-06, "loss": 0.0, "step": 18480 }, { "epoch": 24.031758884390463, "grad_norm": 0.00010587155702523887, "learning_rate": 9.346728644974259e-06, "loss": 0.0, "step": 18490 }, { "epoch": 24.03220872694557, "grad_norm": 0.0001443439832655713, "learning_rate": 9.321737391912831e-06, "loss": 0.0, "step": 18500 }, { "epoch": 24.032658569500676, "grad_norm": 4.6121596824377775e-05, "learning_rate": 9.296746138851402e-06, "loss": 0.0, "step": 18510 }, { "epoch": 24.033108412055782, "grad_norm": 0.0020109680481255054, "learning_rate": 9.271754885789974e-06, "loss": 0.0, "step": 18520 }, { "epoch": 24.033333333333335, "eval_accuracy": 0.8976744186046511, "eval_f1": 0.8969144292188345, "eval_loss": 0.7701441049575806, "eval_runtime": 143.8777, "eval_samples_per_second": 1.494, "eval_steps_per_second": 1.494, "step": 18525 }, { "epoch": 25.000224921277553, "grad_norm": 5.768218761659227e-05, "learning_rate": 9.246763632728546e-06, "loss": 0.0, "step": 18530 }, { "epoch": 25.00067476383266, "grad_norm": 0.0002671243855729699, "learning_rate": 9.221772379667117e-06, "loss": 0.0, "step": 18540 }, { "epoch": 25.001124606387766, "grad_norm": 6.234197644516826e-05, "learning_rate": 9.196781126605689e-06, "loss": 0.0, "step": 18550 }, { "epoch": 25.00157444894287, "grad_norm": 1.260911267308984e-05, "learning_rate": 9.171789873544261e-06, "loss": 0.0, "step": 18560 }, { "epoch": 25.002024291497975, "grad_norm": 1.4868866855977103e-05, "learning_rate": 9.146798620482832e-06, "loss": 0.0, "step": 18570 }, { "epoch": 25.00247413405308, "grad_norm": 4.202577838441357e-05, "learning_rate": 9.121807367421404e-06, "loss": 0.0, "step": 18580 }, { "epoch": 25.002923976608187, "grad_norm": 1.4761310922040138e-05, "learning_rate": 9.096816114359974e-06, "loss": 0.0, "step": 18590 }, { "epoch": 25.003373819163293, "grad_norm": 1.469064773118589e-05, "learning_rate": 9.071824861298546e-06, "loss": 0.0, "step": 18600 }, { "epoch": 25.0038236617184, "grad_norm": 7.887441825005226e-06, "learning_rate": 9.046833608237119e-06, "loss": 0.0, "step": 18610 }, { "epoch": 25.004273504273506, "grad_norm": 9.396842506248504e-05, "learning_rate": 9.02184235517569e-06, "loss": 0.0, "step": 18620 }, { "epoch": 25.00472334682861, "grad_norm": 5.436800347524695e-05, "learning_rate": 8.996851102114261e-06, "loss": 0.0, "step": 18630 }, { "epoch": 25.005173189383715, "grad_norm": 4.8900499677984044e-05, "learning_rate": 8.971859849052832e-06, "loss": 0.0, "step": 18640 }, { "epoch": 25.00562303193882, "grad_norm": 0.00016522601072210819, "learning_rate": 8.946868595991404e-06, "loss": 0.0, "step": 18650 }, { "epoch": 25.006072874493928, "grad_norm": 6.457780546043068e-05, "learning_rate": 8.921877342929975e-06, "loss": 0.0, "step": 18660 }, { "epoch": 25.006522717049034, "grad_norm": 9.141473128693178e-05, "learning_rate": 8.896886089868547e-06, "loss": 0.0, "step": 18670 }, { "epoch": 25.00697255960414, "grad_norm": 4.942928717355244e-05, "learning_rate": 8.871894836807117e-06, "loss": 0.0, "step": 18680 }, { "epoch": 25.007422402159243, "grad_norm": 0.00021431494678836316, "learning_rate": 8.84690358374569e-06, "loss": 0.0, "step": 18690 }, { "epoch": 25.00787224471435, "grad_norm": 2.4247996407211758e-05, "learning_rate": 8.82191233068426e-06, "loss": 0.0, "step": 18700 }, { "epoch": 25.008322087269455, "grad_norm": 4.603596971719526e-05, "learning_rate": 8.796921077622832e-06, "loss": 0.0, "step": 18710 }, { "epoch": 25.00877192982456, "grad_norm": 0.00018338690279051661, "learning_rate": 8.771929824561403e-06, "loss": 0.0, "step": 18720 }, { "epoch": 25.009221772379668, "grad_norm": 0.00014903802366461605, "learning_rate": 8.746938571499975e-06, "loss": 0.0, "step": 18730 }, { "epoch": 25.009671614934774, "grad_norm": 9.139598842011765e-05, "learning_rate": 8.721947318438547e-06, "loss": 0.0, "step": 18740 }, { "epoch": 25.010121457489877, "grad_norm": 5.374592001317069e-05, "learning_rate": 8.696956065377118e-06, "loss": 0.0, "step": 18750 }, { "epoch": 25.010571300044983, "grad_norm": 6.045843110769056e-05, "learning_rate": 8.67196481231569e-06, "loss": 0.0, "step": 18760 }, { "epoch": 25.01102114260009, "grad_norm": 0.0001278681302210316, "learning_rate": 8.64697355925426e-06, "loss": 0.0, "step": 18770 }, { "epoch": 25.011470985155196, "grad_norm": 9.893133392324671e-05, "learning_rate": 8.621982306192833e-06, "loss": 0.0, "step": 18780 }, { "epoch": 25.011920827710302, "grad_norm": 6.618231418542564e-05, "learning_rate": 8.596991053131405e-06, "loss": 0.2267, "step": 18790 }, { "epoch": 25.01237067026541, "grad_norm": 5.454689380712807e-05, "learning_rate": 8.571999800069975e-06, "loss": 0.0, "step": 18800 }, { "epoch": 25.012820512820515, "grad_norm": 5.66052804060746e-05, "learning_rate": 8.547008547008548e-06, "loss": 0.0, "step": 18810 }, { "epoch": 25.013270355375617, "grad_norm": 0.00016078574117273092, "learning_rate": 8.522017293947118e-06, "loss": 0.0, "step": 18820 }, { "epoch": 25.013720197930724, "grad_norm": 4.056755642523058e-05, "learning_rate": 8.49702604088569e-06, "loss": 0.0, "step": 18830 }, { "epoch": 25.01417004048583, "grad_norm": 6.79106087773107e-05, "learning_rate": 8.472034787824263e-06, "loss": 0.0, "step": 18840 }, { "epoch": 25.014619883040936, "grad_norm": 8.359259481949266e-06, "learning_rate": 8.447043534762833e-06, "loss": 0.0, "step": 18850 }, { "epoch": 25.015069725596042, "grad_norm": 0.00025252869818359613, "learning_rate": 8.422052281701405e-06, "loss": 0.0, "step": 18860 }, { "epoch": 25.01551956815115, "grad_norm": 0.00014866738638374954, "learning_rate": 8.397061028639978e-06, "loss": 0.0, "step": 18870 }, { "epoch": 25.01596941070625, "grad_norm": 1.038740902004065e-05, "learning_rate": 8.372069775578548e-06, "loss": 0.0, "step": 18880 }, { "epoch": 25.016419253261358, "grad_norm": 0.00011278248712187633, "learning_rate": 8.34707852251712e-06, "loss": 0.0, "step": 18890 }, { "epoch": 25.016869095816464, "grad_norm": 6.214394670678303e-05, "learning_rate": 8.32208726945569e-06, "loss": 0.0, "step": 18900 }, { "epoch": 25.01731893837157, "grad_norm": 3.8402187783503905e-05, "learning_rate": 8.297096016394263e-06, "loss": 0.0, "step": 18910 }, { "epoch": 25.017768780926676, "grad_norm": 6.363236025208607e-05, "learning_rate": 8.272104763332835e-06, "loss": 0.0, "step": 18920 }, { "epoch": 25.018218623481783, "grad_norm": 0.0001524594408692792, "learning_rate": 8.247113510271406e-06, "loss": 0.0, "step": 18930 }, { "epoch": 25.018668466036885, "grad_norm": 9.741685062181205e-05, "learning_rate": 8.222122257209976e-06, "loss": 0.0, "step": 18940 }, { "epoch": 25.01911830859199, "grad_norm": 5.036893708165735e-05, "learning_rate": 8.197131004148548e-06, "loss": 0.0, "step": 18950 }, { "epoch": 25.019568151147098, "grad_norm": 1.089521811081795e-05, "learning_rate": 8.172139751087119e-06, "loss": 0.0, "step": 18960 }, { "epoch": 25.020017993702204, "grad_norm": 4.6289151214296e-05, "learning_rate": 8.147148498025691e-06, "loss": 0.0, "step": 18970 }, { "epoch": 25.02046783625731, "grad_norm": 0.00026391420396976173, "learning_rate": 8.122157244964262e-06, "loss": 0.0, "step": 18980 }, { "epoch": 25.020917678812417, "grad_norm": 0.0009327546576969326, "learning_rate": 8.097165991902834e-06, "loss": 0.0, "step": 18990 }, { "epoch": 25.021367521367523, "grad_norm": 3.346587982377969e-05, "learning_rate": 8.072174738841406e-06, "loss": 0.0, "step": 19000 }, { "epoch": 25.021817363922626, "grad_norm": 4.9290916649624705e-05, "learning_rate": 8.047183485779977e-06, "loss": 0.0, "step": 19010 }, { "epoch": 25.022267206477732, "grad_norm": 0.00024089630460366607, "learning_rate": 8.022192232718549e-06, "loss": 0.0, "step": 19020 }, { "epoch": 25.02271704903284, "grad_norm": 2.4198923711082898e-05, "learning_rate": 7.99720097965712e-06, "loss": 0.0, "step": 19030 }, { "epoch": 25.023166891587945, "grad_norm": 6.38297206023708e-05, "learning_rate": 7.972209726595692e-06, "loss": 0.0136, "step": 19040 }, { "epoch": 25.02361673414305, "grad_norm": 6.877033592900261e-05, "learning_rate": 7.947218473534264e-06, "loss": 0.0122, "step": 19050 }, { "epoch": 25.024066576698157, "grad_norm": 4.8008467274485156e-05, "learning_rate": 7.922227220472834e-06, "loss": 0.0, "step": 19060 }, { "epoch": 25.02451641925326, "grad_norm": 5.659965972881764e-05, "learning_rate": 7.897235967411407e-06, "loss": 0.0, "step": 19070 }, { "epoch": 25.024966261808366, "grad_norm": 0.0007093020831234753, "learning_rate": 7.872244714349977e-06, "loss": 0.0, "step": 19080 }, { "epoch": 25.025416104363472, "grad_norm": 8.034856000449508e-05, "learning_rate": 7.84725346128855e-06, "loss": 0.0, "step": 19090 }, { "epoch": 25.02586594691858, "grad_norm": 0.0002575552207417786, "learning_rate": 7.822262208227121e-06, "loss": 0.0, "step": 19100 }, { "epoch": 25.026315789473685, "grad_norm": 0.00012933016114402562, "learning_rate": 7.797270955165692e-06, "loss": 0.0001, "step": 19110 }, { "epoch": 25.02676563202879, "grad_norm": 0.0001859216863522306, "learning_rate": 7.772279702104264e-06, "loss": 0.0, "step": 19120 }, { "epoch": 25.027215474583894, "grad_norm": 8.868815348250791e-05, "learning_rate": 7.747288449042836e-06, "loss": 0.0, "step": 19130 }, { "epoch": 25.027665317139, "grad_norm": 1.0929978998319712e-05, "learning_rate": 7.722297195981407e-06, "loss": 0.0, "step": 19140 }, { "epoch": 25.028115159694106, "grad_norm": 4.939647624269128e-05, "learning_rate": 7.697305942919979e-06, "loss": 0.0, "step": 19150 }, { "epoch": 25.028565002249213, "grad_norm": 8.83165339473635e-05, "learning_rate": 7.67231468985855e-06, "loss": 0.0, "step": 19160 }, { "epoch": 25.02901484480432, "grad_norm": 4.967578570358455e-05, "learning_rate": 7.647323436797122e-06, "loss": 0.0, "step": 19170 }, { "epoch": 25.029464687359425, "grad_norm": 0.00014863147225696594, "learning_rate": 7.622332183735693e-06, "loss": 0.0, "step": 19180 }, { "epoch": 25.02991452991453, "grad_norm": 0.0002585393376648426, "learning_rate": 7.597340930674264e-06, "loss": 0.0, "step": 19190 }, { "epoch": 25.030364372469634, "grad_norm": 5.8682879171101376e-05, "learning_rate": 7.572349677612836e-06, "loss": 0.4894, "step": 19200 }, { "epoch": 25.03081421502474, "grad_norm": 9.62483900366351e-05, "learning_rate": 7.5473584245514065e-06, "loss": 0.0, "step": 19210 }, { "epoch": 25.031264057579847, "grad_norm": 5.3955449402565137e-05, "learning_rate": 7.522367171489979e-06, "loss": 0.0, "step": 19220 }, { "epoch": 25.031713900134953, "grad_norm": 4.975936099071987e-05, "learning_rate": 7.497375918428551e-06, "loss": 0.0, "step": 19230 }, { "epoch": 25.03216374269006, "grad_norm": 3.1714382203062996e-05, "learning_rate": 7.4723846653671214e-06, "loss": 0.0, "step": 19240 }, { "epoch": 25.032613585245166, "grad_norm": 2.1250783902360126e-05, "learning_rate": 7.447393412305694e-06, "loss": 0.0, "step": 19250 }, { "epoch": 25.03306342780027, "grad_norm": 7.753247336950153e-05, "learning_rate": 7.422402159244266e-06, "loss": 0.0, "step": 19260 }, { "epoch": 25.033333333333335, "eval_accuracy": 0.9023255813953488, "eval_f1": 0.9014498806822582, "eval_loss": 0.7924391627311707, "eval_runtime": 141.4809, "eval_samples_per_second": 1.52, "eval_steps_per_second": 1.52, "step": 19266 }, { "epoch": 26.000179937022043, "grad_norm": 5.020639218855649e-05, "learning_rate": 7.397410906182836e-06, "loss": 0.0, "step": 19270 }, { "epoch": 26.00062977957715, "grad_norm": 3.383470539120026e-05, "learning_rate": 7.3724196531214086e-06, "loss": 0.0, "step": 19280 }, { "epoch": 26.001079622132252, "grad_norm": 5.749919728259556e-05, "learning_rate": 7.347428400059979e-06, "loss": 0.0, "step": 19290 }, { "epoch": 26.00152946468736, "grad_norm": 0.0001222777646034956, "learning_rate": 7.3224371469985505e-06, "loss": 0.0, "step": 19300 }, { "epoch": 26.001979307242465, "grad_norm": 4.8774643801152706e-05, "learning_rate": 7.297445893937123e-06, "loss": 0.0, "step": 19310 }, { "epoch": 26.00242914979757, "grad_norm": 9.118792513618246e-05, "learning_rate": 7.272454640875693e-06, "loss": 0.0131, "step": 19320 }, { "epoch": 26.002878992352677, "grad_norm": 0.00012592706480063498, "learning_rate": 7.247463387814265e-06, "loss": 0.0, "step": 19330 }, { "epoch": 26.003328834907784, "grad_norm": 6.0971717175561935e-05, "learning_rate": 7.222472134752836e-06, "loss": 0.0, "step": 19340 }, { "epoch": 26.003778677462886, "grad_norm": 4.9173719162354246e-05, "learning_rate": 7.197480881691408e-06, "loss": 0.0, "step": 19350 }, { "epoch": 26.004228520017993, "grad_norm": 0.00023535304353572428, "learning_rate": 7.17248962862998e-06, "loss": 0.0, "step": 19360 }, { "epoch": 26.0046783625731, "grad_norm": 6.919256702531129e-05, "learning_rate": 7.147498375568551e-06, "loss": 0.0, "step": 19370 }, { "epoch": 26.005128205128205, "grad_norm": 0.00013647721789311618, "learning_rate": 7.122507122507123e-06, "loss": 0.0, "step": 19380 }, { "epoch": 26.00557804768331, "grad_norm": 3.22272717312444e-05, "learning_rate": 7.097515869445695e-06, "loss": 0.0, "step": 19390 }, { "epoch": 26.006027890238418, "grad_norm": 0.00014954402286093682, "learning_rate": 7.072524616384266e-06, "loss": 0.011, "step": 19400 }, { "epoch": 26.006477732793524, "grad_norm": 9.461242007091641e-05, "learning_rate": 7.047533363322838e-06, "loss": 0.0, "step": 19410 }, { "epoch": 26.006927575348627, "grad_norm": 5.468566087074578e-05, "learning_rate": 7.0225421102614085e-06, "loss": 0.0, "step": 19420 }, { "epoch": 26.007377417903733, "grad_norm": 0.00011054614878958091, "learning_rate": 6.997550857199981e-06, "loss": 0.0, "step": 19430 }, { "epoch": 26.00782726045884, "grad_norm": 8.279627763840836e-06, "learning_rate": 6.972559604138552e-06, "loss": 0.0, "step": 19440 }, { "epoch": 26.008277103013945, "grad_norm": 0.00010697323159547523, "learning_rate": 6.9475683510771235e-06, "loss": 0.0, "step": 19450 }, { "epoch": 26.00872694556905, "grad_norm": 7.881387864472345e-05, "learning_rate": 6.922577098015695e-06, "loss": 0.0, "step": 19460 }, { "epoch": 26.009176788124158, "grad_norm": 1.0763211321318522e-05, "learning_rate": 6.897585844954265e-06, "loss": 0.0, "step": 19470 }, { "epoch": 26.00962663067926, "grad_norm": 4.906982576358132e-05, "learning_rate": 6.8725945918928376e-06, "loss": 0.0, "step": 19480 }, { "epoch": 26.010076473234367, "grad_norm": 8.990523929242045e-05, "learning_rate": 6.84760333883141e-06, "loss": 0.0, "step": 19490 }, { "epoch": 26.010526315789473, "grad_norm": 0.00014429763541556895, "learning_rate": 6.82261208576998e-06, "loss": 0.0, "step": 19500 }, { "epoch": 26.01097615834458, "grad_norm": 3.263429243816063e-05, "learning_rate": 6.7976208327085525e-06, "loss": 0.0095, "step": 19510 }, { "epoch": 26.011426000899686, "grad_norm": 7.242514129757183e-06, "learning_rate": 6.772629579647125e-06, "loss": 0.2558, "step": 19520 }, { "epoch": 26.011875843454792, "grad_norm": 6.0492795455502346e-05, "learning_rate": 6.747638326585695e-06, "loss": 0.0, "step": 19530 }, { "epoch": 26.012325686009895, "grad_norm": 4.674341471400112e-05, "learning_rate": 6.7226470735242674e-06, "loss": 0.0, "step": 19540 }, { "epoch": 26.012775528565, "grad_norm": 4.5961307478137314e-05, "learning_rate": 6.697655820462838e-06, "loss": 0.0, "step": 19550 }, { "epoch": 26.013225371120107, "grad_norm": 1.6524625607416965e-05, "learning_rate": 6.67266456740141e-06, "loss": 0.0, "step": 19560 }, { "epoch": 26.013675213675214, "grad_norm": 0.00012202440848341212, "learning_rate": 6.6476733143399815e-06, "loss": 0.0, "step": 19570 }, { "epoch": 26.01412505623032, "grad_norm": 4.494684253586456e-05, "learning_rate": 6.622682061278553e-06, "loss": 0.0, "step": 19580 }, { "epoch": 26.014574898785426, "grad_norm": 7.73922056396259e-06, "learning_rate": 6.597690808217124e-06, "loss": 0.0, "step": 19590 }, { "epoch": 26.015024741340532, "grad_norm": 8.580759458709508e-05, "learning_rate": 6.572699555155696e-06, "loss": 0.0, "step": 19600 }, { "epoch": 26.015474583895635, "grad_norm": 0.00011487273877719417, "learning_rate": 6.547708302094267e-06, "loss": 0.0, "step": 19610 }, { "epoch": 26.01592442645074, "grad_norm": 6.581631896551698e-05, "learning_rate": 6.522717049032839e-06, "loss": 0.0, "step": 19620 }, { "epoch": 26.016374269005848, "grad_norm": 1.0582040886220057e-05, "learning_rate": 6.49772579597141e-06, "loss": 0.0, "step": 19630 }, { "epoch": 26.016824111560954, "grad_norm": 0.0001323604374192655, "learning_rate": 6.472734542909982e-06, "loss": 0.0, "step": 19640 }, { "epoch": 26.01727395411606, "grad_norm": 1.7104133803513832e-05, "learning_rate": 6.4477432898485524e-06, "loss": 0.0, "step": 19650 }, { "epoch": 26.017723796671167, "grad_norm": 0.00028111826395615935, "learning_rate": 6.422752036787125e-06, "loss": 0.0, "step": 19660 }, { "epoch": 26.01817363922627, "grad_norm": 1.0770091648737434e-05, "learning_rate": 6.397760783725697e-06, "loss": 0.0, "step": 19670 }, { "epoch": 26.018623481781376, "grad_norm": 0.00013393415429163724, "learning_rate": 6.372769530664267e-06, "loss": 0.0, "step": 19680 }, { "epoch": 26.019073324336482, "grad_norm": 6.0589027270907536e-05, "learning_rate": 6.34777827760284e-06, "loss": 0.0, "step": 19690 }, { "epoch": 26.019523166891588, "grad_norm": 6.314905476756394e-05, "learning_rate": 6.322787024541412e-06, "loss": 0.0, "step": 19700 }, { "epoch": 26.019973009446694, "grad_norm": 3.8313562981784344e-05, "learning_rate": 6.297795771479982e-06, "loss": 0.0, "step": 19710 }, { "epoch": 26.0204228520018, "grad_norm": 0.00010379942978033796, "learning_rate": 6.272804518418554e-06, "loss": 0.0, "step": 19720 }, { "epoch": 26.020872694556903, "grad_norm": 5.5127384257502854e-05, "learning_rate": 6.247813265357125e-06, "loss": 0.0, "step": 19730 }, { "epoch": 26.02132253711201, "grad_norm": 1.1031749636458699e-05, "learning_rate": 6.222822012295696e-06, "loss": 0.0, "step": 19740 }, { "epoch": 26.021772379667116, "grad_norm": 4.589645322994329e-05, "learning_rate": 6.197830759234268e-06, "loss": 0.0, "step": 19750 }, { "epoch": 26.022222222222222, "grad_norm": 0.00010614321945467964, "learning_rate": 6.172839506172839e-06, "loss": 0.0, "step": 19760 }, { "epoch": 26.02267206477733, "grad_norm": 5.87588656344451e-05, "learning_rate": 6.147848253111411e-06, "loss": 0.0, "step": 19770 }, { "epoch": 26.023121907332435, "grad_norm": 0.00013276493700686842, "learning_rate": 6.122857000049983e-06, "loss": 0.0, "step": 19780 }, { "epoch": 26.02357174988754, "grad_norm": 0.00011957735114265233, "learning_rate": 6.097865746988554e-06, "loss": 0.2426, "step": 19790 }, { "epoch": 26.024021592442644, "grad_norm": 1.0441100130265113e-05, "learning_rate": 6.0728744939271254e-06, "loss": 0.0, "step": 19800 }, { "epoch": 26.02447143499775, "grad_norm": 4.557161082630046e-05, "learning_rate": 6.047883240865698e-06, "loss": 0.0, "step": 19810 }, { "epoch": 26.024921277552856, "grad_norm": 6.694184412481263e-05, "learning_rate": 6.022891987804269e-06, "loss": 0.0, "step": 19820 }, { "epoch": 26.025371120107962, "grad_norm": 1.121989498642506e-05, "learning_rate": 5.99790073474284e-06, "loss": 0.0, "step": 19830 }, { "epoch": 26.02582096266307, "grad_norm": 4.009645272162743e-05, "learning_rate": 5.972909481681412e-06, "loss": 0.0, "step": 19840 }, { "epoch": 26.026270805218175, "grad_norm": 1.0906830539170187e-05, "learning_rate": 5.947918228619984e-06, "loss": 0.0, "step": 19850 }, { "epoch": 26.026720647773278, "grad_norm": 2.0681591195170768e-05, "learning_rate": 5.922926975558555e-06, "loss": 0.0, "step": 19860 }, { "epoch": 26.027170490328384, "grad_norm": 2.325711102457717e-05, "learning_rate": 5.897935722497127e-06, "loss": 0.0, "step": 19870 }, { "epoch": 26.02762033288349, "grad_norm": 0.00018761688261292875, "learning_rate": 5.872944469435697e-06, "loss": 0.0, "step": 19880 }, { "epoch": 26.028070175438597, "grad_norm": 9.765683353180066e-05, "learning_rate": 5.8479532163742686e-06, "loss": 0.0, "step": 19890 }, { "epoch": 26.028520017993703, "grad_norm": 0.00016801420133560896, "learning_rate": 5.822961963312841e-06, "loss": 0.0, "step": 19900 }, { "epoch": 26.02896986054881, "grad_norm": 4.9909223889699206e-05, "learning_rate": 5.797970710251412e-06, "loss": 0.0, "step": 19910 }, { "epoch": 26.029419703103915, "grad_norm": 4.9165264499606565e-05, "learning_rate": 5.7729794571899835e-06, "loss": 0.0, "step": 19920 }, { "epoch": 26.029869545659018, "grad_norm": 3.794000804191455e-05, "learning_rate": 5.747988204128555e-06, "loss": 0.0, "step": 19930 }, { "epoch": 26.030319388214124, "grad_norm": 8.395969780394807e-05, "learning_rate": 5.722996951067127e-06, "loss": 0.0, "step": 19940 }, { "epoch": 26.03076923076923, "grad_norm": 6.791610940126702e-05, "learning_rate": 5.6980056980056985e-06, "loss": 0.0, "step": 19950 }, { "epoch": 26.031219073324337, "grad_norm": 2.966651663882658e-05, "learning_rate": 5.67301444494427e-06, "loss": 0.0, "step": 19960 }, { "epoch": 26.031668915879443, "grad_norm": 1.0486045539437328e-05, "learning_rate": 5.648023191882841e-06, "loss": 0.0, "step": 19970 }, { "epoch": 26.03211875843455, "grad_norm": 28.197132110595703, "learning_rate": 5.623031938821413e-06, "loss": 0.2298, "step": 19980 }, { "epoch": 26.032568600989652, "grad_norm": 2.38013581110863e-05, "learning_rate": 5.598040685759985e-06, "loss": 0.0, "step": 19990 }, { "epoch": 26.03301844354476, "grad_norm": 2.8620495868381113e-05, "learning_rate": 5.573049432698556e-06, "loss": 0.0, "step": 20000 }, { "epoch": 26.033333333333335, "eval_accuracy": 0.9023255813953488, "eval_f1": 0.9014498806822582, "eval_loss": 0.795507550239563, "eval_runtime": 138.8088, "eval_samples_per_second": 1.549, "eval_steps_per_second": 1.549, "step": 20007 }, { "epoch": 27.000134952766533, "grad_norm": 4.600766988005489e-05, "learning_rate": 5.5480581796371275e-06, "loss": 0.0, "step": 20010 }, { "epoch": 27.000584795321636, "grad_norm": 1.0620598004607018e-05, "learning_rate": 5.523066926575699e-06, "loss": 0.0, "step": 20020 }, { "epoch": 27.001034637876742, "grad_norm": 2.8910704713780433e-05, "learning_rate": 5.49807567351427e-06, "loss": 0.0, "step": 20030 }, { "epoch": 27.00148448043185, "grad_norm": 8.400883234571666e-05, "learning_rate": 5.473084420452842e-06, "loss": 0.0, "step": 20040 }, { "epoch": 27.001934322986955, "grad_norm": 5.019545278628357e-05, "learning_rate": 5.448093167391413e-06, "loss": 0.0, "step": 20050 }, { "epoch": 27.00238416554206, "grad_norm": 7.78531830292195e-06, "learning_rate": 5.423101914329984e-06, "loss": 0.0, "step": 20060 }, { "epoch": 27.002834008097167, "grad_norm": 4.2451465560588986e-05, "learning_rate": 5.3981106612685565e-06, "loss": 0.0, "step": 20070 }, { "epoch": 27.00328385065227, "grad_norm": 3.849409768008627e-05, "learning_rate": 5.373119408207128e-06, "loss": 0.0, "step": 20080 }, { "epoch": 27.003733693207376, "grad_norm": 27.75694465637207, "learning_rate": 5.348128155145699e-06, "loss": 0.213, "step": 20090 }, { "epoch": 27.004183535762483, "grad_norm": 8.791284199105576e-05, "learning_rate": 5.323136902084271e-06, "loss": 0.0, "step": 20100 }, { "epoch": 27.00463337831759, "grad_norm": 27.43233871459961, "learning_rate": 5.298145649022843e-06, "loss": 0.2221, "step": 20110 }, { "epoch": 27.005083220872695, "grad_norm": 8.236223948188126e-05, "learning_rate": 5.273154395961414e-06, "loss": 0.0, "step": 20120 }, { "epoch": 27.0055330634278, "grad_norm": 4.70541745016817e-05, "learning_rate": 5.2481631428999856e-06, "loss": 0.0, "step": 20130 }, { "epoch": 27.005982905982908, "grad_norm": 2.0352146748336963e-05, "learning_rate": 5.223171889838557e-06, "loss": 0.0, "step": 20140 }, { "epoch": 27.00643274853801, "grad_norm": 6.139372271718457e-05, "learning_rate": 5.198180636777128e-06, "loss": 0.0, "step": 20150 }, { "epoch": 27.006882591093117, "grad_norm": 3.6918532714480534e-05, "learning_rate": 5.1731893837157e-06, "loss": 0.0, "step": 20160 }, { "epoch": 27.007332433648223, "grad_norm": 1.087849432224175e-05, "learning_rate": 5.148198130654271e-06, "loss": 0.0, "step": 20170 }, { "epoch": 27.00778227620333, "grad_norm": 0.00010525659308768809, "learning_rate": 5.123206877592842e-06, "loss": 0.0, "step": 20180 }, { "epoch": 27.008232118758436, "grad_norm": 1.4330070371215697e-05, "learning_rate": 5.098215624531414e-06, "loss": 0.2035, "step": 20190 }, { "epoch": 27.008681961313542, "grad_norm": 0.00023004523245617747, "learning_rate": 5.073224371469986e-06, "loss": 0.0, "step": 20200 }, { "epoch": 27.009131803868645, "grad_norm": 0.0001088624558178708, "learning_rate": 5.048233118408557e-06, "loss": 0.0, "step": 20210 }, { "epoch": 27.00958164642375, "grad_norm": 4.165528662269935e-05, "learning_rate": 5.023241865347129e-06, "loss": 0.0, "step": 20220 }, { "epoch": 27.010031488978857, "grad_norm": 0.0001164772838819772, "learning_rate": 4.9982506122857e-06, "loss": 0.0, "step": 20230 }, { "epoch": 27.010481331533963, "grad_norm": 5.537069228012115e-05, "learning_rate": 4.973259359224271e-06, "loss": 0.0, "step": 20240 }, { "epoch": 27.01093117408907, "grad_norm": 4.1998027882073075e-05, "learning_rate": 4.948268106162844e-06, "loss": 0.0, "step": 20250 }, { "epoch": 27.011381016644176, "grad_norm": 4.55666049674619e-05, "learning_rate": 4.923276853101415e-06, "loss": 0.0, "step": 20260 }, { "epoch": 27.01183085919928, "grad_norm": 0.0002126196341123432, "learning_rate": 4.898285600039986e-06, "loss": 0.0005, "step": 20270 }, { "epoch": 27.012280701754385, "grad_norm": 1.1003239706042223e-05, "learning_rate": 4.873294346978558e-06, "loss": 0.0, "step": 20280 }, { "epoch": 27.01273054430949, "grad_norm": 5.206066998653114e-05, "learning_rate": 4.84830309391713e-06, "loss": 0.0, "step": 20290 }, { "epoch": 27.013180386864597, "grad_norm": 4.412904672790319e-05, "learning_rate": 4.8233118408557004e-06, "loss": 0.0, "step": 20300 }, { "epoch": 27.013630229419704, "grad_norm": 0.00010732950613601133, "learning_rate": 4.798320587794272e-06, "loss": 0.0, "step": 20310 }, { "epoch": 27.01408007197481, "grad_norm": 3.871810986311175e-05, "learning_rate": 4.773329334732843e-06, "loss": 0.0, "step": 20320 }, { "epoch": 27.014529914529916, "grad_norm": 0.00045583664905279875, "learning_rate": 4.748338081671415e-06, "loss": 0.0, "step": 20330 }, { "epoch": 27.01497975708502, "grad_norm": 7.374841061391635e-06, "learning_rate": 4.723346828609987e-06, "loss": 0.0, "step": 20340 }, { "epoch": 27.015429599640125, "grad_norm": 0.00014448180445469916, "learning_rate": 4.698355575548558e-06, "loss": 0.0, "step": 20350 }, { "epoch": 27.01587944219523, "grad_norm": 4.5370332372840494e-05, "learning_rate": 4.6733643224871295e-06, "loss": 0.0, "step": 20360 }, { "epoch": 27.016329284750338, "grad_norm": 0.00012769374006893486, "learning_rate": 4.648373069425701e-06, "loss": 0.0, "step": 20370 }, { "epoch": 27.016779127305444, "grad_norm": 8.324395457748324e-05, "learning_rate": 4.623381816364273e-06, "loss": 0.0, "step": 20380 }, { "epoch": 27.01722896986055, "grad_norm": 9.110749670071527e-05, "learning_rate": 4.598390563302844e-06, "loss": 0.0, "step": 20390 }, { "epoch": 27.017678812415653, "grad_norm": 8.744793740333989e-05, "learning_rate": 4.573399310241416e-06, "loss": 0.0, "step": 20400 }, { "epoch": 27.01812865497076, "grad_norm": 4.7513774916296825e-05, "learning_rate": 4.548408057179987e-06, "loss": 0.0, "step": 20410 }, { "epoch": 27.018578497525866, "grad_norm": 4.2235496948705986e-05, "learning_rate": 4.523416804118559e-06, "loss": 0.0, "step": 20420 }, { "epoch": 27.019028340080972, "grad_norm": 4.154513590037823e-05, "learning_rate": 4.498425551057131e-06, "loss": 0.0, "step": 20430 }, { "epoch": 27.019478182636078, "grad_norm": 4.476084723137319e-05, "learning_rate": 4.473434297995702e-06, "loss": 0.0, "step": 20440 }, { "epoch": 27.019928025191184, "grad_norm": 8.015440835151821e-05, "learning_rate": 4.4484430449342734e-06, "loss": 0.0, "step": 20450 }, { "epoch": 27.020377867746287, "grad_norm": 1.0889186341955792e-05, "learning_rate": 4.423451791872845e-06, "loss": 0.0, "step": 20460 }, { "epoch": 27.020827710301393, "grad_norm": 4.376696961116977e-05, "learning_rate": 4.398460538811416e-06, "loss": 0.0, "step": 20470 }, { "epoch": 27.0212775528565, "grad_norm": 4.285398972569965e-05, "learning_rate": 4.3734692857499875e-06, "loss": 0.0, "step": 20480 }, { "epoch": 27.021727395411606, "grad_norm": 0.00010403386113466695, "learning_rate": 4.348478032688559e-06, "loss": 0.0, "step": 20490 }, { "epoch": 27.022177237966712, "grad_norm": 9.626185055822134e-05, "learning_rate": 4.32348677962713e-06, "loss": 0.0, "step": 20500 }, { "epoch": 27.02262708052182, "grad_norm": 4.611338590621017e-05, "learning_rate": 4.2984955265657025e-06, "loss": 0.0, "step": 20510 }, { "epoch": 27.023076923076925, "grad_norm": 2.3320217223954387e-05, "learning_rate": 4.273504273504274e-06, "loss": 0.0, "step": 20520 }, { "epoch": 27.023526765632027, "grad_norm": 5.852384856552817e-05, "learning_rate": 4.248513020442845e-06, "loss": 0.0, "step": 20530 }, { "epoch": 27.023976608187134, "grad_norm": 4.2648494854802266e-05, "learning_rate": 4.2235217673814166e-06, "loss": 0.0, "step": 20540 }, { "epoch": 27.02442645074224, "grad_norm": 2.290113116032444e-05, "learning_rate": 4.198530514319989e-06, "loss": 0.0, "step": 20550 }, { "epoch": 27.024876293297346, "grad_norm": 3.6734138120664284e-05, "learning_rate": 4.17353926125856e-06, "loss": 0.0, "step": 20560 }, { "epoch": 27.025326135852453, "grad_norm": 3.8403090002248064e-05, "learning_rate": 4.1485480081971315e-06, "loss": 0.0, "step": 20570 }, { "epoch": 27.02577597840756, "grad_norm": 3.8590562326135114e-05, "learning_rate": 4.123556755135703e-06, "loss": 0.0, "step": 20580 }, { "epoch": 27.02622582096266, "grad_norm": 1.193082698591752e-05, "learning_rate": 4.098565502074274e-06, "loss": 0.0167, "step": 20590 }, { "epoch": 27.026675663517768, "grad_norm": 1.0765789738798048e-05, "learning_rate": 4.073574249012846e-06, "loss": 0.0, "step": 20600 }, { "epoch": 27.027125506072874, "grad_norm": 8.50547367008403e-05, "learning_rate": 4.048582995951417e-06, "loss": 0.0, "step": 20610 }, { "epoch": 27.02757534862798, "grad_norm": 1.2438663361535873e-05, "learning_rate": 4.023591742889988e-06, "loss": 0.0, "step": 20620 }, { "epoch": 27.028025191183087, "grad_norm": 5.018794399802573e-05, "learning_rate": 3.99860048982856e-06, "loss": 0.0, "step": 20630 }, { "epoch": 27.028475033738193, "grad_norm": 4.261262893676758, "learning_rate": 3.973609236767132e-06, "loss": 0.0147, "step": 20640 }, { "epoch": 27.028924876293296, "grad_norm": 2.408621003269218e-05, "learning_rate": 3.948617983705703e-06, "loss": 0.0, "step": 20650 }, { "epoch": 27.029374718848402, "grad_norm": 7.914843990874942e-06, "learning_rate": 3.923626730644275e-06, "loss": 0.0, "step": 20660 }, { "epoch": 27.029824561403508, "grad_norm": 0.0001244724408024922, "learning_rate": 3.898635477582846e-06, "loss": 0.0, "step": 20670 }, { "epoch": 27.030274403958614, "grad_norm": 5.987916301819496e-05, "learning_rate": 3.873644224521418e-06, "loss": 0.0, "step": 20680 }, { "epoch": 27.03072424651372, "grad_norm": 4.55908702861052e-05, "learning_rate": 3.8486529714599896e-06, "loss": 0.0, "step": 20690 }, { "epoch": 27.031174089068827, "grad_norm": 0.00010045659291790798, "learning_rate": 3.823661718398561e-06, "loss": 0.0, "step": 20700 }, { "epoch": 27.031623931623933, "grad_norm": 4.627270027413033e-05, "learning_rate": 3.798670465337132e-06, "loss": 0.0, "step": 20710 }, { "epoch": 27.032073774179036, "grad_norm": 0.00013196065265219659, "learning_rate": 3.7736792122757032e-06, "loss": 0.0, "step": 20720 }, { "epoch": 27.032523616734142, "grad_norm": 0.00010084197856485844, "learning_rate": 3.7486879592142755e-06, "loss": 0.0, "step": 20730 }, { "epoch": 27.03297345928925, "grad_norm": 0.0001419501204509288, "learning_rate": 3.723696706152847e-06, "loss": 0.0, "step": 20740 }, { "epoch": 27.033333333333335, "eval_accuracy": 0.8976744186046511, "eval_f1": 0.8969236153087519, "eval_loss": 0.8675167560577393, "eval_runtime": 135.5689, "eval_samples_per_second": 1.586, "eval_steps_per_second": 1.586, "step": 20748 }, { "epoch": 28.00008996851102, "grad_norm": 5.391028389567509e-05, "learning_rate": 3.698705453091418e-06, "loss": 0.0, "step": 20750 }, { "epoch": 28.000539811066126, "grad_norm": 0.00010820186435012147, "learning_rate": 3.6737142000299895e-06, "loss": 0.0, "step": 20760 }, { "epoch": 28.000989653621232, "grad_norm": 5.733988655265421e-05, "learning_rate": 3.6487229469685613e-06, "loss": 0.0, "step": 20770 }, { "epoch": 28.00143949617634, "grad_norm": 0.0001874390000011772, "learning_rate": 3.6237316939071327e-06, "loss": 0.0, "step": 20780 }, { "epoch": 28.001889338731445, "grad_norm": 3.218042911612429e-05, "learning_rate": 3.598740440845704e-06, "loss": 0.0, "step": 20790 }, { "epoch": 28.00233918128655, "grad_norm": 5.542067810893059e-05, "learning_rate": 3.5737491877842754e-06, "loss": 0.0, "step": 20800 }, { "epoch": 28.002789023841654, "grad_norm": 4.4881504436489195e-05, "learning_rate": 3.5487579347228476e-06, "loss": 0.0, "step": 20810 }, { "epoch": 28.00323886639676, "grad_norm": 4.439896656549536e-05, "learning_rate": 3.523766681661419e-06, "loss": 0.0, "step": 20820 }, { "epoch": 28.003688708951866, "grad_norm": 1.50968890011427e-05, "learning_rate": 3.4987754285999904e-06, "loss": 0.0, "step": 20830 }, { "epoch": 28.004138551506973, "grad_norm": 0.00016015196160878986, "learning_rate": 3.4737841755385617e-06, "loss": 0.0, "step": 20840 }, { "epoch": 28.00458839406208, "grad_norm": 7.997457578312606e-05, "learning_rate": 3.4487929224771327e-06, "loss": 0.0, "step": 20850 }, { "epoch": 28.005038236617185, "grad_norm": 1.0388976079411805e-05, "learning_rate": 3.423801669415705e-06, "loss": 0.0, "step": 20860 }, { "epoch": 28.005488079172288, "grad_norm": 9.666564437793568e-05, "learning_rate": 3.3988104163542762e-06, "loss": 0.0, "step": 20870 }, { "epoch": 28.005937921727394, "grad_norm": 6.22380175627768e-05, "learning_rate": 3.3738191632928476e-06, "loss": 0.0, "step": 20880 }, { "epoch": 28.0063877642825, "grad_norm": 1.1780727618315723e-05, "learning_rate": 3.348827910231419e-06, "loss": 0.0, "step": 20890 }, { "epoch": 28.006837606837607, "grad_norm": 1.218922534462763e-05, "learning_rate": 3.3238366571699908e-06, "loss": 0.0, "step": 20900 }, { "epoch": 28.007287449392713, "grad_norm": 2.3350770788965747e-05, "learning_rate": 3.298845404108562e-06, "loss": 0.2106, "step": 20910 }, { "epoch": 28.00773729194782, "grad_norm": 2.3298463929677382e-05, "learning_rate": 3.2738541510471335e-06, "loss": 0.0, "step": 20920 }, { "epoch": 28.008187134502926, "grad_norm": 5.071295890957117e-05, "learning_rate": 3.248862897985705e-06, "loss": 0.0, "step": 20930 }, { "epoch": 28.00863697705803, "grad_norm": 5.1322767831152305e-05, "learning_rate": 3.2238716449242762e-06, "loss": 0.2027, "step": 20940 }, { "epoch": 28.009086819613135, "grad_norm": 0.00015820717089809477, "learning_rate": 3.1988803918628484e-06, "loss": 0.0, "step": 20950 }, { "epoch": 28.00953666216824, "grad_norm": 0.00026298066950403154, "learning_rate": 3.17388913880142e-06, "loss": 0.0, "step": 20960 }, { "epoch": 28.009986504723347, "grad_norm": 5.402839451562613e-05, "learning_rate": 3.148897885739991e-06, "loss": 0.0312, "step": 20970 }, { "epoch": 28.010436347278453, "grad_norm": 3.89511878893245e-05, "learning_rate": 3.1239066326785625e-06, "loss": 0.0143, "step": 20980 }, { "epoch": 28.01088618983356, "grad_norm": 9.082374162971973e-05, "learning_rate": 3.098915379617134e-06, "loss": 0.0, "step": 20990 }, { "epoch": 28.011336032388662, "grad_norm": 7.091991574270651e-05, "learning_rate": 3.0739241265557057e-06, "loss": 0.0, "step": 21000 }, { "epoch": 28.01178587494377, "grad_norm": 4.923396772937849e-05, "learning_rate": 3.048932873494277e-06, "loss": 0.0, "step": 21010 }, { "epoch": 28.012235717498875, "grad_norm": 7.257238848978886e-06, "learning_rate": 3.023941620432849e-06, "loss": 0.0, "step": 21020 }, { "epoch": 28.01268556005398, "grad_norm": 4.1952956962632015e-05, "learning_rate": 2.99895036737142e-06, "loss": 0.0, "step": 21030 }, { "epoch": 28.013135402609088, "grad_norm": 0.0003491560055408627, "learning_rate": 2.973959114309992e-06, "loss": 0.0, "step": 21040 }, { "epoch": 28.013585245164194, "grad_norm": 4.565096242004074e-05, "learning_rate": 2.9489678612485633e-06, "loss": 0.0, "step": 21050 }, { "epoch": 28.014035087719297, "grad_norm": 8.577969128964469e-05, "learning_rate": 2.9239766081871343e-06, "loss": 0.0, "step": 21060 }, { "epoch": 28.014484930274403, "grad_norm": 1.5221750800265e-05, "learning_rate": 2.898985355125706e-06, "loss": 0.0, "step": 21070 }, { "epoch": 28.01493477282951, "grad_norm": 8.05446834419854e-05, "learning_rate": 2.8739941020642774e-06, "loss": 0.0, "step": 21080 }, { "epoch": 28.015384615384615, "grad_norm": 1.090555997507181e-05, "learning_rate": 2.8490028490028492e-06, "loss": 0.0, "step": 21090 }, { "epoch": 28.01583445793972, "grad_norm": 0.00010907348769251257, "learning_rate": 2.8240115959414206e-06, "loss": 0.0, "step": 21100 }, { "epoch": 28.016284300494828, "grad_norm": 5.595383117906749e-05, "learning_rate": 2.7990203428799924e-06, "loss": 0.0, "step": 21110 }, { "epoch": 28.016734143049934, "grad_norm": 7.039815682219341e-05, "learning_rate": 2.7740290898185637e-06, "loss": 0.0, "step": 21120 }, { "epoch": 28.017183985605037, "grad_norm": 1.0920294698735233e-05, "learning_rate": 2.749037836757135e-06, "loss": 0.0, "step": 21130 }, { "epoch": 28.017633828160143, "grad_norm": 5.476304795593023e-05, "learning_rate": 2.7240465836957065e-06, "loss": 0.0, "step": 21140 }, { "epoch": 28.01808367071525, "grad_norm": 0.00028146442491561174, "learning_rate": 2.6990553306342783e-06, "loss": 0.0, "step": 21150 }, { "epoch": 28.018533513270356, "grad_norm": 4.760458250530064e-05, "learning_rate": 2.6740640775728496e-06, "loss": 0.0, "step": 21160 }, { "epoch": 28.018983355825462, "grad_norm": 4.100651858607307e-05, "learning_rate": 2.6490728245114214e-06, "loss": 0.0, "step": 21170 }, { "epoch": 28.01943319838057, "grad_norm": 1.519304441899294e-05, "learning_rate": 2.6240815714499928e-06, "loss": 0.0, "step": 21180 }, { "epoch": 28.01988304093567, "grad_norm": 7.503447704948485e-06, "learning_rate": 2.599090318388564e-06, "loss": 0.0, "step": 21190 }, { "epoch": 28.020332883490777, "grad_norm": 3.8472000596811995e-05, "learning_rate": 2.5740990653271355e-06, "loss": 0.0, "step": 21200 }, { "epoch": 28.020782726045883, "grad_norm": 0.00015251630975399166, "learning_rate": 2.549107812265707e-06, "loss": 0.0, "step": 21210 }, { "epoch": 28.02123256860099, "grad_norm": 2.6364947188994847e-05, "learning_rate": 2.5241165592042787e-06, "loss": 0.0, "step": 21220 }, { "epoch": 28.021682411156096, "grad_norm": 4.5791071897838265e-05, "learning_rate": 2.49912530614285e-06, "loss": 0.0, "step": 21230 }, { "epoch": 28.022132253711202, "grad_norm": 4.219662514515221e-05, "learning_rate": 2.474134053081422e-06, "loss": 0.0, "step": 21240 }, { "epoch": 28.022582096266305, "grad_norm": 7.687362995056901e-06, "learning_rate": 2.449142800019993e-06, "loss": 0.0, "step": 21250 }, { "epoch": 28.02303193882141, "grad_norm": 5.0626680604182184e-05, "learning_rate": 2.424151546958565e-06, "loss": 0.0, "step": 21260 }, { "epoch": 28.023481781376518, "grad_norm": 3.8360118196578696e-05, "learning_rate": 2.399160293897136e-06, "loss": 0.0, "step": 21270 }, { "epoch": 28.023931623931624, "grad_norm": 4.1833860450424254e-05, "learning_rate": 2.3741690408357077e-06, "loss": 0.0, "step": 21280 }, { "epoch": 28.02438146648673, "grad_norm": 3.8545415009139106e-05, "learning_rate": 2.349177787774279e-06, "loss": 0.0, "step": 21290 }, { "epoch": 28.024831309041836, "grad_norm": 4.265516690793447e-05, "learning_rate": 2.3241865347128504e-06, "loss": 0.0, "step": 21300 }, { "epoch": 28.025281151596943, "grad_norm": 5.8191926655126736e-05, "learning_rate": 2.299195281651422e-06, "loss": 0.0, "step": 21310 }, { "epoch": 28.025730994152045, "grad_norm": 4.5987777411937714e-05, "learning_rate": 2.2742040285899936e-06, "loss": 0.0, "step": 21320 }, { "epoch": 28.02618083670715, "grad_norm": 7.349880434048828e-06, "learning_rate": 2.2492127755285654e-06, "loss": 0.0, "step": 21330 }, { "epoch": 28.026630679262258, "grad_norm": 8.564944437239319e-05, "learning_rate": 2.2242215224671367e-06, "loss": 0.0, "step": 21340 }, { "epoch": 28.027080521817364, "grad_norm": 4.335294579504989e-05, "learning_rate": 2.199230269405708e-06, "loss": 0.0, "step": 21350 }, { "epoch": 28.02753036437247, "grad_norm": 0.000654156319797039, "learning_rate": 2.1742390163442794e-06, "loss": 0.0, "step": 21360 }, { "epoch": 28.027980206927577, "grad_norm": 4.8597441491438076e-05, "learning_rate": 2.1492477632828512e-06, "loss": 0.0, "step": 21370 }, { "epoch": 28.02843004948268, "grad_norm": 0.00012680674262810498, "learning_rate": 2.1242565102214226e-06, "loss": 0.0, "step": 21380 }, { "epoch": 28.028879892037786, "grad_norm": 1.0983603715430945e-05, "learning_rate": 2.0992652571599944e-06, "loss": 0.0, "step": 21390 }, { "epoch": 28.029329734592892, "grad_norm": 4.509378777584061e-05, "learning_rate": 2.0742740040985658e-06, "loss": 0.0, "step": 21400 }, { "epoch": 28.029779577148, "grad_norm": 4.7664558223914355e-05, "learning_rate": 2.049282751037137e-06, "loss": 0.0, "step": 21410 }, { "epoch": 28.030229419703105, "grad_norm": 4.569100201479159e-05, "learning_rate": 2.0242914979757085e-06, "loss": 0.0, "step": 21420 }, { "epoch": 28.03067926225821, "grad_norm": 9.251729352399707e-05, "learning_rate": 1.99930024491428e-06, "loss": 0.0, "step": 21430 }, { "epoch": 28.031129104813317, "grad_norm": 2.9391207135631703e-05, "learning_rate": 1.9743089918528516e-06, "loss": 0.0, "step": 21440 }, { "epoch": 28.03157894736842, "grad_norm": 7.009374530753121e-05, "learning_rate": 1.949317738791423e-06, "loss": 0.0, "step": 21450 }, { "epoch": 28.032028789923526, "grad_norm": 2.051427509286441e-05, "learning_rate": 1.9243264857299948e-06, "loss": 0.0, "step": 21460 }, { "epoch": 28.032478632478632, "grad_norm": 9.45365900406614e-05, "learning_rate": 1.899335232668566e-06, "loss": 0.0, "step": 21470 }, { "epoch": 28.03292847503374, "grad_norm": 2.9379485567915253e-05, "learning_rate": 1.8743439796071377e-06, "loss": 0.0, "step": 21480 }, { "epoch": 28.033333333333335, "eval_accuracy": 0.8976744186046511, "eval_f1": 0.8969236153087519, "eval_loss": 0.8670749664306641, "eval_runtime": 138.7741, "eval_samples_per_second": 1.549, "eval_steps_per_second": 1.549, "step": 21489 }, { "epoch": 29.00004498425551, "grad_norm": 3.8332724216161296e-05, "learning_rate": 1.849352726545709e-06, "loss": 0.2139, "step": 21490 }, { "epoch": 29.000494826810616, "grad_norm": 5.128050906932913e-05, "learning_rate": 1.8243614734842807e-06, "loss": 0.0, "step": 21500 }, { "epoch": 29.000944669365722, "grad_norm": 0.00010231976921204478, "learning_rate": 1.799370220422852e-06, "loss": 0.0, "step": 21510 }, { "epoch": 29.00139451192083, "grad_norm": 3.8453330489574e-05, "learning_rate": 1.7743789673614238e-06, "loss": 0.0, "step": 21520 }, { "epoch": 29.001844354475935, "grad_norm": 1.981775858439505e-05, "learning_rate": 1.7493877142999952e-06, "loss": 0.0, "step": 21530 }, { "epoch": 29.002294197031038, "grad_norm": 1.0789018233481329e-05, "learning_rate": 1.7243964612385663e-06, "loss": 0.0, "step": 21540 }, { "epoch": 29.002744039586144, "grad_norm": 7.4757767833943944e-06, "learning_rate": 1.6994052081771381e-06, "loss": 0.0, "step": 21550 }, { "epoch": 29.00319388214125, "grad_norm": 4.1318849980598316e-05, "learning_rate": 1.6744139551157095e-06, "loss": 0.0, "step": 21560 }, { "epoch": 29.003643724696357, "grad_norm": 4.039881605422124e-05, "learning_rate": 1.649422702054281e-06, "loss": 0.0, "step": 21570 }, { "epoch": 29.004093567251463, "grad_norm": 3.9775441109668463e-05, "learning_rate": 1.6244314489928524e-06, "loss": 0.0, "step": 21580 }, { "epoch": 29.00454340980657, "grad_norm": 4.567190262605436e-05, "learning_rate": 1.5994401959314242e-06, "loss": 0.0, "step": 21590 }, { "epoch": 29.004993252361672, "grad_norm": 9.383063297718763e-05, "learning_rate": 1.5744489428699956e-06, "loss": 0.0, "step": 21600 }, { "epoch": 29.005443094916778, "grad_norm": 2.3032833269098774e-05, "learning_rate": 1.549457689808567e-06, "loss": 0.0, "step": 21610 }, { "epoch": 29.005892937471884, "grad_norm": 3.0074230380705558e-05, "learning_rate": 1.5244664367471385e-06, "loss": 0.0, "step": 21620 }, { "epoch": 29.00634278002699, "grad_norm": 8.613972022430971e-05, "learning_rate": 1.49947518368571e-06, "loss": 0.0, "step": 21630 }, { "epoch": 29.006792622582097, "grad_norm": 0.00014007705613039434, "learning_rate": 1.4744839306242817e-06, "loss": 0.0, "step": 21640 }, { "epoch": 29.007242465137203, "grad_norm": 1.0985687367792707e-05, "learning_rate": 1.449492677562853e-06, "loss": 0.0, "step": 21650 }, { "epoch": 29.00769230769231, "grad_norm": 4.126510611968115e-05, "learning_rate": 1.4245014245014246e-06, "loss": 0.2103, "step": 21660 }, { "epoch": 29.008142150247412, "grad_norm": 8.374947356060147e-05, "learning_rate": 1.3995101714399962e-06, "loss": 0.0, "step": 21670 }, { "epoch": 29.00859199280252, "grad_norm": 4.466345490072854e-05, "learning_rate": 1.3745189183785676e-06, "loss": 0.0, "step": 21680 }, { "epoch": 29.009041835357625, "grad_norm": 4.391442780615762e-05, "learning_rate": 1.3495276653171391e-06, "loss": 0.0, "step": 21690 }, { "epoch": 29.00949167791273, "grad_norm": 3.8136146031320095e-05, "learning_rate": 1.3245364122557107e-06, "loss": 0.0, "step": 21700 }, { "epoch": 29.009941520467837, "grad_norm": 3.3664549846434966e-05, "learning_rate": 1.299545159194282e-06, "loss": 0.0, "step": 21710 }, { "epoch": 29.010391363022944, "grad_norm": 6.718199438182637e-05, "learning_rate": 1.2745539061328534e-06, "loss": 0.0, "step": 21720 }, { "epoch": 29.010841205578046, "grad_norm": 3.8779060560045764e-05, "learning_rate": 1.249562653071425e-06, "loss": 0.0, "step": 21730 }, { "epoch": 29.011291048133153, "grad_norm": 5.063170465291478e-05, "learning_rate": 1.2245714000099966e-06, "loss": 0.0, "step": 21740 }, { "epoch": 29.01174089068826, "grad_norm": 4.247555261827074e-05, "learning_rate": 1.199580146948568e-06, "loss": 0.0, "step": 21750 }, { "epoch": 29.012190733243365, "grad_norm": 0.00012306317512411624, "learning_rate": 1.1745888938871395e-06, "loss": 0.0, "step": 21760 }, { "epoch": 29.01264057579847, "grad_norm": 0.00011309176625218242, "learning_rate": 1.149597640825711e-06, "loss": 0.0, "step": 21770 }, { "epoch": 29.013090418353578, "grad_norm": 3.8427380786743015e-05, "learning_rate": 1.1246063877642827e-06, "loss": 0.0, "step": 21780 }, { "epoch": 29.01354026090868, "grad_norm": 0.00010062212822958827, "learning_rate": 1.099615134702854e-06, "loss": 0.0, "step": 21790 }, { "epoch": 29.013990103463787, "grad_norm": 9.29554007598199e-05, "learning_rate": 1.0746238816414256e-06, "loss": 0.0, "step": 21800 }, { "epoch": 29.014439946018893, "grad_norm": 8.449432243651245e-06, "learning_rate": 1.0496326285799972e-06, "loss": 0.0138, "step": 21810 }, { "epoch": 29.014889788574, "grad_norm": 4.278255073586479e-05, "learning_rate": 1.0246413755185686e-06, "loss": 0.0, "step": 21820 }, { "epoch": 29.015339631129105, "grad_norm": 3.810771886492148e-05, "learning_rate": 9.9965012245714e-07, "loss": 0.0, "step": 21830 }, { "epoch": 29.01578947368421, "grad_norm": 8.027945295907557e-05, "learning_rate": 9.746588693957115e-07, "loss": 0.0, "step": 21840 }, { "epoch": 29.016239316239318, "grad_norm": 4.350775634520687e-05, "learning_rate": 9.49667616334283e-07, "loss": 0.0, "step": 21850 }, { "epoch": 29.01668915879442, "grad_norm": 1.762642750691157e-05, "learning_rate": 9.246763632728545e-07, "loss": 0.0, "step": 21860 }, { "epoch": 29.017139001349527, "grad_norm": 1.9369786969036795e-05, "learning_rate": 8.99685110211426e-07, "loss": 0.0, "step": 21870 }, { "epoch": 29.017588843904633, "grad_norm": 3.915031629730947e-05, "learning_rate": 8.746938571499976e-07, "loss": 0.0, "step": 21880 }, { "epoch": 29.01803868645974, "grad_norm": 1.098432949220296e-05, "learning_rate": 8.497026040885691e-07, "loss": 0.0, "step": 21890 }, { "epoch": 29.018488529014846, "grad_norm": 4.99308189318981e-05, "learning_rate": 8.247113510271405e-07, "loss": 0.0, "step": 21900 }, { "epoch": 29.018938371569952, "grad_norm": 4.772571992361918e-05, "learning_rate": 7.997200979657121e-07, "loss": 0.0, "step": 21910 }, { "epoch": 29.019388214125055, "grad_norm": 0.0001329783262917772, "learning_rate": 7.747288449042835e-07, "loss": 0.0, "step": 21920 }, { "epoch": 29.01983805668016, "grad_norm": 9.853947267401963e-05, "learning_rate": 7.49737591842855e-07, "loss": 0.0, "step": 21930 }, { "epoch": 29.020287899235267, "grad_norm": 0.00039622452459298074, "learning_rate": 7.247463387814265e-07, "loss": 0.0, "step": 21940 }, { "epoch": 29.020737741790374, "grad_norm": 0.0001378514862153679, "learning_rate": 6.997550857199981e-07, "loss": 0.0, "step": 21950 }, { "epoch": 29.02118758434548, "grad_norm": 1.4763545550522394e-05, "learning_rate": 6.747638326585696e-07, "loss": 0.0, "step": 21960 }, { "epoch": 29.021637426900586, "grad_norm": 4.487818296183832e-05, "learning_rate": 6.49772579597141e-07, "loss": 0.2075, "step": 21970 }, { "epoch": 29.02208726945569, "grad_norm": 8.74214674695395e-05, "learning_rate": 6.247813265357125e-07, "loss": 0.0, "step": 21980 }, { "epoch": 29.022537112010795, "grad_norm": 4.1720089939190075e-05, "learning_rate": 5.99790073474284e-07, "loss": 0.0, "step": 21990 }, { "epoch": 29.0229869545659, "grad_norm": 4.365522545413114e-05, "learning_rate": 5.747988204128556e-07, "loss": 0.0, "step": 22000 }, { "epoch": 29.023436797121008, "grad_norm": 4.5024207793176174e-05, "learning_rate": 5.49807567351427e-07, "loss": 0.0, "step": 22010 }, { "epoch": 29.023886639676114, "grad_norm": 4.660045669879764e-05, "learning_rate": 5.248163142899986e-07, "loss": 0.0135, "step": 22020 }, { "epoch": 29.02433648223122, "grad_norm": 3.340061448398046e-05, "learning_rate": 4.9982506122857e-07, "loss": 0.0, "step": 22030 }, { "epoch": 29.024786324786326, "grad_norm": 8.851500751916319e-05, "learning_rate": 4.748338081671415e-07, "loss": 0.0, "step": 22040 }, { "epoch": 29.02523616734143, "grad_norm": 8.366268593817949e-05, "learning_rate": 4.49842555105713e-07, "loss": 0.0, "step": 22050 }, { "epoch": 29.025686009896535, "grad_norm": 9.44876010180451e-05, "learning_rate": 4.2485130204428453e-07, "loss": 0.0, "step": 22060 }, { "epoch": 29.02613585245164, "grad_norm": 2.0123570720897987e-05, "learning_rate": 3.9986004898285605e-07, "loss": 0.0, "step": 22070 }, { "epoch": 29.026585695006748, "grad_norm": 3.9800732338335365e-05, "learning_rate": 3.748687959214275e-07, "loss": 0.0, "step": 22080 }, { "epoch": 29.027035537561854, "grad_norm": 4.2405870772199705e-05, "learning_rate": 3.4987754285999905e-07, "loss": 0.0, "step": 22090 }, { "epoch": 29.02748538011696, "grad_norm": 0.000219037348870188, "learning_rate": 3.248862897985705e-07, "loss": 0.0, "step": 22100 }, { "epoch": 29.027935222672063, "grad_norm": 4.169054955127649e-05, "learning_rate": 2.99895036737142e-07, "loss": 0.0, "step": 22110 }, { "epoch": 29.02838506522717, "grad_norm": 8.256129513029009e-05, "learning_rate": 2.749037836757135e-07, "loss": 0.0, "step": 22120 }, { "epoch": 29.028834907782276, "grad_norm": 4.798980080522597e-05, "learning_rate": 2.49912530614285e-07, "loss": 0.0, "step": 22130 }, { "epoch": 29.029284750337382, "grad_norm": 8.0281519331038e-05, "learning_rate": 2.249212775528565e-07, "loss": 0.0, "step": 22140 }, { "epoch": 29.02973459289249, "grad_norm": 4.184588397038169e-05, "learning_rate": 1.9993002449142803e-07, "loss": 0.0, "step": 22150 }, { "epoch": 29.030184435447595, "grad_norm": 3.803205254371278e-05, "learning_rate": 1.7493877142999952e-07, "loss": 0.2093, "step": 22160 }, { "epoch": 29.030634278002697, "grad_norm": 0.00011323133367113769, "learning_rate": 1.49947518368571e-07, "loss": 0.0, "step": 22170 }, { "epoch": 29.031084120557804, "grad_norm": 0.00015793886268511415, "learning_rate": 1.249562653071425e-07, "loss": 0.0, "step": 22180 }, { "epoch": 29.03153396311291, "grad_norm": 3.221273436793126e-05, "learning_rate": 9.996501224571401e-08, "loss": 0.0137, "step": 22190 }, { "epoch": 29.031983805668016, "grad_norm": 0.0001762841857271269, "learning_rate": 7.49737591842855e-08, "loss": 0.0, "step": 22200 }, { "epoch": 29.032433648223122, "grad_norm": 9.130711987381801e-05, "learning_rate": 4.998250612285701e-08, "loss": 0.0, "step": 22210 }, { "epoch": 29.03288349077823, "grad_norm": 4.299340434954502e-05, "learning_rate": 2.4991253061428503e-08, "loss": 0.0, "step": 22220 }, { "epoch": 29.033333333333335, "grad_norm": 8.290063306048978e-06, "learning_rate": 0.0, "loss": 0.0, "step": 22230 }, { "epoch": 29.033333333333335, "eval_accuracy": 0.8976744186046511, "eval_f1": 0.8969236153087519, "eval_loss": 0.8665033578872681, "eval_runtime": 142.8744, "eval_samples_per_second": 1.505, "eval_steps_per_second": 1.505, "step": 22230 }, { "epoch": 29.033333333333335, "step": 22230, "total_flos": 5.695530156049564e+19, "train_loss": 0.05779985365885791, "train_runtime": 28260.3722, "train_samples_per_second": 0.787, "train_steps_per_second": 0.787 }, { "epoch": 29.033333333333335, "eval_accuracy": 0.9375, "eval_f1": 0.9386944198366612, "eval_loss": 0.5316240191459656, "eval_runtime": 76.8207, "eval_samples_per_second": 1.458, "eval_steps_per_second": 1.458, "step": 22230 }, { "epoch": 29.033333333333335, "eval_accuracy": 0.9162790697674419, "eval_f1": 0.9153581572237279, "eval_loss": 0.7608880996704102, "eval_runtime": 142.5038, "eval_samples_per_second": 1.509, "eval_steps_per_second": 1.509, "step": 22230 } ], "logging_steps": 10, "max_steps": 22230, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.695530156049564e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }