{ "best_metric": 0.9789719626168224, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-medmnistv2/checkpoint-1870", "epoch": 10.0, "eval_steps": 500, "global_step": 1870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 0.9260491132736206, "learning_rate": 0.004973262032085562, "loss": 1.5983, "step": 10 }, { "epoch": 0.11, "grad_norm": 1.1307735443115234, "learning_rate": 0.004946524064171123, "loss": 0.9417, "step": 20 }, { "epoch": 0.16, "grad_norm": 0.9537946581840515, "learning_rate": 0.004919786096256685, "loss": 0.7642, "step": 30 }, { "epoch": 0.21, "grad_norm": 0.8597701191902161, "learning_rate": 0.004893048128342246, "loss": 0.6992, "step": 40 }, { "epoch": 0.27, "grad_norm": 1.104675531387329, "learning_rate": 0.004866310160427808, "loss": 0.627, "step": 50 }, { "epoch": 0.32, "grad_norm": 0.846555233001709, "learning_rate": 0.004839572192513369, "loss": 0.5047, "step": 60 }, { "epoch": 0.37, "grad_norm": 1.423182487487793, "learning_rate": 0.004812834224598931, "loss": 0.5431, "step": 70 }, { "epoch": 0.43, "grad_norm": 0.8424627780914307, "learning_rate": 0.004786096256684492, "loss": 0.5962, "step": 80 }, { "epoch": 0.48, "grad_norm": 0.6608781814575195, "learning_rate": 0.004759358288770054, "loss": 0.4084, "step": 90 }, { "epoch": 0.53, "grad_norm": 1.130247712135315, "learning_rate": 0.004732620320855615, "loss": 0.4932, "step": 100 }, { "epoch": 0.59, "grad_norm": 0.6054658889770508, "learning_rate": 0.004705882352941177, "loss": 0.4684, "step": 110 }, { "epoch": 0.64, "grad_norm": 0.8725093603134155, "learning_rate": 0.004679144385026738, "loss": 0.4429, "step": 120 }, { "epoch": 0.7, "grad_norm": 0.6343618035316467, "learning_rate": 0.0046524064171123, "loss": 0.3952, "step": 130 }, { "epoch": 0.75, "grad_norm": 0.9175045490264893, "learning_rate": 0.0046256684491978615, "loss": 0.4592, "step": 140 }, { "epoch": 0.8, "grad_norm": 1.0295114517211914, "learning_rate": 0.004598930481283423, "loss": 0.4212, "step": 150 }, { "epoch": 0.86, "grad_norm": 0.4232007563114166, "learning_rate": 0.004572192513368984, "loss": 0.4165, "step": 160 }, { "epoch": 0.91, "grad_norm": 1.18360435962677, "learning_rate": 0.00454812834224599, "loss": 0.4245, "step": 170 }, { "epoch": 0.96, "grad_norm": 0.7265322804450989, "learning_rate": 0.004521390374331551, "loss": 0.4059, "step": 180 }, { "epoch": 1.0, "eval_accuracy": 0.9310747663551402, "eval_f1": 0.9201346862223367, "eval_loss": 0.18775394558906555, "eval_precision": 0.913178148427007, "eval_recall": 0.9327948208695145, "eval_runtime": 9.5068, "eval_samples_per_second": 180.082, "eval_steps_per_second": 11.255, "step": 187 }, { "epoch": 1.02, "grad_norm": 1.179917335510254, "learning_rate": 0.004494652406417113, "loss": 0.3646, "step": 190 }, { "epoch": 1.07, "grad_norm": 1.1189391613006592, "learning_rate": 0.004467914438502674, "loss": 0.4339, "step": 200 }, { "epoch": 1.12, "grad_norm": 0.8059839010238647, "learning_rate": 0.004441176470588235, "loss": 0.373, "step": 210 }, { "epoch": 1.18, "grad_norm": 1.5934990644454956, "learning_rate": 0.004414438502673797, "loss": 0.4089, "step": 220 }, { "epoch": 1.23, "grad_norm": 0.5738559365272522, "learning_rate": 0.004387700534759359, "loss": 0.4181, "step": 230 }, { "epoch": 1.28, "grad_norm": 1.0053284168243408, "learning_rate": 0.00436096256684492, "loss": 0.354, "step": 240 }, { "epoch": 1.34, "grad_norm": 0.6736829280853271, "learning_rate": 0.004334224598930481, "loss": 0.2862, "step": 250 }, { "epoch": 1.39, "grad_norm": 0.7684084177017212, "learning_rate": 0.0043074866310160425, "loss": 0.3533, "step": 260 }, { "epoch": 1.44, "grad_norm": 1.04612135887146, "learning_rate": 0.004280748663101605, "loss": 0.3654, "step": 270 }, { "epoch": 1.5, "grad_norm": 0.7823394536972046, "learning_rate": 0.004254010695187166, "loss": 0.4385, "step": 280 }, { "epoch": 1.55, "grad_norm": 0.9472429752349854, "learning_rate": 0.004227272727272727, "loss": 0.4417, "step": 290 }, { "epoch": 1.6, "grad_norm": 0.889252245426178, "learning_rate": 0.004200534759358289, "loss": 0.3873, "step": 300 }, { "epoch": 1.66, "grad_norm": 0.7252718806266785, "learning_rate": 0.00417379679144385, "loss": 0.3717, "step": 310 }, { "epoch": 1.71, "grad_norm": 0.8687788844108582, "learning_rate": 0.004147058823529412, "loss": 0.3854, "step": 320 }, { "epoch": 1.76, "grad_norm": 0.6197172999382019, "learning_rate": 0.004122994652406417, "loss": 0.3748, "step": 330 }, { "epoch": 1.82, "grad_norm": 0.6506063342094421, "learning_rate": 0.004096256684491978, "loss": 0.2923, "step": 340 }, { "epoch": 1.87, "grad_norm": 0.5267966389656067, "learning_rate": 0.00406951871657754, "loss": 0.4045, "step": 350 }, { "epoch": 1.93, "grad_norm": 1.1251919269561768, "learning_rate": 0.004042780748663102, "loss": 0.3988, "step": 360 }, { "epoch": 1.98, "grad_norm": 1.114890456199646, "learning_rate": 0.004016042780748663, "loss": 0.3796, "step": 370 }, { "epoch": 2.0, "eval_accuracy": 0.9082943925233645, "eval_f1": 0.886066241884805, "eval_loss": 0.27294662594795227, "eval_precision": 0.9131012141299326, "eval_recall": 0.887497540228883, "eval_runtime": 9.2331, "eval_samples_per_second": 185.419, "eval_steps_per_second": 11.589, "step": 374 }, { "epoch": 2.03, "grad_norm": 0.7117612361907959, "learning_rate": 0.003989304812834224, "loss": 0.3724, "step": 380 }, { "epoch": 2.09, "grad_norm": 0.9159232974052429, "learning_rate": 0.00396524064171123, "loss": 0.3155, "step": 390 }, { "epoch": 2.14, "grad_norm": 0.6797966957092285, "learning_rate": 0.003938502673796792, "loss": 0.3531, "step": 400 }, { "epoch": 2.19, "grad_norm": 0.912696361541748, "learning_rate": 0.003911764705882353, "loss": 0.2788, "step": 410 }, { "epoch": 2.25, "grad_norm": 1.0336519479751587, "learning_rate": 0.0038850267379679144, "loss": 0.3692, "step": 420 }, { "epoch": 2.3, "grad_norm": 0.8013398051261902, "learning_rate": 0.003858288770053476, "loss": 0.3561, "step": 430 }, { "epoch": 2.35, "grad_norm": 0.6950948238372803, "learning_rate": 0.003831550802139038, "loss": 0.3295, "step": 440 }, { "epoch": 2.41, "grad_norm": 0.7441625595092773, "learning_rate": 0.003804812834224599, "loss": 0.3285, "step": 450 }, { "epoch": 2.46, "grad_norm": 4.745124816894531, "learning_rate": 0.0037780748663101605, "loss": 0.4162, "step": 460 }, { "epoch": 2.51, "grad_norm": 1.3873414993286133, "learning_rate": 0.003751336898395722, "loss": 0.3424, "step": 470 }, { "epoch": 2.57, "grad_norm": 0.7891167402267456, "learning_rate": 0.0037272727272727275, "loss": 0.3043, "step": 480 }, { "epoch": 2.62, "grad_norm": 1.013873815536499, "learning_rate": 0.003700534759358289, "loss": 0.3754, "step": 490 }, { "epoch": 2.67, "grad_norm": 0.9377150535583496, "learning_rate": 0.00367379679144385, "loss": 0.3675, "step": 500 }, { "epoch": 2.73, "grad_norm": 2.7368648052215576, "learning_rate": 0.0036470588235294117, "loss": 0.2901, "step": 510 }, { "epoch": 2.78, "grad_norm": 1.5487793684005737, "learning_rate": 0.0036203208556149736, "loss": 0.482, "step": 520 }, { "epoch": 2.83, "grad_norm": 8.680522918701172, "learning_rate": 0.003593582887700535, "loss": 0.378, "step": 530 }, { "epoch": 2.89, "grad_norm": 1.3777785301208496, "learning_rate": 0.0035668449197860962, "loss": 0.4919, "step": 540 }, { "epoch": 2.94, "grad_norm": 2.1192550659179688, "learning_rate": 0.0035401069518716578, "loss": 0.3751, "step": 550 }, { "epoch": 2.99, "grad_norm": 9.656478881835938, "learning_rate": 0.0035133689839572193, "loss": 0.424, "step": 560 }, { "epoch": 3.0, "eval_accuracy": 0.866822429906542, "eval_f1": 0.8491520459723211, "eval_loss": 0.3701097071170807, "eval_precision": 0.8797339861417046, "eval_recall": 0.8520089249800192, "eval_runtime": 9.219, "eval_samples_per_second": 185.702, "eval_steps_per_second": 11.606, "step": 561 }, { "epoch": 3.05, "grad_norm": 1.5421924591064453, "learning_rate": 0.0034866310160427804, "loss": 0.4643, "step": 570 }, { "epoch": 3.1, "grad_norm": 0.9370782375335693, "learning_rate": 0.0034598930481283424, "loss": 0.4274, "step": 580 }, { "epoch": 3.16, "grad_norm": 1.6456141471862793, "learning_rate": 0.003433155080213904, "loss": 0.3616, "step": 590 }, { "epoch": 3.21, "grad_norm": 1.2138258218765259, "learning_rate": 0.0034064171122994654, "loss": 0.4241, "step": 600 }, { "epoch": 3.26, "grad_norm": 0.8959400057792664, "learning_rate": 0.0033796791443850265, "loss": 0.3392, "step": 610 }, { "epoch": 3.32, "grad_norm": 0.8747026324272156, "learning_rate": 0.003352941176470588, "loss": 0.3533, "step": 620 }, { "epoch": 3.37, "grad_norm": 1.7161656618118286, "learning_rate": 0.00332620320855615, "loss": 0.3407, "step": 630 }, { "epoch": 3.42, "grad_norm": 0.9229569435119629, "learning_rate": 0.0032994652406417115, "loss": 0.3098, "step": 640 }, { "epoch": 3.48, "grad_norm": 0.9468969702720642, "learning_rate": 0.0032727272727272726, "loss": 0.3896, "step": 650 }, { "epoch": 3.53, "grad_norm": 1.4430208206176758, "learning_rate": 0.003245989304812834, "loss": 0.3395, "step": 660 }, { "epoch": 3.58, "grad_norm": 1.20052969455719, "learning_rate": 0.0032192513368983957, "loss": 0.3448, "step": 670 }, { "epoch": 3.64, "grad_norm": 1.1726669073104858, "learning_rate": 0.0031925133689839577, "loss": 0.342, "step": 680 }, { "epoch": 3.69, "grad_norm": 0.7881722450256348, "learning_rate": 0.0031657754010695188, "loss": 0.301, "step": 690 }, { "epoch": 3.74, "grad_norm": 0.7960072159767151, "learning_rate": 0.0031390374331550803, "loss": 0.2633, "step": 700 }, { "epoch": 3.8, "grad_norm": 0.964872419834137, "learning_rate": 0.003112299465240642, "loss": 0.2691, "step": 710 }, { "epoch": 3.85, "grad_norm": 0.9894037246704102, "learning_rate": 0.003085561497326203, "loss": 0.2859, "step": 720 }, { "epoch": 3.9, "grad_norm": 1.0027267932891846, "learning_rate": 0.003058823529411765, "loss": 0.3027, "step": 730 }, { "epoch": 3.96, "grad_norm": 1.0325654745101929, "learning_rate": 0.0030320855614973264, "loss": 0.3141, "step": 740 }, { "epoch": 4.0, "eval_accuracy": 0.9380841121495327, "eval_f1": 0.9283105641226367, "eval_loss": 0.18485769629478455, "eval_precision": 0.9266830676466586, "eval_recall": 0.9336478146798447, "eval_runtime": 9.3787, "eval_samples_per_second": 182.542, "eval_steps_per_second": 11.409, "step": 748 }, { "epoch": 4.01, "grad_norm": 1.263634443283081, "learning_rate": 0.003005347593582888, "loss": 0.3592, "step": 750 }, { "epoch": 4.06, "grad_norm": 1.8158007860183716, "learning_rate": 0.002978609625668449, "loss": 0.364, "step": 760 }, { "epoch": 4.12, "grad_norm": 0.9459696412086487, "learning_rate": 0.0029518716577540106, "loss": 0.3587, "step": 770 }, { "epoch": 4.17, "grad_norm": 0.7624779343605042, "learning_rate": 0.0029251336898395725, "loss": 0.304, "step": 780 }, { "epoch": 4.22, "grad_norm": 0.8625235557556152, "learning_rate": 0.002898395721925134, "loss": 0.2726, "step": 790 }, { "epoch": 4.28, "grad_norm": 0.962257444858551, "learning_rate": 0.002871657754010695, "loss": 0.2601, "step": 800 }, { "epoch": 4.33, "grad_norm": 0.6333624720573425, "learning_rate": 0.0028449197860962567, "loss": 0.3448, "step": 810 }, { "epoch": 4.39, "grad_norm": 1.3983910083770752, "learning_rate": 0.002818181818181818, "loss": 0.3202, "step": 820 }, { "epoch": 4.44, "grad_norm": 0.6626348495483398, "learning_rate": 0.00279144385026738, "loss": 0.2529, "step": 830 }, { "epoch": 4.49, "grad_norm": 0.8221544027328491, "learning_rate": 0.0027647058823529413, "loss": 0.2523, "step": 840 }, { "epoch": 4.55, "grad_norm": 0.7872591018676758, "learning_rate": 0.002737967914438503, "loss": 0.2832, "step": 850 }, { "epoch": 4.6, "grad_norm": 1.50129234790802, "learning_rate": 0.0027112299465240643, "loss": 0.2912, "step": 860 }, { "epoch": 4.65, "grad_norm": 0.7471727728843689, "learning_rate": 0.0026844919786096254, "loss": 0.3097, "step": 870 }, { "epoch": 4.71, "grad_norm": 0.6078329086303711, "learning_rate": 0.002657754010695187, "loss": 0.2657, "step": 880 }, { "epoch": 4.76, "grad_norm": 0.8674110174179077, "learning_rate": 0.002631016042780749, "loss": 0.2633, "step": 890 }, { "epoch": 4.81, "grad_norm": 0.5421575307846069, "learning_rate": 0.0026042780748663104, "loss": 0.257, "step": 900 }, { "epoch": 4.87, "grad_norm": 1.314867377281189, "learning_rate": 0.0025775401069518715, "loss": 0.2688, "step": 910 }, { "epoch": 4.92, "grad_norm": 0.698221743106842, "learning_rate": 0.002550802139037433, "loss": 0.2506, "step": 920 }, { "epoch": 4.97, "grad_norm": 0.5437451004981995, "learning_rate": 0.0025240641711229946, "loss": 0.2553, "step": 930 }, { "epoch": 5.0, "eval_accuracy": 0.9643691588785047, "eval_f1": 0.9617344813251135, "eval_loss": 0.1074606254696846, "eval_precision": 0.9630090863077152, "eval_recall": 0.9611619604560873, "eval_runtime": 9.213, "eval_samples_per_second": 185.824, "eval_steps_per_second": 11.614, "step": 935 }, { "epoch": 5.03, "grad_norm": 0.9639925956726074, "learning_rate": 0.002497326203208556, "loss": 0.2186, "step": 940 }, { "epoch": 5.08, "grad_norm": 1.0346194505691528, "learning_rate": 0.0024705882352941176, "loss": 0.3163, "step": 950 }, { "epoch": 5.13, "grad_norm": 0.9101438522338867, "learning_rate": 0.002443850267379679, "loss": 0.257, "step": 960 }, { "epoch": 5.19, "grad_norm": 0.9387779831886292, "learning_rate": 0.0024171122994652407, "loss": 0.2745, "step": 970 }, { "epoch": 5.24, "grad_norm": 1.3407084941864014, "learning_rate": 0.0023903743315508022, "loss": 0.2775, "step": 980 }, { "epoch": 5.29, "grad_norm": 0.7988283038139343, "learning_rate": 0.0023636363636363638, "loss": 0.2568, "step": 990 }, { "epoch": 5.35, "grad_norm": 0.8980028033256531, "learning_rate": 0.0023368983957219253, "loss": 0.296, "step": 1000 }, { "epoch": 5.4, "grad_norm": 0.8847124576568604, "learning_rate": 0.002310160427807487, "loss": 0.2525, "step": 1010 }, { "epoch": 5.45, "grad_norm": 1.3140696287155151, "learning_rate": 0.002283422459893048, "loss": 0.2967, "step": 1020 }, { "epoch": 5.51, "grad_norm": 0.6774911284446716, "learning_rate": 0.00225668449197861, "loss": 0.2735, "step": 1030 }, { "epoch": 5.56, "grad_norm": 0.9686025977134705, "learning_rate": 0.002229946524064171, "loss": 0.2415, "step": 1040 }, { "epoch": 5.61, "grad_norm": 1.3379433155059814, "learning_rate": 0.0022032085561497325, "loss": 0.2656, "step": 1050 }, { "epoch": 5.67, "grad_norm": 0.6908765435218811, "learning_rate": 0.002176470588235294, "loss": 0.2532, "step": 1060 }, { "epoch": 5.72, "grad_norm": 0.8308853507041931, "learning_rate": 0.0021497326203208556, "loss": 0.2428, "step": 1070 }, { "epoch": 5.78, "grad_norm": 1.2064207792282104, "learning_rate": 0.002122994652406417, "loss": 0.2989, "step": 1080 }, { "epoch": 5.83, "grad_norm": 0.8376064896583557, "learning_rate": 0.0020962566844919786, "loss": 0.2061, "step": 1090 }, { "epoch": 5.88, "grad_norm": 0.9363247156143188, "learning_rate": 0.00206951871657754, "loss": 0.2447, "step": 1100 }, { "epoch": 5.94, "grad_norm": 7.874444007873535, "learning_rate": 0.0020427807486631017, "loss": 0.2254, "step": 1110 }, { "epoch": 5.99, "grad_norm": 0.9535788297653198, "learning_rate": 0.002016042780748663, "loss": 0.2686, "step": 1120 }, { "epoch": 6.0, "eval_accuracy": 0.9485981308411215, "eval_f1": 0.9488981890553403, "eval_loss": 0.16793404519557953, "eval_precision": 0.9560571498851578, "eval_recall": 0.9437216744429628, "eval_runtime": 9.2543, "eval_samples_per_second": 184.995, "eval_steps_per_second": 11.562, "step": 1122 }, { "epoch": 6.04, "grad_norm": 0.9278040528297424, "learning_rate": 0.0019893048128342247, "loss": 0.256, "step": 1130 }, { "epoch": 6.1, "grad_norm": 1.0177885293960571, "learning_rate": 0.0019625668449197863, "loss": 0.2173, "step": 1140 }, { "epoch": 6.15, "grad_norm": 0.5898217558860779, "learning_rate": 0.0019358288770053476, "loss": 0.2257, "step": 1150 }, { "epoch": 6.2, "grad_norm": 5.235673904418945, "learning_rate": 0.0019090909090909091, "loss": 0.2388, "step": 1160 }, { "epoch": 6.26, "grad_norm": 1.1271004676818848, "learning_rate": 0.0018823529411764706, "loss": 0.2544, "step": 1170 }, { "epoch": 6.31, "grad_norm": 0.6136900186538696, "learning_rate": 0.001855614973262032, "loss": 0.2785, "step": 1180 }, { "epoch": 6.36, "grad_norm": 0.9343350529670715, "learning_rate": 0.0018288770053475937, "loss": 0.2304, "step": 1190 }, { "epoch": 6.42, "grad_norm": 0.7129714488983154, "learning_rate": 0.001802139037433155, "loss": 0.1709, "step": 1200 }, { "epoch": 6.47, "grad_norm": 0.8645954132080078, "learning_rate": 0.0017754010695187168, "loss": 0.2099, "step": 1210 }, { "epoch": 6.52, "grad_norm": 0.4692780375480652, "learning_rate": 0.001748663101604278, "loss": 0.1801, "step": 1220 }, { "epoch": 6.58, "grad_norm": 1.1131465435028076, "learning_rate": 0.0017219251336898396, "loss": 0.2187, "step": 1230 }, { "epoch": 6.63, "grad_norm": 1.0496641397476196, "learning_rate": 0.0016951871657754011, "loss": 0.2381, "step": 1240 }, { "epoch": 6.68, "grad_norm": 0.7512268424034119, "learning_rate": 0.0016684491978609627, "loss": 0.2171, "step": 1250 }, { "epoch": 6.74, "grad_norm": 0.9206662774085999, "learning_rate": 0.0016417112299465242, "loss": 0.1716, "step": 1260 }, { "epoch": 6.79, "grad_norm": 1.044285535812378, "learning_rate": 0.0016149732620320857, "loss": 0.1996, "step": 1270 }, { "epoch": 6.84, "grad_norm": 1.5523549318313599, "learning_rate": 0.001588235294117647, "loss": 0.198, "step": 1280 }, { "epoch": 6.9, "grad_norm": 0.7654513120651245, "learning_rate": 0.0015614973262032088, "loss": 0.2341, "step": 1290 }, { "epoch": 6.95, "grad_norm": 1.145663857460022, "learning_rate": 0.00153475935828877, "loss": 0.2556, "step": 1300 }, { "epoch": 7.0, "eval_accuracy": 0.9661214953271028, "eval_f1": 0.9619479557860847, "eval_loss": 0.09340371936559677, "eval_precision": 0.9651383824240083, "eval_recall": 0.9598949442531882, "eval_runtime": 9.0216, "eval_samples_per_second": 189.767, "eval_steps_per_second": 11.86, "step": 1309 }, { "epoch": 7.01, "grad_norm": 0.8554219603538513, "learning_rate": 0.0015080213903743314, "loss": 0.237, "step": 1310 }, { "epoch": 7.06, "grad_norm": 0.7055748701095581, "learning_rate": 0.0014812834224598931, "loss": 0.2317, "step": 1320 }, { "epoch": 7.11, "grad_norm": 1.0891897678375244, "learning_rate": 0.0014545454545454545, "loss": 0.1723, "step": 1330 }, { "epoch": 7.17, "grad_norm": 0.5554465651512146, "learning_rate": 0.0014278074866310162, "loss": 0.1986, "step": 1340 }, { "epoch": 7.22, "grad_norm": 1.0232211351394653, "learning_rate": 0.0014010695187165775, "loss": 0.2222, "step": 1350 }, { "epoch": 7.27, "grad_norm": 0.6204003095626831, "learning_rate": 0.001374331550802139, "loss": 0.1827, "step": 1360 }, { "epoch": 7.33, "grad_norm": 0.7353977560997009, "learning_rate": 0.0013475935828877006, "loss": 0.1649, "step": 1370 }, { "epoch": 7.38, "grad_norm": 0.734186053276062, "learning_rate": 0.001320855614973262, "loss": 0.194, "step": 1380 }, { "epoch": 7.43, "grad_norm": 0.47959616780281067, "learning_rate": 0.0012941176470588236, "loss": 0.1763, "step": 1390 }, { "epoch": 7.49, "grad_norm": 0.6939826607704163, "learning_rate": 0.0012673796791443852, "loss": 0.2286, "step": 1400 }, { "epoch": 7.54, "grad_norm": 0.948558509349823, "learning_rate": 0.0012406417112299467, "loss": 0.2506, "step": 1410 }, { "epoch": 7.59, "grad_norm": 0.8466843962669373, "learning_rate": 0.001213903743315508, "loss": 0.2175, "step": 1420 }, { "epoch": 7.65, "grad_norm": 0.6146303415298462, "learning_rate": 0.0011871657754010695, "loss": 0.1641, "step": 1430 }, { "epoch": 7.7, "grad_norm": 0.8321207761764526, "learning_rate": 0.001160427807486631, "loss": 0.1903, "step": 1440 }, { "epoch": 7.75, "grad_norm": 0.7309682965278625, "learning_rate": 0.0011336898395721926, "loss": 0.1981, "step": 1450 }, { "epoch": 7.81, "grad_norm": 0.5901007652282715, "learning_rate": 0.0011069518716577541, "loss": 0.2011, "step": 1460 }, { "epoch": 7.86, "grad_norm": 0.9141890406608582, "learning_rate": 0.0010802139037433154, "loss": 0.2735, "step": 1470 }, { "epoch": 7.91, "grad_norm": 0.813578724861145, "learning_rate": 0.001053475935828877, "loss": 0.2093, "step": 1480 }, { "epoch": 7.97, "grad_norm": 0.4584049582481384, "learning_rate": 0.0010267379679144385, "loss": 0.1777, "step": 1490 }, { "epoch": 8.0, "eval_accuracy": 0.969626168224299, "eval_f1": 0.9686486797969157, "eval_loss": 0.08350867033004761, "eval_precision": 0.9696703038283683, "eval_recall": 0.9682591946397131, "eval_runtime": 9.2254, "eval_samples_per_second": 185.574, "eval_steps_per_second": 11.598, "step": 1496 }, { "epoch": 8.02, "grad_norm": 0.7080217599868774, "learning_rate": 0.001, "loss": 0.1999, "step": 1500 }, { "epoch": 8.07, "grad_norm": 0.9281997084617615, "learning_rate": 0.0009732620320855614, "loss": 0.1688, "step": 1510 }, { "epoch": 8.13, "grad_norm": 0.8174493312835693, "learning_rate": 0.000946524064171123, "loss": 0.1731, "step": 1520 }, { "epoch": 8.18, "grad_norm": 0.6349031925201416, "learning_rate": 0.0009197860962566845, "loss": 0.1672, "step": 1530 }, { "epoch": 8.24, "grad_norm": 0.8174115419387817, "learning_rate": 0.000893048128342246, "loss": 0.1839, "step": 1540 }, { "epoch": 8.29, "grad_norm": 0.6900407671928406, "learning_rate": 0.0008663101604278075, "loss": 0.2044, "step": 1550 }, { "epoch": 8.34, "grad_norm": 0.2948859930038452, "learning_rate": 0.000839572192513369, "loss": 0.1328, "step": 1560 }, { "epoch": 8.4, "grad_norm": 0.7020041942596436, "learning_rate": 0.0008128342245989305, "loss": 0.1759, "step": 1570 }, { "epoch": 8.45, "grad_norm": 1.0418401956558228, "learning_rate": 0.000786096256684492, "loss": 0.1777, "step": 1580 }, { "epoch": 8.5, "grad_norm": 0.7473070025444031, "learning_rate": 0.0007593582887700536, "loss": 0.1631, "step": 1590 }, { "epoch": 8.56, "grad_norm": 0.8006024360656738, "learning_rate": 0.000732620320855615, "loss": 0.1566, "step": 1600 }, { "epoch": 8.61, "grad_norm": 1.0594407320022583, "learning_rate": 0.0007058823529411765, "loss": 0.184, "step": 1610 }, { "epoch": 8.66, "grad_norm": 0.6014285087585449, "learning_rate": 0.000679144385026738, "loss": 0.1583, "step": 1620 }, { "epoch": 8.72, "grad_norm": 0.6736869812011719, "learning_rate": 0.0006524064171122996, "loss": 0.1468, "step": 1630 }, { "epoch": 8.77, "grad_norm": 0.6957813501358032, "learning_rate": 0.0006256684491978609, "loss": 0.1731, "step": 1640 }, { "epoch": 8.82, "grad_norm": 0.5073075294494629, "learning_rate": 0.0005989304812834224, "loss": 0.176, "step": 1650 }, { "epoch": 8.88, "grad_norm": 0.5485414862632751, "learning_rate": 0.000572192513368984, "loss": 0.1936, "step": 1660 }, { "epoch": 8.93, "grad_norm": 0.8590062856674194, "learning_rate": 0.0005454545454545455, "loss": 0.1795, "step": 1670 }, { "epoch": 8.98, "grad_norm": 0.49274083971977234, "learning_rate": 0.000518716577540107, "loss": 0.1607, "step": 1680 }, { "epoch": 9.0, "eval_accuracy": 0.9772196261682243, "eval_f1": 0.9758896890562156, "eval_loss": 0.07392112910747528, "eval_precision": 0.9732910812266744, "eval_recall": 0.97920005624388, "eval_runtime": 9.2433, "eval_samples_per_second": 185.214, "eval_steps_per_second": 11.576, "step": 1683 }, { "epoch": 9.04, "grad_norm": 0.4997323751449585, "learning_rate": 0.0004919786096256684, "loss": 0.1352, "step": 1690 }, { "epoch": 9.09, "grad_norm": 0.5221167206764221, "learning_rate": 0.00046524064171122996, "loss": 0.1597, "step": 1700 }, { "epoch": 9.14, "grad_norm": 0.6731162071228027, "learning_rate": 0.0004385026737967915, "loss": 0.1639, "step": 1710 }, { "epoch": 9.2, "grad_norm": 0.5156794786453247, "learning_rate": 0.00041176470588235296, "loss": 0.1667, "step": 1720 }, { "epoch": 9.25, "grad_norm": 0.767203152179718, "learning_rate": 0.0003850267379679145, "loss": 0.1672, "step": 1730 }, { "epoch": 9.3, "grad_norm": 0.5664710402488708, "learning_rate": 0.0003582887700534759, "loss": 0.1428, "step": 1740 }, { "epoch": 9.36, "grad_norm": 0.37641459703445435, "learning_rate": 0.00033155080213903744, "loss": 0.1667, "step": 1750 }, { "epoch": 9.41, "grad_norm": 0.5527117252349854, "learning_rate": 0.0003048128342245989, "loss": 0.1723, "step": 1760 }, { "epoch": 9.47, "grad_norm": 0.8746387958526611, "learning_rate": 0.00027807486631016044, "loss": 0.1596, "step": 1770 }, { "epoch": 9.52, "grad_norm": 0.5461722612380981, "learning_rate": 0.0002513368983957219, "loss": 0.17, "step": 1780 }, { "epoch": 9.57, "grad_norm": 0.5201784372329712, "learning_rate": 0.00022459893048128345, "loss": 0.1268, "step": 1790 }, { "epoch": 9.63, "grad_norm": 0.44921737909317017, "learning_rate": 0.00019786096256684492, "loss": 0.1537, "step": 1800 }, { "epoch": 9.68, "grad_norm": 0.6538177728652954, "learning_rate": 0.00017112299465240642, "loss": 0.1564, "step": 1810 }, { "epoch": 9.73, "grad_norm": 0.39654332399368286, "learning_rate": 0.00014438502673796793, "loss": 0.1196, "step": 1820 }, { "epoch": 9.79, "grad_norm": 0.5751528143882751, "learning_rate": 0.00011764705882352942, "loss": 0.1953, "step": 1830 }, { "epoch": 9.84, "grad_norm": 0.7018762826919556, "learning_rate": 9.09090909090909e-05, "loss": 0.1414, "step": 1840 }, { "epoch": 9.89, "grad_norm": 0.8955555558204651, "learning_rate": 6.41711229946524e-05, "loss": 0.1415, "step": 1850 }, { "epoch": 9.95, "grad_norm": 0.29650095105171204, "learning_rate": 3.74331550802139e-05, "loss": 0.1361, "step": 1860 }, { "epoch": 10.0, "grad_norm": 0.6939311623573303, "learning_rate": 1.0695187165775402e-05, "loss": 0.1898, "step": 1870 }, { "epoch": 10.0, "eval_accuracy": 0.9789719626168224, "eval_f1": 0.9786328578443323, "eval_loss": 0.06271301954984665, "eval_precision": 0.9764445771965571, "eval_recall": 0.9811556249771411, "eval_runtime": 8.9863, "eval_samples_per_second": 190.512, "eval_steps_per_second": 11.907, "step": 1870 }, { "epoch": 10.0, "step": 1870, "total_flos": 9.332136680499118e+18, "train_loss": 0.29726991015959553, "train_runtime": 1395.7704, "train_samples_per_second": 85.68, "train_steps_per_second": 1.34 } ], "logging_steps": 10, "max_steps": 1870, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 9.332136680499118e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }