{ "best_metric": 0.8176672128705497, "best_model_checkpoint": "convnext-tiny-224_album_vitVMMRdb_make_model_album_pred/checkpoint-12585", "epoch": 14.999702114983616, "global_step": 12585, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.971405877680699e-07, "loss": 5.9416, "step": 10 }, { "epoch": 0.02, "learning_rate": 7.942811755361398e-07, "loss": 5.9452, "step": 20 }, { "epoch": 0.04, "learning_rate": 1.1914217633042098e-06, "loss": 5.9409, "step": 30 }, { "epoch": 0.05, "learning_rate": 1.5885623510722797e-06, "loss": 5.9378, "step": 40 }, { "epoch": 0.06, "learning_rate": 1.9857029388403496e-06, "loss": 5.9394, "step": 50 }, { "epoch": 0.07, "learning_rate": 2.3828435266084195e-06, "loss": 5.9288, "step": 60 }, { "epoch": 0.08, "learning_rate": 2.7799841143764895e-06, "loss": 5.9298, "step": 70 }, { "epoch": 0.1, "learning_rate": 3.1771247021445594e-06, "loss": 5.9304, "step": 80 }, { "epoch": 0.11, "learning_rate": 3.5742652899126297e-06, "loss": 5.9255, "step": 90 }, { "epoch": 0.12, "learning_rate": 3.971405877680699e-06, "loss": 5.9241, "step": 100 }, { "epoch": 0.13, "learning_rate": 4.368546465448769e-06, "loss": 5.9103, "step": 110 }, { "epoch": 0.14, "learning_rate": 4.765687053216839e-06, "loss": 5.9143, "step": 120 }, { "epoch": 0.15, "learning_rate": 5.162827640984909e-06, "loss": 5.9092, "step": 130 }, { "epoch": 0.17, "learning_rate": 5.559968228752979e-06, "loss": 5.8914, "step": 140 }, { "epoch": 0.18, "learning_rate": 5.957108816521049e-06, "loss": 5.8836, "step": 150 }, { "epoch": 0.19, "learning_rate": 6.354249404289119e-06, "loss": 5.8774, "step": 160 }, { "epoch": 0.2, "learning_rate": 6.751389992057189e-06, "loss": 5.87, "step": 170 }, { "epoch": 0.21, "learning_rate": 7.1485305798252594e-06, "loss": 5.8626, "step": 180 }, { "epoch": 0.23, "learning_rate": 7.5456711675933285e-06, "loss": 5.8501, "step": 190 }, { "epoch": 0.24, "learning_rate": 7.942811755361398e-06, "loss": 5.8369, "step": 200 }, { "epoch": 0.25, "learning_rate": 8.33995234312947e-06, "loss": 5.8174, "step": 210 }, { "epoch": 0.26, "learning_rate": 8.737092930897538e-06, "loss": 5.8033, "step": 220 }, { "epoch": 0.27, "learning_rate": 9.134233518665607e-06, "loss": 5.7707, "step": 230 }, { "epoch": 0.29, "learning_rate": 9.531374106433678e-06, "loss": 5.765, "step": 240 }, { "epoch": 0.3, "learning_rate": 9.928514694201747e-06, "loss": 5.7387, "step": 250 }, { "epoch": 0.31, "learning_rate": 1.0325655281969818e-05, "loss": 5.7149, "step": 260 }, { "epoch": 0.32, "learning_rate": 1.0722795869737889e-05, "loss": 5.6734, "step": 270 }, { "epoch": 0.33, "learning_rate": 1.1119936457505958e-05, "loss": 5.6429, "step": 280 }, { "epoch": 0.35, "learning_rate": 1.1517077045274027e-05, "loss": 5.62, "step": 290 }, { "epoch": 0.36, "learning_rate": 1.1914217633042098e-05, "loss": 5.5929, "step": 300 }, { "epoch": 0.37, "learning_rate": 1.2311358220810168e-05, "loss": 5.565, "step": 310 }, { "epoch": 0.38, "learning_rate": 1.2708498808578237e-05, "loss": 5.5256, "step": 320 }, { "epoch": 0.39, "learning_rate": 1.3105639396346307e-05, "loss": 5.5036, "step": 330 }, { "epoch": 0.41, "learning_rate": 1.3502779984114377e-05, "loss": 5.4775, "step": 340 }, { "epoch": 0.42, "learning_rate": 1.3899920571882446e-05, "loss": 5.4254, "step": 350 }, { "epoch": 0.43, "learning_rate": 1.4297061159650519e-05, "loss": 5.4182, "step": 360 }, { "epoch": 0.44, "learning_rate": 1.4694201747418588e-05, "loss": 5.39, "step": 370 }, { "epoch": 0.45, "learning_rate": 1.5091342335186657e-05, "loss": 5.3802, "step": 380 }, { "epoch": 0.46, "learning_rate": 1.548848292295473e-05, "loss": 5.3357, "step": 390 }, { "epoch": 0.48, "learning_rate": 1.5885623510722797e-05, "loss": 5.3103, "step": 400 }, { "epoch": 0.49, "learning_rate": 1.6282764098490864e-05, "loss": 5.3291, "step": 410 }, { "epoch": 0.5, "learning_rate": 1.667990468625894e-05, "loss": 5.2987, "step": 420 }, { "epoch": 0.51, "learning_rate": 1.7077045274027006e-05, "loss": 5.2742, "step": 430 }, { "epoch": 0.52, "learning_rate": 1.7474185861795077e-05, "loss": 5.281, "step": 440 }, { "epoch": 0.54, "learning_rate": 1.7871326449563147e-05, "loss": 5.2443, "step": 450 }, { "epoch": 0.55, "learning_rate": 1.8268467037331215e-05, "loss": 5.2933, "step": 460 }, { "epoch": 0.56, "learning_rate": 1.8665607625099285e-05, "loss": 5.2445, "step": 470 }, { "epoch": 0.57, "learning_rate": 1.9062748212867356e-05, "loss": 5.2277, "step": 480 }, { "epoch": 0.58, "learning_rate": 1.9459888800635427e-05, "loss": 5.2166, "step": 490 }, { "epoch": 0.6, "learning_rate": 1.9857029388403494e-05, "loss": 5.217, "step": 500 }, { "epoch": 0.61, "learning_rate": 2.0254169976171565e-05, "loss": 5.2192, "step": 510 }, { "epoch": 0.62, "learning_rate": 2.0651310563939636e-05, "loss": 5.1772, "step": 520 }, { "epoch": 0.63, "learning_rate": 2.1048451151707703e-05, "loss": 5.1665, "step": 530 }, { "epoch": 0.64, "learning_rate": 2.1445591739475777e-05, "loss": 5.123, "step": 540 }, { "epoch": 0.66, "learning_rate": 2.1842732327243845e-05, "loss": 5.1441, "step": 550 }, { "epoch": 0.67, "learning_rate": 2.2239872915011916e-05, "loss": 5.1312, "step": 560 }, { "epoch": 0.68, "learning_rate": 2.2637013502779986e-05, "loss": 5.0611, "step": 570 }, { "epoch": 0.69, "learning_rate": 2.3034154090548054e-05, "loss": 5.0637, "step": 580 }, { "epoch": 0.7, "learning_rate": 2.3431294678316128e-05, "loss": 5.0366, "step": 590 }, { "epoch": 0.71, "learning_rate": 2.3828435266084195e-05, "loss": 5.0064, "step": 600 }, { "epoch": 0.73, "learning_rate": 2.4225575853852263e-05, "loss": 4.9881, "step": 610 }, { "epoch": 0.74, "learning_rate": 2.4622716441620337e-05, "loss": 4.9946, "step": 620 }, { "epoch": 0.75, "learning_rate": 2.5019857029388404e-05, "loss": 4.9487, "step": 630 }, { "epoch": 0.76, "learning_rate": 2.5416997617156475e-05, "loss": 4.9289, "step": 640 }, { "epoch": 0.77, "learning_rate": 2.5814138204924542e-05, "loss": 4.8561, "step": 650 }, { "epoch": 0.79, "learning_rate": 2.6211278792692613e-05, "loss": 4.9333, "step": 660 }, { "epoch": 0.8, "learning_rate": 2.6608419380460687e-05, "loss": 4.8653, "step": 670 }, { "epoch": 0.81, "learning_rate": 2.7005559968228755e-05, "loss": 4.8375, "step": 680 }, { "epoch": 0.82, "learning_rate": 2.7402700555996825e-05, "loss": 4.848, "step": 690 }, { "epoch": 0.83, "learning_rate": 2.7799841143764893e-05, "loss": 4.7894, "step": 700 }, { "epoch": 0.85, "learning_rate": 2.8196981731532964e-05, "loss": 4.7765, "step": 710 }, { "epoch": 0.86, "learning_rate": 2.8594122319301038e-05, "loss": 4.7187, "step": 720 }, { "epoch": 0.87, "learning_rate": 2.8991262907069105e-05, "loss": 4.7285, "step": 730 }, { "epoch": 0.88, "learning_rate": 2.9388403494837176e-05, "loss": 4.7499, "step": 740 }, { "epoch": 0.89, "learning_rate": 2.9785544082605243e-05, "loss": 4.7581, "step": 750 }, { "epoch": 0.91, "learning_rate": 3.0182684670373314e-05, "loss": 4.712, "step": 760 }, { "epoch": 0.92, "learning_rate": 3.057982525814138e-05, "loss": 4.747, "step": 770 }, { "epoch": 0.93, "learning_rate": 3.097696584590946e-05, "loss": 4.6972, "step": 780 }, { "epoch": 0.94, "learning_rate": 3.1374106433677526e-05, "loss": 4.7124, "step": 790 }, { "epoch": 0.95, "learning_rate": 3.1771247021445594e-05, "loss": 4.6658, "step": 800 }, { "epoch": 0.97, "learning_rate": 3.216838760921366e-05, "loss": 4.6369, "step": 810 }, { "epoch": 0.98, "learning_rate": 3.256552819698173e-05, "loss": 4.6082, "step": 820 }, { "epoch": 0.99, "learning_rate": 3.29626687847498e-05, "loss": 4.6105, "step": 830 }, { "epoch": 1.0, "eval_accuracy": 0.10967525696409951, "eval_f1": 0.04032957193561421, "eval_loss": 4.524773597717285, "eval_precision": 0.05793827286588486, "eval_recall": 0.10967525696409951, "eval_runtime": 98.7079, "eval_samples_per_second": 272.035, "eval_steps_per_second": 4.255, "step": 839 }, { "epoch": 1.0, "learning_rate": 3.335980937251788e-05, "loss": 4.7548, "step": 840 }, { "epoch": 1.01, "learning_rate": 3.3756949960285944e-05, "loss": 4.5241, "step": 850 }, { "epoch": 1.03, "learning_rate": 3.415409054805401e-05, "loss": 4.5317, "step": 860 }, { "epoch": 1.04, "learning_rate": 3.455123113582208e-05, "loss": 4.5292, "step": 870 }, { "epoch": 1.05, "learning_rate": 3.494837172359015e-05, "loss": 4.4944, "step": 880 }, { "epoch": 1.06, "learning_rate": 3.534551231135822e-05, "loss": 4.4926, "step": 890 }, { "epoch": 1.07, "learning_rate": 3.5742652899126295e-05, "loss": 4.4883, "step": 900 }, { "epoch": 1.08, "learning_rate": 3.613979348689436e-05, "loss": 4.4853, "step": 910 }, { "epoch": 1.1, "learning_rate": 3.653693407466243e-05, "loss": 4.4041, "step": 920 }, { "epoch": 1.11, "learning_rate": 3.6934074662430504e-05, "loss": 4.4882, "step": 930 }, { "epoch": 1.12, "learning_rate": 3.733121525019857e-05, "loss": 4.3713, "step": 940 }, { "epoch": 1.13, "learning_rate": 3.7728355837966645e-05, "loss": 4.4213, "step": 950 }, { "epoch": 1.14, "learning_rate": 3.812549642573471e-05, "loss": 4.4329, "step": 960 }, { "epoch": 1.16, "learning_rate": 3.852263701350278e-05, "loss": 4.3754, "step": 970 }, { "epoch": 1.17, "learning_rate": 3.8919777601270854e-05, "loss": 4.3711, "step": 980 }, { "epoch": 1.18, "learning_rate": 3.931691818903892e-05, "loss": 4.3894, "step": 990 }, { "epoch": 1.19, "learning_rate": 3.971405877680699e-05, "loss": 4.3531, "step": 1000 }, { "epoch": 1.2, "learning_rate": 4.011119936457506e-05, "loss": 4.3371, "step": 1010 }, { "epoch": 1.22, "learning_rate": 4.050833995234313e-05, "loss": 4.26, "step": 1020 }, { "epoch": 1.23, "learning_rate": 4.0905480540111204e-05, "loss": 4.3503, "step": 1030 }, { "epoch": 1.24, "learning_rate": 4.130262112787927e-05, "loss": 4.254, "step": 1040 }, { "epoch": 1.25, "learning_rate": 4.169976171564734e-05, "loss": 4.309, "step": 1050 }, { "epoch": 1.26, "learning_rate": 4.209690230341541e-05, "loss": 4.2973, "step": 1060 }, { "epoch": 1.28, "learning_rate": 4.249404289118348e-05, "loss": 4.274, "step": 1070 }, { "epoch": 1.29, "learning_rate": 4.2891183478951555e-05, "loss": 4.2529, "step": 1080 }, { "epoch": 1.3, "learning_rate": 4.328832406671962e-05, "loss": 4.1877, "step": 1090 }, { "epoch": 1.31, "learning_rate": 4.368546465448769e-05, "loss": 4.2293, "step": 1100 }, { "epoch": 1.32, "learning_rate": 4.408260524225576e-05, "loss": 4.1833, "step": 1110 }, { "epoch": 1.33, "learning_rate": 4.447974583002383e-05, "loss": 4.1317, "step": 1120 }, { "epoch": 1.35, "learning_rate": 4.4876886417791905e-05, "loss": 4.1578, "step": 1130 }, { "epoch": 1.36, "learning_rate": 4.527402700555997e-05, "loss": 4.1565, "step": 1140 }, { "epoch": 1.37, "learning_rate": 4.567116759332804e-05, "loss": 4.1517, "step": 1150 }, { "epoch": 1.38, "learning_rate": 4.606830818109611e-05, "loss": 4.1748, "step": 1160 }, { "epoch": 1.39, "learning_rate": 4.6465448768864175e-05, "loss": 4.118, "step": 1170 }, { "epoch": 1.41, "learning_rate": 4.6862589356632256e-05, "loss": 4.1787, "step": 1180 }, { "epoch": 1.42, "learning_rate": 4.725972994440032e-05, "loss": 4.0838, "step": 1190 }, { "epoch": 1.43, "learning_rate": 4.765687053216839e-05, "loss": 4.1386, "step": 1200 }, { "epoch": 1.44, "learning_rate": 4.805401111993646e-05, "loss": 4.032, "step": 1210 }, { "epoch": 1.45, "learning_rate": 4.8451151707704525e-05, "loss": 4.0394, "step": 1220 }, { "epoch": 1.47, "learning_rate": 4.88482922954726e-05, "loss": 4.0293, "step": 1230 }, { "epoch": 1.48, "learning_rate": 4.9245432883240674e-05, "loss": 3.9968, "step": 1240 }, { "epoch": 1.49, "learning_rate": 4.964257347100874e-05, "loss": 4.0275, "step": 1250 }, { "epoch": 1.5, "learning_rate": 4.99955853787745e-05, "loss": 3.9625, "step": 1260 }, { "epoch": 1.51, "learning_rate": 4.995143916651951e-05, "loss": 4.0545, "step": 1270 }, { "epoch": 1.53, "learning_rate": 4.990729295426453e-05, "loss": 3.9338, "step": 1280 }, { "epoch": 1.54, "learning_rate": 4.9863146742009535e-05, "loss": 3.9301, "step": 1290 }, { "epoch": 1.55, "learning_rate": 4.981900052975455e-05, "loss": 3.953, "step": 1300 }, { "epoch": 1.56, "learning_rate": 4.977485431749956e-05, "loss": 3.9407, "step": 1310 }, { "epoch": 1.57, "learning_rate": 4.973070810524457e-05, "loss": 3.9255, "step": 1320 }, { "epoch": 1.59, "learning_rate": 4.968656189298959e-05, "loss": 3.9394, "step": 1330 }, { "epoch": 1.6, "learning_rate": 4.9642415680734595e-05, "loss": 3.9143, "step": 1340 }, { "epoch": 1.61, "learning_rate": 4.959826946847961e-05, "loss": 3.8728, "step": 1350 }, { "epoch": 1.62, "learning_rate": 4.955412325622462e-05, "loss": 3.8672, "step": 1360 }, { "epoch": 1.63, "learning_rate": 4.950997704396963e-05, "loss": 3.8059, "step": 1370 }, { "epoch": 1.64, "learning_rate": 4.9465830831714646e-05, "loss": 3.8209, "step": 1380 }, { "epoch": 1.66, "learning_rate": 4.9421684619459654e-05, "loss": 3.7519, "step": 1390 }, { "epoch": 1.67, "learning_rate": 4.937753840720466e-05, "loss": 3.7815, "step": 1400 }, { "epoch": 1.68, "learning_rate": 4.933339219494968e-05, "loss": 3.8434, "step": 1410 }, { "epoch": 1.69, "learning_rate": 4.9289245982694685e-05, "loss": 3.7892, "step": 1420 }, { "epoch": 1.7, "learning_rate": 4.92450997704397e-05, "loss": 3.7054, "step": 1430 }, { "epoch": 1.72, "learning_rate": 4.920095355818471e-05, "loss": 3.8061, "step": 1440 }, { "epoch": 1.73, "learning_rate": 4.915680734592972e-05, "loss": 3.7292, "step": 1450 }, { "epoch": 1.74, "learning_rate": 4.911266113367473e-05, "loss": 3.7181, "step": 1460 }, { "epoch": 1.75, "learning_rate": 4.9068514921419744e-05, "loss": 3.6944, "step": 1470 }, { "epoch": 1.76, "learning_rate": 4.902436870916476e-05, "loss": 3.7, "step": 1480 }, { "epoch": 1.78, "learning_rate": 4.898022249690977e-05, "loss": 3.6496, "step": 1490 }, { "epoch": 1.79, "learning_rate": 4.893607628465478e-05, "loss": 3.6937, "step": 1500 }, { "epoch": 1.8, "learning_rate": 4.889193007239979e-05, "loss": 3.6367, "step": 1510 }, { "epoch": 1.81, "learning_rate": 4.8847783860144804e-05, "loss": 3.656, "step": 1520 }, { "epoch": 1.82, "learning_rate": 4.880363764788982e-05, "loss": 3.6302, "step": 1530 }, { "epoch": 1.84, "learning_rate": 4.875949143563483e-05, "loss": 3.6144, "step": 1540 }, { "epoch": 1.85, "learning_rate": 4.871534522337984e-05, "loss": 3.6227, "step": 1550 }, { "epoch": 1.86, "learning_rate": 4.867119901112485e-05, "loss": 3.6088, "step": 1560 }, { "epoch": 1.87, "learning_rate": 4.862705279886986e-05, "loss": 3.5556, "step": 1570 }, { "epoch": 1.88, "learning_rate": 4.8582906586614865e-05, "loss": 3.5657, "step": 1580 }, { "epoch": 1.89, "learning_rate": 4.853876037435988e-05, "loss": 3.5833, "step": 1590 }, { "epoch": 1.91, "learning_rate": 4.8494614162104894e-05, "loss": 3.517, "step": 1600 }, { "epoch": 1.92, "learning_rate": 4.84504679498499e-05, "loss": 3.4891, "step": 1610 }, { "epoch": 1.93, "learning_rate": 4.840632173759492e-05, "loss": 3.4938, "step": 1620 }, { "epoch": 1.94, "learning_rate": 4.8362175525339925e-05, "loss": 3.4694, "step": 1630 }, { "epoch": 1.95, "learning_rate": 4.831802931308494e-05, "loss": 3.5104, "step": 1640 }, { "epoch": 1.97, "learning_rate": 4.8273883100829954e-05, "loss": 3.5442, "step": 1650 }, { "epoch": 1.98, "learning_rate": 4.822973688857496e-05, "loss": 3.5078, "step": 1660 }, { "epoch": 1.99, "learning_rate": 4.8185590676319976e-05, "loss": 3.4711, "step": 1670 }, { "epoch": 2.0, "eval_accuracy": 0.2999776552956949, "eval_f1": 0.20968639879082807, "eval_loss": 3.316246509552002, "eval_precision": 0.23018076164128848, "eval_recall": 0.2999776552956949, "eval_runtime": 98.3168, "eval_samples_per_second": 273.117, "eval_steps_per_second": 4.272, "step": 1678 }, { "epoch": 2.0, "learning_rate": 4.8141444464064984e-05, "loss": 3.5429, "step": 1680 }, { "epoch": 2.01, "learning_rate": 4.809729825181e-05, "loss": 3.4651, "step": 1690 }, { "epoch": 2.03, "learning_rate": 4.8053152039555014e-05, "loss": 3.3769, "step": 1700 }, { "epoch": 2.04, "learning_rate": 4.800900582730002e-05, "loss": 3.3783, "step": 1710 }, { "epoch": 2.05, "learning_rate": 4.796485961504503e-05, "loss": 3.4053, "step": 1720 }, { "epoch": 2.06, "learning_rate": 4.7920713402790044e-05, "loss": 3.3744, "step": 1730 }, { "epoch": 2.07, "learning_rate": 4.787656719053505e-05, "loss": 3.3675, "step": 1740 }, { "epoch": 2.09, "learning_rate": 4.7832420978280066e-05, "loss": 3.3607, "step": 1750 }, { "epoch": 2.1, "learning_rate": 4.7788274766025074e-05, "loss": 3.4022, "step": 1760 }, { "epoch": 2.11, "learning_rate": 4.774412855377009e-05, "loss": 3.2748, "step": 1770 }, { "epoch": 2.12, "learning_rate": 4.76999823415151e-05, "loss": 3.3338, "step": 1780 }, { "epoch": 2.13, "learning_rate": 4.765583612926011e-05, "loss": 3.3384, "step": 1790 }, { "epoch": 2.15, "learning_rate": 4.7611689917005126e-05, "loss": 3.2855, "step": 1800 }, { "epoch": 2.16, "learning_rate": 4.7567543704750134e-05, "loss": 3.2991, "step": 1810 }, { "epoch": 2.17, "learning_rate": 4.752339749249515e-05, "loss": 3.3359, "step": 1820 }, { "epoch": 2.18, "learning_rate": 4.7479251280240157e-05, "loss": 3.2541, "step": 1830 }, { "epoch": 2.19, "learning_rate": 4.743510506798517e-05, "loss": 3.2962, "step": 1840 }, { "epoch": 2.2, "learning_rate": 4.7390958855730186e-05, "loss": 3.2443, "step": 1850 }, { "epoch": 2.22, "learning_rate": 4.7346812643475194e-05, "loss": 3.2785, "step": 1860 }, { "epoch": 2.23, "learning_rate": 4.730266643122021e-05, "loss": 3.1409, "step": 1870 }, { "epoch": 2.24, "learning_rate": 4.7258520218965216e-05, "loss": 3.2309, "step": 1880 }, { "epoch": 2.25, "learning_rate": 4.7214374006710224e-05, "loss": 3.1422, "step": 1890 }, { "epoch": 2.26, "learning_rate": 4.717022779445524e-05, "loss": 3.1799, "step": 1900 }, { "epoch": 2.28, "learning_rate": 4.712608158220025e-05, "loss": 3.163, "step": 1910 }, { "epoch": 2.29, "learning_rate": 4.708193536994526e-05, "loss": 3.1483, "step": 1920 }, { "epoch": 2.3, "learning_rate": 4.703778915769027e-05, "loss": 3.2082, "step": 1930 }, { "epoch": 2.31, "learning_rate": 4.6993642945435284e-05, "loss": 3.1304, "step": 1940 }, { "epoch": 2.32, "learning_rate": 4.694949673318029e-05, "loss": 3.1022, "step": 1950 }, { "epoch": 2.34, "learning_rate": 4.6905350520925306e-05, "loss": 3.174, "step": 1960 }, { "epoch": 2.35, "learning_rate": 4.686120430867032e-05, "loss": 3.0886, "step": 1970 }, { "epoch": 2.36, "learning_rate": 4.681705809641533e-05, "loss": 3.0919, "step": 1980 }, { "epoch": 2.37, "learning_rate": 4.6772911884160344e-05, "loss": 3.1014, "step": 1990 }, { "epoch": 2.38, "learning_rate": 4.672876567190535e-05, "loss": 3.0974, "step": 2000 }, { "epoch": 2.4, "learning_rate": 4.6684619459650366e-05, "loss": 3.0405, "step": 2010 }, { "epoch": 2.41, "learning_rate": 4.664047324739538e-05, "loss": 3.1053, "step": 2020 }, { "epoch": 2.42, "learning_rate": 4.659632703514039e-05, "loss": 3.0749, "step": 2030 }, { "epoch": 2.43, "learning_rate": 4.6552180822885396e-05, "loss": 3.0606, "step": 2040 }, { "epoch": 2.44, "learning_rate": 4.6508034610630404e-05, "loss": 3.0498, "step": 2050 }, { "epoch": 2.46, "learning_rate": 4.646388839837542e-05, "loss": 3.0625, "step": 2060 }, { "epoch": 2.47, "learning_rate": 4.6419742186120434e-05, "loss": 3.0172, "step": 2070 }, { "epoch": 2.48, "learning_rate": 4.637559597386544e-05, "loss": 2.9855, "step": 2080 }, { "epoch": 2.49, "learning_rate": 4.6331449761610456e-05, "loss": 2.9864, "step": 2090 }, { "epoch": 2.5, "learning_rate": 4.6287303549355464e-05, "loss": 2.9722, "step": 2100 }, { "epoch": 2.51, "learning_rate": 4.624315733710048e-05, "loss": 2.89, "step": 2110 }, { "epoch": 2.53, "learning_rate": 4.619901112484549e-05, "loss": 3.0054, "step": 2120 }, { "epoch": 2.54, "learning_rate": 4.61548649125905e-05, "loss": 2.9484, "step": 2130 }, { "epoch": 2.55, "learning_rate": 4.6110718700335516e-05, "loss": 2.8977, "step": 2140 }, { "epoch": 2.56, "learning_rate": 4.6066572488080524e-05, "loss": 2.9717, "step": 2150 }, { "epoch": 2.57, "learning_rate": 4.602242627582554e-05, "loss": 2.9702, "step": 2160 }, { "epoch": 2.59, "learning_rate": 4.597828006357055e-05, "loss": 2.9394, "step": 2170 }, { "epoch": 2.6, "learning_rate": 4.593413385131556e-05, "loss": 2.9298, "step": 2180 }, { "epoch": 2.61, "learning_rate": 4.588998763906057e-05, "loss": 2.863, "step": 2190 }, { "epoch": 2.62, "learning_rate": 4.5845841426805583e-05, "loss": 2.8856, "step": 2200 }, { "epoch": 2.63, "learning_rate": 4.580169521455059e-05, "loss": 2.8582, "step": 2210 }, { "epoch": 2.65, "learning_rate": 4.5757549002295606e-05, "loss": 2.9151, "step": 2220 }, { "epoch": 2.66, "learning_rate": 4.5713402790040614e-05, "loss": 2.8773, "step": 2230 }, { "epoch": 2.67, "learning_rate": 4.566925657778563e-05, "loss": 2.8786, "step": 2240 }, { "epoch": 2.68, "learning_rate": 4.5625110365530636e-05, "loss": 2.8755, "step": 2250 }, { "epoch": 2.69, "learning_rate": 4.558096415327565e-05, "loss": 2.854, "step": 2260 }, { "epoch": 2.71, "learning_rate": 4.5536817941020666e-05, "loss": 2.8562, "step": 2270 }, { "epoch": 2.72, "learning_rate": 4.5492671728765674e-05, "loss": 2.8179, "step": 2280 }, { "epoch": 2.73, "learning_rate": 4.544852551651069e-05, "loss": 2.8217, "step": 2290 }, { "epoch": 2.74, "learning_rate": 4.5404379304255696e-05, "loss": 2.7928, "step": 2300 }, { "epoch": 2.75, "learning_rate": 4.536023309200071e-05, "loss": 2.804, "step": 2310 }, { "epoch": 2.76, "learning_rate": 4.5316086879745725e-05, "loss": 2.7497, "step": 2320 }, { "epoch": 2.78, "learning_rate": 4.527194066749073e-05, "loss": 2.7463, "step": 2330 }, { "epoch": 2.79, "learning_rate": 4.522779445523575e-05, "loss": 2.7182, "step": 2340 }, { "epoch": 2.8, "learning_rate": 4.5183648242980756e-05, "loss": 2.7447, "step": 2350 }, { "epoch": 2.81, "learning_rate": 4.5139502030725764e-05, "loss": 2.7624, "step": 2360 }, { "epoch": 2.82, "learning_rate": 4.509535581847077e-05, "loss": 2.7295, "step": 2370 }, { "epoch": 2.84, "learning_rate": 4.5051209606215786e-05, "loss": 2.69, "step": 2380 }, { "epoch": 2.85, "learning_rate": 4.50070633939608e-05, "loss": 2.6543, "step": 2390 }, { "epoch": 2.86, "learning_rate": 4.496291718170581e-05, "loss": 2.7529, "step": 2400 }, { "epoch": 2.87, "learning_rate": 4.491877096945082e-05, "loss": 2.74, "step": 2410 }, { "epoch": 2.88, "learning_rate": 4.487462475719583e-05, "loss": 2.6917, "step": 2420 }, { "epoch": 2.9, "learning_rate": 4.4830478544940846e-05, "loss": 2.6828, "step": 2430 }, { "epoch": 2.91, "learning_rate": 4.478633233268586e-05, "loss": 2.6397, "step": 2440 }, { "epoch": 2.92, "learning_rate": 4.474218612043087e-05, "loss": 2.7281, "step": 2450 }, { "epoch": 2.93, "learning_rate": 4.469803990817588e-05, "loss": 2.656, "step": 2460 }, { "epoch": 2.94, "learning_rate": 4.465389369592089e-05, "loss": 2.6689, "step": 2470 }, { "epoch": 2.96, "learning_rate": 4.4609747483665905e-05, "loss": 2.6667, "step": 2480 }, { "epoch": 2.97, "learning_rate": 4.456560127141092e-05, "loss": 2.6502, "step": 2490 }, { "epoch": 2.98, "learning_rate": 4.452145505915593e-05, "loss": 2.6483, "step": 2500 }, { "epoch": 2.99, "learning_rate": 4.4477308846900936e-05, "loss": 2.6202, "step": 2510 }, { "epoch": 3.0, "eval_accuracy": 0.4708774020557128, "eval_f1": 0.3938664278992793, "eval_loss": 2.444504499435425, "eval_precision": 0.41196094659916344, "eval_recall": 0.4708774020557128, "eval_runtime": 98.7509, "eval_samples_per_second": 271.916, "eval_steps_per_second": 4.253, "step": 2517 }, { "epoch": 3.0, "learning_rate": 4.443316263464595e-05, "loss": 2.6666, "step": 2520 }, { "epoch": 3.02, "learning_rate": 4.438901642239096e-05, "loss": 2.522, "step": 2530 }, { "epoch": 3.03, "learning_rate": 4.434487021013597e-05, "loss": 2.5387, "step": 2540 }, { "epoch": 3.04, "learning_rate": 4.430072399788098e-05, "loss": 2.588, "step": 2550 }, { "epoch": 3.05, "learning_rate": 4.4256577785625996e-05, "loss": 2.5588, "step": 2560 }, { "epoch": 3.06, "learning_rate": 4.4212431573371003e-05, "loss": 2.5374, "step": 2570 }, { "epoch": 3.08, "learning_rate": 4.416828536111602e-05, "loss": 2.6003, "step": 2580 }, { "epoch": 3.09, "learning_rate": 4.412413914886103e-05, "loss": 2.4761, "step": 2590 }, { "epoch": 3.1, "learning_rate": 4.407999293660604e-05, "loss": 2.5166, "step": 2600 }, { "epoch": 3.11, "learning_rate": 4.4035846724351055e-05, "loss": 2.5305, "step": 2610 }, { "epoch": 3.12, "learning_rate": 4.399170051209606e-05, "loss": 2.5378, "step": 2620 }, { "epoch": 3.13, "learning_rate": 4.394755429984108e-05, "loss": 2.5443, "step": 2630 }, { "epoch": 3.15, "learning_rate": 4.390340808758609e-05, "loss": 2.5134, "step": 2640 }, { "epoch": 3.16, "learning_rate": 4.38592618753311e-05, "loss": 2.5105, "step": 2650 }, { "epoch": 3.17, "learning_rate": 4.3815115663076115e-05, "loss": 2.5155, "step": 2660 }, { "epoch": 3.18, "learning_rate": 4.377096945082112e-05, "loss": 2.5207, "step": 2670 }, { "epoch": 3.19, "learning_rate": 4.372682323856613e-05, "loss": 2.4432, "step": 2680 }, { "epoch": 3.21, "learning_rate": 4.3682677026311145e-05, "loss": 2.4199, "step": 2690 }, { "epoch": 3.22, "learning_rate": 4.363853081405615e-05, "loss": 2.4083, "step": 2700 }, { "epoch": 3.23, "learning_rate": 4.359438460180117e-05, "loss": 2.5031, "step": 2710 }, { "epoch": 3.24, "learning_rate": 4.3550238389546176e-05, "loss": 2.421, "step": 2720 }, { "epoch": 3.25, "learning_rate": 4.350609217729119e-05, "loss": 2.4005, "step": 2730 }, { "epoch": 3.27, "learning_rate": 4.34619459650362e-05, "loss": 2.4384, "step": 2740 }, { "epoch": 3.28, "learning_rate": 4.341779975278121e-05, "loss": 2.3936, "step": 2750 }, { "epoch": 3.29, "learning_rate": 4.337365354052623e-05, "loss": 2.3977, "step": 2760 }, { "epoch": 3.3, "learning_rate": 4.3329507328271235e-05, "loss": 2.4, "step": 2770 }, { "epoch": 3.31, "learning_rate": 4.328536111601625e-05, "loss": 2.3995, "step": 2780 }, { "epoch": 3.33, "learning_rate": 4.324121490376126e-05, "loss": 2.3718, "step": 2790 }, { "epoch": 3.34, "learning_rate": 4.319706869150627e-05, "loss": 2.3718, "step": 2800 }, { "epoch": 3.35, "learning_rate": 4.315292247925129e-05, "loss": 2.3832, "step": 2810 }, { "epoch": 3.36, "learning_rate": 4.3108776266996295e-05, "loss": 2.4285, "step": 2820 }, { "epoch": 3.37, "learning_rate": 4.30646300547413e-05, "loss": 2.3217, "step": 2830 }, { "epoch": 3.38, "learning_rate": 4.302048384248632e-05, "loss": 2.3315, "step": 2840 }, { "epoch": 3.4, "learning_rate": 4.2976337630231326e-05, "loss": 2.3445, "step": 2850 }, { "epoch": 3.41, "learning_rate": 4.293219141797634e-05, "loss": 2.34, "step": 2860 }, { "epoch": 3.42, "learning_rate": 4.288804520572135e-05, "loss": 2.3481, "step": 2870 }, { "epoch": 3.43, "learning_rate": 4.284389899346636e-05, "loss": 2.312, "step": 2880 }, { "epoch": 3.44, "learning_rate": 4.279975278121137e-05, "loss": 2.306, "step": 2890 }, { "epoch": 3.46, "learning_rate": 4.2755606568956385e-05, "loss": 2.3284, "step": 2900 }, { "epoch": 3.47, "learning_rate": 4.27114603567014e-05, "loss": 2.3372, "step": 2910 }, { "epoch": 3.48, "learning_rate": 4.266731414444641e-05, "loss": 2.2843, "step": 2920 }, { "epoch": 3.49, "learning_rate": 4.262316793219142e-05, "loss": 2.2336, "step": 2930 }, { "epoch": 3.5, "learning_rate": 4.257902171993643e-05, "loss": 2.2738, "step": 2940 }, { "epoch": 3.52, "learning_rate": 4.2534875507681445e-05, "loss": 2.2537, "step": 2950 }, { "epoch": 3.53, "learning_rate": 4.249072929542646e-05, "loss": 2.3713, "step": 2960 }, { "epoch": 3.54, "learning_rate": 4.244658308317147e-05, "loss": 2.3185, "step": 2970 }, { "epoch": 3.55, "learning_rate": 4.240243687091648e-05, "loss": 2.2767, "step": 2980 }, { "epoch": 3.56, "learning_rate": 4.235829065866149e-05, "loss": 2.2159, "step": 2990 }, { "epoch": 3.58, "learning_rate": 4.23141444464065e-05, "loss": 2.2576, "step": 3000 }, { "epoch": 3.59, "learning_rate": 4.226999823415151e-05, "loss": 2.2698, "step": 3010 }, { "epoch": 3.6, "learning_rate": 4.222585202189652e-05, "loss": 2.2491, "step": 3020 }, { "epoch": 3.61, "learning_rate": 4.2181705809641535e-05, "loss": 2.2619, "step": 3030 }, { "epoch": 3.62, "learning_rate": 4.213755959738654e-05, "loss": 2.2013, "step": 3040 }, { "epoch": 3.64, "learning_rate": 4.209341338513156e-05, "loss": 2.2161, "step": 3050 }, { "epoch": 3.65, "learning_rate": 4.204926717287657e-05, "loss": 2.2614, "step": 3060 }, { "epoch": 3.66, "learning_rate": 4.200512096062158e-05, "loss": 2.1594, "step": 3070 }, { "epoch": 3.67, "learning_rate": 4.1960974748366595e-05, "loss": 2.2023, "step": 3080 }, { "epoch": 3.68, "learning_rate": 4.19168285361116e-05, "loss": 2.2037, "step": 3090 }, { "epoch": 3.69, "learning_rate": 4.187268232385662e-05, "loss": 2.2178, "step": 3100 }, { "epoch": 3.71, "learning_rate": 4.1828536111601625e-05, "loss": 2.1852, "step": 3110 }, { "epoch": 3.72, "learning_rate": 4.178438989934664e-05, "loss": 2.118, "step": 3120 }, { "epoch": 3.73, "learning_rate": 4.1740243687091654e-05, "loss": 2.1234, "step": 3130 }, { "epoch": 3.74, "learning_rate": 4.169609747483666e-05, "loss": 2.1954, "step": 3140 }, { "epoch": 3.75, "learning_rate": 4.165195126258167e-05, "loss": 2.2305, "step": 3150 }, { "epoch": 3.77, "learning_rate": 4.160780505032668e-05, "loss": 2.1876, "step": 3160 }, { "epoch": 3.78, "learning_rate": 4.156365883807169e-05, "loss": 2.208, "step": 3170 }, { "epoch": 3.79, "learning_rate": 4.151951262581671e-05, "loss": 2.1111, "step": 3180 }, { "epoch": 3.8, "learning_rate": 4.1475366413561715e-05, "loss": 2.1647, "step": 3190 }, { "epoch": 3.81, "learning_rate": 4.143122020130673e-05, "loss": 2.1881, "step": 3200 }, { "epoch": 3.83, "learning_rate": 4.138707398905174e-05, "loss": 2.1371, "step": 3210 }, { "epoch": 3.84, "learning_rate": 4.134292777679675e-05, "loss": 2.1696, "step": 3220 }, { "epoch": 3.85, "learning_rate": 4.129878156454177e-05, "loss": 2.1368, "step": 3230 }, { "epoch": 3.86, "learning_rate": 4.1254635352286775e-05, "loss": 2.0675, "step": 3240 }, { "epoch": 3.87, "learning_rate": 4.121048914003179e-05, "loss": 2.1286, "step": 3250 }, { "epoch": 3.89, "learning_rate": 4.11663429277768e-05, "loss": 2.0987, "step": 3260 }, { "epoch": 3.9, "learning_rate": 4.112219671552181e-05, "loss": 2.1006, "step": 3270 }, { "epoch": 3.91, "learning_rate": 4.107805050326683e-05, "loss": 2.0706, "step": 3280 }, { "epoch": 3.92, "learning_rate": 4.1033904291011835e-05, "loss": 2.0452, "step": 3290 }, { "epoch": 3.93, "learning_rate": 4.098975807875684e-05, "loss": 2.0613, "step": 3300 }, { "epoch": 3.94, "learning_rate": 4.094561186650186e-05, "loss": 2.0598, "step": 3310 }, { "epoch": 3.96, "learning_rate": 4.0901465654246865e-05, "loss": 2.0953, "step": 3320 }, { "epoch": 3.97, "learning_rate": 4.085731944199188e-05, "loss": 2.1055, "step": 3330 }, { "epoch": 3.98, "learning_rate": 4.081317322973689e-05, "loss": 2.1009, "step": 3340 }, { "epoch": 3.99, "learning_rate": 4.07690270174819e-05, "loss": 2.0614, "step": 3350 }, { "epoch": 4.0, "eval_accuracy": 0.5741844182928646, "eval_f1": 0.5167663284927785, "eval_loss": 1.8839434385299683, "eval_precision": 0.5388953620960228, "eval_recall": 0.5741844182928646, "eval_runtime": 98.9624, "eval_samples_per_second": 271.335, "eval_steps_per_second": 4.244, "step": 3356 }, { "epoch": 4.0, "learning_rate": 4.072488080522691e-05, "loss": 2.0478, "step": 3360 }, { "epoch": 4.02, "learning_rate": 4.0680734592971925e-05, "loss": 2.0201, "step": 3370 }, { "epoch": 4.03, "learning_rate": 4.063658838071694e-05, "loss": 2.0014, "step": 3380 }, { "epoch": 4.04, "learning_rate": 4.059244216846195e-05, "loss": 2.0108, "step": 3390 }, { "epoch": 4.05, "learning_rate": 4.054829595620696e-05, "loss": 2.0173, "step": 3400 }, { "epoch": 4.06, "learning_rate": 4.050414974395197e-05, "loss": 1.9934, "step": 3410 }, { "epoch": 4.08, "learning_rate": 4.0460003531696984e-05, "loss": 1.9927, "step": 3420 }, { "epoch": 4.09, "learning_rate": 4.0415857319442e-05, "loss": 2.0354, "step": 3430 }, { "epoch": 4.1, "learning_rate": 4.037171110718701e-05, "loss": 1.9639, "step": 3440 }, { "epoch": 4.11, "learning_rate": 4.032756489493202e-05, "loss": 1.9947, "step": 3450 }, { "epoch": 4.12, "learning_rate": 4.028341868267703e-05, "loss": 1.9709, "step": 3460 }, { "epoch": 4.14, "learning_rate": 4.023927247042204e-05, "loss": 1.9729, "step": 3470 }, { "epoch": 4.15, "learning_rate": 4.0195126258167045e-05, "loss": 1.9609, "step": 3480 }, { "epoch": 4.16, "learning_rate": 4.015098004591206e-05, "loss": 1.9675, "step": 3490 }, { "epoch": 4.17, "learning_rate": 4.0106833833657074e-05, "loss": 1.9618, "step": 3500 }, { "epoch": 4.18, "learning_rate": 4.006268762140208e-05, "loss": 1.9164, "step": 3510 }, { "epoch": 4.2, "learning_rate": 4.00185414091471e-05, "loss": 1.949, "step": 3520 }, { "epoch": 4.21, "learning_rate": 3.9974395196892105e-05, "loss": 1.9324, "step": 3530 }, { "epoch": 4.22, "learning_rate": 3.993024898463712e-05, "loss": 1.9665, "step": 3540 }, { "epoch": 4.23, "learning_rate": 3.9886102772382134e-05, "loss": 2.0005, "step": 3550 }, { "epoch": 4.24, "learning_rate": 3.984195656012714e-05, "loss": 1.9234, "step": 3560 }, { "epoch": 4.25, "learning_rate": 3.979781034787216e-05, "loss": 1.9317, "step": 3570 }, { "epoch": 4.27, "learning_rate": 3.9753664135617165e-05, "loss": 1.9583, "step": 3580 }, { "epoch": 4.28, "learning_rate": 3.970951792336218e-05, "loss": 1.869, "step": 3590 }, { "epoch": 4.29, "learning_rate": 3.9665371711107194e-05, "loss": 1.9407, "step": 3600 }, { "epoch": 4.3, "learning_rate": 3.96212254988522e-05, "loss": 1.8964, "step": 3610 }, { "epoch": 4.31, "learning_rate": 3.957707928659721e-05, "loss": 1.8768, "step": 3620 }, { "epoch": 4.33, "learning_rate": 3.9532933074342224e-05, "loss": 1.915, "step": 3630 }, { "epoch": 4.34, "learning_rate": 3.948878686208723e-05, "loss": 1.8625, "step": 3640 }, { "epoch": 4.35, "learning_rate": 3.944464064983225e-05, "loss": 1.9401, "step": 3650 }, { "epoch": 4.36, "learning_rate": 3.9400494437577255e-05, "loss": 1.9032, "step": 3660 }, { "epoch": 4.37, "learning_rate": 3.935634822532227e-05, "loss": 1.9214, "step": 3670 }, { "epoch": 4.39, "learning_rate": 3.931220201306728e-05, "loss": 1.8298, "step": 3680 }, { "epoch": 4.4, "learning_rate": 3.926805580081229e-05, "loss": 1.9058, "step": 3690 }, { "epoch": 4.41, "learning_rate": 3.9223909588557306e-05, "loss": 1.8939, "step": 3700 }, { "epoch": 4.42, "learning_rate": 3.9179763376302314e-05, "loss": 1.8385, "step": 3710 }, { "epoch": 4.43, "learning_rate": 3.913561716404733e-05, "loss": 1.8823, "step": 3720 }, { "epoch": 4.45, "learning_rate": 3.909147095179234e-05, "loss": 1.8207, "step": 3730 }, { "epoch": 4.46, "learning_rate": 3.904732473953735e-05, "loss": 1.8259, "step": 3740 }, { "epoch": 4.47, "learning_rate": 3.9003178527282366e-05, "loss": 1.8235, "step": 3750 }, { "epoch": 4.48, "learning_rate": 3.8959032315027374e-05, "loss": 1.8281, "step": 3760 }, { "epoch": 4.49, "learning_rate": 3.891488610277239e-05, "loss": 1.8559, "step": 3770 }, { "epoch": 4.51, "learning_rate": 3.8870739890517397e-05, "loss": 1.8934, "step": 3780 }, { "epoch": 4.52, "learning_rate": 3.8826593678262404e-05, "loss": 1.8855, "step": 3790 }, { "epoch": 4.53, "learning_rate": 3.878244746600742e-05, "loss": 1.7935, "step": 3800 }, { "epoch": 4.54, "learning_rate": 3.873830125375243e-05, "loss": 1.8148, "step": 3810 }, { "epoch": 4.55, "learning_rate": 3.869415504149744e-05, "loss": 1.7876, "step": 3820 }, { "epoch": 4.56, "learning_rate": 3.865000882924245e-05, "loss": 1.7597, "step": 3830 }, { "epoch": 4.58, "learning_rate": 3.8605862616987464e-05, "loss": 1.8237, "step": 3840 }, { "epoch": 4.59, "learning_rate": 3.856171640473247e-05, "loss": 1.8599, "step": 3850 }, { "epoch": 4.6, "learning_rate": 3.851757019247749e-05, "loss": 1.7719, "step": 3860 }, { "epoch": 4.61, "learning_rate": 3.84734239802225e-05, "loss": 1.8413, "step": 3870 }, { "epoch": 4.62, "learning_rate": 3.842927776796751e-05, "loss": 1.7742, "step": 3880 }, { "epoch": 4.64, "learning_rate": 3.8385131555712524e-05, "loss": 1.8499, "step": 3890 }, { "epoch": 4.65, "learning_rate": 3.834098534345753e-05, "loss": 1.7576, "step": 3900 }, { "epoch": 4.66, "learning_rate": 3.8296839131202546e-05, "loss": 1.7575, "step": 3910 }, { "epoch": 4.67, "learning_rate": 3.825269291894756e-05, "loss": 1.8016, "step": 3920 }, { "epoch": 4.68, "learning_rate": 3.820854670669257e-05, "loss": 1.7705, "step": 3930 }, { "epoch": 4.7, "learning_rate": 3.816440049443758e-05, "loss": 1.7676, "step": 3940 }, { "epoch": 4.71, "learning_rate": 3.812025428218259e-05, "loss": 1.7615, "step": 3950 }, { "epoch": 4.72, "learning_rate": 3.80761080699276e-05, "loss": 1.7477, "step": 3960 }, { "epoch": 4.73, "learning_rate": 3.8031961857672614e-05, "loss": 1.7753, "step": 3970 }, { "epoch": 4.74, "learning_rate": 3.798781564541762e-05, "loss": 1.7486, "step": 3980 }, { "epoch": 4.76, "learning_rate": 3.7943669433162636e-05, "loss": 1.7403, "step": 3990 }, { "epoch": 4.77, "learning_rate": 3.7899523220907644e-05, "loss": 1.6858, "step": 4000 }, { "epoch": 4.78, "learning_rate": 3.785537700865266e-05, "loss": 1.7315, "step": 4010 }, { "epoch": 4.79, "learning_rate": 3.7811230796397674e-05, "loss": 1.7583, "step": 4020 }, { "epoch": 4.8, "learning_rate": 3.776708458414268e-05, "loss": 1.7067, "step": 4030 }, { "epoch": 4.82, "learning_rate": 3.7722938371887696e-05, "loss": 1.7346, "step": 4040 }, { "epoch": 4.83, "learning_rate": 3.7678792159632704e-05, "loss": 1.7225, "step": 4050 }, { "epoch": 4.84, "learning_rate": 3.763464594737772e-05, "loss": 1.7022, "step": 4060 }, { "epoch": 4.85, "learning_rate": 3.759049973512273e-05, "loss": 1.7202, "step": 4070 }, { "epoch": 4.86, "learning_rate": 3.754635352286774e-05, "loss": 1.7266, "step": 4080 }, { "epoch": 4.87, "learning_rate": 3.7502207310612756e-05, "loss": 1.6832, "step": 4090 }, { "epoch": 4.89, "learning_rate": 3.7458061098357764e-05, "loss": 1.6745, "step": 4100 }, { "epoch": 4.9, "learning_rate": 3.741391488610277e-05, "loss": 1.6977, "step": 4110 }, { "epoch": 4.91, "learning_rate": 3.7369768673847786e-05, "loss": 1.6997, "step": 4120 }, { "epoch": 4.92, "learning_rate": 3.7325622461592794e-05, "loss": 1.724, "step": 4130 }, { "epoch": 4.93, "learning_rate": 3.728147624933781e-05, "loss": 1.6449, "step": 4140 }, { "epoch": 4.95, "learning_rate": 3.7237330037082817e-05, "loss": 1.6715, "step": 4150 }, { "epoch": 4.96, "learning_rate": 3.719318382482783e-05, "loss": 1.6833, "step": 4160 }, { "epoch": 4.97, "learning_rate": 3.7149037612572846e-05, "loss": 1.7102, "step": 4170 }, { "epoch": 4.98, "learning_rate": 3.7104891400317854e-05, "loss": 1.7087, "step": 4180 }, { "epoch": 4.99, "learning_rate": 3.706074518806287e-05, "loss": 1.7026, "step": 4190 }, { "epoch": 5.0, "eval_accuracy": 0.6436019663339788, "eval_f1": 0.6012686796803567, "eval_loss": 1.5246539115905762, "eval_precision": 0.6179627814058779, "eval_recall": 0.6436019663339788, "eval_runtime": 98.9599, "eval_samples_per_second": 271.342, "eval_steps_per_second": 4.244, "step": 4195 }, { "epoch": 5.01, "learning_rate": 3.7016598975807876e-05, "loss": 1.7138, "step": 4200 }, { "epoch": 5.02, "learning_rate": 3.697245276355289e-05, "loss": 1.5906, "step": 4210 }, { "epoch": 5.03, "learning_rate": 3.6928306551297906e-05, "loss": 1.6526, "step": 4220 }, { "epoch": 5.04, "learning_rate": 3.6884160339042913e-05, "loss": 1.568, "step": 4230 }, { "epoch": 5.05, "learning_rate": 3.684001412678793e-05, "loss": 1.6482, "step": 4240 }, { "epoch": 5.07, "learning_rate": 3.6795867914532936e-05, "loss": 1.6047, "step": 4250 }, { "epoch": 5.08, "learning_rate": 3.6751721702277944e-05, "loss": 1.6349, "step": 4260 }, { "epoch": 5.09, "learning_rate": 3.670757549002296e-05, "loss": 1.643, "step": 4270 }, { "epoch": 5.1, "learning_rate": 3.6663429277767966e-05, "loss": 1.6335, "step": 4280 }, { "epoch": 5.11, "learning_rate": 3.661928306551298e-05, "loss": 1.616, "step": 4290 }, { "epoch": 5.13, "learning_rate": 3.657513685325799e-05, "loss": 1.5813, "step": 4300 }, { "epoch": 5.14, "learning_rate": 3.6530990641003004e-05, "loss": 1.5871, "step": 4310 }, { "epoch": 5.15, "learning_rate": 3.648684442874801e-05, "loss": 1.6759, "step": 4320 }, { "epoch": 5.16, "learning_rate": 3.6442698216493026e-05, "loss": 1.5666, "step": 4330 }, { "epoch": 5.17, "learning_rate": 3.639855200423804e-05, "loss": 1.6558, "step": 4340 }, { "epoch": 5.18, "learning_rate": 3.635440579198305e-05, "loss": 1.5853, "step": 4350 }, { "epoch": 5.2, "learning_rate": 3.631025957972806e-05, "loss": 1.641, "step": 4360 }, { "epoch": 5.21, "learning_rate": 3.626611336747307e-05, "loss": 1.5875, "step": 4370 }, { "epoch": 5.22, "learning_rate": 3.6221967155218086e-05, "loss": 1.6086, "step": 4380 }, { "epoch": 5.23, "learning_rate": 3.61778209429631e-05, "loss": 1.5143, "step": 4390 }, { "epoch": 5.24, "learning_rate": 3.613367473070811e-05, "loss": 1.5948, "step": 4400 }, { "epoch": 5.26, "learning_rate": 3.608952851845312e-05, "loss": 1.6024, "step": 4410 }, { "epoch": 5.27, "learning_rate": 3.604538230619813e-05, "loss": 1.5302, "step": 4420 }, { "epoch": 5.28, "learning_rate": 3.600123609394314e-05, "loss": 1.5198, "step": 4430 }, { "epoch": 5.29, "learning_rate": 3.595708988168815e-05, "loss": 1.6065, "step": 4440 }, { "epoch": 5.3, "learning_rate": 3.591294366943316e-05, "loss": 1.568, "step": 4450 }, { "epoch": 5.32, "learning_rate": 3.5868797457178176e-05, "loss": 1.6374, "step": 4460 }, { "epoch": 5.33, "learning_rate": 3.5824651244923184e-05, "loss": 1.5732, "step": 4470 }, { "epoch": 5.34, "learning_rate": 3.57805050326682e-05, "loss": 1.5805, "step": 4480 }, { "epoch": 5.35, "learning_rate": 3.573635882041321e-05, "loss": 1.623, "step": 4490 }, { "epoch": 5.36, "learning_rate": 3.569221260815822e-05, "loss": 1.5536, "step": 4500 }, { "epoch": 5.38, "learning_rate": 3.5648066395903236e-05, "loss": 1.5806, "step": 4510 }, { "epoch": 5.39, "learning_rate": 3.5603920183648243e-05, "loss": 1.5798, "step": 4520 }, { "epoch": 5.4, "learning_rate": 3.555977397139326e-05, "loss": 1.5538, "step": 4530 }, { "epoch": 5.41, "learning_rate": 3.551562775913827e-05, "loss": 1.5041, "step": 4540 }, { "epoch": 5.42, "learning_rate": 3.547148154688328e-05, "loss": 1.5274, "step": 4550 }, { "epoch": 5.43, "learning_rate": 3.5427335334628295e-05, "loss": 1.5284, "step": 4560 }, { "epoch": 5.45, "learning_rate": 3.53831891223733e-05, "loss": 1.4944, "step": 4570 }, { "epoch": 5.46, "learning_rate": 3.533904291011831e-05, "loss": 1.5106, "step": 4580 }, { "epoch": 5.47, "learning_rate": 3.5294896697863326e-05, "loss": 1.549, "step": 4590 }, { "epoch": 5.48, "learning_rate": 3.5250750485608334e-05, "loss": 1.5478, "step": 4600 }, { "epoch": 5.49, "learning_rate": 3.520660427335335e-05, "loss": 1.5143, "step": 4610 }, { "epoch": 5.51, "learning_rate": 3.5162458061098356e-05, "loss": 1.5077, "step": 4620 }, { "epoch": 5.52, "learning_rate": 3.511831184884337e-05, "loss": 1.5095, "step": 4630 }, { "epoch": 5.53, "learning_rate": 3.507416563658838e-05, "loss": 1.4635, "step": 4640 }, { "epoch": 5.54, "learning_rate": 3.503001942433339e-05, "loss": 1.4623, "step": 4650 }, { "epoch": 5.55, "learning_rate": 3.498587321207841e-05, "loss": 1.5481, "step": 4660 }, { "epoch": 5.57, "learning_rate": 3.4941726999823416e-05, "loss": 1.4816, "step": 4670 }, { "epoch": 5.58, "learning_rate": 3.489758078756843e-05, "loss": 1.4665, "step": 4680 }, { "epoch": 5.59, "learning_rate": 3.485343457531344e-05, "loss": 1.4978, "step": 4690 }, { "epoch": 5.6, "learning_rate": 3.480928836305845e-05, "loss": 1.4649, "step": 4700 }, { "epoch": 5.61, "learning_rate": 3.476514215080347e-05, "loss": 1.4676, "step": 4710 }, { "epoch": 5.63, "learning_rate": 3.4720995938548475e-05, "loss": 1.4891, "step": 4720 }, { "epoch": 5.64, "learning_rate": 3.467684972629348e-05, "loss": 1.4662, "step": 4730 }, { "epoch": 5.65, "learning_rate": 3.46327035140385e-05, "loss": 1.5112, "step": 4740 }, { "epoch": 5.66, "learning_rate": 3.4588557301783506e-05, "loss": 1.4777, "step": 4750 }, { "epoch": 5.67, "learning_rate": 3.454441108952852e-05, "loss": 1.4861, "step": 4760 }, { "epoch": 5.69, "learning_rate": 3.450026487727353e-05, "loss": 1.4659, "step": 4770 }, { "epoch": 5.7, "learning_rate": 3.445611866501854e-05, "loss": 1.4698, "step": 4780 }, { "epoch": 5.71, "learning_rate": 3.441197245276355e-05, "loss": 1.4517, "step": 4790 }, { "epoch": 5.72, "learning_rate": 3.4367826240508566e-05, "loss": 1.4347, "step": 4800 }, { "epoch": 5.73, "learning_rate": 3.432368002825358e-05, "loss": 1.4547, "step": 4810 }, { "epoch": 5.74, "learning_rate": 3.427953381599859e-05, "loss": 1.4362, "step": 4820 }, { "epoch": 5.76, "learning_rate": 3.42353876037436e-05, "loss": 1.4627, "step": 4830 }, { "epoch": 5.77, "learning_rate": 3.419124139148861e-05, "loss": 1.4835, "step": 4840 }, { "epoch": 5.78, "learning_rate": 3.4147095179233625e-05, "loss": 1.4554, "step": 4850 }, { "epoch": 5.79, "learning_rate": 3.410294896697864e-05, "loss": 1.4479, "step": 4860 }, { "epoch": 5.8, "learning_rate": 3.405880275472365e-05, "loss": 1.4177, "step": 4870 }, { "epoch": 5.82, "learning_rate": 3.401465654246866e-05, "loss": 1.4393, "step": 4880 }, { "epoch": 5.83, "learning_rate": 3.397051033021367e-05, "loss": 1.4812, "step": 4890 }, { "epoch": 5.84, "learning_rate": 3.392636411795868e-05, "loss": 1.4318, "step": 4900 }, { "epoch": 5.85, "learning_rate": 3.388221790570369e-05, "loss": 1.4818, "step": 4910 }, { "epoch": 5.86, "learning_rate": 3.38380716934487e-05, "loss": 1.472, "step": 4920 }, { "epoch": 5.88, "learning_rate": 3.3793925481193715e-05, "loss": 1.4164, "step": 4930 }, { "epoch": 5.89, "learning_rate": 3.374977926893872e-05, "loss": 1.4173, "step": 4940 }, { "epoch": 5.9, "learning_rate": 3.370563305668374e-05, "loss": 1.4133, "step": 4950 }, { "epoch": 5.91, "learning_rate": 3.366148684442875e-05, "loss": 1.4526, "step": 4960 }, { "epoch": 5.92, "learning_rate": 3.361734063217376e-05, "loss": 1.418, "step": 4970 }, { "epoch": 5.94, "learning_rate": 3.3573194419918775e-05, "loss": 1.4083, "step": 4980 }, { "epoch": 5.95, "learning_rate": 3.352904820766378e-05, "loss": 1.4273, "step": 4990 }, { "epoch": 5.96, "learning_rate": 3.34849019954088e-05, "loss": 1.3889, "step": 5000 }, { "epoch": 5.97, "learning_rate": 3.3440755783153805e-05, "loss": 1.3927, "step": 5010 }, { "epoch": 5.98, "learning_rate": 3.339660957089882e-05, "loss": 1.3998, "step": 5020 }, { "epoch": 5.99, "learning_rate": 3.3352463358643835e-05, "loss": 1.4288, "step": 5030 }, { "epoch": 6.0, "eval_accuracy": 0.6978995977953225, "eval_f1": 0.6686418903782712, "eval_loss": 1.2768018245697021, "eval_precision": 0.6810067175098012, "eval_recall": 0.6978995977953225, "eval_runtime": 99.1055, "eval_samples_per_second": 270.944, "eval_steps_per_second": 4.238, "step": 5034 }, { "epoch": 6.01, "learning_rate": 3.330831714638884e-05, "loss": 1.4183, "step": 5040 }, { "epoch": 6.02, "learning_rate": 3.326417093413385e-05, "loss": 1.3548, "step": 5050 }, { "epoch": 6.03, "learning_rate": 3.3220024721878865e-05, "loss": 1.352, "step": 5060 }, { "epoch": 6.04, "learning_rate": 3.317587850962387e-05, "loss": 1.3463, "step": 5070 }, { "epoch": 6.05, "learning_rate": 3.313173229736889e-05, "loss": 1.3743, "step": 5080 }, { "epoch": 6.07, "learning_rate": 3.3087586085113895e-05, "loss": 1.3347, "step": 5090 }, { "epoch": 6.08, "learning_rate": 3.304343987285891e-05, "loss": 1.3853, "step": 5100 }, { "epoch": 6.09, "learning_rate": 3.299929366060392e-05, "loss": 1.3626, "step": 5110 }, { "epoch": 6.1, "learning_rate": 3.295514744834893e-05, "loss": 1.3255, "step": 5120 }, { "epoch": 6.11, "learning_rate": 3.291100123609395e-05, "loss": 1.3577, "step": 5130 }, { "epoch": 6.13, "learning_rate": 3.2866855023838955e-05, "loss": 1.3738, "step": 5140 }, { "epoch": 6.14, "learning_rate": 3.282270881158397e-05, "loss": 1.3802, "step": 5150 }, { "epoch": 6.15, "learning_rate": 3.277856259932898e-05, "loss": 1.3946, "step": 5160 }, { "epoch": 6.16, "learning_rate": 3.273441638707399e-05, "loss": 1.3322, "step": 5170 }, { "epoch": 6.17, "learning_rate": 3.269027017481901e-05, "loss": 1.403, "step": 5180 }, { "epoch": 6.19, "learning_rate": 3.2646123962564015e-05, "loss": 1.3271, "step": 5190 }, { "epoch": 6.2, "learning_rate": 3.260197775030903e-05, "loss": 1.3338, "step": 5200 }, { "epoch": 6.21, "learning_rate": 3.255783153805404e-05, "loss": 1.3484, "step": 5210 }, { "epoch": 6.22, "learning_rate": 3.2513685325799045e-05, "loss": 1.3764, "step": 5220 }, { "epoch": 6.23, "learning_rate": 3.246953911354406e-05, "loss": 1.2798, "step": 5230 }, { "epoch": 6.25, "learning_rate": 3.242539290128907e-05, "loss": 1.3485, "step": 5240 }, { "epoch": 6.26, "learning_rate": 3.238124668903408e-05, "loss": 1.3634, "step": 5250 }, { "epoch": 6.27, "learning_rate": 3.233710047677909e-05, "loss": 1.3283, "step": 5260 }, { "epoch": 6.28, "learning_rate": 3.2292954264524105e-05, "loss": 1.2951, "step": 5270 }, { "epoch": 6.29, "learning_rate": 3.224880805226912e-05, "loss": 1.3562, "step": 5280 }, { "epoch": 6.31, "learning_rate": 3.220466184001413e-05, "loss": 1.3264, "step": 5290 }, { "epoch": 6.32, "learning_rate": 3.216051562775914e-05, "loss": 1.2662, "step": 5300 }, { "epoch": 6.33, "learning_rate": 3.211636941550415e-05, "loss": 1.3093, "step": 5310 }, { "epoch": 6.34, "learning_rate": 3.2072223203249165e-05, "loss": 1.3059, "step": 5320 }, { "epoch": 6.35, "learning_rate": 3.202807699099418e-05, "loss": 1.3101, "step": 5330 }, { "epoch": 6.36, "learning_rate": 3.198393077873919e-05, "loss": 1.2837, "step": 5340 }, { "epoch": 6.38, "learning_rate": 3.19397845664842e-05, "loss": 1.3089, "step": 5350 }, { "epoch": 6.39, "learning_rate": 3.189563835422921e-05, "loss": 1.2935, "step": 5360 }, { "epoch": 6.4, "learning_rate": 3.185149214197422e-05, "loss": 1.2767, "step": 5370 }, { "epoch": 6.41, "learning_rate": 3.180734592971923e-05, "loss": 1.3082, "step": 5380 }, { "epoch": 6.42, "learning_rate": 3.176319971746424e-05, "loss": 1.2936, "step": 5390 }, { "epoch": 6.44, "learning_rate": 3.1719053505209255e-05, "loss": 1.2872, "step": 5400 }, { "epoch": 6.45, "learning_rate": 3.167490729295426e-05, "loss": 1.4089, "step": 5410 }, { "epoch": 6.46, "learning_rate": 3.163076108069928e-05, "loss": 1.3171, "step": 5420 }, { "epoch": 6.47, "learning_rate": 3.1586614868444285e-05, "loss": 1.2926, "step": 5430 }, { "epoch": 6.48, "learning_rate": 3.15424686561893e-05, "loss": 1.2932, "step": 5440 }, { "epoch": 6.5, "learning_rate": 3.1498322443934314e-05, "loss": 1.3524, "step": 5450 }, { "epoch": 6.51, "learning_rate": 3.145417623167932e-05, "loss": 1.3105, "step": 5460 }, { "epoch": 6.52, "learning_rate": 3.141003001942434e-05, "loss": 1.2973, "step": 5470 }, { "epoch": 6.53, "learning_rate": 3.1365883807169345e-05, "loss": 1.2757, "step": 5480 }, { "epoch": 6.54, "learning_rate": 3.132173759491436e-05, "loss": 1.2243, "step": 5490 }, { "epoch": 6.56, "learning_rate": 3.1277591382659374e-05, "loss": 1.323, "step": 5500 }, { "epoch": 6.57, "learning_rate": 3.123344517040438e-05, "loss": 1.2593, "step": 5510 }, { "epoch": 6.58, "learning_rate": 3.11892989581494e-05, "loss": 1.3141, "step": 5520 }, { "epoch": 6.59, "learning_rate": 3.1145152745894405e-05, "loss": 1.2202, "step": 5530 }, { "epoch": 6.6, "learning_rate": 3.110100653363941e-05, "loss": 1.2719, "step": 5540 }, { "epoch": 6.61, "learning_rate": 3.105686032138443e-05, "loss": 1.3169, "step": 5550 }, { "epoch": 6.63, "learning_rate": 3.1012714109129435e-05, "loss": 1.3548, "step": 5560 }, { "epoch": 6.64, "learning_rate": 3.096856789687445e-05, "loss": 1.3031, "step": 5570 }, { "epoch": 6.65, "learning_rate": 3.092442168461946e-05, "loss": 1.2254, "step": 5580 }, { "epoch": 6.66, "learning_rate": 3.088027547236447e-05, "loss": 1.2795, "step": 5590 }, { "epoch": 6.67, "learning_rate": 3.083612926010949e-05, "loss": 1.2555, "step": 5600 }, { "epoch": 6.69, "learning_rate": 3.0791983047854495e-05, "loss": 1.2079, "step": 5610 }, { "epoch": 6.7, "learning_rate": 3.074783683559951e-05, "loss": 1.2155, "step": 5620 }, { "epoch": 6.71, "learning_rate": 3.070369062334452e-05, "loss": 1.254, "step": 5630 }, { "epoch": 6.72, "learning_rate": 3.065954441108953e-05, "loss": 1.2222, "step": 5640 }, { "epoch": 6.73, "learning_rate": 3.0615398198834546e-05, "loss": 1.2446, "step": 5650 }, { "epoch": 6.75, "learning_rate": 3.0571251986579554e-05, "loss": 1.2586, "step": 5660 }, { "epoch": 6.76, "learning_rate": 3.052710577432457e-05, "loss": 1.2234, "step": 5670 }, { "epoch": 6.77, "learning_rate": 3.0482959562069573e-05, "loss": 1.2368, "step": 5680 }, { "epoch": 6.78, "learning_rate": 3.0438813349814588e-05, "loss": 1.2462, "step": 5690 }, { "epoch": 6.79, "learning_rate": 3.0394667137559603e-05, "loss": 1.2774, "step": 5700 }, { "epoch": 6.81, "learning_rate": 3.035052092530461e-05, "loss": 1.2202, "step": 5710 }, { "epoch": 6.82, "learning_rate": 3.0306374713049622e-05, "loss": 1.2673, "step": 5720 }, { "epoch": 6.83, "learning_rate": 3.0262228500794633e-05, "loss": 1.2337, "step": 5730 }, { "epoch": 6.84, "learning_rate": 3.0218082288539644e-05, "loss": 1.2289, "step": 5740 }, { "epoch": 6.85, "learning_rate": 3.0173936076284652e-05, "loss": 1.2486, "step": 5750 }, { "epoch": 6.87, "learning_rate": 3.0129789864029667e-05, "loss": 1.2476, "step": 5760 }, { "epoch": 6.88, "learning_rate": 3.008564365177468e-05, "loss": 1.2412, "step": 5770 }, { "epoch": 6.89, "learning_rate": 3.004149743951969e-05, "loss": 1.2322, "step": 5780 }, { "epoch": 6.9, "learning_rate": 2.9997351227264704e-05, "loss": 1.2127, "step": 5790 }, { "epoch": 6.91, "learning_rate": 2.9953205015009712e-05, "loss": 1.2725, "step": 5800 }, { "epoch": 6.92, "learning_rate": 2.9909058802754723e-05, "loss": 1.2347, "step": 5810 }, { "epoch": 6.94, "learning_rate": 2.9864912590499738e-05, "loss": 1.2351, "step": 5820 }, { "epoch": 6.95, "learning_rate": 2.9820766378244746e-05, "loss": 1.2328, "step": 5830 }, { "epoch": 6.96, "learning_rate": 2.977662016598976e-05, "loss": 1.2027, "step": 5840 }, { "epoch": 6.97, "learning_rate": 2.9732473953734768e-05, "loss": 1.2175, "step": 5850 }, { "epoch": 6.98, "learning_rate": 2.9688327741479783e-05, "loss": 1.1781, "step": 5860 }, { "epoch": 7.0, "learning_rate": 2.9644181529224798e-05, "loss": 1.1953, "step": 5870 }, { "epoch": 7.0, "eval_accuracy": 0.732347683598987, "eval_f1": 0.7077241444760507, "eval_loss": 1.09598970413208, "eval_precision": 0.7217957955270089, "eval_recall": 0.732347683598987, "eval_runtime": 99.334, "eval_samples_per_second": 270.32, "eval_steps_per_second": 4.228, "step": 5873 }, { "epoch": 7.01, "learning_rate": 2.9600035316969805e-05, "loss": 1.181, "step": 5880 }, { "epoch": 7.02, "learning_rate": 2.9555889104714817e-05, "loss": 1.2091, "step": 5890 }, { "epoch": 7.03, "learning_rate": 2.9511742892459825e-05, "loss": 1.2003, "step": 5900 }, { "epoch": 7.04, "learning_rate": 2.946759668020484e-05, "loss": 1.143, "step": 5910 }, { "epoch": 7.06, "learning_rate": 2.9423450467949854e-05, "loss": 1.1644, "step": 5920 }, { "epoch": 7.07, "learning_rate": 2.9379304255694862e-05, "loss": 1.2121, "step": 5930 }, { "epoch": 7.08, "learning_rate": 2.9335158043439876e-05, "loss": 1.1864, "step": 5940 }, { "epoch": 7.09, "learning_rate": 2.9291011831184884e-05, "loss": 1.1574, "step": 5950 }, { "epoch": 7.1, "learning_rate": 2.92468656189299e-05, "loss": 1.1473, "step": 5960 }, { "epoch": 7.12, "learning_rate": 2.920271940667491e-05, "loss": 1.1431, "step": 5970 }, { "epoch": 7.13, "learning_rate": 2.9158573194419918e-05, "loss": 1.1197, "step": 5980 }, { "epoch": 7.14, "learning_rate": 2.9114426982164933e-05, "loss": 1.1862, "step": 5990 }, { "epoch": 7.15, "learning_rate": 2.907028076990994e-05, "loss": 1.1715, "step": 6000 }, { "epoch": 7.16, "learning_rate": 2.9026134557654955e-05, "loss": 1.1822, "step": 6010 }, { "epoch": 7.18, "learning_rate": 2.898198834539997e-05, "loss": 1.1529, "step": 6020 }, { "epoch": 7.19, "learning_rate": 2.8937842133144978e-05, "loss": 1.1397, "step": 6030 }, { "epoch": 7.2, "learning_rate": 2.889369592088999e-05, "loss": 1.1757, "step": 6040 }, { "epoch": 7.21, "learning_rate": 2.8849549708635e-05, "loss": 1.1489, "step": 6050 }, { "epoch": 7.22, "learning_rate": 2.880540349638001e-05, "loss": 1.1669, "step": 6060 }, { "epoch": 7.23, "learning_rate": 2.8761257284125026e-05, "loss": 1.1342, "step": 6070 }, { "epoch": 7.25, "learning_rate": 2.8717111071870034e-05, "loss": 1.1516, "step": 6080 }, { "epoch": 7.26, "learning_rate": 2.867296485961505e-05, "loss": 1.134, "step": 6090 }, { "epoch": 7.27, "learning_rate": 2.8628818647360057e-05, "loss": 1.1583, "step": 6100 }, { "epoch": 7.28, "learning_rate": 2.858467243510507e-05, "loss": 1.1387, "step": 6110 }, { "epoch": 7.29, "learning_rate": 2.8540526222850082e-05, "loss": 1.1366, "step": 6120 }, { "epoch": 7.31, "learning_rate": 2.849638001059509e-05, "loss": 1.1627, "step": 6130 }, { "epoch": 7.32, "learning_rate": 2.8452233798340105e-05, "loss": 1.1384, "step": 6140 }, { "epoch": 7.33, "learning_rate": 2.8408087586085113e-05, "loss": 1.15, "step": 6150 }, { "epoch": 7.34, "learning_rate": 2.8363941373830128e-05, "loss": 1.1452, "step": 6160 }, { "epoch": 7.35, "learning_rate": 2.8319795161575135e-05, "loss": 1.1839, "step": 6170 }, { "epoch": 7.37, "learning_rate": 2.827564894932015e-05, "loss": 1.1116, "step": 6180 }, { "epoch": 7.38, "learning_rate": 2.8231502737065165e-05, "loss": 1.1167, "step": 6190 }, { "epoch": 7.39, "learning_rate": 2.8187356524810173e-05, "loss": 1.1393, "step": 6200 }, { "epoch": 7.4, "learning_rate": 2.8143210312555184e-05, "loss": 1.1897, "step": 6210 }, { "epoch": 7.41, "learning_rate": 2.8099064100300192e-05, "loss": 1.0915, "step": 6220 }, { "epoch": 7.43, "learning_rate": 2.8054917888045206e-05, "loss": 1.1432, "step": 6230 }, { "epoch": 7.44, "learning_rate": 2.801077167579022e-05, "loss": 1.123, "step": 6240 }, { "epoch": 7.45, "learning_rate": 2.796662546353523e-05, "loss": 1.1394, "step": 6250 }, { "epoch": 7.46, "learning_rate": 2.7922479251280244e-05, "loss": 1.1488, "step": 6260 }, { "epoch": 7.47, "learning_rate": 2.787833303902525e-05, "loss": 1.1482, "step": 6270 }, { "epoch": 7.48, "learning_rate": 2.7834186826770266e-05, "loss": 1.1294, "step": 6280 }, { "epoch": 7.5, "learning_rate": 2.7790040614515277e-05, "loss": 1.1433, "step": 6290 }, { "epoch": 7.51, "learning_rate": 2.7745894402260285e-05, "loss": 1.0743, "step": 6300 }, { "epoch": 7.52, "learning_rate": 2.77017481900053e-05, "loss": 1.1177, "step": 6310 }, { "epoch": 7.53, "learning_rate": 2.7657601977750308e-05, "loss": 1.1416, "step": 6320 }, { "epoch": 7.54, "learning_rate": 2.7613455765495322e-05, "loss": 1.082, "step": 6330 }, { "epoch": 7.56, "learning_rate": 2.7569309553240337e-05, "loss": 1.1172, "step": 6340 }, { "epoch": 7.57, "learning_rate": 2.7525163340985345e-05, "loss": 1.0803, "step": 6350 }, { "epoch": 7.58, "learning_rate": 2.7481017128730356e-05, "loss": 1.1274, "step": 6360 }, { "epoch": 7.59, "learning_rate": 2.7436870916475364e-05, "loss": 1.1648, "step": 6370 }, { "epoch": 7.6, "learning_rate": 2.739272470422038e-05, "loss": 1.1242, "step": 6380 }, { "epoch": 7.62, "learning_rate": 2.7348578491965393e-05, "loss": 1.0659, "step": 6390 }, { "epoch": 7.63, "learning_rate": 2.73044322797104e-05, "loss": 1.0619, "step": 6400 }, { "epoch": 7.64, "learning_rate": 2.7260286067455416e-05, "loss": 1.1214, "step": 6410 }, { "epoch": 7.65, "learning_rate": 2.7216139855200424e-05, "loss": 1.1358, "step": 6420 }, { "epoch": 7.66, "learning_rate": 2.717199364294544e-05, "loss": 1.0954, "step": 6430 }, { "epoch": 7.68, "learning_rate": 2.712784743069045e-05, "loss": 1.138, "step": 6440 }, { "epoch": 7.69, "learning_rate": 2.7083701218435457e-05, "loss": 1.1409, "step": 6450 }, { "epoch": 7.7, "learning_rate": 2.7039555006180472e-05, "loss": 1.0962, "step": 6460 }, { "epoch": 7.71, "learning_rate": 2.699540879392548e-05, "loss": 1.1233, "step": 6470 }, { "epoch": 7.72, "learning_rate": 2.6951262581670495e-05, "loss": 1.0884, "step": 6480 }, { "epoch": 7.74, "learning_rate": 2.690711636941551e-05, "loss": 1.0763, "step": 6490 }, { "epoch": 7.75, "learning_rate": 2.6862970157160517e-05, "loss": 1.0832, "step": 6500 }, { "epoch": 7.76, "learning_rate": 2.681882394490553e-05, "loss": 1.0683, "step": 6510 }, { "epoch": 7.77, "learning_rate": 2.677467773265054e-05, "loss": 1.1104, "step": 6520 }, { "epoch": 7.78, "learning_rate": 2.673053152039555e-05, "loss": 1.125, "step": 6530 }, { "epoch": 7.79, "learning_rate": 2.668638530814056e-05, "loss": 1.1071, "step": 6540 }, { "epoch": 7.81, "learning_rate": 2.6642239095885573e-05, "loss": 1.1547, "step": 6550 }, { "epoch": 7.82, "learning_rate": 2.6598092883630588e-05, "loss": 1.0844, "step": 6560 }, { "epoch": 7.83, "learning_rate": 2.6553946671375596e-05, "loss": 1.1004, "step": 6570 }, { "epoch": 7.84, "learning_rate": 2.650980045912061e-05, "loss": 1.092, "step": 6580 }, { "epoch": 7.85, "learning_rate": 2.646565424686562e-05, "loss": 1.1057, "step": 6590 }, { "epoch": 7.87, "learning_rate": 2.642150803461063e-05, "loss": 1.0887, "step": 6600 }, { "epoch": 7.88, "learning_rate": 2.6377361822355644e-05, "loss": 1.07, "step": 6610 }, { "epoch": 7.89, "learning_rate": 2.6333215610100652e-05, "loss": 1.0863, "step": 6620 }, { "epoch": 7.9, "learning_rate": 2.6289069397845667e-05, "loss": 1.1196, "step": 6630 }, { "epoch": 7.91, "learning_rate": 2.6244923185590675e-05, "loss": 1.0892, "step": 6640 }, { "epoch": 7.93, "learning_rate": 2.620077697333569e-05, "loss": 1.1039, "step": 6650 }, { "epoch": 7.94, "learning_rate": 2.6156630761080704e-05, "loss": 1.0494, "step": 6660 }, { "epoch": 7.95, "learning_rate": 2.6112484548825712e-05, "loss": 1.0824, "step": 6670 }, { "epoch": 7.96, "learning_rate": 2.6068338336570723e-05, "loss": 1.1038, "step": 6680 }, { "epoch": 7.97, "learning_rate": 2.602419212431573e-05, "loss": 1.0558, "step": 6690 }, { "epoch": 7.99, "learning_rate": 2.5980045912060746e-05, "loss": 1.0946, "step": 6700 }, { "epoch": 8.0, "learning_rate": 2.593589969980576e-05, "loss": 1.058, "step": 6710 }, { "epoch": 8.0, "eval_accuracy": 0.7548041114255921, "eval_f1": 0.7350043558463751, "eval_loss": 0.9828243255615234, "eval_precision": 0.7440513899220582, "eval_recall": 0.7548041114255921, "eval_runtime": 100.0502, "eval_samples_per_second": 268.385, "eval_steps_per_second": 4.198, "step": 6712 }, { "epoch": 8.01, "learning_rate": 2.5891753487550768e-05, "loss": 1.042, "step": 6720 }, { "epoch": 8.02, "learning_rate": 2.5847607275295783e-05, "loss": 1.0727, "step": 6730 }, { "epoch": 8.03, "learning_rate": 2.580346106304079e-05, "loss": 1.0546, "step": 6740 }, { "epoch": 8.05, "learning_rate": 2.5759314850785805e-05, "loss": 1.0544, "step": 6750 }, { "epoch": 8.06, "learning_rate": 2.5715168638530817e-05, "loss": 1.0355, "step": 6760 }, { "epoch": 8.07, "learning_rate": 2.5671022426275825e-05, "loss": 0.9754, "step": 6770 }, { "epoch": 8.08, "learning_rate": 2.562687621402084e-05, "loss": 1.0294, "step": 6780 }, { "epoch": 8.09, "learning_rate": 2.5582730001765847e-05, "loss": 1.0225, "step": 6790 }, { "epoch": 8.1, "learning_rate": 2.5538583789510862e-05, "loss": 1.0651, "step": 6800 }, { "epoch": 8.12, "learning_rate": 2.5494437577255876e-05, "loss": 1.0233, "step": 6810 }, { "epoch": 8.13, "learning_rate": 2.5450291365000884e-05, "loss": 1.0067, "step": 6820 }, { "epoch": 8.14, "learning_rate": 2.5406145152745896e-05, "loss": 1.0843, "step": 6830 }, { "epoch": 8.15, "learning_rate": 2.5361998940490907e-05, "loss": 1.0345, "step": 6840 }, { "epoch": 8.16, "learning_rate": 2.5317852728235918e-05, "loss": 1.0876, "step": 6850 }, { "epoch": 8.18, "learning_rate": 2.5273706515980933e-05, "loss": 1.0309, "step": 6860 }, { "epoch": 8.19, "learning_rate": 2.522956030372594e-05, "loss": 1.0354, "step": 6870 }, { "epoch": 8.2, "learning_rate": 2.5185414091470955e-05, "loss": 0.9963, "step": 6880 }, { "epoch": 8.21, "learning_rate": 2.5141267879215963e-05, "loss": 1.0118, "step": 6890 }, { "epoch": 8.22, "learning_rate": 2.5097121666960978e-05, "loss": 1.0476, "step": 6900 }, { "epoch": 8.24, "learning_rate": 2.5052975454705986e-05, "loss": 1.079, "step": 6910 }, { "epoch": 8.25, "learning_rate": 2.5008829242450997e-05, "loss": 0.9995, "step": 6920 }, { "epoch": 8.26, "learning_rate": 2.4964683030196008e-05, "loss": 1.061, "step": 6930 }, { "epoch": 8.27, "learning_rate": 2.4920536817941023e-05, "loss": 1.0008, "step": 6940 }, { "epoch": 8.28, "learning_rate": 2.4876390605686034e-05, "loss": 1.0351, "step": 6950 }, { "epoch": 8.3, "learning_rate": 2.4832244393431045e-05, "loss": 1.062, "step": 6960 }, { "epoch": 8.31, "learning_rate": 2.4788098181176057e-05, "loss": 0.9703, "step": 6970 }, { "epoch": 8.32, "learning_rate": 2.4743951968921068e-05, "loss": 1.0638, "step": 6980 }, { "epoch": 8.33, "learning_rate": 2.469980575666608e-05, "loss": 1.0217, "step": 6990 }, { "epoch": 8.34, "learning_rate": 2.465565954441109e-05, "loss": 1.0515, "step": 7000 }, { "epoch": 8.36, "learning_rate": 2.46115133321561e-05, "loss": 0.9567, "step": 7010 }, { "epoch": 8.37, "learning_rate": 2.4567367119901113e-05, "loss": 1.0541, "step": 7020 }, { "epoch": 8.38, "learning_rate": 2.4523220907646124e-05, "loss": 1.0051, "step": 7030 }, { "epoch": 8.39, "learning_rate": 2.4479074695391135e-05, "loss": 0.9657, "step": 7040 }, { "epoch": 8.4, "learning_rate": 2.443492848313615e-05, "loss": 1.0323, "step": 7050 }, { "epoch": 8.41, "learning_rate": 2.439078227088116e-05, "loss": 1.0378, "step": 7060 }, { "epoch": 8.43, "learning_rate": 2.4346636058626173e-05, "loss": 0.974, "step": 7070 }, { "epoch": 8.44, "learning_rate": 2.430248984637118e-05, "loss": 1.0138, "step": 7080 }, { "epoch": 8.45, "learning_rate": 2.4258343634116192e-05, "loss": 1.0023, "step": 7090 }, { "epoch": 8.46, "learning_rate": 2.4214197421861206e-05, "loss": 1.0473, "step": 7100 }, { "epoch": 8.47, "learning_rate": 2.4170051209606218e-05, "loss": 0.9473, "step": 7110 }, { "epoch": 8.49, "learning_rate": 2.412590499735123e-05, "loss": 1.0078, "step": 7120 }, { "epoch": 8.5, "learning_rate": 2.408175878509624e-05, "loss": 1.0177, "step": 7130 }, { "epoch": 8.51, "learning_rate": 2.403761257284125e-05, "loss": 0.9631, "step": 7140 }, { "epoch": 8.52, "learning_rate": 2.3993466360586263e-05, "loss": 0.9581, "step": 7150 }, { "epoch": 8.53, "learning_rate": 2.3949320148331274e-05, "loss": 1.0014, "step": 7160 }, { "epoch": 8.55, "learning_rate": 2.3905173936076285e-05, "loss": 0.9788, "step": 7170 }, { "epoch": 8.56, "learning_rate": 2.3861027723821296e-05, "loss": 0.9712, "step": 7180 }, { "epoch": 8.57, "learning_rate": 2.3816881511566308e-05, "loss": 0.9748, "step": 7190 }, { "epoch": 8.58, "learning_rate": 2.377273529931132e-05, "loss": 0.9946, "step": 7200 }, { "epoch": 8.59, "learning_rate": 2.3728589087056334e-05, "loss": 0.9634, "step": 7210 }, { "epoch": 8.61, "learning_rate": 2.3684442874801345e-05, "loss": 1.0009, "step": 7220 }, { "epoch": 8.62, "learning_rate": 2.3640296662546356e-05, "loss": 0.9781, "step": 7230 }, { "epoch": 8.63, "learning_rate": 2.3596150450291364e-05, "loss": 0.9896, "step": 7240 }, { "epoch": 8.64, "learning_rate": 2.3552004238036375e-05, "loss": 0.975, "step": 7250 }, { "epoch": 8.65, "learning_rate": 2.350785802578139e-05, "loss": 1.0166, "step": 7260 }, { "epoch": 8.66, "learning_rate": 2.34637118135264e-05, "loss": 0.9855, "step": 7270 }, { "epoch": 8.68, "learning_rate": 2.3419565601271412e-05, "loss": 0.9839, "step": 7280 }, { "epoch": 8.69, "learning_rate": 2.3375419389016424e-05, "loss": 0.9924, "step": 7290 }, { "epoch": 8.7, "learning_rate": 2.3331273176761435e-05, "loss": 0.9736, "step": 7300 }, { "epoch": 8.71, "learning_rate": 2.3287126964506446e-05, "loss": 1.0229, "step": 7310 }, { "epoch": 8.72, "learning_rate": 2.3242980752251458e-05, "loss": 1.0075, "step": 7320 }, { "epoch": 8.74, "learning_rate": 2.319883453999647e-05, "loss": 1.065, "step": 7330 }, { "epoch": 8.75, "learning_rate": 2.315468832774148e-05, "loss": 0.9951, "step": 7340 }, { "epoch": 8.76, "learning_rate": 2.311054211548649e-05, "loss": 0.985, "step": 7350 }, { "epoch": 8.77, "learning_rate": 2.3066395903231503e-05, "loss": 1.0109, "step": 7360 }, { "epoch": 8.78, "learning_rate": 2.3022249690976517e-05, "loss": 0.9473, "step": 7370 }, { "epoch": 8.8, "learning_rate": 2.297810347872153e-05, "loss": 0.9864, "step": 7380 }, { "epoch": 8.81, "learning_rate": 2.293395726646654e-05, "loss": 0.9472, "step": 7390 }, { "epoch": 8.82, "learning_rate": 2.2889811054211548e-05, "loss": 0.9759, "step": 7400 }, { "epoch": 8.83, "learning_rate": 2.284566484195656e-05, "loss": 0.9271, "step": 7410 }, { "epoch": 8.84, "learning_rate": 2.2801518629701574e-05, "loss": 1.0013, "step": 7420 }, { "epoch": 8.86, "learning_rate": 2.2757372417446585e-05, "loss": 0.9603, "step": 7430 }, { "epoch": 8.87, "learning_rate": 2.2713226205191596e-05, "loss": 0.9903, "step": 7440 }, { "epoch": 8.88, "learning_rate": 2.2669079992936607e-05, "loss": 0.9983, "step": 7450 }, { "epoch": 8.89, "learning_rate": 2.262493378068162e-05, "loss": 0.9947, "step": 7460 }, { "epoch": 8.9, "learning_rate": 2.258078756842663e-05, "loss": 0.9341, "step": 7470 }, { "epoch": 8.92, "learning_rate": 2.253664135617164e-05, "loss": 0.9749, "step": 7480 }, { "epoch": 8.93, "learning_rate": 2.2492495143916652e-05, "loss": 0.9877, "step": 7490 }, { "epoch": 8.94, "learning_rate": 2.2448348931661664e-05, "loss": 1.0253, "step": 7500 }, { "epoch": 8.95, "learning_rate": 2.2404202719406675e-05, "loss": 0.9495, "step": 7510 }, { "epoch": 8.96, "learning_rate": 2.236005650715169e-05, "loss": 0.9616, "step": 7520 }, { "epoch": 8.97, "learning_rate": 2.23159102948967e-05, "loss": 0.9267, "step": 7530 }, { "epoch": 8.99, "learning_rate": 2.2271764082641712e-05, "loss": 0.957, "step": 7540 }, { "epoch": 9.0, "learning_rate": 2.222761787038672e-05, "loss": 0.9691, "step": 7550 }, { "epoch": 9.0, "eval_accuracy": 0.7718233278712945, "eval_f1": 0.7536014274909693, "eval_loss": 0.9018393754959106, "eval_precision": 0.7615836668708097, "eval_recall": 0.7718233278712945, "eval_runtime": 100.4679, "eval_samples_per_second": 267.269, "eval_steps_per_second": 4.18, "step": 7551 }, { "epoch": 9.01, "learning_rate": 2.218347165813173e-05, "loss": 0.9592, "step": 7560 }, { "epoch": 9.02, "learning_rate": 2.2139325445876742e-05, "loss": 0.8877, "step": 7570 }, { "epoch": 9.03, "learning_rate": 2.2095179233621757e-05, "loss": 0.9331, "step": 7580 }, { "epoch": 9.05, "learning_rate": 2.205103302136677e-05, "loss": 0.9374, "step": 7590 }, { "epoch": 9.06, "learning_rate": 2.200688680911178e-05, "loss": 0.9148, "step": 7600 }, { "epoch": 9.07, "learning_rate": 2.196274059685679e-05, "loss": 0.9694, "step": 7610 }, { "epoch": 9.08, "learning_rate": 2.1918594384601802e-05, "loss": 0.8986, "step": 7620 }, { "epoch": 9.09, "learning_rate": 2.1874448172346813e-05, "loss": 0.9645, "step": 7630 }, { "epoch": 9.11, "learning_rate": 2.1830301960091825e-05, "loss": 0.9577, "step": 7640 }, { "epoch": 9.12, "learning_rate": 2.1786155747836836e-05, "loss": 0.876, "step": 7650 }, { "epoch": 9.13, "learning_rate": 2.1742009535581847e-05, "loss": 0.9324, "step": 7660 }, { "epoch": 9.14, "learning_rate": 2.169786332332686e-05, "loss": 0.9094, "step": 7670 }, { "epoch": 9.15, "learning_rate": 2.1653717111071873e-05, "loss": 0.9311, "step": 7680 }, { "epoch": 9.17, "learning_rate": 2.1609570898816884e-05, "loss": 0.9111, "step": 7690 }, { "epoch": 9.18, "learning_rate": 2.1565424686561896e-05, "loss": 0.9487, "step": 7700 }, { "epoch": 9.19, "learning_rate": 2.1521278474306903e-05, "loss": 0.9526, "step": 7710 }, { "epoch": 9.2, "learning_rate": 2.1477132262051915e-05, "loss": 0.9594, "step": 7720 }, { "epoch": 9.21, "learning_rate": 2.143298604979693e-05, "loss": 1.0186, "step": 7730 }, { "epoch": 9.23, "learning_rate": 2.138883983754194e-05, "loss": 0.8755, "step": 7740 }, { "epoch": 9.24, "learning_rate": 2.1344693625286952e-05, "loss": 0.9704, "step": 7750 }, { "epoch": 9.25, "learning_rate": 2.1300547413031963e-05, "loss": 0.9313, "step": 7760 }, { "epoch": 9.26, "learning_rate": 2.1256401200776974e-05, "loss": 0.9476, "step": 7770 }, { "epoch": 9.27, "learning_rate": 2.1212254988521986e-05, "loss": 0.9674, "step": 7780 }, { "epoch": 9.28, "learning_rate": 2.1168108776266997e-05, "loss": 0.9253, "step": 7790 }, { "epoch": 9.3, "learning_rate": 2.1123962564012008e-05, "loss": 0.9806, "step": 7800 }, { "epoch": 9.31, "learning_rate": 2.107981635175702e-05, "loss": 0.9188, "step": 7810 }, { "epoch": 9.32, "learning_rate": 2.103567013950203e-05, "loss": 0.9386, "step": 7820 }, { "epoch": 9.33, "learning_rate": 2.0991523927247042e-05, "loss": 0.9111, "step": 7830 }, { "epoch": 9.34, "learning_rate": 2.0947377714992057e-05, "loss": 0.912, "step": 7840 }, { "epoch": 9.36, "learning_rate": 2.0903231502737068e-05, "loss": 0.8859, "step": 7850 }, { "epoch": 9.37, "learning_rate": 2.085908529048208e-05, "loss": 0.954, "step": 7860 }, { "epoch": 9.38, "learning_rate": 2.0814939078227087e-05, "loss": 0.9201, "step": 7870 }, { "epoch": 9.39, "learning_rate": 2.07707928659721e-05, "loss": 0.9238, "step": 7880 }, { "epoch": 9.4, "learning_rate": 2.0726646653717113e-05, "loss": 0.9186, "step": 7890 }, { "epoch": 9.42, "learning_rate": 2.0682500441462124e-05, "loss": 0.9529, "step": 7900 }, { "epoch": 9.43, "learning_rate": 2.0638354229207135e-05, "loss": 0.8942, "step": 7910 }, { "epoch": 9.44, "learning_rate": 2.0594208016952147e-05, "loss": 0.9486, "step": 7920 }, { "epoch": 9.45, "learning_rate": 2.0550061804697158e-05, "loss": 0.9249, "step": 7930 }, { "epoch": 9.46, "learning_rate": 2.050591559244217e-05, "loss": 0.9115, "step": 7940 }, { "epoch": 9.48, "learning_rate": 2.046176938018718e-05, "loss": 0.8388, "step": 7950 }, { "epoch": 9.49, "learning_rate": 2.0417623167932192e-05, "loss": 0.9013, "step": 7960 }, { "epoch": 9.5, "learning_rate": 2.0373476955677203e-05, "loss": 0.8917, "step": 7970 }, { "epoch": 9.51, "learning_rate": 2.0329330743422214e-05, "loss": 0.913, "step": 7980 }, { "epoch": 9.52, "learning_rate": 2.0285184531167226e-05, "loss": 0.9192, "step": 7990 }, { "epoch": 9.54, "learning_rate": 2.024103831891224e-05, "loss": 0.9011, "step": 8000 }, { "epoch": 9.55, "learning_rate": 2.019689210665725e-05, "loss": 0.91, "step": 8010 }, { "epoch": 9.56, "learning_rate": 2.0152745894402263e-05, "loss": 0.8934, "step": 8020 }, { "epoch": 9.57, "learning_rate": 2.010859968214727e-05, "loss": 0.9183, "step": 8030 }, { "epoch": 9.58, "learning_rate": 2.0064453469892282e-05, "loss": 0.8899, "step": 8040 }, { "epoch": 9.59, "learning_rate": 2.0020307257637297e-05, "loss": 0.8904, "step": 8050 }, { "epoch": 9.61, "learning_rate": 1.9976161045382308e-05, "loss": 0.8728, "step": 8060 }, { "epoch": 9.62, "learning_rate": 1.993201483312732e-05, "loss": 0.933, "step": 8070 }, { "epoch": 9.63, "learning_rate": 1.988786862087233e-05, "loss": 0.8514, "step": 8080 }, { "epoch": 9.64, "learning_rate": 1.984372240861734e-05, "loss": 0.9437, "step": 8090 }, { "epoch": 9.65, "learning_rate": 1.9799576196362353e-05, "loss": 0.9064, "step": 8100 }, { "epoch": 9.67, "learning_rate": 1.9755429984107364e-05, "loss": 0.8757, "step": 8110 }, { "epoch": 9.68, "learning_rate": 1.9711283771852375e-05, "loss": 0.863, "step": 8120 }, { "epoch": 9.69, "learning_rate": 1.9667137559597387e-05, "loss": 0.9145, "step": 8130 }, { "epoch": 9.7, "learning_rate": 1.9622991347342398e-05, "loss": 0.9044, "step": 8140 }, { "epoch": 9.71, "learning_rate": 1.957884513508741e-05, "loss": 0.8805, "step": 8150 }, { "epoch": 9.73, "learning_rate": 1.9534698922832424e-05, "loss": 0.9111, "step": 8160 }, { "epoch": 9.74, "learning_rate": 1.9490552710577435e-05, "loss": 0.9262, "step": 8170 }, { "epoch": 9.75, "learning_rate": 1.9446406498322446e-05, "loss": 0.9375, "step": 8180 }, { "epoch": 9.76, "learning_rate": 1.9402260286067454e-05, "loss": 0.8493, "step": 8190 }, { "epoch": 9.77, "learning_rate": 1.9358114073812465e-05, "loss": 0.9687, "step": 8200 }, { "epoch": 9.79, "learning_rate": 1.931396786155748e-05, "loss": 0.911, "step": 8210 }, { "epoch": 9.8, "learning_rate": 1.926982164930249e-05, "loss": 0.837, "step": 8220 }, { "epoch": 9.81, "learning_rate": 1.9225675437047503e-05, "loss": 0.9345, "step": 8230 }, { "epoch": 9.82, "learning_rate": 1.9181529224792514e-05, "loss": 0.8811, "step": 8240 }, { "epoch": 9.83, "learning_rate": 1.9137383012537525e-05, "loss": 0.8878, "step": 8250 }, { "epoch": 9.84, "learning_rate": 1.9093236800282536e-05, "loss": 0.8808, "step": 8260 }, { "epoch": 9.86, "learning_rate": 1.9049090588027548e-05, "loss": 0.8808, "step": 8270 }, { "epoch": 9.87, "learning_rate": 1.900494437577256e-05, "loss": 0.9018, "step": 8280 }, { "epoch": 9.88, "learning_rate": 1.896079816351757e-05, "loss": 0.9156, "step": 8290 }, { "epoch": 9.89, "learning_rate": 1.891665195126258e-05, "loss": 0.8381, "step": 8300 }, { "epoch": 9.9, "learning_rate": 1.8872505739007593e-05, "loss": 0.8914, "step": 8310 }, { "epoch": 9.92, "learning_rate": 1.8828359526752607e-05, "loss": 0.8733, "step": 8320 }, { "epoch": 9.93, "learning_rate": 1.878421331449762e-05, "loss": 0.9012, "step": 8330 }, { "epoch": 9.94, "learning_rate": 1.874006710224263e-05, "loss": 0.9081, "step": 8340 }, { "epoch": 9.95, "learning_rate": 1.8695920889987638e-05, "loss": 0.8976, "step": 8350 }, { "epoch": 9.96, "learning_rate": 1.865177467773265e-05, "loss": 0.9017, "step": 8360 }, { "epoch": 9.98, "learning_rate": 1.8607628465477664e-05, "loss": 0.8704, "step": 8370 }, { "epoch": 9.99, "learning_rate": 1.8563482253222675e-05, "loss": 0.8366, "step": 8380 }, { "epoch": 10.0, "learning_rate": 1.8519336040967686e-05, "loss": 0.8757, "step": 8390 }, { "epoch": 10.0, "eval_accuracy": 0.7892521972292567, "eval_f1": 0.7755970684102179, "eval_loss": 0.838049054145813, "eval_precision": 0.780591813364183, "eval_recall": 0.7892521972292567, "eval_runtime": 98.9975, "eval_samples_per_second": 271.239, "eval_steps_per_second": 4.243, "step": 8390 }, { "epoch": 10.01, "learning_rate": 1.8475189828712697e-05, "loss": 0.8931, "step": 8400 }, { "epoch": 10.02, "learning_rate": 1.843104361645771e-05, "loss": 0.901, "step": 8410 }, { "epoch": 10.04, "learning_rate": 1.838689740420272e-05, "loss": 0.8172, "step": 8420 }, { "epoch": 10.05, "learning_rate": 1.834275119194773e-05, "loss": 0.8767, "step": 8430 }, { "epoch": 10.06, "learning_rate": 1.8298604979692742e-05, "loss": 0.8376, "step": 8440 }, { "epoch": 10.07, "learning_rate": 1.8254458767437754e-05, "loss": 0.8581, "step": 8450 }, { "epoch": 10.08, "learning_rate": 1.8210312555182765e-05, "loss": 0.8818, "step": 8460 }, { "epoch": 10.1, "learning_rate": 1.816616634292778e-05, "loss": 0.8747, "step": 8470 }, { "epoch": 10.11, "learning_rate": 1.812202013067279e-05, "loss": 0.8558, "step": 8480 }, { "epoch": 10.12, "learning_rate": 1.8077873918417802e-05, "loss": 0.8284, "step": 8490 }, { "epoch": 10.13, "learning_rate": 1.8033727706162813e-05, "loss": 0.8434, "step": 8500 }, { "epoch": 10.14, "learning_rate": 1.798958149390782e-05, "loss": 0.83, "step": 8510 }, { "epoch": 10.15, "learning_rate": 1.7945435281652833e-05, "loss": 0.8254, "step": 8520 }, { "epoch": 10.17, "learning_rate": 1.7901289069397847e-05, "loss": 0.8608, "step": 8530 }, { "epoch": 10.18, "learning_rate": 1.785714285714286e-05, "loss": 0.8495, "step": 8540 }, { "epoch": 10.19, "learning_rate": 1.781299664488787e-05, "loss": 0.8754, "step": 8550 }, { "epoch": 10.2, "learning_rate": 1.776885043263288e-05, "loss": 0.8561, "step": 8560 }, { "epoch": 10.21, "learning_rate": 1.7724704220377892e-05, "loss": 0.8647, "step": 8570 }, { "epoch": 10.23, "learning_rate": 1.7680558008122904e-05, "loss": 0.8261, "step": 8580 }, { "epoch": 10.24, "learning_rate": 1.7636411795867915e-05, "loss": 0.8337, "step": 8590 }, { "epoch": 10.25, "learning_rate": 1.7592265583612926e-05, "loss": 0.8431, "step": 8600 }, { "epoch": 10.26, "learning_rate": 1.7548119371357937e-05, "loss": 0.8226, "step": 8610 }, { "epoch": 10.27, "learning_rate": 1.750397315910295e-05, "loss": 0.8261, "step": 8620 }, { "epoch": 10.29, "learning_rate": 1.7459826946847963e-05, "loss": 0.8508, "step": 8630 }, { "epoch": 10.3, "learning_rate": 1.7415680734592974e-05, "loss": 0.8536, "step": 8640 }, { "epoch": 10.31, "learning_rate": 1.7371534522337986e-05, "loss": 0.8555, "step": 8650 }, { "epoch": 10.32, "learning_rate": 1.7327388310082997e-05, "loss": 0.8368, "step": 8660 }, { "epoch": 10.33, "learning_rate": 1.7283242097828005e-05, "loss": 0.8585, "step": 8670 }, { "epoch": 10.35, "learning_rate": 1.723909588557302e-05, "loss": 0.869, "step": 8680 }, { "epoch": 10.36, "learning_rate": 1.719494967331803e-05, "loss": 0.8741, "step": 8690 }, { "epoch": 10.37, "learning_rate": 1.7150803461063042e-05, "loss": 0.7962, "step": 8700 }, { "epoch": 10.38, "learning_rate": 1.7106657248808053e-05, "loss": 0.8805, "step": 8710 }, { "epoch": 10.39, "learning_rate": 1.7062511036553065e-05, "loss": 0.8335, "step": 8720 }, { "epoch": 10.41, "learning_rate": 1.7018364824298076e-05, "loss": 0.8563, "step": 8730 }, { "epoch": 10.42, "learning_rate": 1.6974218612043087e-05, "loss": 0.8335, "step": 8740 }, { "epoch": 10.43, "learning_rate": 1.69300723997881e-05, "loss": 0.8624, "step": 8750 }, { "epoch": 10.44, "learning_rate": 1.688592618753311e-05, "loss": 0.835, "step": 8760 }, { "epoch": 10.45, "learning_rate": 1.684177997527812e-05, "loss": 0.8292, "step": 8770 }, { "epoch": 10.46, "learning_rate": 1.6797633763023132e-05, "loss": 0.8683, "step": 8780 }, { "epoch": 10.48, "learning_rate": 1.6753487550768147e-05, "loss": 0.8817, "step": 8790 }, { "epoch": 10.49, "learning_rate": 1.6709341338513158e-05, "loss": 0.8308, "step": 8800 }, { "epoch": 10.5, "learning_rate": 1.666519512625817e-05, "loss": 0.8214, "step": 8810 }, { "epoch": 10.51, "learning_rate": 1.6621048914003177e-05, "loss": 0.8095, "step": 8820 }, { "epoch": 10.52, "learning_rate": 1.657690270174819e-05, "loss": 0.8254, "step": 8830 }, { "epoch": 10.54, "learning_rate": 1.6532756489493203e-05, "loss": 0.846, "step": 8840 }, { "epoch": 10.55, "learning_rate": 1.6488610277238214e-05, "loss": 0.8039, "step": 8850 }, { "epoch": 10.56, "learning_rate": 1.6444464064983226e-05, "loss": 0.8354, "step": 8860 }, { "epoch": 10.57, "learning_rate": 1.6400317852728237e-05, "loss": 0.8321, "step": 8870 }, { "epoch": 10.58, "learning_rate": 1.6356171640473248e-05, "loss": 0.8048, "step": 8880 }, { "epoch": 10.6, "learning_rate": 1.631202542821826e-05, "loss": 0.8414, "step": 8890 }, { "epoch": 10.61, "learning_rate": 1.626787921596327e-05, "loss": 0.87, "step": 8900 }, { "epoch": 10.62, "learning_rate": 1.6223733003708282e-05, "loss": 0.8323, "step": 8910 }, { "epoch": 10.63, "learning_rate": 1.6179586791453293e-05, "loss": 0.8496, "step": 8920 }, { "epoch": 10.64, "learning_rate": 1.6135440579198304e-05, "loss": 0.8311, "step": 8930 }, { "epoch": 10.66, "learning_rate": 1.6091294366943316e-05, "loss": 0.8351, "step": 8940 }, { "epoch": 10.67, "learning_rate": 1.604714815468833e-05, "loss": 0.8303, "step": 8950 }, { "epoch": 10.68, "learning_rate": 1.600300194243334e-05, "loss": 0.8205, "step": 8960 }, { "epoch": 10.69, "learning_rate": 1.5958855730178353e-05, "loss": 0.8116, "step": 8970 }, { "epoch": 10.7, "learning_rate": 1.591470951792336e-05, "loss": 0.792, "step": 8980 }, { "epoch": 10.71, "learning_rate": 1.5870563305668372e-05, "loss": 0.8326, "step": 8990 }, { "epoch": 10.73, "learning_rate": 1.5826417093413387e-05, "loss": 0.8107, "step": 9000 }, { "epoch": 10.74, "learning_rate": 1.5782270881158398e-05, "loss": 0.8512, "step": 9010 }, { "epoch": 10.75, "learning_rate": 1.573812466890341e-05, "loss": 0.8814, "step": 9020 }, { "epoch": 10.76, "learning_rate": 1.569397845664842e-05, "loss": 0.8471, "step": 9030 }, { "epoch": 10.77, "learning_rate": 1.5649832244393432e-05, "loss": 0.841, "step": 9040 }, { "epoch": 10.79, "learning_rate": 1.5605686032138443e-05, "loss": 0.8457, "step": 9050 }, { "epoch": 10.8, "learning_rate": 1.5561539819883454e-05, "loss": 0.8292, "step": 9060 }, { "epoch": 10.81, "learning_rate": 1.5517393607628465e-05, "loss": 0.8242, "step": 9070 }, { "epoch": 10.82, "learning_rate": 1.5473247395373477e-05, "loss": 0.8112, "step": 9080 }, { "epoch": 10.83, "learning_rate": 1.5429101183118488e-05, "loss": 0.8051, "step": 9090 }, { "epoch": 10.85, "learning_rate": 1.53849549708635e-05, "loss": 0.8124, "step": 9100 }, { "epoch": 10.86, "learning_rate": 1.5340808758608514e-05, "loss": 0.8449, "step": 9110 }, { "epoch": 10.87, "learning_rate": 1.5296662546353525e-05, "loss": 0.8209, "step": 9120 }, { "epoch": 10.88, "learning_rate": 1.5252516334098535e-05, "loss": 0.7882, "step": 9130 }, { "epoch": 10.89, "learning_rate": 1.5208370121843546e-05, "loss": 0.8287, "step": 9140 }, { "epoch": 10.91, "learning_rate": 1.5164223909588557e-05, "loss": 0.8281, "step": 9150 }, { "epoch": 10.92, "learning_rate": 1.512007769733357e-05, "loss": 0.8646, "step": 9160 }, { "epoch": 10.93, "learning_rate": 1.5075931485078581e-05, "loss": 0.8217, "step": 9170 }, { "epoch": 10.94, "learning_rate": 1.5031785272823593e-05, "loss": 0.8445, "step": 9180 }, { "epoch": 10.95, "learning_rate": 1.4987639060568604e-05, "loss": 0.8246, "step": 9190 }, { "epoch": 10.97, "learning_rate": 1.4943492848313614e-05, "loss": 0.8449, "step": 9200 }, { "epoch": 10.98, "learning_rate": 1.4899346636058628e-05, "loss": 0.8551, "step": 9210 }, { "epoch": 10.99, "learning_rate": 1.485520042380364e-05, "loss": 0.8446, "step": 9220 }, { "epoch": 11.0, "eval_accuracy": 0.7981528377774467, "eval_f1": 0.7859063096300944, "eval_loss": 0.7904874682426453, "eval_precision": 0.7913122656849716, "eval_recall": 0.7981528377774467, "eval_runtime": 98.969, "eval_samples_per_second": 271.317, "eval_steps_per_second": 4.244, "step": 9229 }, { "epoch": 11.0, "learning_rate": 1.481105421154865e-05, "loss": 0.8497, "step": 9230 }, { "epoch": 11.01, "learning_rate": 1.476690799929366e-05, "loss": 0.7354, "step": 9240 }, { "epoch": 11.03, "learning_rate": 1.4722761787038672e-05, "loss": 0.8413, "step": 9250 }, { "epoch": 11.04, "learning_rate": 1.4678615574783683e-05, "loss": 0.842, "step": 9260 }, { "epoch": 11.05, "learning_rate": 1.4634469362528696e-05, "loss": 0.8079, "step": 9270 }, { "epoch": 11.06, "learning_rate": 1.4590323150273707e-05, "loss": 0.8281, "step": 9280 }, { "epoch": 11.07, "learning_rate": 1.4546176938018718e-05, "loss": 0.7798, "step": 9290 }, { "epoch": 11.08, "learning_rate": 1.450203072576373e-05, "loss": 0.7583, "step": 9300 }, { "epoch": 11.1, "learning_rate": 1.445788451350874e-05, "loss": 0.7882, "step": 9310 }, { "epoch": 11.11, "learning_rate": 1.4413738301253754e-05, "loss": 0.7858, "step": 9320 }, { "epoch": 11.12, "learning_rate": 1.4369592088998765e-05, "loss": 0.7928, "step": 9330 }, { "epoch": 11.13, "learning_rate": 1.4325445876743776e-05, "loss": 0.7811, "step": 9340 }, { "epoch": 11.14, "learning_rate": 1.4281299664488788e-05, "loss": 0.7588, "step": 9350 }, { "epoch": 11.16, "learning_rate": 1.4237153452233797e-05, "loss": 0.8435, "step": 9360 }, { "epoch": 11.17, "learning_rate": 1.4193007239978812e-05, "loss": 0.7808, "step": 9370 }, { "epoch": 11.18, "learning_rate": 1.4148861027723823e-05, "loss": 0.7983, "step": 9380 }, { "epoch": 11.19, "learning_rate": 1.4104714815468834e-05, "loss": 0.7767, "step": 9390 }, { "epoch": 11.2, "learning_rate": 1.4060568603213844e-05, "loss": 0.7829, "step": 9400 }, { "epoch": 11.22, "learning_rate": 1.4016422390958855e-05, "loss": 0.7967, "step": 9410 }, { "epoch": 11.23, "learning_rate": 1.397227617870387e-05, "loss": 0.7639, "step": 9420 }, { "epoch": 11.24, "learning_rate": 1.392812996644888e-05, "loss": 0.8109, "step": 9430 }, { "epoch": 11.25, "learning_rate": 1.388398375419389e-05, "loss": 0.8006, "step": 9440 }, { "epoch": 11.26, "learning_rate": 1.3839837541938902e-05, "loss": 0.7926, "step": 9450 }, { "epoch": 11.28, "learning_rate": 1.3795691329683913e-05, "loss": 0.8429, "step": 9460 }, { "epoch": 11.29, "learning_rate": 1.3751545117428924e-05, "loss": 0.8168, "step": 9470 }, { "epoch": 11.3, "learning_rate": 1.3707398905173937e-05, "loss": 0.7793, "step": 9480 }, { "epoch": 11.31, "learning_rate": 1.3663252692918949e-05, "loss": 0.8215, "step": 9490 }, { "epoch": 11.32, "learning_rate": 1.361910648066396e-05, "loss": 0.7918, "step": 9500 }, { "epoch": 11.33, "learning_rate": 1.3574960268408971e-05, "loss": 0.814, "step": 9510 }, { "epoch": 11.35, "learning_rate": 1.353081405615398e-05, "loss": 0.79, "step": 9520 }, { "epoch": 11.36, "learning_rate": 1.3486667843898995e-05, "loss": 0.8204, "step": 9530 }, { "epoch": 11.37, "learning_rate": 1.3442521631644007e-05, "loss": 0.7532, "step": 9540 }, { "epoch": 11.38, "learning_rate": 1.3398375419389018e-05, "loss": 0.7819, "step": 9550 }, { "epoch": 11.39, "learning_rate": 1.3354229207134027e-05, "loss": 0.8243, "step": 9560 }, { "epoch": 11.41, "learning_rate": 1.3310082994879039e-05, "loss": 0.7817, "step": 9570 }, { "epoch": 11.42, "learning_rate": 1.3265936782624053e-05, "loss": 0.8109, "step": 9580 }, { "epoch": 11.43, "learning_rate": 1.3221790570369063e-05, "loss": 0.8164, "step": 9590 }, { "epoch": 11.44, "learning_rate": 1.3177644358114074e-05, "loss": 0.7479, "step": 9600 }, { "epoch": 11.45, "learning_rate": 1.3133498145859085e-05, "loss": 0.8017, "step": 9610 }, { "epoch": 11.47, "learning_rate": 1.3089351933604097e-05, "loss": 0.7898, "step": 9620 }, { "epoch": 11.48, "learning_rate": 1.304520572134911e-05, "loss": 0.7693, "step": 9630 }, { "epoch": 11.49, "learning_rate": 1.3001059509094121e-05, "loss": 0.7851, "step": 9640 }, { "epoch": 11.5, "learning_rate": 1.2956913296839132e-05, "loss": 0.7936, "step": 9650 }, { "epoch": 11.51, "learning_rate": 1.2912767084584143e-05, "loss": 0.7208, "step": 9660 }, { "epoch": 11.53, "learning_rate": 1.2868620872329155e-05, "loss": 0.7852, "step": 9670 }, { "epoch": 11.54, "learning_rate": 1.2824474660074164e-05, "loss": 0.766, "step": 9680 }, { "epoch": 11.55, "learning_rate": 1.2780328447819179e-05, "loss": 0.7612, "step": 9690 }, { "epoch": 11.56, "learning_rate": 1.273618223556419e-05, "loss": 0.7778, "step": 9700 }, { "epoch": 11.57, "learning_rate": 1.2692036023309201e-05, "loss": 0.8051, "step": 9710 }, { "epoch": 11.59, "learning_rate": 1.2647889811054211e-05, "loss": 0.8111, "step": 9720 }, { "epoch": 11.6, "learning_rate": 1.2603743598799222e-05, "loss": 0.7677, "step": 9730 }, { "epoch": 11.61, "learning_rate": 1.2559597386544237e-05, "loss": 0.7886, "step": 9740 }, { "epoch": 11.62, "learning_rate": 1.2515451174289246e-05, "loss": 0.7708, "step": 9750 }, { "epoch": 11.63, "learning_rate": 1.2471304962034258e-05, "loss": 0.7954, "step": 9760 }, { "epoch": 11.64, "learning_rate": 1.2427158749779269e-05, "loss": 0.8294, "step": 9770 }, { "epoch": 11.66, "learning_rate": 1.2383012537524282e-05, "loss": 0.7807, "step": 9780 }, { "epoch": 11.67, "learning_rate": 1.2338866325269292e-05, "loss": 0.7939, "step": 9790 }, { "epoch": 11.68, "learning_rate": 1.2294720113014303e-05, "loss": 0.8255, "step": 9800 }, { "epoch": 11.69, "learning_rate": 1.2250573900759316e-05, "loss": 0.7947, "step": 9810 }, { "epoch": 11.7, "learning_rate": 1.2206427688504327e-05, "loss": 0.7623, "step": 9820 }, { "epoch": 11.72, "learning_rate": 1.2162281476249338e-05, "loss": 0.7654, "step": 9830 }, { "epoch": 11.73, "learning_rate": 1.211813526399435e-05, "loss": 0.8064, "step": 9840 }, { "epoch": 11.74, "learning_rate": 1.207398905173936e-05, "loss": 0.8024, "step": 9850 }, { "epoch": 11.75, "learning_rate": 1.2029842839484374e-05, "loss": 0.7557, "step": 9860 }, { "epoch": 11.76, "learning_rate": 1.1985696627229383e-05, "loss": 0.7451, "step": 9870 }, { "epoch": 11.78, "learning_rate": 1.1941550414974395e-05, "loss": 0.8012, "step": 9880 }, { "epoch": 11.79, "learning_rate": 1.1897404202719408e-05, "loss": 0.7658, "step": 9890 }, { "epoch": 11.8, "learning_rate": 1.1853257990464419e-05, "loss": 0.8007, "step": 9900 }, { "epoch": 11.81, "learning_rate": 1.180911177820943e-05, "loss": 0.7832, "step": 9910 }, { "epoch": 11.82, "learning_rate": 1.1764965565954441e-05, "loss": 0.7768, "step": 9920 }, { "epoch": 11.84, "learning_rate": 1.1720819353699453e-05, "loss": 0.7761, "step": 9930 }, { "epoch": 11.85, "learning_rate": 1.1676673141444466e-05, "loss": 0.7613, "step": 9940 }, { "epoch": 11.86, "learning_rate": 1.1632526929189475e-05, "loss": 0.7475, "step": 9950 }, { "epoch": 11.87, "learning_rate": 1.1588380716934488e-05, "loss": 0.7412, "step": 9960 }, { "epoch": 11.88, "learning_rate": 1.15442345046795e-05, "loss": 0.7915, "step": 9970 }, { "epoch": 11.89, "learning_rate": 1.150008829242451e-05, "loss": 0.7724, "step": 9980 }, { "epoch": 11.91, "learning_rate": 1.1455942080169522e-05, "loss": 0.8079, "step": 9990 }, { "epoch": 11.92, "learning_rate": 1.1411795867914533e-05, "loss": 0.8112, "step": 10000 }, { "epoch": 11.93, "learning_rate": 1.1367649655659544e-05, "loss": 0.8072, "step": 10010 }, { "epoch": 11.94, "learning_rate": 1.1323503443404557e-05, "loss": 0.7606, "step": 10020 }, { "epoch": 11.95, "learning_rate": 1.1279357231149567e-05, "loss": 0.739, "step": 10030 }, { "epoch": 11.97, "learning_rate": 1.123521101889458e-05, "loss": 0.7504, "step": 10040 }, { "epoch": 11.98, "learning_rate": 1.1191064806639591e-05, "loss": 0.7826, "step": 10050 }, { "epoch": 11.99, "learning_rate": 1.1146918594384602e-05, "loss": 0.7711, "step": 10060 }, { "epoch": 12.0, "eval_accuracy": 0.8069045136302696, "eval_f1": 0.794973833568864, "eval_loss": 0.7523674964904785, "eval_precision": 0.7995192234884096, "eval_recall": 0.8069045136302696, "eval_runtime": 99.9574, "eval_samples_per_second": 268.634, "eval_steps_per_second": 4.202, "step": 10068 }, { "epoch": 12.0, "learning_rate": 1.1102772382129614e-05, "loss": 0.7988, "step": 10070 }, { "epoch": 12.01, "learning_rate": 1.1058626169874625e-05, "loss": 0.7829, "step": 10080 }, { "epoch": 12.03, "learning_rate": 1.1014479957619636e-05, "loss": 0.703, "step": 10090 }, { "epoch": 12.04, "learning_rate": 1.0970333745364649e-05, "loss": 0.7694, "step": 10100 }, { "epoch": 12.05, "learning_rate": 1.0926187533109659e-05, "loss": 0.7603, "step": 10110 }, { "epoch": 12.06, "learning_rate": 1.0882041320854672e-05, "loss": 0.7369, "step": 10120 }, { "epoch": 12.07, "learning_rate": 1.0837895108599683e-05, "loss": 0.7168, "step": 10130 }, { "epoch": 12.09, "learning_rate": 1.0793748896344694e-05, "loss": 0.7568, "step": 10140 }, { "epoch": 12.1, "learning_rate": 1.0749602684089705e-05, "loss": 0.7458, "step": 10150 }, { "epoch": 12.11, "learning_rate": 1.0705456471834717e-05, "loss": 0.7414, "step": 10160 }, { "epoch": 12.12, "learning_rate": 1.0661310259579728e-05, "loss": 0.7439, "step": 10170 }, { "epoch": 12.13, "learning_rate": 1.0617164047324741e-05, "loss": 0.7865, "step": 10180 }, { "epoch": 12.15, "learning_rate": 1.057301783506975e-05, "loss": 0.7622, "step": 10190 }, { "epoch": 12.16, "learning_rate": 1.0528871622814763e-05, "loss": 0.7578, "step": 10200 }, { "epoch": 12.17, "learning_rate": 1.0484725410559775e-05, "loss": 0.7623, "step": 10210 }, { "epoch": 12.18, "learning_rate": 1.0440579198304786e-05, "loss": 0.7498, "step": 10220 }, { "epoch": 12.19, "learning_rate": 1.0396432986049797e-05, "loss": 0.7892, "step": 10230 }, { "epoch": 12.2, "learning_rate": 1.0352286773794808e-05, "loss": 0.7462, "step": 10240 }, { "epoch": 12.22, "learning_rate": 1.0308140561539821e-05, "loss": 0.7069, "step": 10250 }, { "epoch": 12.23, "learning_rate": 1.0263994349284833e-05, "loss": 0.7679, "step": 10260 }, { "epoch": 12.24, "learning_rate": 1.0219848137029842e-05, "loss": 0.7507, "step": 10270 }, { "epoch": 12.25, "learning_rate": 1.0175701924774855e-05, "loss": 0.7748, "step": 10280 }, { "epoch": 12.26, "learning_rate": 1.0131555712519866e-05, "loss": 0.7781, "step": 10290 }, { "epoch": 12.28, "learning_rate": 1.0087409500264878e-05, "loss": 0.6939, "step": 10300 }, { "epoch": 12.29, "learning_rate": 1.0043263288009889e-05, "loss": 0.6655, "step": 10310 }, { "epoch": 12.3, "learning_rate": 9.9991170757549e-06, "loss": 0.7509, "step": 10320 }, { "epoch": 12.31, "learning_rate": 9.954970863499913e-06, "loss": 0.7902, "step": 10330 }, { "epoch": 12.32, "learning_rate": 9.910824651244924e-06, "loss": 0.7459, "step": 10340 }, { "epoch": 12.34, "learning_rate": 9.866678438989934e-06, "loss": 0.7505, "step": 10350 }, { "epoch": 12.35, "learning_rate": 9.822532226734947e-06, "loss": 0.7476, "step": 10360 }, { "epoch": 12.36, "learning_rate": 9.778386014479958e-06, "loss": 0.6797, "step": 10370 }, { "epoch": 12.37, "learning_rate": 9.73423980222497e-06, "loss": 0.7846, "step": 10380 }, { "epoch": 12.38, "learning_rate": 9.69009358996998e-06, "loss": 0.7465, "step": 10390 }, { "epoch": 12.4, "learning_rate": 9.645947377714992e-06, "loss": 0.7493, "step": 10400 }, { "epoch": 12.41, "learning_rate": 9.601801165460005e-06, "loss": 0.7691, "step": 10410 }, { "epoch": 12.42, "learning_rate": 9.557654953205016e-06, "loss": 0.7668, "step": 10420 }, { "epoch": 12.43, "learning_rate": 9.513508740950026e-06, "loss": 0.743, "step": 10430 }, { "epoch": 12.44, "learning_rate": 9.469362528695039e-06, "loss": 0.7609, "step": 10440 }, { "epoch": 12.46, "learning_rate": 9.42521631644005e-06, "loss": 0.7925, "step": 10450 }, { "epoch": 12.47, "learning_rate": 9.381070104185061e-06, "loss": 0.7793, "step": 10460 }, { "epoch": 12.48, "learning_rate": 9.336923891930073e-06, "loss": 0.7548, "step": 10470 }, { "epoch": 12.49, "learning_rate": 9.292777679675084e-06, "loss": 0.7509, "step": 10480 }, { "epoch": 12.5, "learning_rate": 9.248631467420097e-06, "loss": 0.6699, "step": 10490 }, { "epoch": 12.51, "learning_rate": 9.204485255165108e-06, "loss": 0.7488, "step": 10500 }, { "epoch": 12.53, "learning_rate": 9.160339042910118e-06, "loss": 0.7262, "step": 10510 }, { "epoch": 12.54, "learning_rate": 9.11619283065513e-06, "loss": 0.7565, "step": 10520 }, { "epoch": 12.55, "learning_rate": 9.072046618400142e-06, "loss": 0.7549, "step": 10530 }, { "epoch": 12.56, "learning_rate": 9.027900406145153e-06, "loss": 0.7766, "step": 10540 }, { "epoch": 12.57, "learning_rate": 8.983754193890164e-06, "loss": 0.7764, "step": 10550 }, { "epoch": 12.59, "learning_rate": 8.939607981635176e-06, "loss": 0.7215, "step": 10560 }, { "epoch": 12.6, "learning_rate": 8.895461769380189e-06, "loss": 0.7217, "step": 10570 }, { "epoch": 12.61, "learning_rate": 8.8513155571252e-06, "loss": 0.747, "step": 10580 }, { "epoch": 12.62, "learning_rate": 8.80716934487021e-06, "loss": 0.7748, "step": 10590 }, { "epoch": 12.63, "learning_rate": 8.763023132615222e-06, "loss": 0.7711, "step": 10600 }, { "epoch": 12.65, "learning_rate": 8.718876920360234e-06, "loss": 0.743, "step": 10610 }, { "epoch": 12.66, "learning_rate": 8.674730708105245e-06, "loss": 0.7345, "step": 10620 }, { "epoch": 12.67, "learning_rate": 8.630584495850256e-06, "loss": 0.7406, "step": 10630 }, { "epoch": 12.68, "learning_rate": 8.586438283595267e-06, "loss": 0.732, "step": 10640 }, { "epoch": 12.69, "learning_rate": 8.54229207134028e-06, "loss": 0.729, "step": 10650 }, { "epoch": 12.71, "learning_rate": 8.498145859085292e-06, "loss": 0.7323, "step": 10660 }, { "epoch": 12.72, "learning_rate": 8.453999646830301e-06, "loss": 0.7796, "step": 10670 }, { "epoch": 12.73, "learning_rate": 8.409853434575314e-06, "loss": 0.7401, "step": 10680 }, { "epoch": 12.74, "learning_rate": 8.365707222320325e-06, "loss": 0.7403, "step": 10690 }, { "epoch": 12.75, "learning_rate": 8.321561010065337e-06, "loss": 0.7433, "step": 10700 }, { "epoch": 12.76, "learning_rate": 8.277414797810348e-06, "loss": 0.7227, "step": 10710 }, { "epoch": 12.78, "learning_rate": 8.233268585555359e-06, "loss": 0.7929, "step": 10720 }, { "epoch": 12.79, "learning_rate": 8.189122373300372e-06, "loss": 0.7602, "step": 10730 }, { "epoch": 12.8, "learning_rate": 8.144976161045383e-06, "loss": 0.7554, "step": 10740 }, { "epoch": 12.81, "learning_rate": 8.100829948790393e-06, "loss": 0.7635, "step": 10750 }, { "epoch": 12.82, "learning_rate": 8.056683736535406e-06, "loss": 0.7451, "step": 10760 }, { "epoch": 12.84, "learning_rate": 8.012537524280417e-06, "loss": 0.7906, "step": 10770 }, { "epoch": 12.85, "learning_rate": 7.968391312025428e-06, "loss": 0.7279, "step": 10780 }, { "epoch": 12.86, "learning_rate": 7.92424509977044e-06, "loss": 0.7292, "step": 10790 }, { "epoch": 12.87, "learning_rate": 7.880098887515451e-06, "loss": 0.7269, "step": 10800 }, { "epoch": 12.88, "learning_rate": 7.835952675260464e-06, "loss": 0.705, "step": 10810 }, { "epoch": 12.9, "learning_rate": 7.791806463005475e-06, "loss": 0.7554, "step": 10820 }, { "epoch": 12.91, "learning_rate": 7.747660250750485e-06, "loss": 0.7186, "step": 10830 }, { "epoch": 12.92, "learning_rate": 7.703514038495498e-06, "loss": 0.7928, "step": 10840 }, { "epoch": 12.93, "learning_rate": 7.659367826240509e-06, "loss": 0.7391, "step": 10850 }, { "epoch": 12.94, "learning_rate": 7.615221613985521e-06, "loss": 0.728, "step": 10860 }, { "epoch": 12.96, "learning_rate": 7.5710754017305314e-06, "loss": 0.7191, "step": 10870 }, { "epoch": 12.97, "learning_rate": 7.526929189475543e-06, "loss": 0.7436, "step": 10880 }, { "epoch": 12.98, "learning_rate": 7.482782977220555e-06, "loss": 0.71, "step": 10890 }, { "epoch": 12.99, "learning_rate": 7.438636764965566e-06, "loss": 0.7689, "step": 10900 }, { "epoch": 13.0, "eval_accuracy": 0.8123417250111723, "eval_f1": 0.8008904007472688, "eval_loss": 0.7283065915107727, "eval_precision": 0.8043071439972418, "eval_recall": 0.8123417250111723, "eval_runtime": 99.1806, "eval_samples_per_second": 270.739, "eval_steps_per_second": 4.235, "step": 10907 }, { "epoch": 13.0, "learning_rate": 7.394490552710578e-06, "loss": 0.7298, "step": 10910 }, { "epoch": 13.02, "learning_rate": 7.3503443404555894e-06, "loss": 0.7718, "step": 10920 }, { "epoch": 13.03, "learning_rate": 7.306198128200601e-06, "loss": 0.6979, "step": 10930 }, { "epoch": 13.04, "learning_rate": 7.262051915945613e-06, "loss": 0.7406, "step": 10940 }, { "epoch": 13.05, "learning_rate": 7.217905703690623e-06, "loss": 0.7342, "step": 10950 }, { "epoch": 13.06, "learning_rate": 7.1737594914356345e-06, "loss": 0.7568, "step": 10960 }, { "epoch": 13.08, "learning_rate": 7.129613279180647e-06, "loss": 0.7185, "step": 10970 }, { "epoch": 13.09, "learning_rate": 7.085467066925658e-06, "loss": 0.7223, "step": 10980 }, { "epoch": 13.1, "learning_rate": 7.04132085467067e-06, "loss": 0.7146, "step": 10990 }, { "epoch": 13.11, "learning_rate": 6.997174642415681e-06, "loss": 0.679, "step": 11000 }, { "epoch": 13.12, "learning_rate": 6.9530284301606925e-06, "loss": 0.7124, "step": 11010 }, { "epoch": 13.13, "learning_rate": 6.908882217905705e-06, "loss": 0.7503, "step": 11020 }, { "epoch": 13.15, "learning_rate": 6.864736005650715e-06, "loss": 0.7214, "step": 11030 }, { "epoch": 13.16, "learning_rate": 6.820589793395726e-06, "loss": 0.7273, "step": 11040 }, { "epoch": 13.17, "learning_rate": 6.776443581140738e-06, "loss": 0.7334, "step": 11050 }, { "epoch": 13.18, "learning_rate": 6.73229736888575e-06, "loss": 0.7317, "step": 11060 }, { "epoch": 13.19, "learning_rate": 6.688151156630762e-06, "loss": 0.7214, "step": 11070 }, { "epoch": 13.21, "learning_rate": 6.644004944375773e-06, "loss": 0.7343, "step": 11080 }, { "epoch": 13.22, "learning_rate": 6.599858732120784e-06, "loss": 0.7117, "step": 11090 }, { "epoch": 13.23, "learning_rate": 6.555712519865796e-06, "loss": 0.7101, "step": 11100 }, { "epoch": 13.24, "learning_rate": 6.511566307610807e-06, "loss": 0.67, "step": 11110 }, { "epoch": 13.25, "learning_rate": 6.467420095355818e-06, "loss": 0.7423, "step": 11120 }, { "epoch": 13.27, "learning_rate": 6.42327388310083e-06, "loss": 0.7379, "step": 11130 }, { "epoch": 13.28, "learning_rate": 6.3791276708458414e-06, "loss": 0.7012, "step": 11140 }, { "epoch": 13.29, "learning_rate": 6.3349814585908535e-06, "loss": 0.7246, "step": 11150 }, { "epoch": 13.3, "learning_rate": 6.290835246335865e-06, "loss": 0.7448, "step": 11160 }, { "epoch": 13.31, "learning_rate": 6.246689034080876e-06, "loss": 0.6997, "step": 11170 }, { "epoch": 13.33, "learning_rate": 6.202542821825888e-06, "loss": 0.7124, "step": 11180 }, { "epoch": 13.34, "learning_rate": 6.158396609570899e-06, "loss": 0.7324, "step": 11190 }, { "epoch": 13.35, "learning_rate": 6.114250397315911e-06, "loss": 0.7154, "step": 11200 }, { "epoch": 13.36, "learning_rate": 6.070104185060922e-06, "loss": 0.7044, "step": 11210 }, { "epoch": 13.37, "learning_rate": 6.025957972805934e-06, "loss": 0.73, "step": 11220 }, { "epoch": 13.38, "learning_rate": 5.9818117605509445e-06, "loss": 0.72, "step": 11230 }, { "epoch": 13.4, "learning_rate": 5.937665548295957e-06, "loss": 0.7784, "step": 11240 }, { "epoch": 13.41, "learning_rate": 5.893519336040968e-06, "loss": 0.6891, "step": 11250 }, { "epoch": 13.42, "learning_rate": 5.84937312378598e-06, "loss": 0.7191, "step": 11260 }, { "epoch": 13.43, "learning_rate": 5.80522691153099e-06, "loss": 0.7387, "step": 11270 }, { "epoch": 13.44, "learning_rate": 5.7610806992760025e-06, "loss": 0.7136, "step": 11280 }, { "epoch": 13.46, "learning_rate": 5.716934487021014e-06, "loss": 0.732, "step": 11290 }, { "epoch": 13.47, "learning_rate": 5.672788274766026e-06, "loss": 0.7582, "step": 11300 }, { "epoch": 13.48, "learning_rate": 5.628642062511036e-06, "loss": 0.7532, "step": 11310 }, { "epoch": 13.49, "learning_rate": 5.584495850256048e-06, "loss": 0.7333, "step": 11320 }, { "epoch": 13.5, "learning_rate": 5.54034963800106e-06, "loss": 0.7313, "step": 11330 }, { "epoch": 13.52, "learning_rate": 5.496203425746072e-06, "loss": 0.7207, "step": 11340 }, { "epoch": 13.53, "learning_rate": 5.452057213491082e-06, "loss": 0.7069, "step": 11350 }, { "epoch": 13.54, "learning_rate": 5.407911001236094e-06, "loss": 0.6881, "step": 11360 }, { "epoch": 13.55, "learning_rate": 5.3637647889811055e-06, "loss": 0.7396, "step": 11370 }, { "epoch": 13.56, "learning_rate": 5.319618576726118e-06, "loss": 0.6671, "step": 11380 }, { "epoch": 13.58, "learning_rate": 5.275472364471128e-06, "loss": 0.7244, "step": 11390 }, { "epoch": 13.59, "learning_rate": 5.23132615221614e-06, "loss": 0.7028, "step": 11400 }, { "epoch": 13.6, "learning_rate": 5.187179939961151e-06, "loss": 0.7315, "step": 11410 }, { "epoch": 13.61, "learning_rate": 5.1430337277061635e-06, "loss": 0.7555, "step": 11420 }, { "epoch": 13.62, "learning_rate": 5.098887515451174e-06, "loss": 0.7226, "step": 11430 }, { "epoch": 13.64, "learning_rate": 5.054741303196186e-06, "loss": 0.7307, "step": 11440 }, { "epoch": 13.65, "learning_rate": 5.010595090941197e-06, "loss": 0.7348, "step": 11450 }, { "epoch": 13.66, "learning_rate": 4.966448878686209e-06, "loss": 0.6986, "step": 11460 }, { "epoch": 13.67, "learning_rate": 4.92230266643122e-06, "loss": 0.746, "step": 11470 }, { "epoch": 13.68, "learning_rate": 4.878156454176232e-06, "loss": 0.7028, "step": 11480 }, { "epoch": 13.69, "learning_rate": 4.834010241921243e-06, "loss": 0.7215, "step": 11490 }, { "epoch": 13.71, "learning_rate": 4.789864029666255e-06, "loss": 0.7616, "step": 11500 }, { "epoch": 13.72, "learning_rate": 4.7457178174112666e-06, "loss": 0.7073, "step": 11510 }, { "epoch": 13.73, "learning_rate": 4.701571605156278e-06, "loss": 0.6928, "step": 11520 }, { "epoch": 13.74, "learning_rate": 4.657425392901289e-06, "loss": 0.6846, "step": 11530 }, { "epoch": 13.75, "learning_rate": 4.613279180646301e-06, "loss": 0.716, "step": 11540 }, { "epoch": 13.77, "learning_rate": 4.5691329683913125e-06, "loss": 0.7124, "step": 11550 }, { "epoch": 13.78, "learning_rate": 4.524986756136324e-06, "loss": 0.7151, "step": 11560 }, { "epoch": 13.79, "learning_rate": 4.480840543881335e-06, "loss": 0.7018, "step": 11570 }, { "epoch": 13.8, "learning_rate": 4.436694331626347e-06, "loss": 0.7275, "step": 11580 }, { "epoch": 13.81, "learning_rate": 4.392548119371358e-06, "loss": 0.7311, "step": 11590 }, { "epoch": 13.83, "learning_rate": 4.34840190711637e-06, "loss": 0.6931, "step": 11600 }, { "epoch": 13.84, "learning_rate": 4.304255694861381e-06, "loss": 0.7124, "step": 11610 }, { "epoch": 13.85, "learning_rate": 4.260109482606393e-06, "loss": 0.7396, "step": 11620 }, { "epoch": 13.86, "learning_rate": 4.215963270351404e-06, "loss": 0.7241, "step": 11630 }, { "epoch": 13.87, "learning_rate": 4.1718170580964155e-06, "loss": 0.7272, "step": 11640 }, { "epoch": 13.89, "learning_rate": 4.127670845841427e-06, "loss": 0.6968, "step": 11650 }, { "epoch": 13.9, "learning_rate": 4.083524633586439e-06, "loss": 0.6634, "step": 11660 }, { "epoch": 13.91, "learning_rate": 4.03937842133145e-06, "loss": 0.7018, "step": 11670 }, { "epoch": 13.92, "learning_rate": 3.995232209076461e-06, "loss": 0.7265, "step": 11680 }, { "epoch": 13.93, "learning_rate": 3.951085996821473e-06, "loss": 0.6852, "step": 11690 }, { "epoch": 13.94, "learning_rate": 3.906939784566485e-06, "loss": 0.7071, "step": 11700 }, { "epoch": 13.96, "learning_rate": 3.862793572311496e-06, "loss": 0.7031, "step": 11710 }, { "epoch": 13.97, "learning_rate": 3.818647360056507e-06, "loss": 0.7017, "step": 11720 }, { "epoch": 13.98, "learning_rate": 3.7745011478015185e-06, "loss": 0.7198, "step": 11730 }, { "epoch": 13.99, "learning_rate": 3.7303549355465302e-06, "loss": 0.6919, "step": 11740 }, { "epoch": 14.0, "eval_accuracy": 0.8147996424847311, "eval_f1": 0.8036085852328231, "eval_loss": 0.7133387327194214, "eval_precision": 0.8060988123614719, "eval_recall": 0.8147996424847311, "eval_runtime": 99.251, "eval_samples_per_second": 270.546, "eval_steps_per_second": 4.232, "step": 11746 }, { "epoch": 14.0, "learning_rate": 3.686208723291542e-06, "loss": 0.7503, "step": 11750 }, { "epoch": 14.02, "learning_rate": 3.6420625110365527e-06, "loss": 0.6886, "step": 11760 }, { "epoch": 14.03, "learning_rate": 3.5979162987815644e-06, "loss": 0.7366, "step": 11770 }, { "epoch": 14.04, "learning_rate": 3.553770086526576e-06, "loss": 0.6969, "step": 11780 }, { "epoch": 14.05, "learning_rate": 3.509623874271588e-06, "loss": 0.7285, "step": 11790 }, { "epoch": 14.06, "learning_rate": 3.4654776620165995e-06, "loss": 0.6919, "step": 11800 }, { "epoch": 14.08, "learning_rate": 3.4213314497616103e-06, "loss": 0.6904, "step": 11810 }, { "epoch": 14.09, "learning_rate": 3.377185237506622e-06, "loss": 0.6896, "step": 11820 }, { "epoch": 14.1, "learning_rate": 3.3330390252516337e-06, "loss": 0.6798, "step": 11830 }, { "epoch": 14.11, "learning_rate": 3.2888928129966454e-06, "loss": 0.7035, "step": 11840 }, { "epoch": 14.12, "learning_rate": 3.2447466007416562e-06, "loss": 0.6692, "step": 11850 }, { "epoch": 14.14, "learning_rate": 3.200600388486668e-06, "loss": 0.7203, "step": 11860 }, { "epoch": 14.15, "learning_rate": 3.1564541762316796e-06, "loss": 0.7057, "step": 11870 }, { "epoch": 14.16, "learning_rate": 3.112307963976691e-06, "loss": 0.7154, "step": 11880 }, { "epoch": 14.17, "learning_rate": 3.0681617517217025e-06, "loss": 0.6911, "step": 11890 }, { "epoch": 14.18, "learning_rate": 3.024015539466714e-06, "loss": 0.6745, "step": 11900 }, { "epoch": 14.2, "learning_rate": 2.9798693272117255e-06, "loss": 0.7365, "step": 11910 }, { "epoch": 14.21, "learning_rate": 2.9357231149567367e-06, "loss": 0.7185, "step": 11920 }, { "epoch": 14.22, "learning_rate": 2.8915769027017484e-06, "loss": 0.654, "step": 11930 }, { "epoch": 14.23, "learning_rate": 2.8474306904467597e-06, "loss": 0.7046, "step": 11940 }, { "epoch": 14.24, "learning_rate": 2.8032844781917714e-06, "loss": 0.7372, "step": 11950 }, { "epoch": 14.25, "learning_rate": 2.7591382659367826e-06, "loss": 0.7017, "step": 11960 }, { "epoch": 14.27, "learning_rate": 2.7149920536817943e-06, "loss": 0.6843, "step": 11970 }, { "epoch": 14.28, "learning_rate": 2.6708458414268056e-06, "loss": 0.6891, "step": 11980 }, { "epoch": 14.29, "learning_rate": 2.6266996291718173e-06, "loss": 0.6984, "step": 11990 }, { "epoch": 14.3, "learning_rate": 2.5825534169168285e-06, "loss": 0.7012, "step": 12000 }, { "epoch": 14.31, "learning_rate": 2.53840720466184e-06, "loss": 0.7015, "step": 12010 }, { "epoch": 14.33, "learning_rate": 2.4942609924068515e-06, "loss": 0.7127, "step": 12020 }, { "epoch": 14.34, "learning_rate": 2.450114780151863e-06, "loss": 0.7021, "step": 12030 }, { "epoch": 14.35, "learning_rate": 2.4059685678968744e-06, "loss": 0.6599, "step": 12040 }, { "epoch": 14.36, "learning_rate": 2.361822355641886e-06, "loss": 0.7266, "step": 12050 }, { "epoch": 14.37, "learning_rate": 2.3176761433868974e-06, "loss": 0.6562, "step": 12060 }, { "epoch": 14.39, "learning_rate": 2.273529931131909e-06, "loss": 0.7178, "step": 12070 }, { "epoch": 14.4, "learning_rate": 2.2293837188769203e-06, "loss": 0.683, "step": 12080 }, { "epoch": 14.41, "learning_rate": 2.185237506621932e-06, "loss": 0.7003, "step": 12090 }, { "epoch": 14.42, "learning_rate": 2.1410912943669432e-06, "loss": 0.6672, "step": 12100 }, { "epoch": 14.43, "learning_rate": 2.096945082111955e-06, "loss": 0.7333, "step": 12110 }, { "epoch": 14.45, "learning_rate": 2.0527988698569666e-06, "loss": 0.7226, "step": 12120 }, { "epoch": 14.46, "learning_rate": 2.008652657601978e-06, "loss": 0.7062, "step": 12130 }, { "epoch": 14.47, "learning_rate": 1.9645064453469896e-06, "loss": 0.6825, "step": 12140 }, { "epoch": 14.48, "learning_rate": 1.920360233092001e-06, "loss": 0.7219, "step": 12150 }, { "epoch": 14.49, "learning_rate": 1.8762140208370125e-06, "loss": 0.7151, "step": 12160 }, { "epoch": 14.51, "learning_rate": 1.8320678085820238e-06, "loss": 0.6999, "step": 12170 }, { "epoch": 14.52, "learning_rate": 1.7879215963270355e-06, "loss": 0.7182, "step": 12180 }, { "epoch": 14.53, "learning_rate": 1.7437753840720467e-06, "loss": 0.7063, "step": 12190 }, { "epoch": 14.54, "learning_rate": 1.6996291718170584e-06, "loss": 0.7208, "step": 12200 }, { "epoch": 14.55, "learning_rate": 1.6554829595620697e-06, "loss": 0.7346, "step": 12210 }, { "epoch": 14.56, "learning_rate": 1.6113367473070813e-06, "loss": 0.6791, "step": 12220 }, { "epoch": 14.58, "learning_rate": 1.5671905350520926e-06, "loss": 0.7421, "step": 12230 }, { "epoch": 14.59, "learning_rate": 1.523044322797104e-06, "loss": 0.6476, "step": 12240 }, { "epoch": 14.6, "learning_rate": 1.4788981105421156e-06, "loss": 0.6935, "step": 12250 }, { "epoch": 14.61, "learning_rate": 1.434751898287127e-06, "loss": 0.6992, "step": 12260 }, { "epoch": 14.62, "learning_rate": 1.3906056860321385e-06, "loss": 0.6843, "step": 12270 }, { "epoch": 14.64, "learning_rate": 1.34645947377715e-06, "loss": 0.6774, "step": 12280 }, { "epoch": 14.65, "learning_rate": 1.3023132615221614e-06, "loss": 0.6726, "step": 12290 }, { "epoch": 14.66, "learning_rate": 1.258167049267173e-06, "loss": 0.6871, "step": 12300 }, { "epoch": 14.67, "learning_rate": 1.2140208370121844e-06, "loss": 0.7278, "step": 12310 }, { "epoch": 14.68, "learning_rate": 1.1698746247571959e-06, "loss": 0.6631, "step": 12320 }, { "epoch": 14.7, "learning_rate": 1.1257284125022073e-06, "loss": 0.6951, "step": 12330 }, { "epoch": 14.71, "learning_rate": 1.0815822002472188e-06, "loss": 0.7023, "step": 12340 }, { "epoch": 14.72, "learning_rate": 1.0374359879922303e-06, "loss": 0.7297, "step": 12350 }, { "epoch": 14.73, "learning_rate": 9.932897757372418e-07, "loss": 0.6868, "step": 12360 }, { "epoch": 14.74, "learning_rate": 9.491435634822533e-07, "loss": 0.6741, "step": 12370 }, { "epoch": 14.76, "learning_rate": 9.049973512272648e-07, "loss": 0.6589, "step": 12380 }, { "epoch": 14.77, "learning_rate": 8.608511389722763e-07, "loss": 0.7029, "step": 12390 }, { "epoch": 14.78, "learning_rate": 8.167049267172878e-07, "loss": 0.7267, "step": 12400 }, { "epoch": 14.79, "learning_rate": 7.725587144622992e-07, "loss": 0.6523, "step": 12410 }, { "epoch": 14.8, "learning_rate": 7.284125022073107e-07, "loss": 0.7113, "step": 12420 }, { "epoch": 14.82, "learning_rate": 6.842662899523222e-07, "loss": 0.6823, "step": 12430 }, { "epoch": 14.83, "learning_rate": 6.401200776973336e-07, "loss": 0.7095, "step": 12440 }, { "epoch": 14.84, "learning_rate": 5.95973865442345e-07, "loss": 0.6959, "step": 12450 }, { "epoch": 14.85, "learning_rate": 5.518276531873565e-07, "loss": 0.7162, "step": 12460 }, { "epoch": 14.86, "learning_rate": 5.07681440932368e-07, "loss": 0.683, "step": 12470 }, { "epoch": 14.87, "learning_rate": 4.635352286773795e-07, "loss": 0.7184, "step": 12480 }, { "epoch": 14.89, "learning_rate": 4.1938901642239095e-07, "loss": 0.6654, "step": 12490 }, { "epoch": 14.9, "learning_rate": 3.752428041674025e-07, "loss": 0.6955, "step": 12500 }, { "epoch": 14.91, "learning_rate": 3.3109659191241395e-07, "loss": 0.6912, "step": 12510 }, { "epoch": 14.92, "learning_rate": 2.8695037965742537e-07, "loss": 0.7228, "step": 12520 }, { "epoch": 14.93, "learning_rate": 2.4280416740243685e-07, "loss": 0.6885, "step": 12530 }, { "epoch": 14.95, "learning_rate": 1.9865795514744835e-07, "loss": 0.7197, "step": 12540 }, { "epoch": 14.96, "learning_rate": 1.5451174289245984e-07, "loss": 0.7271, "step": 12550 }, { "epoch": 14.97, "learning_rate": 1.103655306374713e-07, "loss": 0.709, "step": 12560 }, { "epoch": 14.98, "learning_rate": 6.621931838248278e-08, "loss": 0.7227, "step": 12570 }, { "epoch": 14.99, "learning_rate": 2.2073106127494264e-08, "loss": 0.694, "step": 12580 }, { "epoch": 15.0, "eval_accuracy": 0.8176672128705497, "eval_f1": 0.8067463077825504, "eval_loss": 0.7063722014427185, "eval_precision": 0.808932447502446, "eval_recall": 0.8176672128705497, "eval_runtime": 99.9807, "eval_samples_per_second": 268.572, "eval_steps_per_second": 4.201, "step": 12585 }, { "epoch": 15.0, "step": 12585, "total_flos": 8.179980758810437e+19, "train_loss": 1.7412154973476806, "train_runtime": 20817.4419, "train_samples_per_second": 154.783, "train_steps_per_second": 0.605 }, { "epoch": 15.0, "eval_accuracy": 0.8244604584473866, "eval_f1": 0.8143373319248239, "eval_loss": 0.6935074329376221, "eval_precision": 0.8209253030287482, "eval_recall": 0.8244604584473866, "eval_runtime": 847.887, "eval_samples_per_second": 253.35, "eval_steps_per_second": 3.959, "step": 12585 }, { "epoch": 15.0, "eval_accuracy": 0.8176672128705497, "eval_f1": 0.8067463077825504, "eval_loss": 0.7063722014427185, "eval_precision": 0.808932447502446, "eval_recall": 0.8176672128705497, "eval_runtime": 100.5004, "eval_samples_per_second": 267.183, "eval_steps_per_second": 4.179, "step": 12585 }, { "epoch": 15.0, "eval_accuracy": 0.8172575599582899, "eval_f1": 0.8057061753323418, "eval_loss": 0.7021384835243225, "eval_precision": 0.8093542627058361, "eval_recall": 0.8172575599582899, "eval_runtime": 100.0753, "eval_samples_per_second": 268.318, "eval_steps_per_second": 4.197, "step": 12585 } ], "max_steps": 12585, "num_train_epochs": 15, "total_flos": 8.179980758810437e+19, "trial_name": null, "trial_params": null }