diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7789 @@ +{ + "best_metric": 0.8176672128705497, + "best_model_checkpoint": "convnext-tiny-224_album_vitVMMRdb_make_model_album_pred/checkpoint-12585", + "epoch": 14.999702114983616, + "global_step": 12585, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 3.971405877680699e-07, + "loss": 5.9416, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 7.942811755361398e-07, + "loss": 5.9452, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 1.1914217633042098e-06, + "loss": 5.9409, + "step": 30 + }, + { + "epoch": 0.05, + "learning_rate": 1.5885623510722797e-06, + "loss": 5.9378, + "step": 40 + }, + { + "epoch": 0.06, + "learning_rate": 1.9857029388403496e-06, + "loss": 5.9394, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 2.3828435266084195e-06, + "loss": 5.9288, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 2.7799841143764895e-06, + "loss": 5.9298, + "step": 70 + }, + { + "epoch": 0.1, + "learning_rate": 3.1771247021445594e-06, + "loss": 5.9304, + "step": 80 + }, + { + "epoch": 0.11, + "learning_rate": 3.5742652899126297e-06, + "loss": 5.9255, + "step": 90 + }, + { + "epoch": 0.12, + "learning_rate": 3.971405877680699e-06, + "loss": 5.9241, + "step": 100 + }, + { + "epoch": 0.13, + "learning_rate": 4.368546465448769e-06, + "loss": 5.9103, + "step": 110 + }, + { + "epoch": 0.14, + "learning_rate": 4.765687053216839e-06, + "loss": 5.9143, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 5.162827640984909e-06, + "loss": 5.9092, + "step": 130 + }, + { + "epoch": 0.17, + "learning_rate": 5.559968228752979e-06, + "loss": 5.8914, + "step": 140 + }, + { + "epoch": 0.18, + "learning_rate": 5.957108816521049e-06, + "loss": 5.8836, + "step": 150 + }, + { + "epoch": 0.19, + "learning_rate": 6.354249404289119e-06, + "loss": 5.8774, + "step": 160 + }, + { + "epoch": 0.2, + "learning_rate": 6.751389992057189e-06, + "loss": 5.87, + "step": 170 + }, + { + "epoch": 0.21, + "learning_rate": 7.1485305798252594e-06, + "loss": 5.8626, + "step": 180 + }, + { + "epoch": 0.23, + "learning_rate": 7.5456711675933285e-06, + "loss": 5.8501, + "step": 190 + }, + { + "epoch": 0.24, + "learning_rate": 7.942811755361398e-06, + "loss": 5.8369, + "step": 200 + }, + { + "epoch": 0.25, + "learning_rate": 8.33995234312947e-06, + "loss": 5.8174, + "step": 210 + }, + { + "epoch": 0.26, + "learning_rate": 8.737092930897538e-06, + "loss": 5.8033, + "step": 220 + }, + { + "epoch": 0.27, + "learning_rate": 9.134233518665607e-06, + "loss": 5.7707, + "step": 230 + }, + { + "epoch": 0.29, + "learning_rate": 9.531374106433678e-06, + "loss": 5.765, + "step": 240 + }, + { + "epoch": 0.3, + "learning_rate": 9.928514694201747e-06, + "loss": 5.7387, + "step": 250 + }, + { + "epoch": 0.31, + "learning_rate": 1.0325655281969818e-05, + "loss": 5.7149, + "step": 260 + }, + { + "epoch": 0.32, + "learning_rate": 1.0722795869737889e-05, + "loss": 5.6734, + "step": 270 + }, + { + "epoch": 0.33, + "learning_rate": 1.1119936457505958e-05, + "loss": 5.6429, + "step": 280 + }, + { + "epoch": 0.35, + "learning_rate": 1.1517077045274027e-05, + "loss": 5.62, + "step": 290 + }, + { + "epoch": 0.36, + "learning_rate": 1.1914217633042098e-05, + "loss": 5.5929, + "step": 300 + }, + { + "epoch": 0.37, + "learning_rate": 1.2311358220810168e-05, + "loss": 5.565, + "step": 310 + }, + { + "epoch": 0.38, + "learning_rate": 1.2708498808578237e-05, + "loss": 5.5256, + "step": 320 + }, + { + "epoch": 0.39, + "learning_rate": 1.3105639396346307e-05, + "loss": 5.5036, + "step": 330 + }, + { + "epoch": 0.41, + "learning_rate": 1.3502779984114377e-05, + "loss": 5.4775, + "step": 340 + }, + { + "epoch": 0.42, + "learning_rate": 1.3899920571882446e-05, + "loss": 5.4254, + "step": 350 + }, + { + "epoch": 0.43, + "learning_rate": 1.4297061159650519e-05, + "loss": 5.4182, + "step": 360 + }, + { + "epoch": 0.44, + "learning_rate": 1.4694201747418588e-05, + "loss": 5.39, + "step": 370 + }, + { + "epoch": 0.45, + "learning_rate": 1.5091342335186657e-05, + "loss": 5.3802, + "step": 380 + }, + { + "epoch": 0.46, + "learning_rate": 1.548848292295473e-05, + "loss": 5.3357, + "step": 390 + }, + { + "epoch": 0.48, + "learning_rate": 1.5885623510722797e-05, + "loss": 5.3103, + "step": 400 + }, + { + "epoch": 0.49, + "learning_rate": 1.6282764098490864e-05, + "loss": 5.3291, + "step": 410 + }, + { + "epoch": 0.5, + "learning_rate": 1.667990468625894e-05, + "loss": 5.2987, + "step": 420 + }, + { + "epoch": 0.51, + "learning_rate": 1.7077045274027006e-05, + "loss": 5.2742, + "step": 430 + }, + { + "epoch": 0.52, + "learning_rate": 1.7474185861795077e-05, + "loss": 5.281, + "step": 440 + }, + { + "epoch": 0.54, + "learning_rate": 1.7871326449563147e-05, + "loss": 5.2443, + "step": 450 + }, + { + "epoch": 0.55, + "learning_rate": 1.8268467037331215e-05, + "loss": 5.2933, + "step": 460 + }, + { + "epoch": 0.56, + "learning_rate": 1.8665607625099285e-05, + "loss": 5.2445, + "step": 470 + }, + { + "epoch": 0.57, + "learning_rate": 1.9062748212867356e-05, + "loss": 5.2277, + "step": 480 + }, + { + "epoch": 0.58, + "learning_rate": 1.9459888800635427e-05, + "loss": 5.2166, + "step": 490 + }, + { + "epoch": 0.6, + "learning_rate": 1.9857029388403494e-05, + "loss": 5.217, + "step": 500 + }, + { + "epoch": 0.61, + "learning_rate": 2.0254169976171565e-05, + "loss": 5.2192, + "step": 510 + }, + { + "epoch": 0.62, + "learning_rate": 2.0651310563939636e-05, + "loss": 5.1772, + "step": 520 + }, + { + "epoch": 0.63, + "learning_rate": 2.1048451151707703e-05, + "loss": 5.1665, + "step": 530 + }, + { + "epoch": 0.64, + "learning_rate": 2.1445591739475777e-05, + "loss": 5.123, + "step": 540 + }, + { + "epoch": 0.66, + "learning_rate": 2.1842732327243845e-05, + "loss": 5.1441, + "step": 550 + }, + { + "epoch": 0.67, + "learning_rate": 2.2239872915011916e-05, + "loss": 5.1312, + "step": 560 + }, + { + "epoch": 0.68, + "learning_rate": 2.2637013502779986e-05, + "loss": 5.0611, + "step": 570 + }, + { + "epoch": 0.69, + "learning_rate": 2.3034154090548054e-05, + "loss": 5.0637, + "step": 580 + }, + { + "epoch": 0.7, + "learning_rate": 2.3431294678316128e-05, + "loss": 5.0366, + "step": 590 + }, + { + "epoch": 0.71, + "learning_rate": 2.3828435266084195e-05, + "loss": 5.0064, + "step": 600 + }, + { + "epoch": 0.73, + "learning_rate": 2.4225575853852263e-05, + "loss": 4.9881, + "step": 610 + }, + { + "epoch": 0.74, + "learning_rate": 2.4622716441620337e-05, + "loss": 4.9946, + "step": 620 + }, + { + "epoch": 0.75, + "learning_rate": 2.5019857029388404e-05, + "loss": 4.9487, + "step": 630 + }, + { + "epoch": 0.76, + "learning_rate": 2.5416997617156475e-05, + "loss": 4.9289, + "step": 640 + }, + { + "epoch": 0.77, + "learning_rate": 2.5814138204924542e-05, + "loss": 4.8561, + "step": 650 + }, + { + "epoch": 0.79, + "learning_rate": 2.6211278792692613e-05, + "loss": 4.9333, + "step": 660 + }, + { + "epoch": 0.8, + "learning_rate": 2.6608419380460687e-05, + "loss": 4.8653, + "step": 670 + }, + { + "epoch": 0.81, + "learning_rate": 2.7005559968228755e-05, + "loss": 4.8375, + "step": 680 + }, + { + "epoch": 0.82, + "learning_rate": 2.7402700555996825e-05, + "loss": 4.848, + "step": 690 + }, + { + "epoch": 0.83, + "learning_rate": 2.7799841143764893e-05, + "loss": 4.7894, + "step": 700 + }, + { + "epoch": 0.85, + "learning_rate": 2.8196981731532964e-05, + "loss": 4.7765, + "step": 710 + }, + { + "epoch": 0.86, + "learning_rate": 2.8594122319301038e-05, + "loss": 4.7187, + "step": 720 + }, + { + "epoch": 0.87, + "learning_rate": 2.8991262907069105e-05, + "loss": 4.7285, + "step": 730 + }, + { + "epoch": 0.88, + "learning_rate": 2.9388403494837176e-05, + "loss": 4.7499, + "step": 740 + }, + { + "epoch": 0.89, + "learning_rate": 2.9785544082605243e-05, + "loss": 4.7581, + "step": 750 + }, + { + "epoch": 0.91, + "learning_rate": 3.0182684670373314e-05, + "loss": 4.712, + "step": 760 + }, + { + "epoch": 0.92, + "learning_rate": 3.057982525814138e-05, + "loss": 4.747, + "step": 770 + }, + { + "epoch": 0.93, + "learning_rate": 3.097696584590946e-05, + "loss": 4.6972, + "step": 780 + }, + { + "epoch": 0.94, + "learning_rate": 3.1374106433677526e-05, + "loss": 4.7124, + "step": 790 + }, + { + "epoch": 0.95, + "learning_rate": 3.1771247021445594e-05, + "loss": 4.6658, + "step": 800 + }, + { + "epoch": 0.97, + "learning_rate": 3.216838760921366e-05, + "loss": 4.6369, + "step": 810 + }, + { + "epoch": 0.98, + "learning_rate": 3.256552819698173e-05, + "loss": 4.6082, + "step": 820 + }, + { + "epoch": 0.99, + "learning_rate": 3.29626687847498e-05, + "loss": 4.6105, + "step": 830 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.10967525696409951, + "eval_f1": 0.04032957193561421, + "eval_loss": 4.524773597717285, + "eval_precision": 0.05793827286588486, + "eval_recall": 0.10967525696409951, + "eval_runtime": 98.7079, + "eval_samples_per_second": 272.035, + "eval_steps_per_second": 4.255, + "step": 839 + }, + { + "epoch": 1.0, + "learning_rate": 3.335980937251788e-05, + "loss": 4.7548, + "step": 840 + }, + { + "epoch": 1.01, + "learning_rate": 3.3756949960285944e-05, + "loss": 4.5241, + "step": 850 + }, + { + "epoch": 1.03, + "learning_rate": 3.415409054805401e-05, + "loss": 4.5317, + "step": 860 + }, + { + "epoch": 1.04, + "learning_rate": 3.455123113582208e-05, + "loss": 4.5292, + "step": 870 + }, + { + "epoch": 1.05, + "learning_rate": 3.494837172359015e-05, + "loss": 4.4944, + "step": 880 + }, + { + "epoch": 1.06, + "learning_rate": 3.534551231135822e-05, + "loss": 4.4926, + "step": 890 + }, + { + "epoch": 1.07, + "learning_rate": 3.5742652899126295e-05, + "loss": 4.4883, + "step": 900 + }, + { + "epoch": 1.08, + "learning_rate": 3.613979348689436e-05, + "loss": 4.4853, + "step": 910 + }, + { + "epoch": 1.1, + "learning_rate": 3.653693407466243e-05, + "loss": 4.4041, + "step": 920 + }, + { + "epoch": 1.11, + "learning_rate": 3.6934074662430504e-05, + "loss": 4.4882, + "step": 930 + }, + { + "epoch": 1.12, + "learning_rate": 3.733121525019857e-05, + "loss": 4.3713, + "step": 940 + }, + { + "epoch": 1.13, + "learning_rate": 3.7728355837966645e-05, + "loss": 4.4213, + "step": 950 + }, + { + "epoch": 1.14, + "learning_rate": 3.812549642573471e-05, + "loss": 4.4329, + "step": 960 + }, + { + "epoch": 1.16, + "learning_rate": 3.852263701350278e-05, + "loss": 4.3754, + "step": 970 + }, + { + "epoch": 1.17, + "learning_rate": 3.8919777601270854e-05, + "loss": 4.3711, + "step": 980 + }, + { + "epoch": 1.18, + "learning_rate": 3.931691818903892e-05, + "loss": 4.3894, + "step": 990 + }, + { + "epoch": 1.19, + "learning_rate": 3.971405877680699e-05, + "loss": 4.3531, + "step": 1000 + }, + { + "epoch": 1.2, + "learning_rate": 4.011119936457506e-05, + "loss": 4.3371, + "step": 1010 + }, + { + "epoch": 1.22, + "learning_rate": 4.050833995234313e-05, + "loss": 4.26, + "step": 1020 + }, + { + "epoch": 1.23, + "learning_rate": 4.0905480540111204e-05, + "loss": 4.3503, + "step": 1030 + }, + { + "epoch": 1.24, + "learning_rate": 4.130262112787927e-05, + "loss": 4.254, + "step": 1040 + }, + { + "epoch": 1.25, + "learning_rate": 4.169976171564734e-05, + "loss": 4.309, + "step": 1050 + }, + { + "epoch": 1.26, + "learning_rate": 4.209690230341541e-05, + "loss": 4.2973, + "step": 1060 + }, + { + "epoch": 1.28, + "learning_rate": 4.249404289118348e-05, + "loss": 4.274, + "step": 1070 + }, + { + "epoch": 1.29, + "learning_rate": 4.2891183478951555e-05, + "loss": 4.2529, + "step": 1080 + }, + { + "epoch": 1.3, + "learning_rate": 4.328832406671962e-05, + "loss": 4.1877, + "step": 1090 + }, + { + "epoch": 1.31, + "learning_rate": 4.368546465448769e-05, + "loss": 4.2293, + "step": 1100 + }, + { + "epoch": 1.32, + "learning_rate": 4.408260524225576e-05, + "loss": 4.1833, + "step": 1110 + }, + { + "epoch": 1.33, + "learning_rate": 4.447974583002383e-05, + "loss": 4.1317, + "step": 1120 + }, + { + "epoch": 1.35, + "learning_rate": 4.4876886417791905e-05, + "loss": 4.1578, + "step": 1130 + }, + { + "epoch": 1.36, + "learning_rate": 4.527402700555997e-05, + "loss": 4.1565, + "step": 1140 + }, + { + "epoch": 1.37, + "learning_rate": 4.567116759332804e-05, + "loss": 4.1517, + "step": 1150 + }, + { + "epoch": 1.38, + "learning_rate": 4.606830818109611e-05, + "loss": 4.1748, + "step": 1160 + }, + { + "epoch": 1.39, + "learning_rate": 4.6465448768864175e-05, + "loss": 4.118, + "step": 1170 + }, + { + "epoch": 1.41, + "learning_rate": 4.6862589356632256e-05, + "loss": 4.1787, + "step": 1180 + }, + { + "epoch": 1.42, + "learning_rate": 4.725972994440032e-05, + "loss": 4.0838, + "step": 1190 + }, + { + "epoch": 1.43, + "learning_rate": 4.765687053216839e-05, + "loss": 4.1386, + "step": 1200 + }, + { + "epoch": 1.44, + "learning_rate": 4.805401111993646e-05, + "loss": 4.032, + "step": 1210 + }, + { + "epoch": 1.45, + "learning_rate": 4.8451151707704525e-05, + "loss": 4.0394, + "step": 1220 + }, + { + "epoch": 1.47, + "learning_rate": 4.88482922954726e-05, + "loss": 4.0293, + "step": 1230 + }, + { + "epoch": 1.48, + "learning_rate": 4.9245432883240674e-05, + "loss": 3.9968, + "step": 1240 + }, + { + "epoch": 1.49, + "learning_rate": 4.964257347100874e-05, + "loss": 4.0275, + "step": 1250 + }, + { + "epoch": 1.5, + "learning_rate": 4.99955853787745e-05, + "loss": 3.9625, + "step": 1260 + }, + { + "epoch": 1.51, + "learning_rate": 4.995143916651951e-05, + "loss": 4.0545, + "step": 1270 + }, + { + "epoch": 1.53, + "learning_rate": 4.990729295426453e-05, + "loss": 3.9338, + "step": 1280 + }, + { + "epoch": 1.54, + "learning_rate": 4.9863146742009535e-05, + "loss": 3.9301, + "step": 1290 + }, + { + "epoch": 1.55, + "learning_rate": 4.981900052975455e-05, + "loss": 3.953, + "step": 1300 + }, + { + "epoch": 1.56, + "learning_rate": 4.977485431749956e-05, + "loss": 3.9407, + "step": 1310 + }, + { + "epoch": 1.57, + "learning_rate": 4.973070810524457e-05, + "loss": 3.9255, + "step": 1320 + }, + { + "epoch": 1.59, + "learning_rate": 4.968656189298959e-05, + "loss": 3.9394, + "step": 1330 + }, + { + "epoch": 1.6, + "learning_rate": 4.9642415680734595e-05, + "loss": 3.9143, + "step": 1340 + }, + { + "epoch": 1.61, + "learning_rate": 4.959826946847961e-05, + "loss": 3.8728, + "step": 1350 + }, + { + "epoch": 1.62, + "learning_rate": 4.955412325622462e-05, + "loss": 3.8672, + "step": 1360 + }, + { + "epoch": 1.63, + "learning_rate": 4.950997704396963e-05, + "loss": 3.8059, + "step": 1370 + }, + { + "epoch": 1.64, + "learning_rate": 4.9465830831714646e-05, + "loss": 3.8209, + "step": 1380 + }, + { + "epoch": 1.66, + "learning_rate": 4.9421684619459654e-05, + "loss": 3.7519, + "step": 1390 + }, + { + "epoch": 1.67, + "learning_rate": 4.937753840720466e-05, + "loss": 3.7815, + "step": 1400 + }, + { + "epoch": 1.68, + "learning_rate": 4.933339219494968e-05, + "loss": 3.8434, + "step": 1410 + }, + { + "epoch": 1.69, + "learning_rate": 4.9289245982694685e-05, + "loss": 3.7892, + "step": 1420 + }, + { + "epoch": 1.7, + "learning_rate": 4.92450997704397e-05, + "loss": 3.7054, + "step": 1430 + }, + { + "epoch": 1.72, + "learning_rate": 4.920095355818471e-05, + "loss": 3.8061, + "step": 1440 + }, + { + "epoch": 1.73, + "learning_rate": 4.915680734592972e-05, + "loss": 3.7292, + "step": 1450 + }, + { + "epoch": 1.74, + "learning_rate": 4.911266113367473e-05, + "loss": 3.7181, + "step": 1460 + }, + { + "epoch": 1.75, + "learning_rate": 4.9068514921419744e-05, + "loss": 3.6944, + "step": 1470 + }, + { + "epoch": 1.76, + "learning_rate": 4.902436870916476e-05, + "loss": 3.7, + "step": 1480 + }, + { + "epoch": 1.78, + "learning_rate": 4.898022249690977e-05, + "loss": 3.6496, + "step": 1490 + }, + { + "epoch": 1.79, + "learning_rate": 4.893607628465478e-05, + "loss": 3.6937, + "step": 1500 + }, + { + "epoch": 1.8, + "learning_rate": 4.889193007239979e-05, + "loss": 3.6367, + "step": 1510 + }, + { + "epoch": 1.81, + "learning_rate": 4.8847783860144804e-05, + "loss": 3.656, + "step": 1520 + }, + { + "epoch": 1.82, + "learning_rate": 4.880363764788982e-05, + "loss": 3.6302, + "step": 1530 + }, + { + "epoch": 1.84, + "learning_rate": 4.875949143563483e-05, + "loss": 3.6144, + "step": 1540 + }, + { + "epoch": 1.85, + "learning_rate": 4.871534522337984e-05, + "loss": 3.6227, + "step": 1550 + }, + { + "epoch": 1.86, + "learning_rate": 4.867119901112485e-05, + "loss": 3.6088, + "step": 1560 + }, + { + "epoch": 1.87, + "learning_rate": 4.862705279886986e-05, + "loss": 3.5556, + "step": 1570 + }, + { + "epoch": 1.88, + "learning_rate": 4.8582906586614865e-05, + "loss": 3.5657, + "step": 1580 + }, + { + "epoch": 1.89, + "learning_rate": 4.853876037435988e-05, + "loss": 3.5833, + "step": 1590 + }, + { + "epoch": 1.91, + "learning_rate": 4.8494614162104894e-05, + "loss": 3.517, + "step": 1600 + }, + { + "epoch": 1.92, + "learning_rate": 4.84504679498499e-05, + "loss": 3.4891, + "step": 1610 + }, + { + "epoch": 1.93, + "learning_rate": 4.840632173759492e-05, + "loss": 3.4938, + "step": 1620 + }, + { + "epoch": 1.94, + "learning_rate": 4.8362175525339925e-05, + "loss": 3.4694, + "step": 1630 + }, + { + "epoch": 1.95, + "learning_rate": 4.831802931308494e-05, + "loss": 3.5104, + "step": 1640 + }, + { + "epoch": 1.97, + "learning_rate": 4.8273883100829954e-05, + "loss": 3.5442, + "step": 1650 + }, + { + "epoch": 1.98, + "learning_rate": 4.822973688857496e-05, + "loss": 3.5078, + "step": 1660 + }, + { + "epoch": 1.99, + "learning_rate": 4.8185590676319976e-05, + "loss": 3.4711, + "step": 1670 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.2999776552956949, + "eval_f1": 0.20968639879082807, + "eval_loss": 3.316246509552002, + "eval_precision": 0.23018076164128848, + "eval_recall": 0.2999776552956949, + "eval_runtime": 98.3168, + "eval_samples_per_second": 273.117, + "eval_steps_per_second": 4.272, + "step": 1678 + }, + { + "epoch": 2.0, + "learning_rate": 4.8141444464064984e-05, + "loss": 3.5429, + "step": 1680 + }, + { + "epoch": 2.01, + "learning_rate": 4.809729825181e-05, + "loss": 3.4651, + "step": 1690 + }, + { + "epoch": 2.03, + "learning_rate": 4.8053152039555014e-05, + "loss": 3.3769, + "step": 1700 + }, + { + "epoch": 2.04, + "learning_rate": 4.800900582730002e-05, + "loss": 3.3783, + "step": 1710 + }, + { + "epoch": 2.05, + "learning_rate": 4.796485961504503e-05, + "loss": 3.4053, + "step": 1720 + }, + { + "epoch": 2.06, + "learning_rate": 4.7920713402790044e-05, + "loss": 3.3744, + "step": 1730 + }, + { + "epoch": 2.07, + "learning_rate": 4.787656719053505e-05, + "loss": 3.3675, + "step": 1740 + }, + { + "epoch": 2.09, + "learning_rate": 4.7832420978280066e-05, + "loss": 3.3607, + "step": 1750 + }, + { + "epoch": 2.1, + "learning_rate": 4.7788274766025074e-05, + "loss": 3.4022, + "step": 1760 + }, + { + "epoch": 2.11, + "learning_rate": 4.774412855377009e-05, + "loss": 3.2748, + "step": 1770 + }, + { + "epoch": 2.12, + "learning_rate": 4.76999823415151e-05, + "loss": 3.3338, + "step": 1780 + }, + { + "epoch": 2.13, + "learning_rate": 4.765583612926011e-05, + "loss": 3.3384, + "step": 1790 + }, + { + "epoch": 2.15, + "learning_rate": 4.7611689917005126e-05, + "loss": 3.2855, + "step": 1800 + }, + { + "epoch": 2.16, + "learning_rate": 4.7567543704750134e-05, + "loss": 3.2991, + "step": 1810 + }, + { + "epoch": 2.17, + "learning_rate": 4.752339749249515e-05, + "loss": 3.3359, + "step": 1820 + }, + { + "epoch": 2.18, + "learning_rate": 4.7479251280240157e-05, + "loss": 3.2541, + "step": 1830 + }, + { + "epoch": 2.19, + "learning_rate": 4.743510506798517e-05, + "loss": 3.2962, + "step": 1840 + }, + { + "epoch": 2.2, + "learning_rate": 4.7390958855730186e-05, + "loss": 3.2443, + "step": 1850 + }, + { + "epoch": 2.22, + "learning_rate": 4.7346812643475194e-05, + "loss": 3.2785, + "step": 1860 + }, + { + "epoch": 2.23, + "learning_rate": 4.730266643122021e-05, + "loss": 3.1409, + "step": 1870 + }, + { + "epoch": 2.24, + "learning_rate": 4.7258520218965216e-05, + "loss": 3.2309, + "step": 1880 + }, + { + "epoch": 2.25, + "learning_rate": 4.7214374006710224e-05, + "loss": 3.1422, + "step": 1890 + }, + { + "epoch": 2.26, + "learning_rate": 4.717022779445524e-05, + "loss": 3.1799, + "step": 1900 + }, + { + "epoch": 2.28, + "learning_rate": 4.712608158220025e-05, + "loss": 3.163, + "step": 1910 + }, + { + "epoch": 2.29, + "learning_rate": 4.708193536994526e-05, + "loss": 3.1483, + "step": 1920 + }, + { + "epoch": 2.3, + "learning_rate": 4.703778915769027e-05, + "loss": 3.2082, + "step": 1930 + }, + { + "epoch": 2.31, + "learning_rate": 4.6993642945435284e-05, + "loss": 3.1304, + "step": 1940 + }, + { + "epoch": 2.32, + "learning_rate": 4.694949673318029e-05, + "loss": 3.1022, + "step": 1950 + }, + { + "epoch": 2.34, + "learning_rate": 4.6905350520925306e-05, + "loss": 3.174, + "step": 1960 + }, + { + "epoch": 2.35, + "learning_rate": 4.686120430867032e-05, + "loss": 3.0886, + "step": 1970 + }, + { + "epoch": 2.36, + "learning_rate": 4.681705809641533e-05, + "loss": 3.0919, + "step": 1980 + }, + { + "epoch": 2.37, + "learning_rate": 4.6772911884160344e-05, + "loss": 3.1014, + "step": 1990 + }, + { + "epoch": 2.38, + "learning_rate": 4.672876567190535e-05, + "loss": 3.0974, + "step": 2000 + }, + { + "epoch": 2.4, + "learning_rate": 4.6684619459650366e-05, + "loss": 3.0405, + "step": 2010 + }, + { + "epoch": 2.41, + "learning_rate": 4.664047324739538e-05, + "loss": 3.1053, + "step": 2020 + }, + { + "epoch": 2.42, + "learning_rate": 4.659632703514039e-05, + "loss": 3.0749, + "step": 2030 + }, + { + "epoch": 2.43, + "learning_rate": 4.6552180822885396e-05, + "loss": 3.0606, + "step": 2040 + }, + { + "epoch": 2.44, + "learning_rate": 4.6508034610630404e-05, + "loss": 3.0498, + "step": 2050 + }, + { + "epoch": 2.46, + "learning_rate": 4.646388839837542e-05, + "loss": 3.0625, + "step": 2060 + }, + { + "epoch": 2.47, + "learning_rate": 4.6419742186120434e-05, + "loss": 3.0172, + "step": 2070 + }, + { + "epoch": 2.48, + "learning_rate": 4.637559597386544e-05, + "loss": 2.9855, + "step": 2080 + }, + { + "epoch": 2.49, + "learning_rate": 4.6331449761610456e-05, + "loss": 2.9864, + "step": 2090 + }, + { + "epoch": 2.5, + "learning_rate": 4.6287303549355464e-05, + "loss": 2.9722, + "step": 2100 + }, + { + "epoch": 2.51, + "learning_rate": 4.624315733710048e-05, + "loss": 2.89, + "step": 2110 + }, + { + "epoch": 2.53, + "learning_rate": 4.619901112484549e-05, + "loss": 3.0054, + "step": 2120 + }, + { + "epoch": 2.54, + "learning_rate": 4.61548649125905e-05, + "loss": 2.9484, + "step": 2130 + }, + { + "epoch": 2.55, + "learning_rate": 4.6110718700335516e-05, + "loss": 2.8977, + "step": 2140 + }, + { + "epoch": 2.56, + "learning_rate": 4.6066572488080524e-05, + "loss": 2.9717, + "step": 2150 + }, + { + "epoch": 2.57, + "learning_rate": 4.602242627582554e-05, + "loss": 2.9702, + "step": 2160 + }, + { + "epoch": 2.59, + "learning_rate": 4.597828006357055e-05, + "loss": 2.9394, + "step": 2170 + }, + { + "epoch": 2.6, + "learning_rate": 4.593413385131556e-05, + "loss": 2.9298, + "step": 2180 + }, + { + "epoch": 2.61, + "learning_rate": 4.588998763906057e-05, + "loss": 2.863, + "step": 2190 + }, + { + "epoch": 2.62, + "learning_rate": 4.5845841426805583e-05, + "loss": 2.8856, + "step": 2200 + }, + { + "epoch": 2.63, + "learning_rate": 4.580169521455059e-05, + "loss": 2.8582, + "step": 2210 + }, + { + "epoch": 2.65, + "learning_rate": 4.5757549002295606e-05, + "loss": 2.9151, + "step": 2220 + }, + { + "epoch": 2.66, + "learning_rate": 4.5713402790040614e-05, + "loss": 2.8773, + "step": 2230 + }, + { + "epoch": 2.67, + "learning_rate": 4.566925657778563e-05, + "loss": 2.8786, + "step": 2240 + }, + { + "epoch": 2.68, + "learning_rate": 4.5625110365530636e-05, + "loss": 2.8755, + "step": 2250 + }, + { + "epoch": 2.69, + "learning_rate": 4.558096415327565e-05, + "loss": 2.854, + "step": 2260 + }, + { + "epoch": 2.71, + "learning_rate": 4.5536817941020666e-05, + "loss": 2.8562, + "step": 2270 + }, + { + "epoch": 2.72, + "learning_rate": 4.5492671728765674e-05, + "loss": 2.8179, + "step": 2280 + }, + { + "epoch": 2.73, + "learning_rate": 4.544852551651069e-05, + "loss": 2.8217, + "step": 2290 + }, + { + "epoch": 2.74, + "learning_rate": 4.5404379304255696e-05, + "loss": 2.7928, + "step": 2300 + }, + { + "epoch": 2.75, + "learning_rate": 4.536023309200071e-05, + "loss": 2.804, + "step": 2310 + }, + { + "epoch": 2.76, + "learning_rate": 4.5316086879745725e-05, + "loss": 2.7497, + "step": 2320 + }, + { + "epoch": 2.78, + "learning_rate": 4.527194066749073e-05, + "loss": 2.7463, + "step": 2330 + }, + { + "epoch": 2.79, + "learning_rate": 4.522779445523575e-05, + "loss": 2.7182, + "step": 2340 + }, + { + "epoch": 2.8, + "learning_rate": 4.5183648242980756e-05, + "loss": 2.7447, + "step": 2350 + }, + { + "epoch": 2.81, + "learning_rate": 4.5139502030725764e-05, + "loss": 2.7624, + "step": 2360 + }, + { + "epoch": 2.82, + "learning_rate": 4.509535581847077e-05, + "loss": 2.7295, + "step": 2370 + }, + { + "epoch": 2.84, + "learning_rate": 4.5051209606215786e-05, + "loss": 2.69, + "step": 2380 + }, + { + "epoch": 2.85, + "learning_rate": 4.50070633939608e-05, + "loss": 2.6543, + "step": 2390 + }, + { + "epoch": 2.86, + "learning_rate": 4.496291718170581e-05, + "loss": 2.7529, + "step": 2400 + }, + { + "epoch": 2.87, + "learning_rate": 4.491877096945082e-05, + "loss": 2.74, + "step": 2410 + }, + { + "epoch": 2.88, + "learning_rate": 4.487462475719583e-05, + "loss": 2.6917, + "step": 2420 + }, + { + "epoch": 2.9, + "learning_rate": 4.4830478544940846e-05, + "loss": 2.6828, + "step": 2430 + }, + { + "epoch": 2.91, + "learning_rate": 4.478633233268586e-05, + "loss": 2.6397, + "step": 2440 + }, + { + "epoch": 2.92, + "learning_rate": 4.474218612043087e-05, + "loss": 2.7281, + "step": 2450 + }, + { + "epoch": 2.93, + "learning_rate": 4.469803990817588e-05, + "loss": 2.656, + "step": 2460 + }, + { + "epoch": 2.94, + "learning_rate": 4.465389369592089e-05, + "loss": 2.6689, + "step": 2470 + }, + { + "epoch": 2.96, + "learning_rate": 4.4609747483665905e-05, + "loss": 2.6667, + "step": 2480 + }, + { + "epoch": 2.97, + "learning_rate": 4.456560127141092e-05, + "loss": 2.6502, + "step": 2490 + }, + { + "epoch": 2.98, + "learning_rate": 4.452145505915593e-05, + "loss": 2.6483, + "step": 2500 + }, + { + "epoch": 2.99, + "learning_rate": 4.4477308846900936e-05, + "loss": 2.6202, + "step": 2510 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.4708774020557128, + "eval_f1": 0.3938664278992793, + "eval_loss": 2.444504499435425, + "eval_precision": 0.41196094659916344, + "eval_recall": 0.4708774020557128, + "eval_runtime": 98.7509, + "eval_samples_per_second": 271.916, + "eval_steps_per_second": 4.253, + "step": 2517 + }, + { + "epoch": 3.0, + "learning_rate": 4.443316263464595e-05, + "loss": 2.6666, + "step": 2520 + }, + { + "epoch": 3.02, + "learning_rate": 4.438901642239096e-05, + "loss": 2.522, + "step": 2530 + }, + { + "epoch": 3.03, + "learning_rate": 4.434487021013597e-05, + "loss": 2.5387, + "step": 2540 + }, + { + "epoch": 3.04, + "learning_rate": 4.430072399788098e-05, + "loss": 2.588, + "step": 2550 + }, + { + "epoch": 3.05, + "learning_rate": 4.4256577785625996e-05, + "loss": 2.5588, + "step": 2560 + }, + { + "epoch": 3.06, + "learning_rate": 4.4212431573371003e-05, + "loss": 2.5374, + "step": 2570 + }, + { + "epoch": 3.08, + "learning_rate": 4.416828536111602e-05, + "loss": 2.6003, + "step": 2580 + }, + { + "epoch": 3.09, + "learning_rate": 4.412413914886103e-05, + "loss": 2.4761, + "step": 2590 + }, + { + "epoch": 3.1, + "learning_rate": 4.407999293660604e-05, + "loss": 2.5166, + "step": 2600 + }, + { + "epoch": 3.11, + "learning_rate": 4.4035846724351055e-05, + "loss": 2.5305, + "step": 2610 + }, + { + "epoch": 3.12, + "learning_rate": 4.399170051209606e-05, + "loss": 2.5378, + "step": 2620 + }, + { + "epoch": 3.13, + "learning_rate": 4.394755429984108e-05, + "loss": 2.5443, + "step": 2630 + }, + { + "epoch": 3.15, + "learning_rate": 4.390340808758609e-05, + "loss": 2.5134, + "step": 2640 + }, + { + "epoch": 3.16, + "learning_rate": 4.38592618753311e-05, + "loss": 2.5105, + "step": 2650 + }, + { + "epoch": 3.17, + "learning_rate": 4.3815115663076115e-05, + "loss": 2.5155, + "step": 2660 + }, + { + "epoch": 3.18, + "learning_rate": 4.377096945082112e-05, + "loss": 2.5207, + "step": 2670 + }, + { + "epoch": 3.19, + "learning_rate": 4.372682323856613e-05, + "loss": 2.4432, + "step": 2680 + }, + { + "epoch": 3.21, + "learning_rate": 4.3682677026311145e-05, + "loss": 2.4199, + "step": 2690 + }, + { + "epoch": 3.22, + "learning_rate": 4.363853081405615e-05, + "loss": 2.4083, + "step": 2700 + }, + { + "epoch": 3.23, + "learning_rate": 4.359438460180117e-05, + "loss": 2.5031, + "step": 2710 + }, + { + "epoch": 3.24, + "learning_rate": 4.3550238389546176e-05, + "loss": 2.421, + "step": 2720 + }, + { + "epoch": 3.25, + "learning_rate": 4.350609217729119e-05, + "loss": 2.4005, + "step": 2730 + }, + { + "epoch": 3.27, + "learning_rate": 4.34619459650362e-05, + "loss": 2.4384, + "step": 2740 + }, + { + "epoch": 3.28, + "learning_rate": 4.341779975278121e-05, + "loss": 2.3936, + "step": 2750 + }, + { + "epoch": 3.29, + "learning_rate": 4.337365354052623e-05, + "loss": 2.3977, + "step": 2760 + }, + { + "epoch": 3.3, + "learning_rate": 4.3329507328271235e-05, + "loss": 2.4, + "step": 2770 + }, + { + "epoch": 3.31, + "learning_rate": 4.328536111601625e-05, + "loss": 2.3995, + "step": 2780 + }, + { + "epoch": 3.33, + "learning_rate": 4.324121490376126e-05, + "loss": 2.3718, + "step": 2790 + }, + { + "epoch": 3.34, + "learning_rate": 4.319706869150627e-05, + "loss": 2.3718, + "step": 2800 + }, + { + "epoch": 3.35, + "learning_rate": 4.315292247925129e-05, + "loss": 2.3832, + "step": 2810 + }, + { + "epoch": 3.36, + "learning_rate": 4.3108776266996295e-05, + "loss": 2.4285, + "step": 2820 + }, + { + "epoch": 3.37, + "learning_rate": 4.30646300547413e-05, + "loss": 2.3217, + "step": 2830 + }, + { + "epoch": 3.38, + "learning_rate": 4.302048384248632e-05, + "loss": 2.3315, + "step": 2840 + }, + { + "epoch": 3.4, + "learning_rate": 4.2976337630231326e-05, + "loss": 2.3445, + "step": 2850 + }, + { + "epoch": 3.41, + "learning_rate": 4.293219141797634e-05, + "loss": 2.34, + "step": 2860 + }, + { + "epoch": 3.42, + "learning_rate": 4.288804520572135e-05, + "loss": 2.3481, + "step": 2870 + }, + { + "epoch": 3.43, + "learning_rate": 4.284389899346636e-05, + "loss": 2.312, + "step": 2880 + }, + { + "epoch": 3.44, + "learning_rate": 4.279975278121137e-05, + "loss": 2.306, + "step": 2890 + }, + { + "epoch": 3.46, + "learning_rate": 4.2755606568956385e-05, + "loss": 2.3284, + "step": 2900 + }, + { + "epoch": 3.47, + "learning_rate": 4.27114603567014e-05, + "loss": 2.3372, + "step": 2910 + }, + { + "epoch": 3.48, + "learning_rate": 4.266731414444641e-05, + "loss": 2.2843, + "step": 2920 + }, + { + "epoch": 3.49, + "learning_rate": 4.262316793219142e-05, + "loss": 2.2336, + "step": 2930 + }, + { + "epoch": 3.5, + "learning_rate": 4.257902171993643e-05, + "loss": 2.2738, + "step": 2940 + }, + { + "epoch": 3.52, + "learning_rate": 4.2534875507681445e-05, + "loss": 2.2537, + "step": 2950 + }, + { + "epoch": 3.53, + "learning_rate": 4.249072929542646e-05, + "loss": 2.3713, + "step": 2960 + }, + { + "epoch": 3.54, + "learning_rate": 4.244658308317147e-05, + "loss": 2.3185, + "step": 2970 + }, + { + "epoch": 3.55, + "learning_rate": 4.240243687091648e-05, + "loss": 2.2767, + "step": 2980 + }, + { + "epoch": 3.56, + "learning_rate": 4.235829065866149e-05, + "loss": 2.2159, + "step": 2990 + }, + { + "epoch": 3.58, + "learning_rate": 4.23141444464065e-05, + "loss": 2.2576, + "step": 3000 + }, + { + "epoch": 3.59, + "learning_rate": 4.226999823415151e-05, + "loss": 2.2698, + "step": 3010 + }, + { + "epoch": 3.6, + "learning_rate": 4.222585202189652e-05, + "loss": 2.2491, + "step": 3020 + }, + { + "epoch": 3.61, + "learning_rate": 4.2181705809641535e-05, + "loss": 2.2619, + "step": 3030 + }, + { + "epoch": 3.62, + "learning_rate": 4.213755959738654e-05, + "loss": 2.2013, + "step": 3040 + }, + { + "epoch": 3.64, + "learning_rate": 4.209341338513156e-05, + "loss": 2.2161, + "step": 3050 + }, + { + "epoch": 3.65, + "learning_rate": 4.204926717287657e-05, + "loss": 2.2614, + "step": 3060 + }, + { + "epoch": 3.66, + "learning_rate": 4.200512096062158e-05, + "loss": 2.1594, + "step": 3070 + }, + { + "epoch": 3.67, + "learning_rate": 4.1960974748366595e-05, + "loss": 2.2023, + "step": 3080 + }, + { + "epoch": 3.68, + "learning_rate": 4.19168285361116e-05, + "loss": 2.2037, + "step": 3090 + }, + { + "epoch": 3.69, + "learning_rate": 4.187268232385662e-05, + "loss": 2.2178, + "step": 3100 + }, + { + "epoch": 3.71, + "learning_rate": 4.1828536111601625e-05, + "loss": 2.1852, + "step": 3110 + }, + { + "epoch": 3.72, + "learning_rate": 4.178438989934664e-05, + "loss": 2.118, + "step": 3120 + }, + { + "epoch": 3.73, + "learning_rate": 4.1740243687091654e-05, + "loss": 2.1234, + "step": 3130 + }, + { + "epoch": 3.74, + "learning_rate": 4.169609747483666e-05, + "loss": 2.1954, + "step": 3140 + }, + { + "epoch": 3.75, + "learning_rate": 4.165195126258167e-05, + "loss": 2.2305, + "step": 3150 + }, + { + "epoch": 3.77, + "learning_rate": 4.160780505032668e-05, + "loss": 2.1876, + "step": 3160 + }, + { + "epoch": 3.78, + "learning_rate": 4.156365883807169e-05, + "loss": 2.208, + "step": 3170 + }, + { + "epoch": 3.79, + "learning_rate": 4.151951262581671e-05, + "loss": 2.1111, + "step": 3180 + }, + { + "epoch": 3.8, + "learning_rate": 4.1475366413561715e-05, + "loss": 2.1647, + "step": 3190 + }, + { + "epoch": 3.81, + "learning_rate": 4.143122020130673e-05, + "loss": 2.1881, + "step": 3200 + }, + { + "epoch": 3.83, + "learning_rate": 4.138707398905174e-05, + "loss": 2.1371, + "step": 3210 + }, + { + "epoch": 3.84, + "learning_rate": 4.134292777679675e-05, + "loss": 2.1696, + "step": 3220 + }, + { + "epoch": 3.85, + "learning_rate": 4.129878156454177e-05, + "loss": 2.1368, + "step": 3230 + }, + { + "epoch": 3.86, + "learning_rate": 4.1254635352286775e-05, + "loss": 2.0675, + "step": 3240 + }, + { + "epoch": 3.87, + "learning_rate": 4.121048914003179e-05, + "loss": 2.1286, + "step": 3250 + }, + { + "epoch": 3.89, + "learning_rate": 4.11663429277768e-05, + "loss": 2.0987, + "step": 3260 + }, + { + "epoch": 3.9, + "learning_rate": 4.112219671552181e-05, + "loss": 2.1006, + "step": 3270 + }, + { + "epoch": 3.91, + "learning_rate": 4.107805050326683e-05, + "loss": 2.0706, + "step": 3280 + }, + { + "epoch": 3.92, + "learning_rate": 4.1033904291011835e-05, + "loss": 2.0452, + "step": 3290 + }, + { + "epoch": 3.93, + "learning_rate": 4.098975807875684e-05, + "loss": 2.0613, + "step": 3300 + }, + { + "epoch": 3.94, + "learning_rate": 4.094561186650186e-05, + "loss": 2.0598, + "step": 3310 + }, + { + "epoch": 3.96, + "learning_rate": 4.0901465654246865e-05, + "loss": 2.0953, + "step": 3320 + }, + { + "epoch": 3.97, + "learning_rate": 4.085731944199188e-05, + "loss": 2.1055, + "step": 3330 + }, + { + "epoch": 3.98, + "learning_rate": 4.081317322973689e-05, + "loss": 2.1009, + "step": 3340 + }, + { + "epoch": 3.99, + "learning_rate": 4.07690270174819e-05, + "loss": 2.0614, + "step": 3350 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5741844182928646, + "eval_f1": 0.5167663284927785, + "eval_loss": 1.8839434385299683, + "eval_precision": 0.5388953620960228, + "eval_recall": 0.5741844182928646, + "eval_runtime": 98.9624, + "eval_samples_per_second": 271.335, + "eval_steps_per_second": 4.244, + "step": 3356 + }, + { + "epoch": 4.0, + "learning_rate": 4.072488080522691e-05, + "loss": 2.0478, + "step": 3360 + }, + { + "epoch": 4.02, + "learning_rate": 4.0680734592971925e-05, + "loss": 2.0201, + "step": 3370 + }, + { + "epoch": 4.03, + "learning_rate": 4.063658838071694e-05, + "loss": 2.0014, + "step": 3380 + }, + { + "epoch": 4.04, + "learning_rate": 4.059244216846195e-05, + "loss": 2.0108, + "step": 3390 + }, + { + "epoch": 4.05, + "learning_rate": 4.054829595620696e-05, + "loss": 2.0173, + "step": 3400 + }, + { + "epoch": 4.06, + "learning_rate": 4.050414974395197e-05, + "loss": 1.9934, + "step": 3410 + }, + { + "epoch": 4.08, + "learning_rate": 4.0460003531696984e-05, + "loss": 1.9927, + "step": 3420 + }, + { + "epoch": 4.09, + "learning_rate": 4.0415857319442e-05, + "loss": 2.0354, + "step": 3430 + }, + { + "epoch": 4.1, + "learning_rate": 4.037171110718701e-05, + "loss": 1.9639, + "step": 3440 + }, + { + "epoch": 4.11, + "learning_rate": 4.032756489493202e-05, + "loss": 1.9947, + "step": 3450 + }, + { + "epoch": 4.12, + "learning_rate": 4.028341868267703e-05, + "loss": 1.9709, + "step": 3460 + }, + { + "epoch": 4.14, + "learning_rate": 4.023927247042204e-05, + "loss": 1.9729, + "step": 3470 + }, + { + "epoch": 4.15, + "learning_rate": 4.0195126258167045e-05, + "loss": 1.9609, + "step": 3480 + }, + { + "epoch": 4.16, + "learning_rate": 4.015098004591206e-05, + "loss": 1.9675, + "step": 3490 + }, + { + "epoch": 4.17, + "learning_rate": 4.0106833833657074e-05, + "loss": 1.9618, + "step": 3500 + }, + { + "epoch": 4.18, + "learning_rate": 4.006268762140208e-05, + "loss": 1.9164, + "step": 3510 + }, + { + "epoch": 4.2, + "learning_rate": 4.00185414091471e-05, + "loss": 1.949, + "step": 3520 + }, + { + "epoch": 4.21, + "learning_rate": 3.9974395196892105e-05, + "loss": 1.9324, + "step": 3530 + }, + { + "epoch": 4.22, + "learning_rate": 3.993024898463712e-05, + "loss": 1.9665, + "step": 3540 + }, + { + "epoch": 4.23, + "learning_rate": 3.9886102772382134e-05, + "loss": 2.0005, + "step": 3550 + }, + { + "epoch": 4.24, + "learning_rate": 3.984195656012714e-05, + "loss": 1.9234, + "step": 3560 + }, + { + "epoch": 4.25, + "learning_rate": 3.979781034787216e-05, + "loss": 1.9317, + "step": 3570 + }, + { + "epoch": 4.27, + "learning_rate": 3.9753664135617165e-05, + "loss": 1.9583, + "step": 3580 + }, + { + "epoch": 4.28, + "learning_rate": 3.970951792336218e-05, + "loss": 1.869, + "step": 3590 + }, + { + "epoch": 4.29, + "learning_rate": 3.9665371711107194e-05, + "loss": 1.9407, + "step": 3600 + }, + { + "epoch": 4.3, + "learning_rate": 3.96212254988522e-05, + "loss": 1.8964, + "step": 3610 + }, + { + "epoch": 4.31, + "learning_rate": 3.957707928659721e-05, + "loss": 1.8768, + "step": 3620 + }, + { + "epoch": 4.33, + "learning_rate": 3.9532933074342224e-05, + "loss": 1.915, + "step": 3630 + }, + { + "epoch": 4.34, + "learning_rate": 3.948878686208723e-05, + "loss": 1.8625, + "step": 3640 + }, + { + "epoch": 4.35, + "learning_rate": 3.944464064983225e-05, + "loss": 1.9401, + "step": 3650 + }, + { + "epoch": 4.36, + "learning_rate": 3.9400494437577255e-05, + "loss": 1.9032, + "step": 3660 + }, + { + "epoch": 4.37, + "learning_rate": 3.935634822532227e-05, + "loss": 1.9214, + "step": 3670 + }, + { + "epoch": 4.39, + "learning_rate": 3.931220201306728e-05, + "loss": 1.8298, + "step": 3680 + }, + { + "epoch": 4.4, + "learning_rate": 3.926805580081229e-05, + "loss": 1.9058, + "step": 3690 + }, + { + "epoch": 4.41, + "learning_rate": 3.9223909588557306e-05, + "loss": 1.8939, + "step": 3700 + }, + { + "epoch": 4.42, + "learning_rate": 3.9179763376302314e-05, + "loss": 1.8385, + "step": 3710 + }, + { + "epoch": 4.43, + "learning_rate": 3.913561716404733e-05, + "loss": 1.8823, + "step": 3720 + }, + { + "epoch": 4.45, + "learning_rate": 3.909147095179234e-05, + "loss": 1.8207, + "step": 3730 + }, + { + "epoch": 4.46, + "learning_rate": 3.904732473953735e-05, + "loss": 1.8259, + "step": 3740 + }, + { + "epoch": 4.47, + "learning_rate": 3.9003178527282366e-05, + "loss": 1.8235, + "step": 3750 + }, + { + "epoch": 4.48, + "learning_rate": 3.8959032315027374e-05, + "loss": 1.8281, + "step": 3760 + }, + { + "epoch": 4.49, + "learning_rate": 3.891488610277239e-05, + "loss": 1.8559, + "step": 3770 + }, + { + "epoch": 4.51, + "learning_rate": 3.8870739890517397e-05, + "loss": 1.8934, + "step": 3780 + }, + { + "epoch": 4.52, + "learning_rate": 3.8826593678262404e-05, + "loss": 1.8855, + "step": 3790 + }, + { + "epoch": 4.53, + "learning_rate": 3.878244746600742e-05, + "loss": 1.7935, + "step": 3800 + }, + { + "epoch": 4.54, + "learning_rate": 3.873830125375243e-05, + "loss": 1.8148, + "step": 3810 + }, + { + "epoch": 4.55, + "learning_rate": 3.869415504149744e-05, + "loss": 1.7876, + "step": 3820 + }, + { + "epoch": 4.56, + "learning_rate": 3.865000882924245e-05, + "loss": 1.7597, + "step": 3830 + }, + { + "epoch": 4.58, + "learning_rate": 3.8605862616987464e-05, + "loss": 1.8237, + "step": 3840 + }, + { + "epoch": 4.59, + "learning_rate": 3.856171640473247e-05, + "loss": 1.8599, + "step": 3850 + }, + { + "epoch": 4.6, + "learning_rate": 3.851757019247749e-05, + "loss": 1.7719, + "step": 3860 + }, + { + "epoch": 4.61, + "learning_rate": 3.84734239802225e-05, + "loss": 1.8413, + "step": 3870 + }, + { + "epoch": 4.62, + "learning_rate": 3.842927776796751e-05, + "loss": 1.7742, + "step": 3880 + }, + { + "epoch": 4.64, + "learning_rate": 3.8385131555712524e-05, + "loss": 1.8499, + "step": 3890 + }, + { + "epoch": 4.65, + "learning_rate": 3.834098534345753e-05, + "loss": 1.7576, + "step": 3900 + }, + { + "epoch": 4.66, + "learning_rate": 3.8296839131202546e-05, + "loss": 1.7575, + "step": 3910 + }, + { + "epoch": 4.67, + "learning_rate": 3.825269291894756e-05, + "loss": 1.8016, + "step": 3920 + }, + { + "epoch": 4.68, + "learning_rate": 3.820854670669257e-05, + "loss": 1.7705, + "step": 3930 + }, + { + "epoch": 4.7, + "learning_rate": 3.816440049443758e-05, + "loss": 1.7676, + "step": 3940 + }, + { + "epoch": 4.71, + "learning_rate": 3.812025428218259e-05, + "loss": 1.7615, + "step": 3950 + }, + { + "epoch": 4.72, + "learning_rate": 3.80761080699276e-05, + "loss": 1.7477, + "step": 3960 + }, + { + "epoch": 4.73, + "learning_rate": 3.8031961857672614e-05, + "loss": 1.7753, + "step": 3970 + }, + { + "epoch": 4.74, + "learning_rate": 3.798781564541762e-05, + "loss": 1.7486, + "step": 3980 + }, + { + "epoch": 4.76, + "learning_rate": 3.7943669433162636e-05, + "loss": 1.7403, + "step": 3990 + }, + { + "epoch": 4.77, + "learning_rate": 3.7899523220907644e-05, + "loss": 1.6858, + "step": 4000 + }, + { + "epoch": 4.78, + "learning_rate": 3.785537700865266e-05, + "loss": 1.7315, + "step": 4010 + }, + { + "epoch": 4.79, + "learning_rate": 3.7811230796397674e-05, + "loss": 1.7583, + "step": 4020 + }, + { + "epoch": 4.8, + "learning_rate": 3.776708458414268e-05, + "loss": 1.7067, + "step": 4030 + }, + { + "epoch": 4.82, + "learning_rate": 3.7722938371887696e-05, + "loss": 1.7346, + "step": 4040 + }, + { + "epoch": 4.83, + "learning_rate": 3.7678792159632704e-05, + "loss": 1.7225, + "step": 4050 + }, + { + "epoch": 4.84, + "learning_rate": 3.763464594737772e-05, + "loss": 1.7022, + "step": 4060 + }, + { + "epoch": 4.85, + "learning_rate": 3.759049973512273e-05, + "loss": 1.7202, + "step": 4070 + }, + { + "epoch": 4.86, + "learning_rate": 3.754635352286774e-05, + "loss": 1.7266, + "step": 4080 + }, + { + "epoch": 4.87, + "learning_rate": 3.7502207310612756e-05, + "loss": 1.6832, + "step": 4090 + }, + { + "epoch": 4.89, + "learning_rate": 3.7458061098357764e-05, + "loss": 1.6745, + "step": 4100 + }, + { + "epoch": 4.9, + "learning_rate": 3.741391488610277e-05, + "loss": 1.6977, + "step": 4110 + }, + { + "epoch": 4.91, + "learning_rate": 3.7369768673847786e-05, + "loss": 1.6997, + "step": 4120 + }, + { + "epoch": 4.92, + "learning_rate": 3.7325622461592794e-05, + "loss": 1.724, + "step": 4130 + }, + { + "epoch": 4.93, + "learning_rate": 3.728147624933781e-05, + "loss": 1.6449, + "step": 4140 + }, + { + "epoch": 4.95, + "learning_rate": 3.7237330037082817e-05, + "loss": 1.6715, + "step": 4150 + }, + { + "epoch": 4.96, + "learning_rate": 3.719318382482783e-05, + "loss": 1.6833, + "step": 4160 + }, + { + "epoch": 4.97, + "learning_rate": 3.7149037612572846e-05, + "loss": 1.7102, + "step": 4170 + }, + { + "epoch": 4.98, + "learning_rate": 3.7104891400317854e-05, + "loss": 1.7087, + "step": 4180 + }, + { + "epoch": 4.99, + "learning_rate": 3.706074518806287e-05, + "loss": 1.7026, + "step": 4190 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.6436019663339788, + "eval_f1": 0.6012686796803567, + "eval_loss": 1.5246539115905762, + "eval_precision": 0.6179627814058779, + "eval_recall": 0.6436019663339788, + "eval_runtime": 98.9599, + "eval_samples_per_second": 271.342, + "eval_steps_per_second": 4.244, + "step": 4195 + }, + { + "epoch": 5.01, + "learning_rate": 3.7016598975807876e-05, + "loss": 1.7138, + "step": 4200 + }, + { + "epoch": 5.02, + "learning_rate": 3.697245276355289e-05, + "loss": 1.5906, + "step": 4210 + }, + { + "epoch": 5.03, + "learning_rate": 3.6928306551297906e-05, + "loss": 1.6526, + "step": 4220 + }, + { + "epoch": 5.04, + "learning_rate": 3.6884160339042913e-05, + "loss": 1.568, + "step": 4230 + }, + { + "epoch": 5.05, + "learning_rate": 3.684001412678793e-05, + "loss": 1.6482, + "step": 4240 + }, + { + "epoch": 5.07, + "learning_rate": 3.6795867914532936e-05, + "loss": 1.6047, + "step": 4250 + }, + { + "epoch": 5.08, + "learning_rate": 3.6751721702277944e-05, + "loss": 1.6349, + "step": 4260 + }, + { + "epoch": 5.09, + "learning_rate": 3.670757549002296e-05, + "loss": 1.643, + "step": 4270 + }, + { + "epoch": 5.1, + "learning_rate": 3.6663429277767966e-05, + "loss": 1.6335, + "step": 4280 + }, + { + "epoch": 5.11, + "learning_rate": 3.661928306551298e-05, + "loss": 1.616, + "step": 4290 + }, + { + "epoch": 5.13, + "learning_rate": 3.657513685325799e-05, + "loss": 1.5813, + "step": 4300 + }, + { + "epoch": 5.14, + "learning_rate": 3.6530990641003004e-05, + "loss": 1.5871, + "step": 4310 + }, + { + "epoch": 5.15, + "learning_rate": 3.648684442874801e-05, + "loss": 1.6759, + "step": 4320 + }, + { + "epoch": 5.16, + "learning_rate": 3.6442698216493026e-05, + "loss": 1.5666, + "step": 4330 + }, + { + "epoch": 5.17, + "learning_rate": 3.639855200423804e-05, + "loss": 1.6558, + "step": 4340 + }, + { + "epoch": 5.18, + "learning_rate": 3.635440579198305e-05, + "loss": 1.5853, + "step": 4350 + }, + { + "epoch": 5.2, + "learning_rate": 3.631025957972806e-05, + "loss": 1.641, + "step": 4360 + }, + { + "epoch": 5.21, + "learning_rate": 3.626611336747307e-05, + "loss": 1.5875, + "step": 4370 + }, + { + "epoch": 5.22, + "learning_rate": 3.6221967155218086e-05, + "loss": 1.6086, + "step": 4380 + }, + { + "epoch": 5.23, + "learning_rate": 3.61778209429631e-05, + "loss": 1.5143, + "step": 4390 + }, + { + "epoch": 5.24, + "learning_rate": 3.613367473070811e-05, + "loss": 1.5948, + "step": 4400 + }, + { + "epoch": 5.26, + "learning_rate": 3.608952851845312e-05, + "loss": 1.6024, + "step": 4410 + }, + { + "epoch": 5.27, + "learning_rate": 3.604538230619813e-05, + "loss": 1.5302, + "step": 4420 + }, + { + "epoch": 5.28, + "learning_rate": 3.600123609394314e-05, + "loss": 1.5198, + "step": 4430 + }, + { + "epoch": 5.29, + "learning_rate": 3.595708988168815e-05, + "loss": 1.6065, + "step": 4440 + }, + { + "epoch": 5.3, + "learning_rate": 3.591294366943316e-05, + "loss": 1.568, + "step": 4450 + }, + { + "epoch": 5.32, + "learning_rate": 3.5868797457178176e-05, + "loss": 1.6374, + "step": 4460 + }, + { + "epoch": 5.33, + "learning_rate": 3.5824651244923184e-05, + "loss": 1.5732, + "step": 4470 + }, + { + "epoch": 5.34, + "learning_rate": 3.57805050326682e-05, + "loss": 1.5805, + "step": 4480 + }, + { + "epoch": 5.35, + "learning_rate": 3.573635882041321e-05, + "loss": 1.623, + "step": 4490 + }, + { + "epoch": 5.36, + "learning_rate": 3.569221260815822e-05, + "loss": 1.5536, + "step": 4500 + }, + { + "epoch": 5.38, + "learning_rate": 3.5648066395903236e-05, + "loss": 1.5806, + "step": 4510 + }, + { + "epoch": 5.39, + "learning_rate": 3.5603920183648243e-05, + "loss": 1.5798, + "step": 4520 + }, + { + "epoch": 5.4, + "learning_rate": 3.555977397139326e-05, + "loss": 1.5538, + "step": 4530 + }, + { + "epoch": 5.41, + "learning_rate": 3.551562775913827e-05, + "loss": 1.5041, + "step": 4540 + }, + { + "epoch": 5.42, + "learning_rate": 3.547148154688328e-05, + "loss": 1.5274, + "step": 4550 + }, + { + "epoch": 5.43, + "learning_rate": 3.5427335334628295e-05, + "loss": 1.5284, + "step": 4560 + }, + { + "epoch": 5.45, + "learning_rate": 3.53831891223733e-05, + "loss": 1.4944, + "step": 4570 + }, + { + "epoch": 5.46, + "learning_rate": 3.533904291011831e-05, + "loss": 1.5106, + "step": 4580 + }, + { + "epoch": 5.47, + "learning_rate": 3.5294896697863326e-05, + "loss": 1.549, + "step": 4590 + }, + { + "epoch": 5.48, + "learning_rate": 3.5250750485608334e-05, + "loss": 1.5478, + "step": 4600 + }, + { + "epoch": 5.49, + "learning_rate": 3.520660427335335e-05, + "loss": 1.5143, + "step": 4610 + }, + { + "epoch": 5.51, + "learning_rate": 3.5162458061098356e-05, + "loss": 1.5077, + "step": 4620 + }, + { + "epoch": 5.52, + "learning_rate": 3.511831184884337e-05, + "loss": 1.5095, + "step": 4630 + }, + { + "epoch": 5.53, + "learning_rate": 3.507416563658838e-05, + "loss": 1.4635, + "step": 4640 + }, + { + "epoch": 5.54, + "learning_rate": 3.503001942433339e-05, + "loss": 1.4623, + "step": 4650 + }, + { + "epoch": 5.55, + "learning_rate": 3.498587321207841e-05, + "loss": 1.5481, + "step": 4660 + }, + { + "epoch": 5.57, + "learning_rate": 3.4941726999823416e-05, + "loss": 1.4816, + "step": 4670 + }, + { + "epoch": 5.58, + "learning_rate": 3.489758078756843e-05, + "loss": 1.4665, + "step": 4680 + }, + { + "epoch": 5.59, + "learning_rate": 3.485343457531344e-05, + "loss": 1.4978, + "step": 4690 + }, + { + "epoch": 5.6, + "learning_rate": 3.480928836305845e-05, + "loss": 1.4649, + "step": 4700 + }, + { + "epoch": 5.61, + "learning_rate": 3.476514215080347e-05, + "loss": 1.4676, + "step": 4710 + }, + { + "epoch": 5.63, + "learning_rate": 3.4720995938548475e-05, + "loss": 1.4891, + "step": 4720 + }, + { + "epoch": 5.64, + "learning_rate": 3.467684972629348e-05, + "loss": 1.4662, + "step": 4730 + }, + { + "epoch": 5.65, + "learning_rate": 3.46327035140385e-05, + "loss": 1.5112, + "step": 4740 + }, + { + "epoch": 5.66, + "learning_rate": 3.4588557301783506e-05, + "loss": 1.4777, + "step": 4750 + }, + { + "epoch": 5.67, + "learning_rate": 3.454441108952852e-05, + "loss": 1.4861, + "step": 4760 + }, + { + "epoch": 5.69, + "learning_rate": 3.450026487727353e-05, + "loss": 1.4659, + "step": 4770 + }, + { + "epoch": 5.7, + "learning_rate": 3.445611866501854e-05, + "loss": 1.4698, + "step": 4780 + }, + { + "epoch": 5.71, + "learning_rate": 3.441197245276355e-05, + "loss": 1.4517, + "step": 4790 + }, + { + "epoch": 5.72, + "learning_rate": 3.4367826240508566e-05, + "loss": 1.4347, + "step": 4800 + }, + { + "epoch": 5.73, + "learning_rate": 3.432368002825358e-05, + "loss": 1.4547, + "step": 4810 + }, + { + "epoch": 5.74, + "learning_rate": 3.427953381599859e-05, + "loss": 1.4362, + "step": 4820 + }, + { + "epoch": 5.76, + "learning_rate": 3.42353876037436e-05, + "loss": 1.4627, + "step": 4830 + }, + { + "epoch": 5.77, + "learning_rate": 3.419124139148861e-05, + "loss": 1.4835, + "step": 4840 + }, + { + "epoch": 5.78, + "learning_rate": 3.4147095179233625e-05, + "loss": 1.4554, + "step": 4850 + }, + { + "epoch": 5.79, + "learning_rate": 3.410294896697864e-05, + "loss": 1.4479, + "step": 4860 + }, + { + "epoch": 5.8, + "learning_rate": 3.405880275472365e-05, + "loss": 1.4177, + "step": 4870 + }, + { + "epoch": 5.82, + "learning_rate": 3.401465654246866e-05, + "loss": 1.4393, + "step": 4880 + }, + { + "epoch": 5.83, + "learning_rate": 3.397051033021367e-05, + "loss": 1.4812, + "step": 4890 + }, + { + "epoch": 5.84, + "learning_rate": 3.392636411795868e-05, + "loss": 1.4318, + "step": 4900 + }, + { + "epoch": 5.85, + "learning_rate": 3.388221790570369e-05, + "loss": 1.4818, + "step": 4910 + }, + { + "epoch": 5.86, + "learning_rate": 3.38380716934487e-05, + "loss": 1.472, + "step": 4920 + }, + { + "epoch": 5.88, + "learning_rate": 3.3793925481193715e-05, + "loss": 1.4164, + "step": 4930 + }, + { + "epoch": 5.89, + "learning_rate": 3.374977926893872e-05, + "loss": 1.4173, + "step": 4940 + }, + { + "epoch": 5.9, + "learning_rate": 3.370563305668374e-05, + "loss": 1.4133, + "step": 4950 + }, + { + "epoch": 5.91, + "learning_rate": 3.366148684442875e-05, + "loss": 1.4526, + "step": 4960 + }, + { + "epoch": 5.92, + "learning_rate": 3.361734063217376e-05, + "loss": 1.418, + "step": 4970 + }, + { + "epoch": 5.94, + "learning_rate": 3.3573194419918775e-05, + "loss": 1.4083, + "step": 4980 + }, + { + "epoch": 5.95, + "learning_rate": 3.352904820766378e-05, + "loss": 1.4273, + "step": 4990 + }, + { + "epoch": 5.96, + "learning_rate": 3.34849019954088e-05, + "loss": 1.3889, + "step": 5000 + }, + { + "epoch": 5.97, + "learning_rate": 3.3440755783153805e-05, + "loss": 1.3927, + "step": 5010 + }, + { + "epoch": 5.98, + "learning_rate": 3.339660957089882e-05, + "loss": 1.3998, + "step": 5020 + }, + { + "epoch": 5.99, + "learning_rate": 3.3352463358643835e-05, + "loss": 1.4288, + "step": 5030 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6978995977953225, + "eval_f1": 0.6686418903782712, + "eval_loss": 1.2768018245697021, + "eval_precision": 0.6810067175098012, + "eval_recall": 0.6978995977953225, + "eval_runtime": 99.1055, + "eval_samples_per_second": 270.944, + "eval_steps_per_second": 4.238, + "step": 5034 + }, + { + "epoch": 6.01, + "learning_rate": 3.330831714638884e-05, + "loss": 1.4183, + "step": 5040 + }, + { + "epoch": 6.02, + "learning_rate": 3.326417093413385e-05, + "loss": 1.3548, + "step": 5050 + }, + { + "epoch": 6.03, + "learning_rate": 3.3220024721878865e-05, + "loss": 1.352, + "step": 5060 + }, + { + "epoch": 6.04, + "learning_rate": 3.317587850962387e-05, + "loss": 1.3463, + "step": 5070 + }, + { + "epoch": 6.05, + "learning_rate": 3.313173229736889e-05, + "loss": 1.3743, + "step": 5080 + }, + { + "epoch": 6.07, + "learning_rate": 3.3087586085113895e-05, + "loss": 1.3347, + "step": 5090 + }, + { + "epoch": 6.08, + "learning_rate": 3.304343987285891e-05, + "loss": 1.3853, + "step": 5100 + }, + { + "epoch": 6.09, + "learning_rate": 3.299929366060392e-05, + "loss": 1.3626, + "step": 5110 + }, + { + "epoch": 6.1, + "learning_rate": 3.295514744834893e-05, + "loss": 1.3255, + "step": 5120 + }, + { + "epoch": 6.11, + "learning_rate": 3.291100123609395e-05, + "loss": 1.3577, + "step": 5130 + }, + { + "epoch": 6.13, + "learning_rate": 3.2866855023838955e-05, + "loss": 1.3738, + "step": 5140 + }, + { + "epoch": 6.14, + "learning_rate": 3.282270881158397e-05, + "loss": 1.3802, + "step": 5150 + }, + { + "epoch": 6.15, + "learning_rate": 3.277856259932898e-05, + "loss": 1.3946, + "step": 5160 + }, + { + "epoch": 6.16, + "learning_rate": 3.273441638707399e-05, + "loss": 1.3322, + "step": 5170 + }, + { + "epoch": 6.17, + "learning_rate": 3.269027017481901e-05, + "loss": 1.403, + "step": 5180 + }, + { + "epoch": 6.19, + "learning_rate": 3.2646123962564015e-05, + "loss": 1.3271, + "step": 5190 + }, + { + "epoch": 6.2, + "learning_rate": 3.260197775030903e-05, + "loss": 1.3338, + "step": 5200 + }, + { + "epoch": 6.21, + "learning_rate": 3.255783153805404e-05, + "loss": 1.3484, + "step": 5210 + }, + { + "epoch": 6.22, + "learning_rate": 3.2513685325799045e-05, + "loss": 1.3764, + "step": 5220 + }, + { + "epoch": 6.23, + "learning_rate": 3.246953911354406e-05, + "loss": 1.2798, + "step": 5230 + }, + { + "epoch": 6.25, + "learning_rate": 3.242539290128907e-05, + "loss": 1.3485, + "step": 5240 + }, + { + "epoch": 6.26, + "learning_rate": 3.238124668903408e-05, + "loss": 1.3634, + "step": 5250 + }, + { + "epoch": 6.27, + "learning_rate": 3.233710047677909e-05, + "loss": 1.3283, + "step": 5260 + }, + { + "epoch": 6.28, + "learning_rate": 3.2292954264524105e-05, + "loss": 1.2951, + "step": 5270 + }, + { + "epoch": 6.29, + "learning_rate": 3.224880805226912e-05, + "loss": 1.3562, + "step": 5280 + }, + { + "epoch": 6.31, + "learning_rate": 3.220466184001413e-05, + "loss": 1.3264, + "step": 5290 + }, + { + "epoch": 6.32, + "learning_rate": 3.216051562775914e-05, + "loss": 1.2662, + "step": 5300 + }, + { + "epoch": 6.33, + "learning_rate": 3.211636941550415e-05, + "loss": 1.3093, + "step": 5310 + }, + { + "epoch": 6.34, + "learning_rate": 3.2072223203249165e-05, + "loss": 1.3059, + "step": 5320 + }, + { + "epoch": 6.35, + "learning_rate": 3.202807699099418e-05, + "loss": 1.3101, + "step": 5330 + }, + { + "epoch": 6.36, + "learning_rate": 3.198393077873919e-05, + "loss": 1.2837, + "step": 5340 + }, + { + "epoch": 6.38, + "learning_rate": 3.19397845664842e-05, + "loss": 1.3089, + "step": 5350 + }, + { + "epoch": 6.39, + "learning_rate": 3.189563835422921e-05, + "loss": 1.2935, + "step": 5360 + }, + { + "epoch": 6.4, + "learning_rate": 3.185149214197422e-05, + "loss": 1.2767, + "step": 5370 + }, + { + "epoch": 6.41, + "learning_rate": 3.180734592971923e-05, + "loss": 1.3082, + "step": 5380 + }, + { + "epoch": 6.42, + "learning_rate": 3.176319971746424e-05, + "loss": 1.2936, + "step": 5390 + }, + { + "epoch": 6.44, + "learning_rate": 3.1719053505209255e-05, + "loss": 1.2872, + "step": 5400 + }, + { + "epoch": 6.45, + "learning_rate": 3.167490729295426e-05, + "loss": 1.4089, + "step": 5410 + }, + { + "epoch": 6.46, + "learning_rate": 3.163076108069928e-05, + "loss": 1.3171, + "step": 5420 + }, + { + "epoch": 6.47, + "learning_rate": 3.1586614868444285e-05, + "loss": 1.2926, + "step": 5430 + }, + { + "epoch": 6.48, + "learning_rate": 3.15424686561893e-05, + "loss": 1.2932, + "step": 5440 + }, + { + "epoch": 6.5, + "learning_rate": 3.1498322443934314e-05, + "loss": 1.3524, + "step": 5450 + }, + { + "epoch": 6.51, + "learning_rate": 3.145417623167932e-05, + "loss": 1.3105, + "step": 5460 + }, + { + "epoch": 6.52, + "learning_rate": 3.141003001942434e-05, + "loss": 1.2973, + "step": 5470 + }, + { + "epoch": 6.53, + "learning_rate": 3.1365883807169345e-05, + "loss": 1.2757, + "step": 5480 + }, + { + "epoch": 6.54, + "learning_rate": 3.132173759491436e-05, + "loss": 1.2243, + "step": 5490 + }, + { + "epoch": 6.56, + "learning_rate": 3.1277591382659374e-05, + "loss": 1.323, + "step": 5500 + }, + { + "epoch": 6.57, + "learning_rate": 3.123344517040438e-05, + "loss": 1.2593, + "step": 5510 + }, + { + "epoch": 6.58, + "learning_rate": 3.11892989581494e-05, + "loss": 1.3141, + "step": 5520 + }, + { + "epoch": 6.59, + "learning_rate": 3.1145152745894405e-05, + "loss": 1.2202, + "step": 5530 + }, + { + "epoch": 6.6, + "learning_rate": 3.110100653363941e-05, + "loss": 1.2719, + "step": 5540 + }, + { + "epoch": 6.61, + "learning_rate": 3.105686032138443e-05, + "loss": 1.3169, + "step": 5550 + }, + { + "epoch": 6.63, + "learning_rate": 3.1012714109129435e-05, + "loss": 1.3548, + "step": 5560 + }, + { + "epoch": 6.64, + "learning_rate": 3.096856789687445e-05, + "loss": 1.3031, + "step": 5570 + }, + { + "epoch": 6.65, + "learning_rate": 3.092442168461946e-05, + "loss": 1.2254, + "step": 5580 + }, + { + "epoch": 6.66, + "learning_rate": 3.088027547236447e-05, + "loss": 1.2795, + "step": 5590 + }, + { + "epoch": 6.67, + "learning_rate": 3.083612926010949e-05, + "loss": 1.2555, + "step": 5600 + }, + { + "epoch": 6.69, + "learning_rate": 3.0791983047854495e-05, + "loss": 1.2079, + "step": 5610 + }, + { + "epoch": 6.7, + "learning_rate": 3.074783683559951e-05, + "loss": 1.2155, + "step": 5620 + }, + { + "epoch": 6.71, + "learning_rate": 3.070369062334452e-05, + "loss": 1.254, + "step": 5630 + }, + { + "epoch": 6.72, + "learning_rate": 3.065954441108953e-05, + "loss": 1.2222, + "step": 5640 + }, + { + "epoch": 6.73, + "learning_rate": 3.0615398198834546e-05, + "loss": 1.2446, + "step": 5650 + }, + { + "epoch": 6.75, + "learning_rate": 3.0571251986579554e-05, + "loss": 1.2586, + "step": 5660 + }, + { + "epoch": 6.76, + "learning_rate": 3.052710577432457e-05, + "loss": 1.2234, + "step": 5670 + }, + { + "epoch": 6.77, + "learning_rate": 3.0482959562069573e-05, + "loss": 1.2368, + "step": 5680 + }, + { + "epoch": 6.78, + "learning_rate": 3.0438813349814588e-05, + "loss": 1.2462, + "step": 5690 + }, + { + "epoch": 6.79, + "learning_rate": 3.0394667137559603e-05, + "loss": 1.2774, + "step": 5700 + }, + { + "epoch": 6.81, + "learning_rate": 3.035052092530461e-05, + "loss": 1.2202, + "step": 5710 + }, + { + "epoch": 6.82, + "learning_rate": 3.0306374713049622e-05, + "loss": 1.2673, + "step": 5720 + }, + { + "epoch": 6.83, + "learning_rate": 3.0262228500794633e-05, + "loss": 1.2337, + "step": 5730 + }, + { + "epoch": 6.84, + "learning_rate": 3.0218082288539644e-05, + "loss": 1.2289, + "step": 5740 + }, + { + "epoch": 6.85, + "learning_rate": 3.0173936076284652e-05, + "loss": 1.2486, + "step": 5750 + }, + { + "epoch": 6.87, + "learning_rate": 3.0129789864029667e-05, + "loss": 1.2476, + "step": 5760 + }, + { + "epoch": 6.88, + "learning_rate": 3.008564365177468e-05, + "loss": 1.2412, + "step": 5770 + }, + { + "epoch": 6.89, + "learning_rate": 3.004149743951969e-05, + "loss": 1.2322, + "step": 5780 + }, + { + "epoch": 6.9, + "learning_rate": 2.9997351227264704e-05, + "loss": 1.2127, + "step": 5790 + }, + { + "epoch": 6.91, + "learning_rate": 2.9953205015009712e-05, + "loss": 1.2725, + "step": 5800 + }, + { + "epoch": 6.92, + "learning_rate": 2.9909058802754723e-05, + "loss": 1.2347, + "step": 5810 + }, + { + "epoch": 6.94, + "learning_rate": 2.9864912590499738e-05, + "loss": 1.2351, + "step": 5820 + }, + { + "epoch": 6.95, + "learning_rate": 2.9820766378244746e-05, + "loss": 1.2328, + "step": 5830 + }, + { + "epoch": 6.96, + "learning_rate": 2.977662016598976e-05, + "loss": 1.2027, + "step": 5840 + }, + { + "epoch": 6.97, + "learning_rate": 2.9732473953734768e-05, + "loss": 1.2175, + "step": 5850 + }, + { + "epoch": 6.98, + "learning_rate": 2.9688327741479783e-05, + "loss": 1.1781, + "step": 5860 + }, + { + "epoch": 7.0, + "learning_rate": 2.9644181529224798e-05, + "loss": 1.1953, + "step": 5870 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.732347683598987, + "eval_f1": 0.7077241444760507, + "eval_loss": 1.09598970413208, + "eval_precision": 0.7217957955270089, + "eval_recall": 0.732347683598987, + "eval_runtime": 99.334, + "eval_samples_per_second": 270.32, + "eval_steps_per_second": 4.228, + "step": 5873 + }, + { + "epoch": 7.01, + "learning_rate": 2.9600035316969805e-05, + "loss": 1.181, + "step": 5880 + }, + { + "epoch": 7.02, + "learning_rate": 2.9555889104714817e-05, + "loss": 1.2091, + "step": 5890 + }, + { + "epoch": 7.03, + "learning_rate": 2.9511742892459825e-05, + "loss": 1.2003, + "step": 5900 + }, + { + "epoch": 7.04, + "learning_rate": 2.946759668020484e-05, + "loss": 1.143, + "step": 5910 + }, + { + "epoch": 7.06, + "learning_rate": 2.9423450467949854e-05, + "loss": 1.1644, + "step": 5920 + }, + { + "epoch": 7.07, + "learning_rate": 2.9379304255694862e-05, + "loss": 1.2121, + "step": 5930 + }, + { + "epoch": 7.08, + "learning_rate": 2.9335158043439876e-05, + "loss": 1.1864, + "step": 5940 + }, + { + "epoch": 7.09, + "learning_rate": 2.9291011831184884e-05, + "loss": 1.1574, + "step": 5950 + }, + { + "epoch": 7.1, + "learning_rate": 2.92468656189299e-05, + "loss": 1.1473, + "step": 5960 + }, + { + "epoch": 7.12, + "learning_rate": 2.920271940667491e-05, + "loss": 1.1431, + "step": 5970 + }, + { + "epoch": 7.13, + "learning_rate": 2.9158573194419918e-05, + "loss": 1.1197, + "step": 5980 + }, + { + "epoch": 7.14, + "learning_rate": 2.9114426982164933e-05, + "loss": 1.1862, + "step": 5990 + }, + { + "epoch": 7.15, + "learning_rate": 2.907028076990994e-05, + "loss": 1.1715, + "step": 6000 + }, + { + "epoch": 7.16, + "learning_rate": 2.9026134557654955e-05, + "loss": 1.1822, + "step": 6010 + }, + { + "epoch": 7.18, + "learning_rate": 2.898198834539997e-05, + "loss": 1.1529, + "step": 6020 + }, + { + "epoch": 7.19, + "learning_rate": 2.8937842133144978e-05, + "loss": 1.1397, + "step": 6030 + }, + { + "epoch": 7.2, + "learning_rate": 2.889369592088999e-05, + "loss": 1.1757, + "step": 6040 + }, + { + "epoch": 7.21, + "learning_rate": 2.8849549708635e-05, + "loss": 1.1489, + "step": 6050 + }, + { + "epoch": 7.22, + "learning_rate": 2.880540349638001e-05, + "loss": 1.1669, + "step": 6060 + }, + { + "epoch": 7.23, + "learning_rate": 2.8761257284125026e-05, + "loss": 1.1342, + "step": 6070 + }, + { + "epoch": 7.25, + "learning_rate": 2.8717111071870034e-05, + "loss": 1.1516, + "step": 6080 + }, + { + "epoch": 7.26, + "learning_rate": 2.867296485961505e-05, + "loss": 1.134, + "step": 6090 + }, + { + "epoch": 7.27, + "learning_rate": 2.8628818647360057e-05, + "loss": 1.1583, + "step": 6100 + }, + { + "epoch": 7.28, + "learning_rate": 2.858467243510507e-05, + "loss": 1.1387, + "step": 6110 + }, + { + "epoch": 7.29, + "learning_rate": 2.8540526222850082e-05, + "loss": 1.1366, + "step": 6120 + }, + { + "epoch": 7.31, + "learning_rate": 2.849638001059509e-05, + "loss": 1.1627, + "step": 6130 + }, + { + "epoch": 7.32, + "learning_rate": 2.8452233798340105e-05, + "loss": 1.1384, + "step": 6140 + }, + { + "epoch": 7.33, + "learning_rate": 2.8408087586085113e-05, + "loss": 1.15, + "step": 6150 + }, + { + "epoch": 7.34, + "learning_rate": 2.8363941373830128e-05, + "loss": 1.1452, + "step": 6160 + }, + { + "epoch": 7.35, + "learning_rate": 2.8319795161575135e-05, + "loss": 1.1839, + "step": 6170 + }, + { + "epoch": 7.37, + "learning_rate": 2.827564894932015e-05, + "loss": 1.1116, + "step": 6180 + }, + { + "epoch": 7.38, + "learning_rate": 2.8231502737065165e-05, + "loss": 1.1167, + "step": 6190 + }, + { + "epoch": 7.39, + "learning_rate": 2.8187356524810173e-05, + "loss": 1.1393, + "step": 6200 + }, + { + "epoch": 7.4, + "learning_rate": 2.8143210312555184e-05, + "loss": 1.1897, + "step": 6210 + }, + { + "epoch": 7.41, + "learning_rate": 2.8099064100300192e-05, + "loss": 1.0915, + "step": 6220 + }, + { + "epoch": 7.43, + "learning_rate": 2.8054917888045206e-05, + "loss": 1.1432, + "step": 6230 + }, + { + "epoch": 7.44, + "learning_rate": 2.801077167579022e-05, + "loss": 1.123, + "step": 6240 + }, + { + "epoch": 7.45, + "learning_rate": 2.796662546353523e-05, + "loss": 1.1394, + "step": 6250 + }, + { + "epoch": 7.46, + "learning_rate": 2.7922479251280244e-05, + "loss": 1.1488, + "step": 6260 + }, + { + "epoch": 7.47, + "learning_rate": 2.787833303902525e-05, + "loss": 1.1482, + "step": 6270 + }, + { + "epoch": 7.48, + "learning_rate": 2.7834186826770266e-05, + "loss": 1.1294, + "step": 6280 + }, + { + "epoch": 7.5, + "learning_rate": 2.7790040614515277e-05, + "loss": 1.1433, + "step": 6290 + }, + { + "epoch": 7.51, + "learning_rate": 2.7745894402260285e-05, + "loss": 1.0743, + "step": 6300 + }, + { + "epoch": 7.52, + "learning_rate": 2.77017481900053e-05, + "loss": 1.1177, + "step": 6310 + }, + { + "epoch": 7.53, + "learning_rate": 2.7657601977750308e-05, + "loss": 1.1416, + "step": 6320 + }, + { + "epoch": 7.54, + "learning_rate": 2.7613455765495322e-05, + "loss": 1.082, + "step": 6330 + }, + { + "epoch": 7.56, + "learning_rate": 2.7569309553240337e-05, + "loss": 1.1172, + "step": 6340 + }, + { + "epoch": 7.57, + "learning_rate": 2.7525163340985345e-05, + "loss": 1.0803, + "step": 6350 + }, + { + "epoch": 7.58, + "learning_rate": 2.7481017128730356e-05, + "loss": 1.1274, + "step": 6360 + }, + { + "epoch": 7.59, + "learning_rate": 2.7436870916475364e-05, + "loss": 1.1648, + "step": 6370 + }, + { + "epoch": 7.6, + "learning_rate": 2.739272470422038e-05, + "loss": 1.1242, + "step": 6380 + }, + { + "epoch": 7.62, + "learning_rate": 2.7348578491965393e-05, + "loss": 1.0659, + "step": 6390 + }, + { + "epoch": 7.63, + "learning_rate": 2.73044322797104e-05, + "loss": 1.0619, + "step": 6400 + }, + { + "epoch": 7.64, + "learning_rate": 2.7260286067455416e-05, + "loss": 1.1214, + "step": 6410 + }, + { + "epoch": 7.65, + "learning_rate": 2.7216139855200424e-05, + "loss": 1.1358, + "step": 6420 + }, + { + "epoch": 7.66, + "learning_rate": 2.717199364294544e-05, + "loss": 1.0954, + "step": 6430 + }, + { + "epoch": 7.68, + "learning_rate": 2.712784743069045e-05, + "loss": 1.138, + "step": 6440 + }, + { + "epoch": 7.69, + "learning_rate": 2.7083701218435457e-05, + "loss": 1.1409, + "step": 6450 + }, + { + "epoch": 7.7, + "learning_rate": 2.7039555006180472e-05, + "loss": 1.0962, + "step": 6460 + }, + { + "epoch": 7.71, + "learning_rate": 2.699540879392548e-05, + "loss": 1.1233, + "step": 6470 + }, + { + "epoch": 7.72, + "learning_rate": 2.6951262581670495e-05, + "loss": 1.0884, + "step": 6480 + }, + { + "epoch": 7.74, + "learning_rate": 2.690711636941551e-05, + "loss": 1.0763, + "step": 6490 + }, + { + "epoch": 7.75, + "learning_rate": 2.6862970157160517e-05, + "loss": 1.0832, + "step": 6500 + }, + { + "epoch": 7.76, + "learning_rate": 2.681882394490553e-05, + "loss": 1.0683, + "step": 6510 + }, + { + "epoch": 7.77, + "learning_rate": 2.677467773265054e-05, + "loss": 1.1104, + "step": 6520 + }, + { + "epoch": 7.78, + "learning_rate": 2.673053152039555e-05, + "loss": 1.125, + "step": 6530 + }, + { + "epoch": 7.79, + "learning_rate": 2.668638530814056e-05, + "loss": 1.1071, + "step": 6540 + }, + { + "epoch": 7.81, + "learning_rate": 2.6642239095885573e-05, + "loss": 1.1547, + "step": 6550 + }, + { + "epoch": 7.82, + "learning_rate": 2.6598092883630588e-05, + "loss": 1.0844, + "step": 6560 + }, + { + "epoch": 7.83, + "learning_rate": 2.6553946671375596e-05, + "loss": 1.1004, + "step": 6570 + }, + { + "epoch": 7.84, + "learning_rate": 2.650980045912061e-05, + "loss": 1.092, + "step": 6580 + }, + { + "epoch": 7.85, + "learning_rate": 2.646565424686562e-05, + "loss": 1.1057, + "step": 6590 + }, + { + "epoch": 7.87, + "learning_rate": 2.642150803461063e-05, + "loss": 1.0887, + "step": 6600 + }, + { + "epoch": 7.88, + "learning_rate": 2.6377361822355644e-05, + "loss": 1.07, + "step": 6610 + }, + { + "epoch": 7.89, + "learning_rate": 2.6333215610100652e-05, + "loss": 1.0863, + "step": 6620 + }, + { + "epoch": 7.9, + "learning_rate": 2.6289069397845667e-05, + "loss": 1.1196, + "step": 6630 + }, + { + "epoch": 7.91, + "learning_rate": 2.6244923185590675e-05, + "loss": 1.0892, + "step": 6640 + }, + { + "epoch": 7.93, + "learning_rate": 2.620077697333569e-05, + "loss": 1.1039, + "step": 6650 + }, + { + "epoch": 7.94, + "learning_rate": 2.6156630761080704e-05, + "loss": 1.0494, + "step": 6660 + }, + { + "epoch": 7.95, + "learning_rate": 2.6112484548825712e-05, + "loss": 1.0824, + "step": 6670 + }, + { + "epoch": 7.96, + "learning_rate": 2.6068338336570723e-05, + "loss": 1.1038, + "step": 6680 + }, + { + "epoch": 7.97, + "learning_rate": 2.602419212431573e-05, + "loss": 1.0558, + "step": 6690 + }, + { + "epoch": 7.99, + "learning_rate": 2.5980045912060746e-05, + "loss": 1.0946, + "step": 6700 + }, + { + "epoch": 8.0, + "learning_rate": 2.593589969980576e-05, + "loss": 1.058, + "step": 6710 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7548041114255921, + "eval_f1": 0.7350043558463751, + "eval_loss": 0.9828243255615234, + "eval_precision": 0.7440513899220582, + "eval_recall": 0.7548041114255921, + "eval_runtime": 100.0502, + "eval_samples_per_second": 268.385, + "eval_steps_per_second": 4.198, + "step": 6712 + }, + { + "epoch": 8.01, + "learning_rate": 2.5891753487550768e-05, + "loss": 1.042, + "step": 6720 + }, + { + "epoch": 8.02, + "learning_rate": 2.5847607275295783e-05, + "loss": 1.0727, + "step": 6730 + }, + { + "epoch": 8.03, + "learning_rate": 2.580346106304079e-05, + "loss": 1.0546, + "step": 6740 + }, + { + "epoch": 8.05, + "learning_rate": 2.5759314850785805e-05, + "loss": 1.0544, + "step": 6750 + }, + { + "epoch": 8.06, + "learning_rate": 2.5715168638530817e-05, + "loss": 1.0355, + "step": 6760 + }, + { + "epoch": 8.07, + "learning_rate": 2.5671022426275825e-05, + "loss": 0.9754, + "step": 6770 + }, + { + "epoch": 8.08, + "learning_rate": 2.562687621402084e-05, + "loss": 1.0294, + "step": 6780 + }, + { + "epoch": 8.09, + "learning_rate": 2.5582730001765847e-05, + "loss": 1.0225, + "step": 6790 + }, + { + "epoch": 8.1, + "learning_rate": 2.5538583789510862e-05, + "loss": 1.0651, + "step": 6800 + }, + { + "epoch": 8.12, + "learning_rate": 2.5494437577255876e-05, + "loss": 1.0233, + "step": 6810 + }, + { + "epoch": 8.13, + "learning_rate": 2.5450291365000884e-05, + "loss": 1.0067, + "step": 6820 + }, + { + "epoch": 8.14, + "learning_rate": 2.5406145152745896e-05, + "loss": 1.0843, + "step": 6830 + }, + { + "epoch": 8.15, + "learning_rate": 2.5361998940490907e-05, + "loss": 1.0345, + "step": 6840 + }, + { + "epoch": 8.16, + "learning_rate": 2.5317852728235918e-05, + "loss": 1.0876, + "step": 6850 + }, + { + "epoch": 8.18, + "learning_rate": 2.5273706515980933e-05, + "loss": 1.0309, + "step": 6860 + }, + { + "epoch": 8.19, + "learning_rate": 2.522956030372594e-05, + "loss": 1.0354, + "step": 6870 + }, + { + "epoch": 8.2, + "learning_rate": 2.5185414091470955e-05, + "loss": 0.9963, + "step": 6880 + }, + { + "epoch": 8.21, + "learning_rate": 2.5141267879215963e-05, + "loss": 1.0118, + "step": 6890 + }, + { + "epoch": 8.22, + "learning_rate": 2.5097121666960978e-05, + "loss": 1.0476, + "step": 6900 + }, + { + "epoch": 8.24, + "learning_rate": 2.5052975454705986e-05, + "loss": 1.079, + "step": 6910 + }, + { + "epoch": 8.25, + "learning_rate": 2.5008829242450997e-05, + "loss": 0.9995, + "step": 6920 + }, + { + "epoch": 8.26, + "learning_rate": 2.4964683030196008e-05, + "loss": 1.061, + "step": 6930 + }, + { + "epoch": 8.27, + "learning_rate": 2.4920536817941023e-05, + "loss": 1.0008, + "step": 6940 + }, + { + "epoch": 8.28, + "learning_rate": 2.4876390605686034e-05, + "loss": 1.0351, + "step": 6950 + }, + { + "epoch": 8.3, + "learning_rate": 2.4832244393431045e-05, + "loss": 1.062, + "step": 6960 + }, + { + "epoch": 8.31, + "learning_rate": 2.4788098181176057e-05, + "loss": 0.9703, + "step": 6970 + }, + { + "epoch": 8.32, + "learning_rate": 2.4743951968921068e-05, + "loss": 1.0638, + "step": 6980 + }, + { + "epoch": 8.33, + "learning_rate": 2.469980575666608e-05, + "loss": 1.0217, + "step": 6990 + }, + { + "epoch": 8.34, + "learning_rate": 2.465565954441109e-05, + "loss": 1.0515, + "step": 7000 + }, + { + "epoch": 8.36, + "learning_rate": 2.46115133321561e-05, + "loss": 0.9567, + "step": 7010 + }, + { + "epoch": 8.37, + "learning_rate": 2.4567367119901113e-05, + "loss": 1.0541, + "step": 7020 + }, + { + "epoch": 8.38, + "learning_rate": 2.4523220907646124e-05, + "loss": 1.0051, + "step": 7030 + }, + { + "epoch": 8.39, + "learning_rate": 2.4479074695391135e-05, + "loss": 0.9657, + "step": 7040 + }, + { + "epoch": 8.4, + "learning_rate": 2.443492848313615e-05, + "loss": 1.0323, + "step": 7050 + }, + { + "epoch": 8.41, + "learning_rate": 2.439078227088116e-05, + "loss": 1.0378, + "step": 7060 + }, + { + "epoch": 8.43, + "learning_rate": 2.4346636058626173e-05, + "loss": 0.974, + "step": 7070 + }, + { + "epoch": 8.44, + "learning_rate": 2.430248984637118e-05, + "loss": 1.0138, + "step": 7080 + }, + { + "epoch": 8.45, + "learning_rate": 2.4258343634116192e-05, + "loss": 1.0023, + "step": 7090 + }, + { + "epoch": 8.46, + "learning_rate": 2.4214197421861206e-05, + "loss": 1.0473, + "step": 7100 + }, + { + "epoch": 8.47, + "learning_rate": 2.4170051209606218e-05, + "loss": 0.9473, + "step": 7110 + }, + { + "epoch": 8.49, + "learning_rate": 2.412590499735123e-05, + "loss": 1.0078, + "step": 7120 + }, + { + "epoch": 8.5, + "learning_rate": 2.408175878509624e-05, + "loss": 1.0177, + "step": 7130 + }, + { + "epoch": 8.51, + "learning_rate": 2.403761257284125e-05, + "loss": 0.9631, + "step": 7140 + }, + { + "epoch": 8.52, + "learning_rate": 2.3993466360586263e-05, + "loss": 0.9581, + "step": 7150 + }, + { + "epoch": 8.53, + "learning_rate": 2.3949320148331274e-05, + "loss": 1.0014, + "step": 7160 + }, + { + "epoch": 8.55, + "learning_rate": 2.3905173936076285e-05, + "loss": 0.9788, + "step": 7170 + }, + { + "epoch": 8.56, + "learning_rate": 2.3861027723821296e-05, + "loss": 0.9712, + "step": 7180 + }, + { + "epoch": 8.57, + "learning_rate": 2.3816881511566308e-05, + "loss": 0.9748, + "step": 7190 + }, + { + "epoch": 8.58, + "learning_rate": 2.377273529931132e-05, + "loss": 0.9946, + "step": 7200 + }, + { + "epoch": 8.59, + "learning_rate": 2.3728589087056334e-05, + "loss": 0.9634, + "step": 7210 + }, + { + "epoch": 8.61, + "learning_rate": 2.3684442874801345e-05, + "loss": 1.0009, + "step": 7220 + }, + { + "epoch": 8.62, + "learning_rate": 2.3640296662546356e-05, + "loss": 0.9781, + "step": 7230 + }, + { + "epoch": 8.63, + "learning_rate": 2.3596150450291364e-05, + "loss": 0.9896, + "step": 7240 + }, + { + "epoch": 8.64, + "learning_rate": 2.3552004238036375e-05, + "loss": 0.975, + "step": 7250 + }, + { + "epoch": 8.65, + "learning_rate": 2.350785802578139e-05, + "loss": 1.0166, + "step": 7260 + }, + { + "epoch": 8.66, + "learning_rate": 2.34637118135264e-05, + "loss": 0.9855, + "step": 7270 + }, + { + "epoch": 8.68, + "learning_rate": 2.3419565601271412e-05, + "loss": 0.9839, + "step": 7280 + }, + { + "epoch": 8.69, + "learning_rate": 2.3375419389016424e-05, + "loss": 0.9924, + "step": 7290 + }, + { + "epoch": 8.7, + "learning_rate": 2.3331273176761435e-05, + "loss": 0.9736, + "step": 7300 + }, + { + "epoch": 8.71, + "learning_rate": 2.3287126964506446e-05, + "loss": 1.0229, + "step": 7310 + }, + { + "epoch": 8.72, + "learning_rate": 2.3242980752251458e-05, + "loss": 1.0075, + "step": 7320 + }, + { + "epoch": 8.74, + "learning_rate": 2.319883453999647e-05, + "loss": 1.065, + "step": 7330 + }, + { + "epoch": 8.75, + "learning_rate": 2.315468832774148e-05, + "loss": 0.9951, + "step": 7340 + }, + { + "epoch": 8.76, + "learning_rate": 2.311054211548649e-05, + "loss": 0.985, + "step": 7350 + }, + { + "epoch": 8.77, + "learning_rate": 2.3066395903231503e-05, + "loss": 1.0109, + "step": 7360 + }, + { + "epoch": 8.78, + "learning_rate": 2.3022249690976517e-05, + "loss": 0.9473, + "step": 7370 + }, + { + "epoch": 8.8, + "learning_rate": 2.297810347872153e-05, + "loss": 0.9864, + "step": 7380 + }, + { + "epoch": 8.81, + "learning_rate": 2.293395726646654e-05, + "loss": 0.9472, + "step": 7390 + }, + { + "epoch": 8.82, + "learning_rate": 2.2889811054211548e-05, + "loss": 0.9759, + "step": 7400 + }, + { + "epoch": 8.83, + "learning_rate": 2.284566484195656e-05, + "loss": 0.9271, + "step": 7410 + }, + { + "epoch": 8.84, + "learning_rate": 2.2801518629701574e-05, + "loss": 1.0013, + "step": 7420 + }, + { + "epoch": 8.86, + "learning_rate": 2.2757372417446585e-05, + "loss": 0.9603, + "step": 7430 + }, + { + "epoch": 8.87, + "learning_rate": 2.2713226205191596e-05, + "loss": 0.9903, + "step": 7440 + }, + { + "epoch": 8.88, + "learning_rate": 2.2669079992936607e-05, + "loss": 0.9983, + "step": 7450 + }, + { + "epoch": 8.89, + "learning_rate": 2.262493378068162e-05, + "loss": 0.9947, + "step": 7460 + }, + { + "epoch": 8.9, + "learning_rate": 2.258078756842663e-05, + "loss": 0.9341, + "step": 7470 + }, + { + "epoch": 8.92, + "learning_rate": 2.253664135617164e-05, + "loss": 0.9749, + "step": 7480 + }, + { + "epoch": 8.93, + "learning_rate": 2.2492495143916652e-05, + "loss": 0.9877, + "step": 7490 + }, + { + "epoch": 8.94, + "learning_rate": 2.2448348931661664e-05, + "loss": 1.0253, + "step": 7500 + }, + { + "epoch": 8.95, + "learning_rate": 2.2404202719406675e-05, + "loss": 0.9495, + "step": 7510 + }, + { + "epoch": 8.96, + "learning_rate": 2.236005650715169e-05, + "loss": 0.9616, + "step": 7520 + }, + { + "epoch": 8.97, + "learning_rate": 2.23159102948967e-05, + "loss": 0.9267, + "step": 7530 + }, + { + "epoch": 8.99, + "learning_rate": 2.2271764082641712e-05, + "loss": 0.957, + "step": 7540 + }, + { + "epoch": 9.0, + "learning_rate": 2.222761787038672e-05, + "loss": 0.9691, + "step": 7550 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7718233278712945, + "eval_f1": 0.7536014274909693, + "eval_loss": 0.9018393754959106, + "eval_precision": 0.7615836668708097, + "eval_recall": 0.7718233278712945, + "eval_runtime": 100.4679, + "eval_samples_per_second": 267.269, + "eval_steps_per_second": 4.18, + "step": 7551 + }, + { + "epoch": 9.01, + "learning_rate": 2.218347165813173e-05, + "loss": 0.9592, + "step": 7560 + }, + { + "epoch": 9.02, + "learning_rate": 2.2139325445876742e-05, + "loss": 0.8877, + "step": 7570 + }, + { + "epoch": 9.03, + "learning_rate": 2.2095179233621757e-05, + "loss": 0.9331, + "step": 7580 + }, + { + "epoch": 9.05, + "learning_rate": 2.205103302136677e-05, + "loss": 0.9374, + "step": 7590 + }, + { + "epoch": 9.06, + "learning_rate": 2.200688680911178e-05, + "loss": 0.9148, + "step": 7600 + }, + { + "epoch": 9.07, + "learning_rate": 2.196274059685679e-05, + "loss": 0.9694, + "step": 7610 + }, + { + "epoch": 9.08, + "learning_rate": 2.1918594384601802e-05, + "loss": 0.8986, + "step": 7620 + }, + { + "epoch": 9.09, + "learning_rate": 2.1874448172346813e-05, + "loss": 0.9645, + "step": 7630 + }, + { + "epoch": 9.11, + "learning_rate": 2.1830301960091825e-05, + "loss": 0.9577, + "step": 7640 + }, + { + "epoch": 9.12, + "learning_rate": 2.1786155747836836e-05, + "loss": 0.876, + "step": 7650 + }, + { + "epoch": 9.13, + "learning_rate": 2.1742009535581847e-05, + "loss": 0.9324, + "step": 7660 + }, + { + "epoch": 9.14, + "learning_rate": 2.169786332332686e-05, + "loss": 0.9094, + "step": 7670 + }, + { + "epoch": 9.15, + "learning_rate": 2.1653717111071873e-05, + "loss": 0.9311, + "step": 7680 + }, + { + "epoch": 9.17, + "learning_rate": 2.1609570898816884e-05, + "loss": 0.9111, + "step": 7690 + }, + { + "epoch": 9.18, + "learning_rate": 2.1565424686561896e-05, + "loss": 0.9487, + "step": 7700 + }, + { + "epoch": 9.19, + "learning_rate": 2.1521278474306903e-05, + "loss": 0.9526, + "step": 7710 + }, + { + "epoch": 9.2, + "learning_rate": 2.1477132262051915e-05, + "loss": 0.9594, + "step": 7720 + }, + { + "epoch": 9.21, + "learning_rate": 2.143298604979693e-05, + "loss": 1.0186, + "step": 7730 + }, + { + "epoch": 9.23, + "learning_rate": 2.138883983754194e-05, + "loss": 0.8755, + "step": 7740 + }, + { + "epoch": 9.24, + "learning_rate": 2.1344693625286952e-05, + "loss": 0.9704, + "step": 7750 + }, + { + "epoch": 9.25, + "learning_rate": 2.1300547413031963e-05, + "loss": 0.9313, + "step": 7760 + }, + { + "epoch": 9.26, + "learning_rate": 2.1256401200776974e-05, + "loss": 0.9476, + "step": 7770 + }, + { + "epoch": 9.27, + "learning_rate": 2.1212254988521986e-05, + "loss": 0.9674, + "step": 7780 + }, + { + "epoch": 9.28, + "learning_rate": 2.1168108776266997e-05, + "loss": 0.9253, + "step": 7790 + }, + { + "epoch": 9.3, + "learning_rate": 2.1123962564012008e-05, + "loss": 0.9806, + "step": 7800 + }, + { + "epoch": 9.31, + "learning_rate": 2.107981635175702e-05, + "loss": 0.9188, + "step": 7810 + }, + { + "epoch": 9.32, + "learning_rate": 2.103567013950203e-05, + "loss": 0.9386, + "step": 7820 + }, + { + "epoch": 9.33, + "learning_rate": 2.0991523927247042e-05, + "loss": 0.9111, + "step": 7830 + }, + { + "epoch": 9.34, + "learning_rate": 2.0947377714992057e-05, + "loss": 0.912, + "step": 7840 + }, + { + "epoch": 9.36, + "learning_rate": 2.0903231502737068e-05, + "loss": 0.8859, + "step": 7850 + }, + { + "epoch": 9.37, + "learning_rate": 2.085908529048208e-05, + "loss": 0.954, + "step": 7860 + }, + { + "epoch": 9.38, + "learning_rate": 2.0814939078227087e-05, + "loss": 0.9201, + "step": 7870 + }, + { + "epoch": 9.39, + "learning_rate": 2.07707928659721e-05, + "loss": 0.9238, + "step": 7880 + }, + { + "epoch": 9.4, + "learning_rate": 2.0726646653717113e-05, + "loss": 0.9186, + "step": 7890 + }, + { + "epoch": 9.42, + "learning_rate": 2.0682500441462124e-05, + "loss": 0.9529, + "step": 7900 + }, + { + "epoch": 9.43, + "learning_rate": 2.0638354229207135e-05, + "loss": 0.8942, + "step": 7910 + }, + { + "epoch": 9.44, + "learning_rate": 2.0594208016952147e-05, + "loss": 0.9486, + "step": 7920 + }, + { + "epoch": 9.45, + "learning_rate": 2.0550061804697158e-05, + "loss": 0.9249, + "step": 7930 + }, + { + "epoch": 9.46, + "learning_rate": 2.050591559244217e-05, + "loss": 0.9115, + "step": 7940 + }, + { + "epoch": 9.48, + "learning_rate": 2.046176938018718e-05, + "loss": 0.8388, + "step": 7950 + }, + { + "epoch": 9.49, + "learning_rate": 2.0417623167932192e-05, + "loss": 0.9013, + "step": 7960 + }, + { + "epoch": 9.5, + "learning_rate": 2.0373476955677203e-05, + "loss": 0.8917, + "step": 7970 + }, + { + "epoch": 9.51, + "learning_rate": 2.0329330743422214e-05, + "loss": 0.913, + "step": 7980 + }, + { + "epoch": 9.52, + "learning_rate": 2.0285184531167226e-05, + "loss": 0.9192, + "step": 7990 + }, + { + "epoch": 9.54, + "learning_rate": 2.024103831891224e-05, + "loss": 0.9011, + "step": 8000 + }, + { + "epoch": 9.55, + "learning_rate": 2.019689210665725e-05, + "loss": 0.91, + "step": 8010 + }, + { + "epoch": 9.56, + "learning_rate": 2.0152745894402263e-05, + "loss": 0.8934, + "step": 8020 + }, + { + "epoch": 9.57, + "learning_rate": 2.010859968214727e-05, + "loss": 0.9183, + "step": 8030 + }, + { + "epoch": 9.58, + "learning_rate": 2.0064453469892282e-05, + "loss": 0.8899, + "step": 8040 + }, + { + "epoch": 9.59, + "learning_rate": 2.0020307257637297e-05, + "loss": 0.8904, + "step": 8050 + }, + { + "epoch": 9.61, + "learning_rate": 1.9976161045382308e-05, + "loss": 0.8728, + "step": 8060 + }, + { + "epoch": 9.62, + "learning_rate": 1.993201483312732e-05, + "loss": 0.933, + "step": 8070 + }, + { + "epoch": 9.63, + "learning_rate": 1.988786862087233e-05, + "loss": 0.8514, + "step": 8080 + }, + { + "epoch": 9.64, + "learning_rate": 1.984372240861734e-05, + "loss": 0.9437, + "step": 8090 + }, + { + "epoch": 9.65, + "learning_rate": 1.9799576196362353e-05, + "loss": 0.9064, + "step": 8100 + }, + { + "epoch": 9.67, + "learning_rate": 1.9755429984107364e-05, + "loss": 0.8757, + "step": 8110 + }, + { + "epoch": 9.68, + "learning_rate": 1.9711283771852375e-05, + "loss": 0.863, + "step": 8120 + }, + { + "epoch": 9.69, + "learning_rate": 1.9667137559597387e-05, + "loss": 0.9145, + "step": 8130 + }, + { + "epoch": 9.7, + "learning_rate": 1.9622991347342398e-05, + "loss": 0.9044, + "step": 8140 + }, + { + "epoch": 9.71, + "learning_rate": 1.957884513508741e-05, + "loss": 0.8805, + "step": 8150 + }, + { + "epoch": 9.73, + "learning_rate": 1.9534698922832424e-05, + "loss": 0.9111, + "step": 8160 + }, + { + "epoch": 9.74, + "learning_rate": 1.9490552710577435e-05, + "loss": 0.9262, + "step": 8170 + }, + { + "epoch": 9.75, + "learning_rate": 1.9446406498322446e-05, + "loss": 0.9375, + "step": 8180 + }, + { + "epoch": 9.76, + "learning_rate": 1.9402260286067454e-05, + "loss": 0.8493, + "step": 8190 + }, + { + "epoch": 9.77, + "learning_rate": 1.9358114073812465e-05, + "loss": 0.9687, + "step": 8200 + }, + { + "epoch": 9.79, + "learning_rate": 1.931396786155748e-05, + "loss": 0.911, + "step": 8210 + }, + { + "epoch": 9.8, + "learning_rate": 1.926982164930249e-05, + "loss": 0.837, + "step": 8220 + }, + { + "epoch": 9.81, + "learning_rate": 1.9225675437047503e-05, + "loss": 0.9345, + "step": 8230 + }, + { + "epoch": 9.82, + "learning_rate": 1.9181529224792514e-05, + "loss": 0.8811, + "step": 8240 + }, + { + "epoch": 9.83, + "learning_rate": 1.9137383012537525e-05, + "loss": 0.8878, + "step": 8250 + }, + { + "epoch": 9.84, + "learning_rate": 1.9093236800282536e-05, + "loss": 0.8808, + "step": 8260 + }, + { + "epoch": 9.86, + "learning_rate": 1.9049090588027548e-05, + "loss": 0.8808, + "step": 8270 + }, + { + "epoch": 9.87, + "learning_rate": 1.900494437577256e-05, + "loss": 0.9018, + "step": 8280 + }, + { + "epoch": 9.88, + "learning_rate": 1.896079816351757e-05, + "loss": 0.9156, + "step": 8290 + }, + { + "epoch": 9.89, + "learning_rate": 1.891665195126258e-05, + "loss": 0.8381, + "step": 8300 + }, + { + "epoch": 9.9, + "learning_rate": 1.8872505739007593e-05, + "loss": 0.8914, + "step": 8310 + }, + { + "epoch": 9.92, + "learning_rate": 1.8828359526752607e-05, + "loss": 0.8733, + "step": 8320 + }, + { + "epoch": 9.93, + "learning_rate": 1.878421331449762e-05, + "loss": 0.9012, + "step": 8330 + }, + { + "epoch": 9.94, + "learning_rate": 1.874006710224263e-05, + "loss": 0.9081, + "step": 8340 + }, + { + "epoch": 9.95, + "learning_rate": 1.8695920889987638e-05, + "loss": 0.8976, + "step": 8350 + }, + { + "epoch": 9.96, + "learning_rate": 1.865177467773265e-05, + "loss": 0.9017, + "step": 8360 + }, + { + "epoch": 9.98, + "learning_rate": 1.8607628465477664e-05, + "loss": 0.8704, + "step": 8370 + }, + { + "epoch": 9.99, + "learning_rate": 1.8563482253222675e-05, + "loss": 0.8366, + "step": 8380 + }, + { + "epoch": 10.0, + "learning_rate": 1.8519336040967686e-05, + "loss": 0.8757, + "step": 8390 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7892521972292567, + "eval_f1": 0.7755970684102179, + "eval_loss": 0.838049054145813, + "eval_precision": 0.780591813364183, + "eval_recall": 0.7892521972292567, + "eval_runtime": 98.9975, + "eval_samples_per_second": 271.239, + "eval_steps_per_second": 4.243, + "step": 8390 + }, + { + "epoch": 10.01, + "learning_rate": 1.8475189828712697e-05, + "loss": 0.8931, + "step": 8400 + }, + { + "epoch": 10.02, + "learning_rate": 1.843104361645771e-05, + "loss": 0.901, + "step": 8410 + }, + { + "epoch": 10.04, + "learning_rate": 1.838689740420272e-05, + "loss": 0.8172, + "step": 8420 + }, + { + "epoch": 10.05, + "learning_rate": 1.834275119194773e-05, + "loss": 0.8767, + "step": 8430 + }, + { + "epoch": 10.06, + "learning_rate": 1.8298604979692742e-05, + "loss": 0.8376, + "step": 8440 + }, + { + "epoch": 10.07, + "learning_rate": 1.8254458767437754e-05, + "loss": 0.8581, + "step": 8450 + }, + { + "epoch": 10.08, + "learning_rate": 1.8210312555182765e-05, + "loss": 0.8818, + "step": 8460 + }, + { + "epoch": 10.1, + "learning_rate": 1.816616634292778e-05, + "loss": 0.8747, + "step": 8470 + }, + { + "epoch": 10.11, + "learning_rate": 1.812202013067279e-05, + "loss": 0.8558, + "step": 8480 + }, + { + "epoch": 10.12, + "learning_rate": 1.8077873918417802e-05, + "loss": 0.8284, + "step": 8490 + }, + { + "epoch": 10.13, + "learning_rate": 1.8033727706162813e-05, + "loss": 0.8434, + "step": 8500 + }, + { + "epoch": 10.14, + "learning_rate": 1.798958149390782e-05, + "loss": 0.83, + "step": 8510 + }, + { + "epoch": 10.15, + "learning_rate": 1.7945435281652833e-05, + "loss": 0.8254, + "step": 8520 + }, + { + "epoch": 10.17, + "learning_rate": 1.7901289069397847e-05, + "loss": 0.8608, + "step": 8530 + }, + { + "epoch": 10.18, + "learning_rate": 1.785714285714286e-05, + "loss": 0.8495, + "step": 8540 + }, + { + "epoch": 10.19, + "learning_rate": 1.781299664488787e-05, + "loss": 0.8754, + "step": 8550 + }, + { + "epoch": 10.2, + "learning_rate": 1.776885043263288e-05, + "loss": 0.8561, + "step": 8560 + }, + { + "epoch": 10.21, + "learning_rate": 1.7724704220377892e-05, + "loss": 0.8647, + "step": 8570 + }, + { + "epoch": 10.23, + "learning_rate": 1.7680558008122904e-05, + "loss": 0.8261, + "step": 8580 + }, + { + "epoch": 10.24, + "learning_rate": 1.7636411795867915e-05, + "loss": 0.8337, + "step": 8590 + }, + { + "epoch": 10.25, + "learning_rate": 1.7592265583612926e-05, + "loss": 0.8431, + "step": 8600 + }, + { + "epoch": 10.26, + "learning_rate": 1.7548119371357937e-05, + "loss": 0.8226, + "step": 8610 + }, + { + "epoch": 10.27, + "learning_rate": 1.750397315910295e-05, + "loss": 0.8261, + "step": 8620 + }, + { + "epoch": 10.29, + "learning_rate": 1.7459826946847963e-05, + "loss": 0.8508, + "step": 8630 + }, + { + "epoch": 10.3, + "learning_rate": 1.7415680734592974e-05, + "loss": 0.8536, + "step": 8640 + }, + { + "epoch": 10.31, + "learning_rate": 1.7371534522337986e-05, + "loss": 0.8555, + "step": 8650 + }, + { + "epoch": 10.32, + "learning_rate": 1.7327388310082997e-05, + "loss": 0.8368, + "step": 8660 + }, + { + "epoch": 10.33, + "learning_rate": 1.7283242097828005e-05, + "loss": 0.8585, + "step": 8670 + }, + { + "epoch": 10.35, + "learning_rate": 1.723909588557302e-05, + "loss": 0.869, + "step": 8680 + }, + { + "epoch": 10.36, + "learning_rate": 1.719494967331803e-05, + "loss": 0.8741, + "step": 8690 + }, + { + "epoch": 10.37, + "learning_rate": 1.7150803461063042e-05, + "loss": 0.7962, + "step": 8700 + }, + { + "epoch": 10.38, + "learning_rate": 1.7106657248808053e-05, + "loss": 0.8805, + "step": 8710 + }, + { + "epoch": 10.39, + "learning_rate": 1.7062511036553065e-05, + "loss": 0.8335, + "step": 8720 + }, + { + "epoch": 10.41, + "learning_rate": 1.7018364824298076e-05, + "loss": 0.8563, + "step": 8730 + }, + { + "epoch": 10.42, + "learning_rate": 1.6974218612043087e-05, + "loss": 0.8335, + "step": 8740 + }, + { + "epoch": 10.43, + "learning_rate": 1.69300723997881e-05, + "loss": 0.8624, + "step": 8750 + }, + { + "epoch": 10.44, + "learning_rate": 1.688592618753311e-05, + "loss": 0.835, + "step": 8760 + }, + { + "epoch": 10.45, + "learning_rate": 1.684177997527812e-05, + "loss": 0.8292, + "step": 8770 + }, + { + "epoch": 10.46, + "learning_rate": 1.6797633763023132e-05, + "loss": 0.8683, + "step": 8780 + }, + { + "epoch": 10.48, + "learning_rate": 1.6753487550768147e-05, + "loss": 0.8817, + "step": 8790 + }, + { + "epoch": 10.49, + "learning_rate": 1.6709341338513158e-05, + "loss": 0.8308, + "step": 8800 + }, + { + "epoch": 10.5, + "learning_rate": 1.666519512625817e-05, + "loss": 0.8214, + "step": 8810 + }, + { + "epoch": 10.51, + "learning_rate": 1.6621048914003177e-05, + "loss": 0.8095, + "step": 8820 + }, + { + "epoch": 10.52, + "learning_rate": 1.657690270174819e-05, + "loss": 0.8254, + "step": 8830 + }, + { + "epoch": 10.54, + "learning_rate": 1.6532756489493203e-05, + "loss": 0.846, + "step": 8840 + }, + { + "epoch": 10.55, + "learning_rate": 1.6488610277238214e-05, + "loss": 0.8039, + "step": 8850 + }, + { + "epoch": 10.56, + "learning_rate": 1.6444464064983226e-05, + "loss": 0.8354, + "step": 8860 + }, + { + "epoch": 10.57, + "learning_rate": 1.6400317852728237e-05, + "loss": 0.8321, + "step": 8870 + }, + { + "epoch": 10.58, + "learning_rate": 1.6356171640473248e-05, + "loss": 0.8048, + "step": 8880 + }, + { + "epoch": 10.6, + "learning_rate": 1.631202542821826e-05, + "loss": 0.8414, + "step": 8890 + }, + { + "epoch": 10.61, + "learning_rate": 1.626787921596327e-05, + "loss": 0.87, + "step": 8900 + }, + { + "epoch": 10.62, + "learning_rate": 1.6223733003708282e-05, + "loss": 0.8323, + "step": 8910 + }, + { + "epoch": 10.63, + "learning_rate": 1.6179586791453293e-05, + "loss": 0.8496, + "step": 8920 + }, + { + "epoch": 10.64, + "learning_rate": 1.6135440579198304e-05, + "loss": 0.8311, + "step": 8930 + }, + { + "epoch": 10.66, + "learning_rate": 1.6091294366943316e-05, + "loss": 0.8351, + "step": 8940 + }, + { + "epoch": 10.67, + "learning_rate": 1.604714815468833e-05, + "loss": 0.8303, + "step": 8950 + }, + { + "epoch": 10.68, + "learning_rate": 1.600300194243334e-05, + "loss": 0.8205, + "step": 8960 + }, + { + "epoch": 10.69, + "learning_rate": 1.5958855730178353e-05, + "loss": 0.8116, + "step": 8970 + }, + { + "epoch": 10.7, + "learning_rate": 1.591470951792336e-05, + "loss": 0.792, + "step": 8980 + }, + { + "epoch": 10.71, + "learning_rate": 1.5870563305668372e-05, + "loss": 0.8326, + "step": 8990 + }, + { + "epoch": 10.73, + "learning_rate": 1.5826417093413387e-05, + "loss": 0.8107, + "step": 9000 + }, + { + "epoch": 10.74, + "learning_rate": 1.5782270881158398e-05, + "loss": 0.8512, + "step": 9010 + }, + { + "epoch": 10.75, + "learning_rate": 1.573812466890341e-05, + "loss": 0.8814, + "step": 9020 + }, + { + "epoch": 10.76, + "learning_rate": 1.569397845664842e-05, + "loss": 0.8471, + "step": 9030 + }, + { + "epoch": 10.77, + "learning_rate": 1.5649832244393432e-05, + "loss": 0.841, + "step": 9040 + }, + { + "epoch": 10.79, + "learning_rate": 1.5605686032138443e-05, + "loss": 0.8457, + "step": 9050 + }, + { + "epoch": 10.8, + "learning_rate": 1.5561539819883454e-05, + "loss": 0.8292, + "step": 9060 + }, + { + "epoch": 10.81, + "learning_rate": 1.5517393607628465e-05, + "loss": 0.8242, + "step": 9070 + }, + { + "epoch": 10.82, + "learning_rate": 1.5473247395373477e-05, + "loss": 0.8112, + "step": 9080 + }, + { + "epoch": 10.83, + "learning_rate": 1.5429101183118488e-05, + "loss": 0.8051, + "step": 9090 + }, + { + "epoch": 10.85, + "learning_rate": 1.53849549708635e-05, + "loss": 0.8124, + "step": 9100 + }, + { + "epoch": 10.86, + "learning_rate": 1.5340808758608514e-05, + "loss": 0.8449, + "step": 9110 + }, + { + "epoch": 10.87, + "learning_rate": 1.5296662546353525e-05, + "loss": 0.8209, + "step": 9120 + }, + { + "epoch": 10.88, + "learning_rate": 1.5252516334098535e-05, + "loss": 0.7882, + "step": 9130 + }, + { + "epoch": 10.89, + "learning_rate": 1.5208370121843546e-05, + "loss": 0.8287, + "step": 9140 + }, + { + "epoch": 10.91, + "learning_rate": 1.5164223909588557e-05, + "loss": 0.8281, + "step": 9150 + }, + { + "epoch": 10.92, + "learning_rate": 1.512007769733357e-05, + "loss": 0.8646, + "step": 9160 + }, + { + "epoch": 10.93, + "learning_rate": 1.5075931485078581e-05, + "loss": 0.8217, + "step": 9170 + }, + { + "epoch": 10.94, + "learning_rate": 1.5031785272823593e-05, + "loss": 0.8445, + "step": 9180 + }, + { + "epoch": 10.95, + "learning_rate": 1.4987639060568604e-05, + "loss": 0.8246, + "step": 9190 + }, + { + "epoch": 10.97, + "learning_rate": 1.4943492848313614e-05, + "loss": 0.8449, + "step": 9200 + }, + { + "epoch": 10.98, + "learning_rate": 1.4899346636058628e-05, + "loss": 0.8551, + "step": 9210 + }, + { + "epoch": 10.99, + "learning_rate": 1.485520042380364e-05, + "loss": 0.8446, + "step": 9220 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.7981528377774467, + "eval_f1": 0.7859063096300944, + "eval_loss": 0.7904874682426453, + "eval_precision": 0.7913122656849716, + "eval_recall": 0.7981528377774467, + "eval_runtime": 98.969, + "eval_samples_per_second": 271.317, + "eval_steps_per_second": 4.244, + "step": 9229 + }, + { + "epoch": 11.0, + "learning_rate": 1.481105421154865e-05, + "loss": 0.8497, + "step": 9230 + }, + { + "epoch": 11.01, + "learning_rate": 1.476690799929366e-05, + "loss": 0.7354, + "step": 9240 + }, + { + "epoch": 11.03, + "learning_rate": 1.4722761787038672e-05, + "loss": 0.8413, + "step": 9250 + }, + { + "epoch": 11.04, + "learning_rate": 1.4678615574783683e-05, + "loss": 0.842, + "step": 9260 + }, + { + "epoch": 11.05, + "learning_rate": 1.4634469362528696e-05, + "loss": 0.8079, + "step": 9270 + }, + { + "epoch": 11.06, + "learning_rate": 1.4590323150273707e-05, + "loss": 0.8281, + "step": 9280 + }, + { + "epoch": 11.07, + "learning_rate": 1.4546176938018718e-05, + "loss": 0.7798, + "step": 9290 + }, + { + "epoch": 11.08, + "learning_rate": 1.450203072576373e-05, + "loss": 0.7583, + "step": 9300 + }, + { + "epoch": 11.1, + "learning_rate": 1.445788451350874e-05, + "loss": 0.7882, + "step": 9310 + }, + { + "epoch": 11.11, + "learning_rate": 1.4413738301253754e-05, + "loss": 0.7858, + "step": 9320 + }, + { + "epoch": 11.12, + "learning_rate": 1.4369592088998765e-05, + "loss": 0.7928, + "step": 9330 + }, + { + "epoch": 11.13, + "learning_rate": 1.4325445876743776e-05, + "loss": 0.7811, + "step": 9340 + }, + { + "epoch": 11.14, + "learning_rate": 1.4281299664488788e-05, + "loss": 0.7588, + "step": 9350 + }, + { + "epoch": 11.16, + "learning_rate": 1.4237153452233797e-05, + "loss": 0.8435, + "step": 9360 + }, + { + "epoch": 11.17, + "learning_rate": 1.4193007239978812e-05, + "loss": 0.7808, + "step": 9370 + }, + { + "epoch": 11.18, + "learning_rate": 1.4148861027723823e-05, + "loss": 0.7983, + "step": 9380 + }, + { + "epoch": 11.19, + "learning_rate": 1.4104714815468834e-05, + "loss": 0.7767, + "step": 9390 + }, + { + "epoch": 11.2, + "learning_rate": 1.4060568603213844e-05, + "loss": 0.7829, + "step": 9400 + }, + { + "epoch": 11.22, + "learning_rate": 1.4016422390958855e-05, + "loss": 0.7967, + "step": 9410 + }, + { + "epoch": 11.23, + "learning_rate": 1.397227617870387e-05, + "loss": 0.7639, + "step": 9420 + }, + { + "epoch": 11.24, + "learning_rate": 1.392812996644888e-05, + "loss": 0.8109, + "step": 9430 + }, + { + "epoch": 11.25, + "learning_rate": 1.388398375419389e-05, + "loss": 0.8006, + "step": 9440 + }, + { + "epoch": 11.26, + "learning_rate": 1.3839837541938902e-05, + "loss": 0.7926, + "step": 9450 + }, + { + "epoch": 11.28, + "learning_rate": 1.3795691329683913e-05, + "loss": 0.8429, + "step": 9460 + }, + { + "epoch": 11.29, + "learning_rate": 1.3751545117428924e-05, + "loss": 0.8168, + "step": 9470 + }, + { + "epoch": 11.3, + "learning_rate": 1.3707398905173937e-05, + "loss": 0.7793, + "step": 9480 + }, + { + "epoch": 11.31, + "learning_rate": 1.3663252692918949e-05, + "loss": 0.8215, + "step": 9490 + }, + { + "epoch": 11.32, + "learning_rate": 1.361910648066396e-05, + "loss": 0.7918, + "step": 9500 + }, + { + "epoch": 11.33, + "learning_rate": 1.3574960268408971e-05, + "loss": 0.814, + "step": 9510 + }, + { + "epoch": 11.35, + "learning_rate": 1.353081405615398e-05, + "loss": 0.79, + "step": 9520 + }, + { + "epoch": 11.36, + "learning_rate": 1.3486667843898995e-05, + "loss": 0.8204, + "step": 9530 + }, + { + "epoch": 11.37, + "learning_rate": 1.3442521631644007e-05, + "loss": 0.7532, + "step": 9540 + }, + { + "epoch": 11.38, + "learning_rate": 1.3398375419389018e-05, + "loss": 0.7819, + "step": 9550 + }, + { + "epoch": 11.39, + "learning_rate": 1.3354229207134027e-05, + "loss": 0.8243, + "step": 9560 + }, + { + "epoch": 11.41, + "learning_rate": 1.3310082994879039e-05, + "loss": 0.7817, + "step": 9570 + }, + { + "epoch": 11.42, + "learning_rate": 1.3265936782624053e-05, + "loss": 0.8109, + "step": 9580 + }, + { + "epoch": 11.43, + "learning_rate": 1.3221790570369063e-05, + "loss": 0.8164, + "step": 9590 + }, + { + "epoch": 11.44, + "learning_rate": 1.3177644358114074e-05, + "loss": 0.7479, + "step": 9600 + }, + { + "epoch": 11.45, + "learning_rate": 1.3133498145859085e-05, + "loss": 0.8017, + "step": 9610 + }, + { + "epoch": 11.47, + "learning_rate": 1.3089351933604097e-05, + "loss": 0.7898, + "step": 9620 + }, + { + "epoch": 11.48, + "learning_rate": 1.304520572134911e-05, + "loss": 0.7693, + "step": 9630 + }, + { + "epoch": 11.49, + "learning_rate": 1.3001059509094121e-05, + "loss": 0.7851, + "step": 9640 + }, + { + "epoch": 11.5, + "learning_rate": 1.2956913296839132e-05, + "loss": 0.7936, + "step": 9650 + }, + { + "epoch": 11.51, + "learning_rate": 1.2912767084584143e-05, + "loss": 0.7208, + "step": 9660 + }, + { + "epoch": 11.53, + "learning_rate": 1.2868620872329155e-05, + "loss": 0.7852, + "step": 9670 + }, + { + "epoch": 11.54, + "learning_rate": 1.2824474660074164e-05, + "loss": 0.766, + "step": 9680 + }, + { + "epoch": 11.55, + "learning_rate": 1.2780328447819179e-05, + "loss": 0.7612, + "step": 9690 + }, + { + "epoch": 11.56, + "learning_rate": 1.273618223556419e-05, + "loss": 0.7778, + "step": 9700 + }, + { + "epoch": 11.57, + "learning_rate": 1.2692036023309201e-05, + "loss": 0.8051, + "step": 9710 + }, + { + "epoch": 11.59, + "learning_rate": 1.2647889811054211e-05, + "loss": 0.8111, + "step": 9720 + }, + { + "epoch": 11.6, + "learning_rate": 1.2603743598799222e-05, + "loss": 0.7677, + "step": 9730 + }, + { + "epoch": 11.61, + "learning_rate": 1.2559597386544237e-05, + "loss": 0.7886, + "step": 9740 + }, + { + "epoch": 11.62, + "learning_rate": 1.2515451174289246e-05, + "loss": 0.7708, + "step": 9750 + }, + { + "epoch": 11.63, + "learning_rate": 1.2471304962034258e-05, + "loss": 0.7954, + "step": 9760 + }, + { + "epoch": 11.64, + "learning_rate": 1.2427158749779269e-05, + "loss": 0.8294, + "step": 9770 + }, + { + "epoch": 11.66, + "learning_rate": 1.2383012537524282e-05, + "loss": 0.7807, + "step": 9780 + }, + { + "epoch": 11.67, + "learning_rate": 1.2338866325269292e-05, + "loss": 0.7939, + "step": 9790 + }, + { + "epoch": 11.68, + "learning_rate": 1.2294720113014303e-05, + "loss": 0.8255, + "step": 9800 + }, + { + "epoch": 11.69, + "learning_rate": 1.2250573900759316e-05, + "loss": 0.7947, + "step": 9810 + }, + { + "epoch": 11.7, + "learning_rate": 1.2206427688504327e-05, + "loss": 0.7623, + "step": 9820 + }, + { + "epoch": 11.72, + "learning_rate": 1.2162281476249338e-05, + "loss": 0.7654, + "step": 9830 + }, + { + "epoch": 11.73, + "learning_rate": 1.211813526399435e-05, + "loss": 0.8064, + "step": 9840 + }, + { + "epoch": 11.74, + "learning_rate": 1.207398905173936e-05, + "loss": 0.8024, + "step": 9850 + }, + { + "epoch": 11.75, + "learning_rate": 1.2029842839484374e-05, + "loss": 0.7557, + "step": 9860 + }, + { + "epoch": 11.76, + "learning_rate": 1.1985696627229383e-05, + "loss": 0.7451, + "step": 9870 + }, + { + "epoch": 11.78, + "learning_rate": 1.1941550414974395e-05, + "loss": 0.8012, + "step": 9880 + }, + { + "epoch": 11.79, + "learning_rate": 1.1897404202719408e-05, + "loss": 0.7658, + "step": 9890 + }, + { + "epoch": 11.8, + "learning_rate": 1.1853257990464419e-05, + "loss": 0.8007, + "step": 9900 + }, + { + "epoch": 11.81, + "learning_rate": 1.180911177820943e-05, + "loss": 0.7832, + "step": 9910 + }, + { + "epoch": 11.82, + "learning_rate": 1.1764965565954441e-05, + "loss": 0.7768, + "step": 9920 + }, + { + "epoch": 11.84, + "learning_rate": 1.1720819353699453e-05, + "loss": 0.7761, + "step": 9930 + }, + { + "epoch": 11.85, + "learning_rate": 1.1676673141444466e-05, + "loss": 0.7613, + "step": 9940 + }, + { + "epoch": 11.86, + "learning_rate": 1.1632526929189475e-05, + "loss": 0.7475, + "step": 9950 + }, + { + "epoch": 11.87, + "learning_rate": 1.1588380716934488e-05, + "loss": 0.7412, + "step": 9960 + }, + { + "epoch": 11.88, + "learning_rate": 1.15442345046795e-05, + "loss": 0.7915, + "step": 9970 + }, + { + "epoch": 11.89, + "learning_rate": 1.150008829242451e-05, + "loss": 0.7724, + "step": 9980 + }, + { + "epoch": 11.91, + "learning_rate": 1.1455942080169522e-05, + "loss": 0.8079, + "step": 9990 + }, + { + "epoch": 11.92, + "learning_rate": 1.1411795867914533e-05, + "loss": 0.8112, + "step": 10000 + }, + { + "epoch": 11.93, + "learning_rate": 1.1367649655659544e-05, + "loss": 0.8072, + "step": 10010 + }, + { + "epoch": 11.94, + "learning_rate": 1.1323503443404557e-05, + "loss": 0.7606, + "step": 10020 + }, + { + "epoch": 11.95, + "learning_rate": 1.1279357231149567e-05, + "loss": 0.739, + "step": 10030 + }, + { + "epoch": 11.97, + "learning_rate": 1.123521101889458e-05, + "loss": 0.7504, + "step": 10040 + }, + { + "epoch": 11.98, + "learning_rate": 1.1191064806639591e-05, + "loss": 0.7826, + "step": 10050 + }, + { + "epoch": 11.99, + "learning_rate": 1.1146918594384602e-05, + "loss": 0.7711, + "step": 10060 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8069045136302696, + "eval_f1": 0.794973833568864, + "eval_loss": 0.7523674964904785, + "eval_precision": 0.7995192234884096, + "eval_recall": 0.8069045136302696, + "eval_runtime": 99.9574, + "eval_samples_per_second": 268.634, + "eval_steps_per_second": 4.202, + "step": 10068 + }, + { + "epoch": 12.0, + "learning_rate": 1.1102772382129614e-05, + "loss": 0.7988, + "step": 10070 + }, + { + "epoch": 12.01, + "learning_rate": 1.1058626169874625e-05, + "loss": 0.7829, + "step": 10080 + }, + { + "epoch": 12.03, + "learning_rate": 1.1014479957619636e-05, + "loss": 0.703, + "step": 10090 + }, + { + "epoch": 12.04, + "learning_rate": 1.0970333745364649e-05, + "loss": 0.7694, + "step": 10100 + }, + { + "epoch": 12.05, + "learning_rate": 1.0926187533109659e-05, + "loss": 0.7603, + "step": 10110 + }, + { + "epoch": 12.06, + "learning_rate": 1.0882041320854672e-05, + "loss": 0.7369, + "step": 10120 + }, + { + "epoch": 12.07, + "learning_rate": 1.0837895108599683e-05, + "loss": 0.7168, + "step": 10130 + }, + { + "epoch": 12.09, + "learning_rate": 1.0793748896344694e-05, + "loss": 0.7568, + "step": 10140 + }, + { + "epoch": 12.1, + "learning_rate": 1.0749602684089705e-05, + "loss": 0.7458, + "step": 10150 + }, + { + "epoch": 12.11, + "learning_rate": 1.0705456471834717e-05, + "loss": 0.7414, + "step": 10160 + }, + { + "epoch": 12.12, + "learning_rate": 1.0661310259579728e-05, + "loss": 0.7439, + "step": 10170 + }, + { + "epoch": 12.13, + "learning_rate": 1.0617164047324741e-05, + "loss": 0.7865, + "step": 10180 + }, + { + "epoch": 12.15, + "learning_rate": 1.057301783506975e-05, + "loss": 0.7622, + "step": 10190 + }, + { + "epoch": 12.16, + "learning_rate": 1.0528871622814763e-05, + "loss": 0.7578, + "step": 10200 + }, + { + "epoch": 12.17, + "learning_rate": 1.0484725410559775e-05, + "loss": 0.7623, + "step": 10210 + }, + { + "epoch": 12.18, + "learning_rate": 1.0440579198304786e-05, + "loss": 0.7498, + "step": 10220 + }, + { + "epoch": 12.19, + "learning_rate": 1.0396432986049797e-05, + "loss": 0.7892, + "step": 10230 + }, + { + "epoch": 12.2, + "learning_rate": 1.0352286773794808e-05, + "loss": 0.7462, + "step": 10240 + }, + { + "epoch": 12.22, + "learning_rate": 1.0308140561539821e-05, + "loss": 0.7069, + "step": 10250 + }, + { + "epoch": 12.23, + "learning_rate": 1.0263994349284833e-05, + "loss": 0.7679, + "step": 10260 + }, + { + "epoch": 12.24, + "learning_rate": 1.0219848137029842e-05, + "loss": 0.7507, + "step": 10270 + }, + { + "epoch": 12.25, + "learning_rate": 1.0175701924774855e-05, + "loss": 0.7748, + "step": 10280 + }, + { + "epoch": 12.26, + "learning_rate": 1.0131555712519866e-05, + "loss": 0.7781, + "step": 10290 + }, + { + "epoch": 12.28, + "learning_rate": 1.0087409500264878e-05, + "loss": 0.6939, + "step": 10300 + }, + { + "epoch": 12.29, + "learning_rate": 1.0043263288009889e-05, + "loss": 0.6655, + "step": 10310 + }, + { + "epoch": 12.3, + "learning_rate": 9.9991170757549e-06, + "loss": 0.7509, + "step": 10320 + }, + { + "epoch": 12.31, + "learning_rate": 9.954970863499913e-06, + "loss": 0.7902, + "step": 10330 + }, + { + "epoch": 12.32, + "learning_rate": 9.910824651244924e-06, + "loss": 0.7459, + "step": 10340 + }, + { + "epoch": 12.34, + "learning_rate": 9.866678438989934e-06, + "loss": 0.7505, + "step": 10350 + }, + { + "epoch": 12.35, + "learning_rate": 9.822532226734947e-06, + "loss": 0.7476, + "step": 10360 + }, + { + "epoch": 12.36, + "learning_rate": 9.778386014479958e-06, + "loss": 0.6797, + "step": 10370 + }, + { + "epoch": 12.37, + "learning_rate": 9.73423980222497e-06, + "loss": 0.7846, + "step": 10380 + }, + { + "epoch": 12.38, + "learning_rate": 9.69009358996998e-06, + "loss": 0.7465, + "step": 10390 + }, + { + "epoch": 12.4, + "learning_rate": 9.645947377714992e-06, + "loss": 0.7493, + "step": 10400 + }, + { + "epoch": 12.41, + "learning_rate": 9.601801165460005e-06, + "loss": 0.7691, + "step": 10410 + }, + { + "epoch": 12.42, + "learning_rate": 9.557654953205016e-06, + "loss": 0.7668, + "step": 10420 + }, + { + "epoch": 12.43, + "learning_rate": 9.513508740950026e-06, + "loss": 0.743, + "step": 10430 + }, + { + "epoch": 12.44, + "learning_rate": 9.469362528695039e-06, + "loss": 0.7609, + "step": 10440 + }, + { + "epoch": 12.46, + "learning_rate": 9.42521631644005e-06, + "loss": 0.7925, + "step": 10450 + }, + { + "epoch": 12.47, + "learning_rate": 9.381070104185061e-06, + "loss": 0.7793, + "step": 10460 + }, + { + "epoch": 12.48, + "learning_rate": 9.336923891930073e-06, + "loss": 0.7548, + "step": 10470 + }, + { + "epoch": 12.49, + "learning_rate": 9.292777679675084e-06, + "loss": 0.7509, + "step": 10480 + }, + { + "epoch": 12.5, + "learning_rate": 9.248631467420097e-06, + "loss": 0.6699, + "step": 10490 + }, + { + "epoch": 12.51, + "learning_rate": 9.204485255165108e-06, + "loss": 0.7488, + "step": 10500 + }, + { + "epoch": 12.53, + "learning_rate": 9.160339042910118e-06, + "loss": 0.7262, + "step": 10510 + }, + { + "epoch": 12.54, + "learning_rate": 9.11619283065513e-06, + "loss": 0.7565, + "step": 10520 + }, + { + "epoch": 12.55, + "learning_rate": 9.072046618400142e-06, + "loss": 0.7549, + "step": 10530 + }, + { + "epoch": 12.56, + "learning_rate": 9.027900406145153e-06, + "loss": 0.7766, + "step": 10540 + }, + { + "epoch": 12.57, + "learning_rate": 8.983754193890164e-06, + "loss": 0.7764, + "step": 10550 + }, + { + "epoch": 12.59, + "learning_rate": 8.939607981635176e-06, + "loss": 0.7215, + "step": 10560 + }, + { + "epoch": 12.6, + "learning_rate": 8.895461769380189e-06, + "loss": 0.7217, + "step": 10570 + }, + { + "epoch": 12.61, + "learning_rate": 8.8513155571252e-06, + "loss": 0.747, + "step": 10580 + }, + { + "epoch": 12.62, + "learning_rate": 8.80716934487021e-06, + "loss": 0.7748, + "step": 10590 + }, + { + "epoch": 12.63, + "learning_rate": 8.763023132615222e-06, + "loss": 0.7711, + "step": 10600 + }, + { + "epoch": 12.65, + "learning_rate": 8.718876920360234e-06, + "loss": 0.743, + "step": 10610 + }, + { + "epoch": 12.66, + "learning_rate": 8.674730708105245e-06, + "loss": 0.7345, + "step": 10620 + }, + { + "epoch": 12.67, + "learning_rate": 8.630584495850256e-06, + "loss": 0.7406, + "step": 10630 + }, + { + "epoch": 12.68, + "learning_rate": 8.586438283595267e-06, + "loss": 0.732, + "step": 10640 + }, + { + "epoch": 12.69, + "learning_rate": 8.54229207134028e-06, + "loss": 0.729, + "step": 10650 + }, + { + "epoch": 12.71, + "learning_rate": 8.498145859085292e-06, + "loss": 0.7323, + "step": 10660 + }, + { + "epoch": 12.72, + "learning_rate": 8.453999646830301e-06, + "loss": 0.7796, + "step": 10670 + }, + { + "epoch": 12.73, + "learning_rate": 8.409853434575314e-06, + "loss": 0.7401, + "step": 10680 + }, + { + "epoch": 12.74, + "learning_rate": 8.365707222320325e-06, + "loss": 0.7403, + "step": 10690 + }, + { + "epoch": 12.75, + "learning_rate": 8.321561010065337e-06, + "loss": 0.7433, + "step": 10700 + }, + { + "epoch": 12.76, + "learning_rate": 8.277414797810348e-06, + "loss": 0.7227, + "step": 10710 + }, + { + "epoch": 12.78, + "learning_rate": 8.233268585555359e-06, + "loss": 0.7929, + "step": 10720 + }, + { + "epoch": 12.79, + "learning_rate": 8.189122373300372e-06, + "loss": 0.7602, + "step": 10730 + }, + { + "epoch": 12.8, + "learning_rate": 8.144976161045383e-06, + "loss": 0.7554, + "step": 10740 + }, + { + "epoch": 12.81, + "learning_rate": 8.100829948790393e-06, + "loss": 0.7635, + "step": 10750 + }, + { + "epoch": 12.82, + "learning_rate": 8.056683736535406e-06, + "loss": 0.7451, + "step": 10760 + }, + { + "epoch": 12.84, + "learning_rate": 8.012537524280417e-06, + "loss": 0.7906, + "step": 10770 + }, + { + "epoch": 12.85, + "learning_rate": 7.968391312025428e-06, + "loss": 0.7279, + "step": 10780 + }, + { + "epoch": 12.86, + "learning_rate": 7.92424509977044e-06, + "loss": 0.7292, + "step": 10790 + }, + { + "epoch": 12.87, + "learning_rate": 7.880098887515451e-06, + "loss": 0.7269, + "step": 10800 + }, + { + "epoch": 12.88, + "learning_rate": 7.835952675260464e-06, + "loss": 0.705, + "step": 10810 + }, + { + "epoch": 12.9, + "learning_rate": 7.791806463005475e-06, + "loss": 0.7554, + "step": 10820 + }, + { + "epoch": 12.91, + "learning_rate": 7.747660250750485e-06, + "loss": 0.7186, + "step": 10830 + }, + { + "epoch": 12.92, + "learning_rate": 7.703514038495498e-06, + "loss": 0.7928, + "step": 10840 + }, + { + "epoch": 12.93, + "learning_rate": 7.659367826240509e-06, + "loss": 0.7391, + "step": 10850 + }, + { + "epoch": 12.94, + "learning_rate": 7.615221613985521e-06, + "loss": 0.728, + "step": 10860 + }, + { + "epoch": 12.96, + "learning_rate": 7.5710754017305314e-06, + "loss": 0.7191, + "step": 10870 + }, + { + "epoch": 12.97, + "learning_rate": 7.526929189475543e-06, + "loss": 0.7436, + "step": 10880 + }, + { + "epoch": 12.98, + "learning_rate": 7.482782977220555e-06, + "loss": 0.71, + "step": 10890 + }, + { + "epoch": 12.99, + "learning_rate": 7.438636764965566e-06, + "loss": 0.7689, + "step": 10900 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8123417250111723, + "eval_f1": 0.8008904007472688, + "eval_loss": 0.7283065915107727, + "eval_precision": 0.8043071439972418, + "eval_recall": 0.8123417250111723, + "eval_runtime": 99.1806, + "eval_samples_per_second": 270.739, + "eval_steps_per_second": 4.235, + "step": 10907 + }, + { + "epoch": 13.0, + "learning_rate": 7.394490552710578e-06, + "loss": 0.7298, + "step": 10910 + }, + { + "epoch": 13.02, + "learning_rate": 7.3503443404555894e-06, + "loss": 0.7718, + "step": 10920 + }, + { + "epoch": 13.03, + "learning_rate": 7.306198128200601e-06, + "loss": 0.6979, + "step": 10930 + }, + { + "epoch": 13.04, + "learning_rate": 7.262051915945613e-06, + "loss": 0.7406, + "step": 10940 + }, + { + "epoch": 13.05, + "learning_rate": 7.217905703690623e-06, + "loss": 0.7342, + "step": 10950 + }, + { + "epoch": 13.06, + "learning_rate": 7.1737594914356345e-06, + "loss": 0.7568, + "step": 10960 + }, + { + "epoch": 13.08, + "learning_rate": 7.129613279180647e-06, + "loss": 0.7185, + "step": 10970 + }, + { + "epoch": 13.09, + "learning_rate": 7.085467066925658e-06, + "loss": 0.7223, + "step": 10980 + }, + { + "epoch": 13.1, + "learning_rate": 7.04132085467067e-06, + "loss": 0.7146, + "step": 10990 + }, + { + "epoch": 13.11, + "learning_rate": 6.997174642415681e-06, + "loss": 0.679, + "step": 11000 + }, + { + "epoch": 13.12, + "learning_rate": 6.9530284301606925e-06, + "loss": 0.7124, + "step": 11010 + }, + { + "epoch": 13.13, + "learning_rate": 6.908882217905705e-06, + "loss": 0.7503, + "step": 11020 + }, + { + "epoch": 13.15, + "learning_rate": 6.864736005650715e-06, + "loss": 0.7214, + "step": 11030 + }, + { + "epoch": 13.16, + "learning_rate": 6.820589793395726e-06, + "loss": 0.7273, + "step": 11040 + }, + { + "epoch": 13.17, + "learning_rate": 6.776443581140738e-06, + "loss": 0.7334, + "step": 11050 + }, + { + "epoch": 13.18, + "learning_rate": 6.73229736888575e-06, + "loss": 0.7317, + "step": 11060 + }, + { + "epoch": 13.19, + "learning_rate": 6.688151156630762e-06, + "loss": 0.7214, + "step": 11070 + }, + { + "epoch": 13.21, + "learning_rate": 6.644004944375773e-06, + "loss": 0.7343, + "step": 11080 + }, + { + "epoch": 13.22, + "learning_rate": 6.599858732120784e-06, + "loss": 0.7117, + "step": 11090 + }, + { + "epoch": 13.23, + "learning_rate": 6.555712519865796e-06, + "loss": 0.7101, + "step": 11100 + }, + { + "epoch": 13.24, + "learning_rate": 6.511566307610807e-06, + "loss": 0.67, + "step": 11110 + }, + { + "epoch": 13.25, + "learning_rate": 6.467420095355818e-06, + "loss": 0.7423, + "step": 11120 + }, + { + "epoch": 13.27, + "learning_rate": 6.42327388310083e-06, + "loss": 0.7379, + "step": 11130 + }, + { + "epoch": 13.28, + "learning_rate": 6.3791276708458414e-06, + "loss": 0.7012, + "step": 11140 + }, + { + "epoch": 13.29, + "learning_rate": 6.3349814585908535e-06, + "loss": 0.7246, + "step": 11150 + }, + { + "epoch": 13.3, + "learning_rate": 6.290835246335865e-06, + "loss": 0.7448, + "step": 11160 + }, + { + "epoch": 13.31, + "learning_rate": 6.246689034080876e-06, + "loss": 0.6997, + "step": 11170 + }, + { + "epoch": 13.33, + "learning_rate": 6.202542821825888e-06, + "loss": 0.7124, + "step": 11180 + }, + { + "epoch": 13.34, + "learning_rate": 6.158396609570899e-06, + "loss": 0.7324, + "step": 11190 + }, + { + "epoch": 13.35, + "learning_rate": 6.114250397315911e-06, + "loss": 0.7154, + "step": 11200 + }, + { + "epoch": 13.36, + "learning_rate": 6.070104185060922e-06, + "loss": 0.7044, + "step": 11210 + }, + { + "epoch": 13.37, + "learning_rate": 6.025957972805934e-06, + "loss": 0.73, + "step": 11220 + }, + { + "epoch": 13.38, + "learning_rate": 5.9818117605509445e-06, + "loss": 0.72, + "step": 11230 + }, + { + "epoch": 13.4, + "learning_rate": 5.937665548295957e-06, + "loss": 0.7784, + "step": 11240 + }, + { + "epoch": 13.41, + "learning_rate": 5.893519336040968e-06, + "loss": 0.6891, + "step": 11250 + }, + { + "epoch": 13.42, + "learning_rate": 5.84937312378598e-06, + "loss": 0.7191, + "step": 11260 + }, + { + "epoch": 13.43, + "learning_rate": 5.80522691153099e-06, + "loss": 0.7387, + "step": 11270 + }, + { + "epoch": 13.44, + "learning_rate": 5.7610806992760025e-06, + "loss": 0.7136, + "step": 11280 + }, + { + "epoch": 13.46, + "learning_rate": 5.716934487021014e-06, + "loss": 0.732, + "step": 11290 + }, + { + "epoch": 13.47, + "learning_rate": 5.672788274766026e-06, + "loss": 0.7582, + "step": 11300 + }, + { + "epoch": 13.48, + "learning_rate": 5.628642062511036e-06, + "loss": 0.7532, + "step": 11310 + }, + { + "epoch": 13.49, + "learning_rate": 5.584495850256048e-06, + "loss": 0.7333, + "step": 11320 + }, + { + "epoch": 13.5, + "learning_rate": 5.54034963800106e-06, + "loss": 0.7313, + "step": 11330 + }, + { + "epoch": 13.52, + "learning_rate": 5.496203425746072e-06, + "loss": 0.7207, + "step": 11340 + }, + { + "epoch": 13.53, + "learning_rate": 5.452057213491082e-06, + "loss": 0.7069, + "step": 11350 + }, + { + "epoch": 13.54, + "learning_rate": 5.407911001236094e-06, + "loss": 0.6881, + "step": 11360 + }, + { + "epoch": 13.55, + "learning_rate": 5.3637647889811055e-06, + "loss": 0.7396, + "step": 11370 + }, + { + "epoch": 13.56, + "learning_rate": 5.319618576726118e-06, + "loss": 0.6671, + "step": 11380 + }, + { + "epoch": 13.58, + "learning_rate": 5.275472364471128e-06, + "loss": 0.7244, + "step": 11390 + }, + { + "epoch": 13.59, + "learning_rate": 5.23132615221614e-06, + "loss": 0.7028, + "step": 11400 + }, + { + "epoch": 13.6, + "learning_rate": 5.187179939961151e-06, + "loss": 0.7315, + "step": 11410 + }, + { + "epoch": 13.61, + "learning_rate": 5.1430337277061635e-06, + "loss": 0.7555, + "step": 11420 + }, + { + "epoch": 13.62, + "learning_rate": 5.098887515451174e-06, + "loss": 0.7226, + "step": 11430 + }, + { + "epoch": 13.64, + "learning_rate": 5.054741303196186e-06, + "loss": 0.7307, + "step": 11440 + }, + { + "epoch": 13.65, + "learning_rate": 5.010595090941197e-06, + "loss": 0.7348, + "step": 11450 + }, + { + "epoch": 13.66, + "learning_rate": 4.966448878686209e-06, + "loss": 0.6986, + "step": 11460 + }, + { + "epoch": 13.67, + "learning_rate": 4.92230266643122e-06, + "loss": 0.746, + "step": 11470 + }, + { + "epoch": 13.68, + "learning_rate": 4.878156454176232e-06, + "loss": 0.7028, + "step": 11480 + }, + { + "epoch": 13.69, + "learning_rate": 4.834010241921243e-06, + "loss": 0.7215, + "step": 11490 + }, + { + "epoch": 13.71, + "learning_rate": 4.789864029666255e-06, + "loss": 0.7616, + "step": 11500 + }, + { + "epoch": 13.72, + "learning_rate": 4.7457178174112666e-06, + "loss": 0.7073, + "step": 11510 + }, + { + "epoch": 13.73, + "learning_rate": 4.701571605156278e-06, + "loss": 0.6928, + "step": 11520 + }, + { + "epoch": 13.74, + "learning_rate": 4.657425392901289e-06, + "loss": 0.6846, + "step": 11530 + }, + { + "epoch": 13.75, + "learning_rate": 4.613279180646301e-06, + "loss": 0.716, + "step": 11540 + }, + { + "epoch": 13.77, + "learning_rate": 4.5691329683913125e-06, + "loss": 0.7124, + "step": 11550 + }, + { + "epoch": 13.78, + "learning_rate": 4.524986756136324e-06, + "loss": 0.7151, + "step": 11560 + }, + { + "epoch": 13.79, + "learning_rate": 4.480840543881335e-06, + "loss": 0.7018, + "step": 11570 + }, + { + "epoch": 13.8, + "learning_rate": 4.436694331626347e-06, + "loss": 0.7275, + "step": 11580 + }, + { + "epoch": 13.81, + "learning_rate": 4.392548119371358e-06, + "loss": 0.7311, + "step": 11590 + }, + { + "epoch": 13.83, + "learning_rate": 4.34840190711637e-06, + "loss": 0.6931, + "step": 11600 + }, + { + "epoch": 13.84, + "learning_rate": 4.304255694861381e-06, + "loss": 0.7124, + "step": 11610 + }, + { + "epoch": 13.85, + "learning_rate": 4.260109482606393e-06, + "loss": 0.7396, + "step": 11620 + }, + { + "epoch": 13.86, + "learning_rate": 4.215963270351404e-06, + "loss": 0.7241, + "step": 11630 + }, + { + "epoch": 13.87, + "learning_rate": 4.1718170580964155e-06, + "loss": 0.7272, + "step": 11640 + }, + { + "epoch": 13.89, + "learning_rate": 4.127670845841427e-06, + "loss": 0.6968, + "step": 11650 + }, + { + "epoch": 13.9, + "learning_rate": 4.083524633586439e-06, + "loss": 0.6634, + "step": 11660 + }, + { + "epoch": 13.91, + "learning_rate": 4.03937842133145e-06, + "loss": 0.7018, + "step": 11670 + }, + { + "epoch": 13.92, + "learning_rate": 3.995232209076461e-06, + "loss": 0.7265, + "step": 11680 + }, + { + "epoch": 13.93, + "learning_rate": 3.951085996821473e-06, + "loss": 0.6852, + "step": 11690 + }, + { + "epoch": 13.94, + "learning_rate": 3.906939784566485e-06, + "loss": 0.7071, + "step": 11700 + }, + { + "epoch": 13.96, + "learning_rate": 3.862793572311496e-06, + "loss": 0.7031, + "step": 11710 + }, + { + "epoch": 13.97, + "learning_rate": 3.818647360056507e-06, + "loss": 0.7017, + "step": 11720 + }, + { + "epoch": 13.98, + "learning_rate": 3.7745011478015185e-06, + "loss": 0.7198, + "step": 11730 + }, + { + "epoch": 13.99, + "learning_rate": 3.7303549355465302e-06, + "loss": 0.6919, + "step": 11740 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8147996424847311, + "eval_f1": 0.8036085852328231, + "eval_loss": 0.7133387327194214, + "eval_precision": 0.8060988123614719, + "eval_recall": 0.8147996424847311, + "eval_runtime": 99.251, + "eval_samples_per_second": 270.546, + "eval_steps_per_second": 4.232, + "step": 11746 + }, + { + "epoch": 14.0, + "learning_rate": 3.686208723291542e-06, + "loss": 0.7503, + "step": 11750 + }, + { + "epoch": 14.02, + "learning_rate": 3.6420625110365527e-06, + "loss": 0.6886, + "step": 11760 + }, + { + "epoch": 14.03, + "learning_rate": 3.5979162987815644e-06, + "loss": 0.7366, + "step": 11770 + }, + { + "epoch": 14.04, + "learning_rate": 3.553770086526576e-06, + "loss": 0.6969, + "step": 11780 + }, + { + "epoch": 14.05, + "learning_rate": 3.509623874271588e-06, + "loss": 0.7285, + "step": 11790 + }, + { + "epoch": 14.06, + "learning_rate": 3.4654776620165995e-06, + "loss": 0.6919, + "step": 11800 + }, + { + "epoch": 14.08, + "learning_rate": 3.4213314497616103e-06, + "loss": 0.6904, + "step": 11810 + }, + { + "epoch": 14.09, + "learning_rate": 3.377185237506622e-06, + "loss": 0.6896, + "step": 11820 + }, + { + "epoch": 14.1, + "learning_rate": 3.3330390252516337e-06, + "loss": 0.6798, + "step": 11830 + }, + { + "epoch": 14.11, + "learning_rate": 3.2888928129966454e-06, + "loss": 0.7035, + "step": 11840 + }, + { + "epoch": 14.12, + "learning_rate": 3.2447466007416562e-06, + "loss": 0.6692, + "step": 11850 + }, + { + "epoch": 14.14, + "learning_rate": 3.200600388486668e-06, + "loss": 0.7203, + "step": 11860 + }, + { + "epoch": 14.15, + "learning_rate": 3.1564541762316796e-06, + "loss": 0.7057, + "step": 11870 + }, + { + "epoch": 14.16, + "learning_rate": 3.112307963976691e-06, + "loss": 0.7154, + "step": 11880 + }, + { + "epoch": 14.17, + "learning_rate": 3.0681617517217025e-06, + "loss": 0.6911, + "step": 11890 + }, + { + "epoch": 14.18, + "learning_rate": 3.024015539466714e-06, + "loss": 0.6745, + "step": 11900 + }, + { + "epoch": 14.2, + "learning_rate": 2.9798693272117255e-06, + "loss": 0.7365, + "step": 11910 + }, + { + "epoch": 14.21, + "learning_rate": 2.9357231149567367e-06, + "loss": 0.7185, + "step": 11920 + }, + { + "epoch": 14.22, + "learning_rate": 2.8915769027017484e-06, + "loss": 0.654, + "step": 11930 + }, + { + "epoch": 14.23, + "learning_rate": 2.8474306904467597e-06, + "loss": 0.7046, + "step": 11940 + }, + { + "epoch": 14.24, + "learning_rate": 2.8032844781917714e-06, + "loss": 0.7372, + "step": 11950 + }, + { + "epoch": 14.25, + "learning_rate": 2.7591382659367826e-06, + "loss": 0.7017, + "step": 11960 + }, + { + "epoch": 14.27, + "learning_rate": 2.7149920536817943e-06, + "loss": 0.6843, + "step": 11970 + }, + { + "epoch": 14.28, + "learning_rate": 2.6708458414268056e-06, + "loss": 0.6891, + "step": 11980 + }, + { + "epoch": 14.29, + "learning_rate": 2.6266996291718173e-06, + "loss": 0.6984, + "step": 11990 + }, + { + "epoch": 14.3, + "learning_rate": 2.5825534169168285e-06, + "loss": 0.7012, + "step": 12000 + }, + { + "epoch": 14.31, + "learning_rate": 2.53840720466184e-06, + "loss": 0.7015, + "step": 12010 + }, + { + "epoch": 14.33, + "learning_rate": 2.4942609924068515e-06, + "loss": 0.7127, + "step": 12020 + }, + { + "epoch": 14.34, + "learning_rate": 2.450114780151863e-06, + "loss": 0.7021, + "step": 12030 + }, + { + "epoch": 14.35, + "learning_rate": 2.4059685678968744e-06, + "loss": 0.6599, + "step": 12040 + }, + { + "epoch": 14.36, + "learning_rate": 2.361822355641886e-06, + "loss": 0.7266, + "step": 12050 + }, + { + "epoch": 14.37, + "learning_rate": 2.3176761433868974e-06, + "loss": 0.6562, + "step": 12060 + }, + { + "epoch": 14.39, + "learning_rate": 2.273529931131909e-06, + "loss": 0.7178, + "step": 12070 + }, + { + "epoch": 14.4, + "learning_rate": 2.2293837188769203e-06, + "loss": 0.683, + "step": 12080 + }, + { + "epoch": 14.41, + "learning_rate": 2.185237506621932e-06, + "loss": 0.7003, + "step": 12090 + }, + { + "epoch": 14.42, + "learning_rate": 2.1410912943669432e-06, + "loss": 0.6672, + "step": 12100 + }, + { + "epoch": 14.43, + "learning_rate": 2.096945082111955e-06, + "loss": 0.7333, + "step": 12110 + }, + { + "epoch": 14.45, + "learning_rate": 2.0527988698569666e-06, + "loss": 0.7226, + "step": 12120 + }, + { + "epoch": 14.46, + "learning_rate": 2.008652657601978e-06, + "loss": 0.7062, + "step": 12130 + }, + { + "epoch": 14.47, + "learning_rate": 1.9645064453469896e-06, + "loss": 0.6825, + "step": 12140 + }, + { + "epoch": 14.48, + "learning_rate": 1.920360233092001e-06, + "loss": 0.7219, + "step": 12150 + }, + { + "epoch": 14.49, + "learning_rate": 1.8762140208370125e-06, + "loss": 0.7151, + "step": 12160 + }, + { + "epoch": 14.51, + "learning_rate": 1.8320678085820238e-06, + "loss": 0.6999, + "step": 12170 + }, + { + "epoch": 14.52, + "learning_rate": 1.7879215963270355e-06, + "loss": 0.7182, + "step": 12180 + }, + { + "epoch": 14.53, + "learning_rate": 1.7437753840720467e-06, + "loss": 0.7063, + "step": 12190 + }, + { + "epoch": 14.54, + "learning_rate": 1.6996291718170584e-06, + "loss": 0.7208, + "step": 12200 + }, + { + "epoch": 14.55, + "learning_rate": 1.6554829595620697e-06, + "loss": 0.7346, + "step": 12210 + }, + { + "epoch": 14.56, + "learning_rate": 1.6113367473070813e-06, + "loss": 0.6791, + "step": 12220 + }, + { + "epoch": 14.58, + "learning_rate": 1.5671905350520926e-06, + "loss": 0.7421, + "step": 12230 + }, + { + "epoch": 14.59, + "learning_rate": 1.523044322797104e-06, + "loss": 0.6476, + "step": 12240 + }, + { + "epoch": 14.6, + "learning_rate": 1.4788981105421156e-06, + "loss": 0.6935, + "step": 12250 + }, + { + "epoch": 14.61, + "learning_rate": 1.434751898287127e-06, + "loss": 0.6992, + "step": 12260 + }, + { + "epoch": 14.62, + "learning_rate": 1.3906056860321385e-06, + "loss": 0.6843, + "step": 12270 + }, + { + "epoch": 14.64, + "learning_rate": 1.34645947377715e-06, + "loss": 0.6774, + "step": 12280 + }, + { + "epoch": 14.65, + "learning_rate": 1.3023132615221614e-06, + "loss": 0.6726, + "step": 12290 + }, + { + "epoch": 14.66, + "learning_rate": 1.258167049267173e-06, + "loss": 0.6871, + "step": 12300 + }, + { + "epoch": 14.67, + "learning_rate": 1.2140208370121844e-06, + "loss": 0.7278, + "step": 12310 + }, + { + "epoch": 14.68, + "learning_rate": 1.1698746247571959e-06, + "loss": 0.6631, + "step": 12320 + }, + { + "epoch": 14.7, + "learning_rate": 1.1257284125022073e-06, + "loss": 0.6951, + "step": 12330 + }, + { + "epoch": 14.71, + "learning_rate": 1.0815822002472188e-06, + "loss": 0.7023, + "step": 12340 + }, + { + "epoch": 14.72, + "learning_rate": 1.0374359879922303e-06, + "loss": 0.7297, + "step": 12350 + }, + { + "epoch": 14.73, + "learning_rate": 9.932897757372418e-07, + "loss": 0.6868, + "step": 12360 + }, + { + "epoch": 14.74, + "learning_rate": 9.491435634822533e-07, + "loss": 0.6741, + "step": 12370 + }, + { + "epoch": 14.76, + "learning_rate": 9.049973512272648e-07, + "loss": 0.6589, + "step": 12380 + }, + { + "epoch": 14.77, + "learning_rate": 8.608511389722763e-07, + "loss": 0.7029, + "step": 12390 + }, + { + "epoch": 14.78, + "learning_rate": 8.167049267172878e-07, + "loss": 0.7267, + "step": 12400 + }, + { + "epoch": 14.79, + "learning_rate": 7.725587144622992e-07, + "loss": 0.6523, + "step": 12410 + }, + { + "epoch": 14.8, + "learning_rate": 7.284125022073107e-07, + "loss": 0.7113, + "step": 12420 + }, + { + "epoch": 14.82, + "learning_rate": 6.842662899523222e-07, + "loss": 0.6823, + "step": 12430 + }, + { + "epoch": 14.83, + "learning_rate": 6.401200776973336e-07, + "loss": 0.7095, + "step": 12440 + }, + { + "epoch": 14.84, + "learning_rate": 5.95973865442345e-07, + "loss": 0.6959, + "step": 12450 + }, + { + "epoch": 14.85, + "learning_rate": 5.518276531873565e-07, + "loss": 0.7162, + "step": 12460 + }, + { + "epoch": 14.86, + "learning_rate": 5.07681440932368e-07, + "loss": 0.683, + "step": 12470 + }, + { + "epoch": 14.87, + "learning_rate": 4.635352286773795e-07, + "loss": 0.7184, + "step": 12480 + }, + { + "epoch": 14.89, + "learning_rate": 4.1938901642239095e-07, + "loss": 0.6654, + "step": 12490 + }, + { + "epoch": 14.9, + "learning_rate": 3.752428041674025e-07, + "loss": 0.6955, + "step": 12500 + }, + { + "epoch": 14.91, + "learning_rate": 3.3109659191241395e-07, + "loss": 0.6912, + "step": 12510 + }, + { + "epoch": 14.92, + "learning_rate": 2.8695037965742537e-07, + "loss": 0.7228, + "step": 12520 + }, + { + "epoch": 14.93, + "learning_rate": 2.4280416740243685e-07, + "loss": 0.6885, + "step": 12530 + }, + { + "epoch": 14.95, + "learning_rate": 1.9865795514744835e-07, + "loss": 0.7197, + "step": 12540 + }, + { + "epoch": 14.96, + "learning_rate": 1.5451174289245984e-07, + "loss": 0.7271, + "step": 12550 + }, + { + "epoch": 14.97, + "learning_rate": 1.103655306374713e-07, + "loss": 0.709, + "step": 12560 + }, + { + "epoch": 14.98, + "learning_rate": 6.621931838248278e-08, + "loss": 0.7227, + "step": 12570 + }, + { + "epoch": 14.99, + "learning_rate": 2.2073106127494264e-08, + "loss": 0.694, + "step": 12580 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8176672128705497, + "eval_f1": 0.8067463077825504, + "eval_loss": 0.7063722014427185, + "eval_precision": 0.808932447502446, + "eval_recall": 0.8176672128705497, + "eval_runtime": 99.9807, + "eval_samples_per_second": 268.572, + "eval_steps_per_second": 4.201, + "step": 12585 + }, + { + "epoch": 15.0, + "step": 12585, + "total_flos": 8.179980758810437e+19, + "train_loss": 1.7412154973476806, + "train_runtime": 20817.4419, + "train_samples_per_second": 154.783, + "train_steps_per_second": 0.605 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8244604584473866, + "eval_f1": 0.8143373319248239, + "eval_loss": 0.6935074329376221, + "eval_precision": 0.8209253030287482, + "eval_recall": 0.8244604584473866, + "eval_runtime": 847.887, + "eval_samples_per_second": 253.35, + "eval_steps_per_second": 3.959, + "step": 12585 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8176672128705497, + "eval_f1": 0.8067463077825504, + "eval_loss": 0.7063722014427185, + "eval_precision": 0.808932447502446, + "eval_recall": 0.8176672128705497, + "eval_runtime": 100.5004, + "eval_samples_per_second": 267.183, + "eval_steps_per_second": 4.179, + "step": 12585 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8172575599582899, + "eval_f1": 0.8057061753323418, + "eval_loss": 0.7021384835243225, + "eval_precision": 0.8093542627058361, + "eval_recall": 0.8172575599582899, + "eval_runtime": 100.0753, + "eval_samples_per_second": 268.318, + "eval_steps_per_second": 4.197, + "step": 12585 + } + ], + "max_steps": 12585, + "num_train_epochs": 15, + "total_flos": 8.179980758810437e+19, + "trial_name": null, + "trial_params": null +}