venetis's picture
End of training
aad7306
{
"best_metric": 0.8176672128705497,
"best_model_checkpoint": "convnext-tiny-224_album_vitVMMRdb_make_model_album_pred/checkpoint-12585",
"epoch": 14.999702114983616,
"global_step": 12585,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.971405877680699e-07,
"loss": 5.9416,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 7.942811755361398e-07,
"loss": 5.9452,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 1.1914217633042098e-06,
"loss": 5.9409,
"step": 30
},
{
"epoch": 0.05,
"learning_rate": 1.5885623510722797e-06,
"loss": 5.9378,
"step": 40
},
{
"epoch": 0.06,
"learning_rate": 1.9857029388403496e-06,
"loss": 5.9394,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 2.3828435266084195e-06,
"loss": 5.9288,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 2.7799841143764895e-06,
"loss": 5.9298,
"step": 70
},
{
"epoch": 0.1,
"learning_rate": 3.1771247021445594e-06,
"loss": 5.9304,
"step": 80
},
{
"epoch": 0.11,
"learning_rate": 3.5742652899126297e-06,
"loss": 5.9255,
"step": 90
},
{
"epoch": 0.12,
"learning_rate": 3.971405877680699e-06,
"loss": 5.9241,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 4.368546465448769e-06,
"loss": 5.9103,
"step": 110
},
{
"epoch": 0.14,
"learning_rate": 4.765687053216839e-06,
"loss": 5.9143,
"step": 120
},
{
"epoch": 0.15,
"learning_rate": 5.162827640984909e-06,
"loss": 5.9092,
"step": 130
},
{
"epoch": 0.17,
"learning_rate": 5.559968228752979e-06,
"loss": 5.8914,
"step": 140
},
{
"epoch": 0.18,
"learning_rate": 5.957108816521049e-06,
"loss": 5.8836,
"step": 150
},
{
"epoch": 0.19,
"learning_rate": 6.354249404289119e-06,
"loss": 5.8774,
"step": 160
},
{
"epoch": 0.2,
"learning_rate": 6.751389992057189e-06,
"loss": 5.87,
"step": 170
},
{
"epoch": 0.21,
"learning_rate": 7.1485305798252594e-06,
"loss": 5.8626,
"step": 180
},
{
"epoch": 0.23,
"learning_rate": 7.5456711675933285e-06,
"loss": 5.8501,
"step": 190
},
{
"epoch": 0.24,
"learning_rate": 7.942811755361398e-06,
"loss": 5.8369,
"step": 200
},
{
"epoch": 0.25,
"learning_rate": 8.33995234312947e-06,
"loss": 5.8174,
"step": 210
},
{
"epoch": 0.26,
"learning_rate": 8.737092930897538e-06,
"loss": 5.8033,
"step": 220
},
{
"epoch": 0.27,
"learning_rate": 9.134233518665607e-06,
"loss": 5.7707,
"step": 230
},
{
"epoch": 0.29,
"learning_rate": 9.531374106433678e-06,
"loss": 5.765,
"step": 240
},
{
"epoch": 0.3,
"learning_rate": 9.928514694201747e-06,
"loss": 5.7387,
"step": 250
},
{
"epoch": 0.31,
"learning_rate": 1.0325655281969818e-05,
"loss": 5.7149,
"step": 260
},
{
"epoch": 0.32,
"learning_rate": 1.0722795869737889e-05,
"loss": 5.6734,
"step": 270
},
{
"epoch": 0.33,
"learning_rate": 1.1119936457505958e-05,
"loss": 5.6429,
"step": 280
},
{
"epoch": 0.35,
"learning_rate": 1.1517077045274027e-05,
"loss": 5.62,
"step": 290
},
{
"epoch": 0.36,
"learning_rate": 1.1914217633042098e-05,
"loss": 5.5929,
"step": 300
},
{
"epoch": 0.37,
"learning_rate": 1.2311358220810168e-05,
"loss": 5.565,
"step": 310
},
{
"epoch": 0.38,
"learning_rate": 1.2708498808578237e-05,
"loss": 5.5256,
"step": 320
},
{
"epoch": 0.39,
"learning_rate": 1.3105639396346307e-05,
"loss": 5.5036,
"step": 330
},
{
"epoch": 0.41,
"learning_rate": 1.3502779984114377e-05,
"loss": 5.4775,
"step": 340
},
{
"epoch": 0.42,
"learning_rate": 1.3899920571882446e-05,
"loss": 5.4254,
"step": 350
},
{
"epoch": 0.43,
"learning_rate": 1.4297061159650519e-05,
"loss": 5.4182,
"step": 360
},
{
"epoch": 0.44,
"learning_rate": 1.4694201747418588e-05,
"loss": 5.39,
"step": 370
},
{
"epoch": 0.45,
"learning_rate": 1.5091342335186657e-05,
"loss": 5.3802,
"step": 380
},
{
"epoch": 0.46,
"learning_rate": 1.548848292295473e-05,
"loss": 5.3357,
"step": 390
},
{
"epoch": 0.48,
"learning_rate": 1.5885623510722797e-05,
"loss": 5.3103,
"step": 400
},
{
"epoch": 0.49,
"learning_rate": 1.6282764098490864e-05,
"loss": 5.3291,
"step": 410
},
{
"epoch": 0.5,
"learning_rate": 1.667990468625894e-05,
"loss": 5.2987,
"step": 420
},
{
"epoch": 0.51,
"learning_rate": 1.7077045274027006e-05,
"loss": 5.2742,
"step": 430
},
{
"epoch": 0.52,
"learning_rate": 1.7474185861795077e-05,
"loss": 5.281,
"step": 440
},
{
"epoch": 0.54,
"learning_rate": 1.7871326449563147e-05,
"loss": 5.2443,
"step": 450
},
{
"epoch": 0.55,
"learning_rate": 1.8268467037331215e-05,
"loss": 5.2933,
"step": 460
},
{
"epoch": 0.56,
"learning_rate": 1.8665607625099285e-05,
"loss": 5.2445,
"step": 470
},
{
"epoch": 0.57,
"learning_rate": 1.9062748212867356e-05,
"loss": 5.2277,
"step": 480
},
{
"epoch": 0.58,
"learning_rate": 1.9459888800635427e-05,
"loss": 5.2166,
"step": 490
},
{
"epoch": 0.6,
"learning_rate": 1.9857029388403494e-05,
"loss": 5.217,
"step": 500
},
{
"epoch": 0.61,
"learning_rate": 2.0254169976171565e-05,
"loss": 5.2192,
"step": 510
},
{
"epoch": 0.62,
"learning_rate": 2.0651310563939636e-05,
"loss": 5.1772,
"step": 520
},
{
"epoch": 0.63,
"learning_rate": 2.1048451151707703e-05,
"loss": 5.1665,
"step": 530
},
{
"epoch": 0.64,
"learning_rate": 2.1445591739475777e-05,
"loss": 5.123,
"step": 540
},
{
"epoch": 0.66,
"learning_rate": 2.1842732327243845e-05,
"loss": 5.1441,
"step": 550
},
{
"epoch": 0.67,
"learning_rate": 2.2239872915011916e-05,
"loss": 5.1312,
"step": 560
},
{
"epoch": 0.68,
"learning_rate": 2.2637013502779986e-05,
"loss": 5.0611,
"step": 570
},
{
"epoch": 0.69,
"learning_rate": 2.3034154090548054e-05,
"loss": 5.0637,
"step": 580
},
{
"epoch": 0.7,
"learning_rate": 2.3431294678316128e-05,
"loss": 5.0366,
"step": 590
},
{
"epoch": 0.71,
"learning_rate": 2.3828435266084195e-05,
"loss": 5.0064,
"step": 600
},
{
"epoch": 0.73,
"learning_rate": 2.4225575853852263e-05,
"loss": 4.9881,
"step": 610
},
{
"epoch": 0.74,
"learning_rate": 2.4622716441620337e-05,
"loss": 4.9946,
"step": 620
},
{
"epoch": 0.75,
"learning_rate": 2.5019857029388404e-05,
"loss": 4.9487,
"step": 630
},
{
"epoch": 0.76,
"learning_rate": 2.5416997617156475e-05,
"loss": 4.9289,
"step": 640
},
{
"epoch": 0.77,
"learning_rate": 2.5814138204924542e-05,
"loss": 4.8561,
"step": 650
},
{
"epoch": 0.79,
"learning_rate": 2.6211278792692613e-05,
"loss": 4.9333,
"step": 660
},
{
"epoch": 0.8,
"learning_rate": 2.6608419380460687e-05,
"loss": 4.8653,
"step": 670
},
{
"epoch": 0.81,
"learning_rate": 2.7005559968228755e-05,
"loss": 4.8375,
"step": 680
},
{
"epoch": 0.82,
"learning_rate": 2.7402700555996825e-05,
"loss": 4.848,
"step": 690
},
{
"epoch": 0.83,
"learning_rate": 2.7799841143764893e-05,
"loss": 4.7894,
"step": 700
},
{
"epoch": 0.85,
"learning_rate": 2.8196981731532964e-05,
"loss": 4.7765,
"step": 710
},
{
"epoch": 0.86,
"learning_rate": 2.8594122319301038e-05,
"loss": 4.7187,
"step": 720
},
{
"epoch": 0.87,
"learning_rate": 2.8991262907069105e-05,
"loss": 4.7285,
"step": 730
},
{
"epoch": 0.88,
"learning_rate": 2.9388403494837176e-05,
"loss": 4.7499,
"step": 740
},
{
"epoch": 0.89,
"learning_rate": 2.9785544082605243e-05,
"loss": 4.7581,
"step": 750
},
{
"epoch": 0.91,
"learning_rate": 3.0182684670373314e-05,
"loss": 4.712,
"step": 760
},
{
"epoch": 0.92,
"learning_rate": 3.057982525814138e-05,
"loss": 4.747,
"step": 770
},
{
"epoch": 0.93,
"learning_rate": 3.097696584590946e-05,
"loss": 4.6972,
"step": 780
},
{
"epoch": 0.94,
"learning_rate": 3.1374106433677526e-05,
"loss": 4.7124,
"step": 790
},
{
"epoch": 0.95,
"learning_rate": 3.1771247021445594e-05,
"loss": 4.6658,
"step": 800
},
{
"epoch": 0.97,
"learning_rate": 3.216838760921366e-05,
"loss": 4.6369,
"step": 810
},
{
"epoch": 0.98,
"learning_rate": 3.256552819698173e-05,
"loss": 4.6082,
"step": 820
},
{
"epoch": 0.99,
"learning_rate": 3.29626687847498e-05,
"loss": 4.6105,
"step": 830
},
{
"epoch": 1.0,
"eval_accuracy": 0.10967525696409951,
"eval_f1": 0.04032957193561421,
"eval_loss": 4.524773597717285,
"eval_precision": 0.05793827286588486,
"eval_recall": 0.10967525696409951,
"eval_runtime": 98.7079,
"eval_samples_per_second": 272.035,
"eval_steps_per_second": 4.255,
"step": 839
},
{
"epoch": 1.0,
"learning_rate": 3.335980937251788e-05,
"loss": 4.7548,
"step": 840
},
{
"epoch": 1.01,
"learning_rate": 3.3756949960285944e-05,
"loss": 4.5241,
"step": 850
},
{
"epoch": 1.03,
"learning_rate": 3.415409054805401e-05,
"loss": 4.5317,
"step": 860
},
{
"epoch": 1.04,
"learning_rate": 3.455123113582208e-05,
"loss": 4.5292,
"step": 870
},
{
"epoch": 1.05,
"learning_rate": 3.494837172359015e-05,
"loss": 4.4944,
"step": 880
},
{
"epoch": 1.06,
"learning_rate": 3.534551231135822e-05,
"loss": 4.4926,
"step": 890
},
{
"epoch": 1.07,
"learning_rate": 3.5742652899126295e-05,
"loss": 4.4883,
"step": 900
},
{
"epoch": 1.08,
"learning_rate": 3.613979348689436e-05,
"loss": 4.4853,
"step": 910
},
{
"epoch": 1.1,
"learning_rate": 3.653693407466243e-05,
"loss": 4.4041,
"step": 920
},
{
"epoch": 1.11,
"learning_rate": 3.6934074662430504e-05,
"loss": 4.4882,
"step": 930
},
{
"epoch": 1.12,
"learning_rate": 3.733121525019857e-05,
"loss": 4.3713,
"step": 940
},
{
"epoch": 1.13,
"learning_rate": 3.7728355837966645e-05,
"loss": 4.4213,
"step": 950
},
{
"epoch": 1.14,
"learning_rate": 3.812549642573471e-05,
"loss": 4.4329,
"step": 960
},
{
"epoch": 1.16,
"learning_rate": 3.852263701350278e-05,
"loss": 4.3754,
"step": 970
},
{
"epoch": 1.17,
"learning_rate": 3.8919777601270854e-05,
"loss": 4.3711,
"step": 980
},
{
"epoch": 1.18,
"learning_rate": 3.931691818903892e-05,
"loss": 4.3894,
"step": 990
},
{
"epoch": 1.19,
"learning_rate": 3.971405877680699e-05,
"loss": 4.3531,
"step": 1000
},
{
"epoch": 1.2,
"learning_rate": 4.011119936457506e-05,
"loss": 4.3371,
"step": 1010
},
{
"epoch": 1.22,
"learning_rate": 4.050833995234313e-05,
"loss": 4.26,
"step": 1020
},
{
"epoch": 1.23,
"learning_rate": 4.0905480540111204e-05,
"loss": 4.3503,
"step": 1030
},
{
"epoch": 1.24,
"learning_rate": 4.130262112787927e-05,
"loss": 4.254,
"step": 1040
},
{
"epoch": 1.25,
"learning_rate": 4.169976171564734e-05,
"loss": 4.309,
"step": 1050
},
{
"epoch": 1.26,
"learning_rate": 4.209690230341541e-05,
"loss": 4.2973,
"step": 1060
},
{
"epoch": 1.28,
"learning_rate": 4.249404289118348e-05,
"loss": 4.274,
"step": 1070
},
{
"epoch": 1.29,
"learning_rate": 4.2891183478951555e-05,
"loss": 4.2529,
"step": 1080
},
{
"epoch": 1.3,
"learning_rate": 4.328832406671962e-05,
"loss": 4.1877,
"step": 1090
},
{
"epoch": 1.31,
"learning_rate": 4.368546465448769e-05,
"loss": 4.2293,
"step": 1100
},
{
"epoch": 1.32,
"learning_rate": 4.408260524225576e-05,
"loss": 4.1833,
"step": 1110
},
{
"epoch": 1.33,
"learning_rate": 4.447974583002383e-05,
"loss": 4.1317,
"step": 1120
},
{
"epoch": 1.35,
"learning_rate": 4.4876886417791905e-05,
"loss": 4.1578,
"step": 1130
},
{
"epoch": 1.36,
"learning_rate": 4.527402700555997e-05,
"loss": 4.1565,
"step": 1140
},
{
"epoch": 1.37,
"learning_rate": 4.567116759332804e-05,
"loss": 4.1517,
"step": 1150
},
{
"epoch": 1.38,
"learning_rate": 4.606830818109611e-05,
"loss": 4.1748,
"step": 1160
},
{
"epoch": 1.39,
"learning_rate": 4.6465448768864175e-05,
"loss": 4.118,
"step": 1170
},
{
"epoch": 1.41,
"learning_rate": 4.6862589356632256e-05,
"loss": 4.1787,
"step": 1180
},
{
"epoch": 1.42,
"learning_rate": 4.725972994440032e-05,
"loss": 4.0838,
"step": 1190
},
{
"epoch": 1.43,
"learning_rate": 4.765687053216839e-05,
"loss": 4.1386,
"step": 1200
},
{
"epoch": 1.44,
"learning_rate": 4.805401111993646e-05,
"loss": 4.032,
"step": 1210
},
{
"epoch": 1.45,
"learning_rate": 4.8451151707704525e-05,
"loss": 4.0394,
"step": 1220
},
{
"epoch": 1.47,
"learning_rate": 4.88482922954726e-05,
"loss": 4.0293,
"step": 1230
},
{
"epoch": 1.48,
"learning_rate": 4.9245432883240674e-05,
"loss": 3.9968,
"step": 1240
},
{
"epoch": 1.49,
"learning_rate": 4.964257347100874e-05,
"loss": 4.0275,
"step": 1250
},
{
"epoch": 1.5,
"learning_rate": 4.99955853787745e-05,
"loss": 3.9625,
"step": 1260
},
{
"epoch": 1.51,
"learning_rate": 4.995143916651951e-05,
"loss": 4.0545,
"step": 1270
},
{
"epoch": 1.53,
"learning_rate": 4.990729295426453e-05,
"loss": 3.9338,
"step": 1280
},
{
"epoch": 1.54,
"learning_rate": 4.9863146742009535e-05,
"loss": 3.9301,
"step": 1290
},
{
"epoch": 1.55,
"learning_rate": 4.981900052975455e-05,
"loss": 3.953,
"step": 1300
},
{
"epoch": 1.56,
"learning_rate": 4.977485431749956e-05,
"loss": 3.9407,
"step": 1310
},
{
"epoch": 1.57,
"learning_rate": 4.973070810524457e-05,
"loss": 3.9255,
"step": 1320
},
{
"epoch": 1.59,
"learning_rate": 4.968656189298959e-05,
"loss": 3.9394,
"step": 1330
},
{
"epoch": 1.6,
"learning_rate": 4.9642415680734595e-05,
"loss": 3.9143,
"step": 1340
},
{
"epoch": 1.61,
"learning_rate": 4.959826946847961e-05,
"loss": 3.8728,
"step": 1350
},
{
"epoch": 1.62,
"learning_rate": 4.955412325622462e-05,
"loss": 3.8672,
"step": 1360
},
{
"epoch": 1.63,
"learning_rate": 4.950997704396963e-05,
"loss": 3.8059,
"step": 1370
},
{
"epoch": 1.64,
"learning_rate": 4.9465830831714646e-05,
"loss": 3.8209,
"step": 1380
},
{
"epoch": 1.66,
"learning_rate": 4.9421684619459654e-05,
"loss": 3.7519,
"step": 1390
},
{
"epoch": 1.67,
"learning_rate": 4.937753840720466e-05,
"loss": 3.7815,
"step": 1400
},
{
"epoch": 1.68,
"learning_rate": 4.933339219494968e-05,
"loss": 3.8434,
"step": 1410
},
{
"epoch": 1.69,
"learning_rate": 4.9289245982694685e-05,
"loss": 3.7892,
"step": 1420
},
{
"epoch": 1.7,
"learning_rate": 4.92450997704397e-05,
"loss": 3.7054,
"step": 1430
},
{
"epoch": 1.72,
"learning_rate": 4.920095355818471e-05,
"loss": 3.8061,
"step": 1440
},
{
"epoch": 1.73,
"learning_rate": 4.915680734592972e-05,
"loss": 3.7292,
"step": 1450
},
{
"epoch": 1.74,
"learning_rate": 4.911266113367473e-05,
"loss": 3.7181,
"step": 1460
},
{
"epoch": 1.75,
"learning_rate": 4.9068514921419744e-05,
"loss": 3.6944,
"step": 1470
},
{
"epoch": 1.76,
"learning_rate": 4.902436870916476e-05,
"loss": 3.7,
"step": 1480
},
{
"epoch": 1.78,
"learning_rate": 4.898022249690977e-05,
"loss": 3.6496,
"step": 1490
},
{
"epoch": 1.79,
"learning_rate": 4.893607628465478e-05,
"loss": 3.6937,
"step": 1500
},
{
"epoch": 1.8,
"learning_rate": 4.889193007239979e-05,
"loss": 3.6367,
"step": 1510
},
{
"epoch": 1.81,
"learning_rate": 4.8847783860144804e-05,
"loss": 3.656,
"step": 1520
},
{
"epoch": 1.82,
"learning_rate": 4.880363764788982e-05,
"loss": 3.6302,
"step": 1530
},
{
"epoch": 1.84,
"learning_rate": 4.875949143563483e-05,
"loss": 3.6144,
"step": 1540
},
{
"epoch": 1.85,
"learning_rate": 4.871534522337984e-05,
"loss": 3.6227,
"step": 1550
},
{
"epoch": 1.86,
"learning_rate": 4.867119901112485e-05,
"loss": 3.6088,
"step": 1560
},
{
"epoch": 1.87,
"learning_rate": 4.862705279886986e-05,
"loss": 3.5556,
"step": 1570
},
{
"epoch": 1.88,
"learning_rate": 4.8582906586614865e-05,
"loss": 3.5657,
"step": 1580
},
{
"epoch": 1.89,
"learning_rate": 4.853876037435988e-05,
"loss": 3.5833,
"step": 1590
},
{
"epoch": 1.91,
"learning_rate": 4.8494614162104894e-05,
"loss": 3.517,
"step": 1600
},
{
"epoch": 1.92,
"learning_rate": 4.84504679498499e-05,
"loss": 3.4891,
"step": 1610
},
{
"epoch": 1.93,
"learning_rate": 4.840632173759492e-05,
"loss": 3.4938,
"step": 1620
},
{
"epoch": 1.94,
"learning_rate": 4.8362175525339925e-05,
"loss": 3.4694,
"step": 1630
},
{
"epoch": 1.95,
"learning_rate": 4.831802931308494e-05,
"loss": 3.5104,
"step": 1640
},
{
"epoch": 1.97,
"learning_rate": 4.8273883100829954e-05,
"loss": 3.5442,
"step": 1650
},
{
"epoch": 1.98,
"learning_rate": 4.822973688857496e-05,
"loss": 3.5078,
"step": 1660
},
{
"epoch": 1.99,
"learning_rate": 4.8185590676319976e-05,
"loss": 3.4711,
"step": 1670
},
{
"epoch": 2.0,
"eval_accuracy": 0.2999776552956949,
"eval_f1": 0.20968639879082807,
"eval_loss": 3.316246509552002,
"eval_precision": 0.23018076164128848,
"eval_recall": 0.2999776552956949,
"eval_runtime": 98.3168,
"eval_samples_per_second": 273.117,
"eval_steps_per_second": 4.272,
"step": 1678
},
{
"epoch": 2.0,
"learning_rate": 4.8141444464064984e-05,
"loss": 3.5429,
"step": 1680
},
{
"epoch": 2.01,
"learning_rate": 4.809729825181e-05,
"loss": 3.4651,
"step": 1690
},
{
"epoch": 2.03,
"learning_rate": 4.8053152039555014e-05,
"loss": 3.3769,
"step": 1700
},
{
"epoch": 2.04,
"learning_rate": 4.800900582730002e-05,
"loss": 3.3783,
"step": 1710
},
{
"epoch": 2.05,
"learning_rate": 4.796485961504503e-05,
"loss": 3.4053,
"step": 1720
},
{
"epoch": 2.06,
"learning_rate": 4.7920713402790044e-05,
"loss": 3.3744,
"step": 1730
},
{
"epoch": 2.07,
"learning_rate": 4.787656719053505e-05,
"loss": 3.3675,
"step": 1740
},
{
"epoch": 2.09,
"learning_rate": 4.7832420978280066e-05,
"loss": 3.3607,
"step": 1750
},
{
"epoch": 2.1,
"learning_rate": 4.7788274766025074e-05,
"loss": 3.4022,
"step": 1760
},
{
"epoch": 2.11,
"learning_rate": 4.774412855377009e-05,
"loss": 3.2748,
"step": 1770
},
{
"epoch": 2.12,
"learning_rate": 4.76999823415151e-05,
"loss": 3.3338,
"step": 1780
},
{
"epoch": 2.13,
"learning_rate": 4.765583612926011e-05,
"loss": 3.3384,
"step": 1790
},
{
"epoch": 2.15,
"learning_rate": 4.7611689917005126e-05,
"loss": 3.2855,
"step": 1800
},
{
"epoch": 2.16,
"learning_rate": 4.7567543704750134e-05,
"loss": 3.2991,
"step": 1810
},
{
"epoch": 2.17,
"learning_rate": 4.752339749249515e-05,
"loss": 3.3359,
"step": 1820
},
{
"epoch": 2.18,
"learning_rate": 4.7479251280240157e-05,
"loss": 3.2541,
"step": 1830
},
{
"epoch": 2.19,
"learning_rate": 4.743510506798517e-05,
"loss": 3.2962,
"step": 1840
},
{
"epoch": 2.2,
"learning_rate": 4.7390958855730186e-05,
"loss": 3.2443,
"step": 1850
},
{
"epoch": 2.22,
"learning_rate": 4.7346812643475194e-05,
"loss": 3.2785,
"step": 1860
},
{
"epoch": 2.23,
"learning_rate": 4.730266643122021e-05,
"loss": 3.1409,
"step": 1870
},
{
"epoch": 2.24,
"learning_rate": 4.7258520218965216e-05,
"loss": 3.2309,
"step": 1880
},
{
"epoch": 2.25,
"learning_rate": 4.7214374006710224e-05,
"loss": 3.1422,
"step": 1890
},
{
"epoch": 2.26,
"learning_rate": 4.717022779445524e-05,
"loss": 3.1799,
"step": 1900
},
{
"epoch": 2.28,
"learning_rate": 4.712608158220025e-05,
"loss": 3.163,
"step": 1910
},
{
"epoch": 2.29,
"learning_rate": 4.708193536994526e-05,
"loss": 3.1483,
"step": 1920
},
{
"epoch": 2.3,
"learning_rate": 4.703778915769027e-05,
"loss": 3.2082,
"step": 1930
},
{
"epoch": 2.31,
"learning_rate": 4.6993642945435284e-05,
"loss": 3.1304,
"step": 1940
},
{
"epoch": 2.32,
"learning_rate": 4.694949673318029e-05,
"loss": 3.1022,
"step": 1950
},
{
"epoch": 2.34,
"learning_rate": 4.6905350520925306e-05,
"loss": 3.174,
"step": 1960
},
{
"epoch": 2.35,
"learning_rate": 4.686120430867032e-05,
"loss": 3.0886,
"step": 1970
},
{
"epoch": 2.36,
"learning_rate": 4.681705809641533e-05,
"loss": 3.0919,
"step": 1980
},
{
"epoch": 2.37,
"learning_rate": 4.6772911884160344e-05,
"loss": 3.1014,
"step": 1990
},
{
"epoch": 2.38,
"learning_rate": 4.672876567190535e-05,
"loss": 3.0974,
"step": 2000
},
{
"epoch": 2.4,
"learning_rate": 4.6684619459650366e-05,
"loss": 3.0405,
"step": 2010
},
{
"epoch": 2.41,
"learning_rate": 4.664047324739538e-05,
"loss": 3.1053,
"step": 2020
},
{
"epoch": 2.42,
"learning_rate": 4.659632703514039e-05,
"loss": 3.0749,
"step": 2030
},
{
"epoch": 2.43,
"learning_rate": 4.6552180822885396e-05,
"loss": 3.0606,
"step": 2040
},
{
"epoch": 2.44,
"learning_rate": 4.6508034610630404e-05,
"loss": 3.0498,
"step": 2050
},
{
"epoch": 2.46,
"learning_rate": 4.646388839837542e-05,
"loss": 3.0625,
"step": 2060
},
{
"epoch": 2.47,
"learning_rate": 4.6419742186120434e-05,
"loss": 3.0172,
"step": 2070
},
{
"epoch": 2.48,
"learning_rate": 4.637559597386544e-05,
"loss": 2.9855,
"step": 2080
},
{
"epoch": 2.49,
"learning_rate": 4.6331449761610456e-05,
"loss": 2.9864,
"step": 2090
},
{
"epoch": 2.5,
"learning_rate": 4.6287303549355464e-05,
"loss": 2.9722,
"step": 2100
},
{
"epoch": 2.51,
"learning_rate": 4.624315733710048e-05,
"loss": 2.89,
"step": 2110
},
{
"epoch": 2.53,
"learning_rate": 4.619901112484549e-05,
"loss": 3.0054,
"step": 2120
},
{
"epoch": 2.54,
"learning_rate": 4.61548649125905e-05,
"loss": 2.9484,
"step": 2130
},
{
"epoch": 2.55,
"learning_rate": 4.6110718700335516e-05,
"loss": 2.8977,
"step": 2140
},
{
"epoch": 2.56,
"learning_rate": 4.6066572488080524e-05,
"loss": 2.9717,
"step": 2150
},
{
"epoch": 2.57,
"learning_rate": 4.602242627582554e-05,
"loss": 2.9702,
"step": 2160
},
{
"epoch": 2.59,
"learning_rate": 4.597828006357055e-05,
"loss": 2.9394,
"step": 2170
},
{
"epoch": 2.6,
"learning_rate": 4.593413385131556e-05,
"loss": 2.9298,
"step": 2180
},
{
"epoch": 2.61,
"learning_rate": 4.588998763906057e-05,
"loss": 2.863,
"step": 2190
},
{
"epoch": 2.62,
"learning_rate": 4.5845841426805583e-05,
"loss": 2.8856,
"step": 2200
},
{
"epoch": 2.63,
"learning_rate": 4.580169521455059e-05,
"loss": 2.8582,
"step": 2210
},
{
"epoch": 2.65,
"learning_rate": 4.5757549002295606e-05,
"loss": 2.9151,
"step": 2220
},
{
"epoch": 2.66,
"learning_rate": 4.5713402790040614e-05,
"loss": 2.8773,
"step": 2230
},
{
"epoch": 2.67,
"learning_rate": 4.566925657778563e-05,
"loss": 2.8786,
"step": 2240
},
{
"epoch": 2.68,
"learning_rate": 4.5625110365530636e-05,
"loss": 2.8755,
"step": 2250
},
{
"epoch": 2.69,
"learning_rate": 4.558096415327565e-05,
"loss": 2.854,
"step": 2260
},
{
"epoch": 2.71,
"learning_rate": 4.5536817941020666e-05,
"loss": 2.8562,
"step": 2270
},
{
"epoch": 2.72,
"learning_rate": 4.5492671728765674e-05,
"loss": 2.8179,
"step": 2280
},
{
"epoch": 2.73,
"learning_rate": 4.544852551651069e-05,
"loss": 2.8217,
"step": 2290
},
{
"epoch": 2.74,
"learning_rate": 4.5404379304255696e-05,
"loss": 2.7928,
"step": 2300
},
{
"epoch": 2.75,
"learning_rate": 4.536023309200071e-05,
"loss": 2.804,
"step": 2310
},
{
"epoch": 2.76,
"learning_rate": 4.5316086879745725e-05,
"loss": 2.7497,
"step": 2320
},
{
"epoch": 2.78,
"learning_rate": 4.527194066749073e-05,
"loss": 2.7463,
"step": 2330
},
{
"epoch": 2.79,
"learning_rate": 4.522779445523575e-05,
"loss": 2.7182,
"step": 2340
},
{
"epoch": 2.8,
"learning_rate": 4.5183648242980756e-05,
"loss": 2.7447,
"step": 2350
},
{
"epoch": 2.81,
"learning_rate": 4.5139502030725764e-05,
"loss": 2.7624,
"step": 2360
},
{
"epoch": 2.82,
"learning_rate": 4.509535581847077e-05,
"loss": 2.7295,
"step": 2370
},
{
"epoch": 2.84,
"learning_rate": 4.5051209606215786e-05,
"loss": 2.69,
"step": 2380
},
{
"epoch": 2.85,
"learning_rate": 4.50070633939608e-05,
"loss": 2.6543,
"step": 2390
},
{
"epoch": 2.86,
"learning_rate": 4.496291718170581e-05,
"loss": 2.7529,
"step": 2400
},
{
"epoch": 2.87,
"learning_rate": 4.491877096945082e-05,
"loss": 2.74,
"step": 2410
},
{
"epoch": 2.88,
"learning_rate": 4.487462475719583e-05,
"loss": 2.6917,
"step": 2420
},
{
"epoch": 2.9,
"learning_rate": 4.4830478544940846e-05,
"loss": 2.6828,
"step": 2430
},
{
"epoch": 2.91,
"learning_rate": 4.478633233268586e-05,
"loss": 2.6397,
"step": 2440
},
{
"epoch": 2.92,
"learning_rate": 4.474218612043087e-05,
"loss": 2.7281,
"step": 2450
},
{
"epoch": 2.93,
"learning_rate": 4.469803990817588e-05,
"loss": 2.656,
"step": 2460
},
{
"epoch": 2.94,
"learning_rate": 4.465389369592089e-05,
"loss": 2.6689,
"step": 2470
},
{
"epoch": 2.96,
"learning_rate": 4.4609747483665905e-05,
"loss": 2.6667,
"step": 2480
},
{
"epoch": 2.97,
"learning_rate": 4.456560127141092e-05,
"loss": 2.6502,
"step": 2490
},
{
"epoch": 2.98,
"learning_rate": 4.452145505915593e-05,
"loss": 2.6483,
"step": 2500
},
{
"epoch": 2.99,
"learning_rate": 4.4477308846900936e-05,
"loss": 2.6202,
"step": 2510
},
{
"epoch": 3.0,
"eval_accuracy": 0.4708774020557128,
"eval_f1": 0.3938664278992793,
"eval_loss": 2.444504499435425,
"eval_precision": 0.41196094659916344,
"eval_recall": 0.4708774020557128,
"eval_runtime": 98.7509,
"eval_samples_per_second": 271.916,
"eval_steps_per_second": 4.253,
"step": 2517
},
{
"epoch": 3.0,
"learning_rate": 4.443316263464595e-05,
"loss": 2.6666,
"step": 2520
},
{
"epoch": 3.02,
"learning_rate": 4.438901642239096e-05,
"loss": 2.522,
"step": 2530
},
{
"epoch": 3.03,
"learning_rate": 4.434487021013597e-05,
"loss": 2.5387,
"step": 2540
},
{
"epoch": 3.04,
"learning_rate": 4.430072399788098e-05,
"loss": 2.588,
"step": 2550
},
{
"epoch": 3.05,
"learning_rate": 4.4256577785625996e-05,
"loss": 2.5588,
"step": 2560
},
{
"epoch": 3.06,
"learning_rate": 4.4212431573371003e-05,
"loss": 2.5374,
"step": 2570
},
{
"epoch": 3.08,
"learning_rate": 4.416828536111602e-05,
"loss": 2.6003,
"step": 2580
},
{
"epoch": 3.09,
"learning_rate": 4.412413914886103e-05,
"loss": 2.4761,
"step": 2590
},
{
"epoch": 3.1,
"learning_rate": 4.407999293660604e-05,
"loss": 2.5166,
"step": 2600
},
{
"epoch": 3.11,
"learning_rate": 4.4035846724351055e-05,
"loss": 2.5305,
"step": 2610
},
{
"epoch": 3.12,
"learning_rate": 4.399170051209606e-05,
"loss": 2.5378,
"step": 2620
},
{
"epoch": 3.13,
"learning_rate": 4.394755429984108e-05,
"loss": 2.5443,
"step": 2630
},
{
"epoch": 3.15,
"learning_rate": 4.390340808758609e-05,
"loss": 2.5134,
"step": 2640
},
{
"epoch": 3.16,
"learning_rate": 4.38592618753311e-05,
"loss": 2.5105,
"step": 2650
},
{
"epoch": 3.17,
"learning_rate": 4.3815115663076115e-05,
"loss": 2.5155,
"step": 2660
},
{
"epoch": 3.18,
"learning_rate": 4.377096945082112e-05,
"loss": 2.5207,
"step": 2670
},
{
"epoch": 3.19,
"learning_rate": 4.372682323856613e-05,
"loss": 2.4432,
"step": 2680
},
{
"epoch": 3.21,
"learning_rate": 4.3682677026311145e-05,
"loss": 2.4199,
"step": 2690
},
{
"epoch": 3.22,
"learning_rate": 4.363853081405615e-05,
"loss": 2.4083,
"step": 2700
},
{
"epoch": 3.23,
"learning_rate": 4.359438460180117e-05,
"loss": 2.5031,
"step": 2710
},
{
"epoch": 3.24,
"learning_rate": 4.3550238389546176e-05,
"loss": 2.421,
"step": 2720
},
{
"epoch": 3.25,
"learning_rate": 4.350609217729119e-05,
"loss": 2.4005,
"step": 2730
},
{
"epoch": 3.27,
"learning_rate": 4.34619459650362e-05,
"loss": 2.4384,
"step": 2740
},
{
"epoch": 3.28,
"learning_rate": 4.341779975278121e-05,
"loss": 2.3936,
"step": 2750
},
{
"epoch": 3.29,
"learning_rate": 4.337365354052623e-05,
"loss": 2.3977,
"step": 2760
},
{
"epoch": 3.3,
"learning_rate": 4.3329507328271235e-05,
"loss": 2.4,
"step": 2770
},
{
"epoch": 3.31,
"learning_rate": 4.328536111601625e-05,
"loss": 2.3995,
"step": 2780
},
{
"epoch": 3.33,
"learning_rate": 4.324121490376126e-05,
"loss": 2.3718,
"step": 2790
},
{
"epoch": 3.34,
"learning_rate": 4.319706869150627e-05,
"loss": 2.3718,
"step": 2800
},
{
"epoch": 3.35,
"learning_rate": 4.315292247925129e-05,
"loss": 2.3832,
"step": 2810
},
{
"epoch": 3.36,
"learning_rate": 4.3108776266996295e-05,
"loss": 2.4285,
"step": 2820
},
{
"epoch": 3.37,
"learning_rate": 4.30646300547413e-05,
"loss": 2.3217,
"step": 2830
},
{
"epoch": 3.38,
"learning_rate": 4.302048384248632e-05,
"loss": 2.3315,
"step": 2840
},
{
"epoch": 3.4,
"learning_rate": 4.2976337630231326e-05,
"loss": 2.3445,
"step": 2850
},
{
"epoch": 3.41,
"learning_rate": 4.293219141797634e-05,
"loss": 2.34,
"step": 2860
},
{
"epoch": 3.42,
"learning_rate": 4.288804520572135e-05,
"loss": 2.3481,
"step": 2870
},
{
"epoch": 3.43,
"learning_rate": 4.284389899346636e-05,
"loss": 2.312,
"step": 2880
},
{
"epoch": 3.44,
"learning_rate": 4.279975278121137e-05,
"loss": 2.306,
"step": 2890
},
{
"epoch": 3.46,
"learning_rate": 4.2755606568956385e-05,
"loss": 2.3284,
"step": 2900
},
{
"epoch": 3.47,
"learning_rate": 4.27114603567014e-05,
"loss": 2.3372,
"step": 2910
},
{
"epoch": 3.48,
"learning_rate": 4.266731414444641e-05,
"loss": 2.2843,
"step": 2920
},
{
"epoch": 3.49,
"learning_rate": 4.262316793219142e-05,
"loss": 2.2336,
"step": 2930
},
{
"epoch": 3.5,
"learning_rate": 4.257902171993643e-05,
"loss": 2.2738,
"step": 2940
},
{
"epoch": 3.52,
"learning_rate": 4.2534875507681445e-05,
"loss": 2.2537,
"step": 2950
},
{
"epoch": 3.53,
"learning_rate": 4.249072929542646e-05,
"loss": 2.3713,
"step": 2960
},
{
"epoch": 3.54,
"learning_rate": 4.244658308317147e-05,
"loss": 2.3185,
"step": 2970
},
{
"epoch": 3.55,
"learning_rate": 4.240243687091648e-05,
"loss": 2.2767,
"step": 2980
},
{
"epoch": 3.56,
"learning_rate": 4.235829065866149e-05,
"loss": 2.2159,
"step": 2990
},
{
"epoch": 3.58,
"learning_rate": 4.23141444464065e-05,
"loss": 2.2576,
"step": 3000
},
{
"epoch": 3.59,
"learning_rate": 4.226999823415151e-05,
"loss": 2.2698,
"step": 3010
},
{
"epoch": 3.6,
"learning_rate": 4.222585202189652e-05,
"loss": 2.2491,
"step": 3020
},
{
"epoch": 3.61,
"learning_rate": 4.2181705809641535e-05,
"loss": 2.2619,
"step": 3030
},
{
"epoch": 3.62,
"learning_rate": 4.213755959738654e-05,
"loss": 2.2013,
"step": 3040
},
{
"epoch": 3.64,
"learning_rate": 4.209341338513156e-05,
"loss": 2.2161,
"step": 3050
},
{
"epoch": 3.65,
"learning_rate": 4.204926717287657e-05,
"loss": 2.2614,
"step": 3060
},
{
"epoch": 3.66,
"learning_rate": 4.200512096062158e-05,
"loss": 2.1594,
"step": 3070
},
{
"epoch": 3.67,
"learning_rate": 4.1960974748366595e-05,
"loss": 2.2023,
"step": 3080
},
{
"epoch": 3.68,
"learning_rate": 4.19168285361116e-05,
"loss": 2.2037,
"step": 3090
},
{
"epoch": 3.69,
"learning_rate": 4.187268232385662e-05,
"loss": 2.2178,
"step": 3100
},
{
"epoch": 3.71,
"learning_rate": 4.1828536111601625e-05,
"loss": 2.1852,
"step": 3110
},
{
"epoch": 3.72,
"learning_rate": 4.178438989934664e-05,
"loss": 2.118,
"step": 3120
},
{
"epoch": 3.73,
"learning_rate": 4.1740243687091654e-05,
"loss": 2.1234,
"step": 3130
},
{
"epoch": 3.74,
"learning_rate": 4.169609747483666e-05,
"loss": 2.1954,
"step": 3140
},
{
"epoch": 3.75,
"learning_rate": 4.165195126258167e-05,
"loss": 2.2305,
"step": 3150
},
{
"epoch": 3.77,
"learning_rate": 4.160780505032668e-05,
"loss": 2.1876,
"step": 3160
},
{
"epoch": 3.78,
"learning_rate": 4.156365883807169e-05,
"loss": 2.208,
"step": 3170
},
{
"epoch": 3.79,
"learning_rate": 4.151951262581671e-05,
"loss": 2.1111,
"step": 3180
},
{
"epoch": 3.8,
"learning_rate": 4.1475366413561715e-05,
"loss": 2.1647,
"step": 3190
},
{
"epoch": 3.81,
"learning_rate": 4.143122020130673e-05,
"loss": 2.1881,
"step": 3200
},
{
"epoch": 3.83,
"learning_rate": 4.138707398905174e-05,
"loss": 2.1371,
"step": 3210
},
{
"epoch": 3.84,
"learning_rate": 4.134292777679675e-05,
"loss": 2.1696,
"step": 3220
},
{
"epoch": 3.85,
"learning_rate": 4.129878156454177e-05,
"loss": 2.1368,
"step": 3230
},
{
"epoch": 3.86,
"learning_rate": 4.1254635352286775e-05,
"loss": 2.0675,
"step": 3240
},
{
"epoch": 3.87,
"learning_rate": 4.121048914003179e-05,
"loss": 2.1286,
"step": 3250
},
{
"epoch": 3.89,
"learning_rate": 4.11663429277768e-05,
"loss": 2.0987,
"step": 3260
},
{
"epoch": 3.9,
"learning_rate": 4.112219671552181e-05,
"loss": 2.1006,
"step": 3270
},
{
"epoch": 3.91,
"learning_rate": 4.107805050326683e-05,
"loss": 2.0706,
"step": 3280
},
{
"epoch": 3.92,
"learning_rate": 4.1033904291011835e-05,
"loss": 2.0452,
"step": 3290
},
{
"epoch": 3.93,
"learning_rate": 4.098975807875684e-05,
"loss": 2.0613,
"step": 3300
},
{
"epoch": 3.94,
"learning_rate": 4.094561186650186e-05,
"loss": 2.0598,
"step": 3310
},
{
"epoch": 3.96,
"learning_rate": 4.0901465654246865e-05,
"loss": 2.0953,
"step": 3320
},
{
"epoch": 3.97,
"learning_rate": 4.085731944199188e-05,
"loss": 2.1055,
"step": 3330
},
{
"epoch": 3.98,
"learning_rate": 4.081317322973689e-05,
"loss": 2.1009,
"step": 3340
},
{
"epoch": 3.99,
"learning_rate": 4.07690270174819e-05,
"loss": 2.0614,
"step": 3350
},
{
"epoch": 4.0,
"eval_accuracy": 0.5741844182928646,
"eval_f1": 0.5167663284927785,
"eval_loss": 1.8839434385299683,
"eval_precision": 0.5388953620960228,
"eval_recall": 0.5741844182928646,
"eval_runtime": 98.9624,
"eval_samples_per_second": 271.335,
"eval_steps_per_second": 4.244,
"step": 3356
},
{
"epoch": 4.0,
"learning_rate": 4.072488080522691e-05,
"loss": 2.0478,
"step": 3360
},
{
"epoch": 4.02,
"learning_rate": 4.0680734592971925e-05,
"loss": 2.0201,
"step": 3370
},
{
"epoch": 4.03,
"learning_rate": 4.063658838071694e-05,
"loss": 2.0014,
"step": 3380
},
{
"epoch": 4.04,
"learning_rate": 4.059244216846195e-05,
"loss": 2.0108,
"step": 3390
},
{
"epoch": 4.05,
"learning_rate": 4.054829595620696e-05,
"loss": 2.0173,
"step": 3400
},
{
"epoch": 4.06,
"learning_rate": 4.050414974395197e-05,
"loss": 1.9934,
"step": 3410
},
{
"epoch": 4.08,
"learning_rate": 4.0460003531696984e-05,
"loss": 1.9927,
"step": 3420
},
{
"epoch": 4.09,
"learning_rate": 4.0415857319442e-05,
"loss": 2.0354,
"step": 3430
},
{
"epoch": 4.1,
"learning_rate": 4.037171110718701e-05,
"loss": 1.9639,
"step": 3440
},
{
"epoch": 4.11,
"learning_rate": 4.032756489493202e-05,
"loss": 1.9947,
"step": 3450
},
{
"epoch": 4.12,
"learning_rate": 4.028341868267703e-05,
"loss": 1.9709,
"step": 3460
},
{
"epoch": 4.14,
"learning_rate": 4.023927247042204e-05,
"loss": 1.9729,
"step": 3470
},
{
"epoch": 4.15,
"learning_rate": 4.0195126258167045e-05,
"loss": 1.9609,
"step": 3480
},
{
"epoch": 4.16,
"learning_rate": 4.015098004591206e-05,
"loss": 1.9675,
"step": 3490
},
{
"epoch": 4.17,
"learning_rate": 4.0106833833657074e-05,
"loss": 1.9618,
"step": 3500
},
{
"epoch": 4.18,
"learning_rate": 4.006268762140208e-05,
"loss": 1.9164,
"step": 3510
},
{
"epoch": 4.2,
"learning_rate": 4.00185414091471e-05,
"loss": 1.949,
"step": 3520
},
{
"epoch": 4.21,
"learning_rate": 3.9974395196892105e-05,
"loss": 1.9324,
"step": 3530
},
{
"epoch": 4.22,
"learning_rate": 3.993024898463712e-05,
"loss": 1.9665,
"step": 3540
},
{
"epoch": 4.23,
"learning_rate": 3.9886102772382134e-05,
"loss": 2.0005,
"step": 3550
},
{
"epoch": 4.24,
"learning_rate": 3.984195656012714e-05,
"loss": 1.9234,
"step": 3560
},
{
"epoch": 4.25,
"learning_rate": 3.979781034787216e-05,
"loss": 1.9317,
"step": 3570
},
{
"epoch": 4.27,
"learning_rate": 3.9753664135617165e-05,
"loss": 1.9583,
"step": 3580
},
{
"epoch": 4.28,
"learning_rate": 3.970951792336218e-05,
"loss": 1.869,
"step": 3590
},
{
"epoch": 4.29,
"learning_rate": 3.9665371711107194e-05,
"loss": 1.9407,
"step": 3600
},
{
"epoch": 4.3,
"learning_rate": 3.96212254988522e-05,
"loss": 1.8964,
"step": 3610
},
{
"epoch": 4.31,
"learning_rate": 3.957707928659721e-05,
"loss": 1.8768,
"step": 3620
},
{
"epoch": 4.33,
"learning_rate": 3.9532933074342224e-05,
"loss": 1.915,
"step": 3630
},
{
"epoch": 4.34,
"learning_rate": 3.948878686208723e-05,
"loss": 1.8625,
"step": 3640
},
{
"epoch": 4.35,
"learning_rate": 3.944464064983225e-05,
"loss": 1.9401,
"step": 3650
},
{
"epoch": 4.36,
"learning_rate": 3.9400494437577255e-05,
"loss": 1.9032,
"step": 3660
},
{
"epoch": 4.37,
"learning_rate": 3.935634822532227e-05,
"loss": 1.9214,
"step": 3670
},
{
"epoch": 4.39,
"learning_rate": 3.931220201306728e-05,
"loss": 1.8298,
"step": 3680
},
{
"epoch": 4.4,
"learning_rate": 3.926805580081229e-05,
"loss": 1.9058,
"step": 3690
},
{
"epoch": 4.41,
"learning_rate": 3.9223909588557306e-05,
"loss": 1.8939,
"step": 3700
},
{
"epoch": 4.42,
"learning_rate": 3.9179763376302314e-05,
"loss": 1.8385,
"step": 3710
},
{
"epoch": 4.43,
"learning_rate": 3.913561716404733e-05,
"loss": 1.8823,
"step": 3720
},
{
"epoch": 4.45,
"learning_rate": 3.909147095179234e-05,
"loss": 1.8207,
"step": 3730
},
{
"epoch": 4.46,
"learning_rate": 3.904732473953735e-05,
"loss": 1.8259,
"step": 3740
},
{
"epoch": 4.47,
"learning_rate": 3.9003178527282366e-05,
"loss": 1.8235,
"step": 3750
},
{
"epoch": 4.48,
"learning_rate": 3.8959032315027374e-05,
"loss": 1.8281,
"step": 3760
},
{
"epoch": 4.49,
"learning_rate": 3.891488610277239e-05,
"loss": 1.8559,
"step": 3770
},
{
"epoch": 4.51,
"learning_rate": 3.8870739890517397e-05,
"loss": 1.8934,
"step": 3780
},
{
"epoch": 4.52,
"learning_rate": 3.8826593678262404e-05,
"loss": 1.8855,
"step": 3790
},
{
"epoch": 4.53,
"learning_rate": 3.878244746600742e-05,
"loss": 1.7935,
"step": 3800
},
{
"epoch": 4.54,
"learning_rate": 3.873830125375243e-05,
"loss": 1.8148,
"step": 3810
},
{
"epoch": 4.55,
"learning_rate": 3.869415504149744e-05,
"loss": 1.7876,
"step": 3820
},
{
"epoch": 4.56,
"learning_rate": 3.865000882924245e-05,
"loss": 1.7597,
"step": 3830
},
{
"epoch": 4.58,
"learning_rate": 3.8605862616987464e-05,
"loss": 1.8237,
"step": 3840
},
{
"epoch": 4.59,
"learning_rate": 3.856171640473247e-05,
"loss": 1.8599,
"step": 3850
},
{
"epoch": 4.6,
"learning_rate": 3.851757019247749e-05,
"loss": 1.7719,
"step": 3860
},
{
"epoch": 4.61,
"learning_rate": 3.84734239802225e-05,
"loss": 1.8413,
"step": 3870
},
{
"epoch": 4.62,
"learning_rate": 3.842927776796751e-05,
"loss": 1.7742,
"step": 3880
},
{
"epoch": 4.64,
"learning_rate": 3.8385131555712524e-05,
"loss": 1.8499,
"step": 3890
},
{
"epoch": 4.65,
"learning_rate": 3.834098534345753e-05,
"loss": 1.7576,
"step": 3900
},
{
"epoch": 4.66,
"learning_rate": 3.8296839131202546e-05,
"loss": 1.7575,
"step": 3910
},
{
"epoch": 4.67,
"learning_rate": 3.825269291894756e-05,
"loss": 1.8016,
"step": 3920
},
{
"epoch": 4.68,
"learning_rate": 3.820854670669257e-05,
"loss": 1.7705,
"step": 3930
},
{
"epoch": 4.7,
"learning_rate": 3.816440049443758e-05,
"loss": 1.7676,
"step": 3940
},
{
"epoch": 4.71,
"learning_rate": 3.812025428218259e-05,
"loss": 1.7615,
"step": 3950
},
{
"epoch": 4.72,
"learning_rate": 3.80761080699276e-05,
"loss": 1.7477,
"step": 3960
},
{
"epoch": 4.73,
"learning_rate": 3.8031961857672614e-05,
"loss": 1.7753,
"step": 3970
},
{
"epoch": 4.74,
"learning_rate": 3.798781564541762e-05,
"loss": 1.7486,
"step": 3980
},
{
"epoch": 4.76,
"learning_rate": 3.7943669433162636e-05,
"loss": 1.7403,
"step": 3990
},
{
"epoch": 4.77,
"learning_rate": 3.7899523220907644e-05,
"loss": 1.6858,
"step": 4000
},
{
"epoch": 4.78,
"learning_rate": 3.785537700865266e-05,
"loss": 1.7315,
"step": 4010
},
{
"epoch": 4.79,
"learning_rate": 3.7811230796397674e-05,
"loss": 1.7583,
"step": 4020
},
{
"epoch": 4.8,
"learning_rate": 3.776708458414268e-05,
"loss": 1.7067,
"step": 4030
},
{
"epoch": 4.82,
"learning_rate": 3.7722938371887696e-05,
"loss": 1.7346,
"step": 4040
},
{
"epoch": 4.83,
"learning_rate": 3.7678792159632704e-05,
"loss": 1.7225,
"step": 4050
},
{
"epoch": 4.84,
"learning_rate": 3.763464594737772e-05,
"loss": 1.7022,
"step": 4060
},
{
"epoch": 4.85,
"learning_rate": 3.759049973512273e-05,
"loss": 1.7202,
"step": 4070
},
{
"epoch": 4.86,
"learning_rate": 3.754635352286774e-05,
"loss": 1.7266,
"step": 4080
},
{
"epoch": 4.87,
"learning_rate": 3.7502207310612756e-05,
"loss": 1.6832,
"step": 4090
},
{
"epoch": 4.89,
"learning_rate": 3.7458061098357764e-05,
"loss": 1.6745,
"step": 4100
},
{
"epoch": 4.9,
"learning_rate": 3.741391488610277e-05,
"loss": 1.6977,
"step": 4110
},
{
"epoch": 4.91,
"learning_rate": 3.7369768673847786e-05,
"loss": 1.6997,
"step": 4120
},
{
"epoch": 4.92,
"learning_rate": 3.7325622461592794e-05,
"loss": 1.724,
"step": 4130
},
{
"epoch": 4.93,
"learning_rate": 3.728147624933781e-05,
"loss": 1.6449,
"step": 4140
},
{
"epoch": 4.95,
"learning_rate": 3.7237330037082817e-05,
"loss": 1.6715,
"step": 4150
},
{
"epoch": 4.96,
"learning_rate": 3.719318382482783e-05,
"loss": 1.6833,
"step": 4160
},
{
"epoch": 4.97,
"learning_rate": 3.7149037612572846e-05,
"loss": 1.7102,
"step": 4170
},
{
"epoch": 4.98,
"learning_rate": 3.7104891400317854e-05,
"loss": 1.7087,
"step": 4180
},
{
"epoch": 4.99,
"learning_rate": 3.706074518806287e-05,
"loss": 1.7026,
"step": 4190
},
{
"epoch": 5.0,
"eval_accuracy": 0.6436019663339788,
"eval_f1": 0.6012686796803567,
"eval_loss": 1.5246539115905762,
"eval_precision": 0.6179627814058779,
"eval_recall": 0.6436019663339788,
"eval_runtime": 98.9599,
"eval_samples_per_second": 271.342,
"eval_steps_per_second": 4.244,
"step": 4195
},
{
"epoch": 5.01,
"learning_rate": 3.7016598975807876e-05,
"loss": 1.7138,
"step": 4200
},
{
"epoch": 5.02,
"learning_rate": 3.697245276355289e-05,
"loss": 1.5906,
"step": 4210
},
{
"epoch": 5.03,
"learning_rate": 3.6928306551297906e-05,
"loss": 1.6526,
"step": 4220
},
{
"epoch": 5.04,
"learning_rate": 3.6884160339042913e-05,
"loss": 1.568,
"step": 4230
},
{
"epoch": 5.05,
"learning_rate": 3.684001412678793e-05,
"loss": 1.6482,
"step": 4240
},
{
"epoch": 5.07,
"learning_rate": 3.6795867914532936e-05,
"loss": 1.6047,
"step": 4250
},
{
"epoch": 5.08,
"learning_rate": 3.6751721702277944e-05,
"loss": 1.6349,
"step": 4260
},
{
"epoch": 5.09,
"learning_rate": 3.670757549002296e-05,
"loss": 1.643,
"step": 4270
},
{
"epoch": 5.1,
"learning_rate": 3.6663429277767966e-05,
"loss": 1.6335,
"step": 4280
},
{
"epoch": 5.11,
"learning_rate": 3.661928306551298e-05,
"loss": 1.616,
"step": 4290
},
{
"epoch": 5.13,
"learning_rate": 3.657513685325799e-05,
"loss": 1.5813,
"step": 4300
},
{
"epoch": 5.14,
"learning_rate": 3.6530990641003004e-05,
"loss": 1.5871,
"step": 4310
},
{
"epoch": 5.15,
"learning_rate": 3.648684442874801e-05,
"loss": 1.6759,
"step": 4320
},
{
"epoch": 5.16,
"learning_rate": 3.6442698216493026e-05,
"loss": 1.5666,
"step": 4330
},
{
"epoch": 5.17,
"learning_rate": 3.639855200423804e-05,
"loss": 1.6558,
"step": 4340
},
{
"epoch": 5.18,
"learning_rate": 3.635440579198305e-05,
"loss": 1.5853,
"step": 4350
},
{
"epoch": 5.2,
"learning_rate": 3.631025957972806e-05,
"loss": 1.641,
"step": 4360
},
{
"epoch": 5.21,
"learning_rate": 3.626611336747307e-05,
"loss": 1.5875,
"step": 4370
},
{
"epoch": 5.22,
"learning_rate": 3.6221967155218086e-05,
"loss": 1.6086,
"step": 4380
},
{
"epoch": 5.23,
"learning_rate": 3.61778209429631e-05,
"loss": 1.5143,
"step": 4390
},
{
"epoch": 5.24,
"learning_rate": 3.613367473070811e-05,
"loss": 1.5948,
"step": 4400
},
{
"epoch": 5.26,
"learning_rate": 3.608952851845312e-05,
"loss": 1.6024,
"step": 4410
},
{
"epoch": 5.27,
"learning_rate": 3.604538230619813e-05,
"loss": 1.5302,
"step": 4420
},
{
"epoch": 5.28,
"learning_rate": 3.600123609394314e-05,
"loss": 1.5198,
"step": 4430
},
{
"epoch": 5.29,
"learning_rate": 3.595708988168815e-05,
"loss": 1.6065,
"step": 4440
},
{
"epoch": 5.3,
"learning_rate": 3.591294366943316e-05,
"loss": 1.568,
"step": 4450
},
{
"epoch": 5.32,
"learning_rate": 3.5868797457178176e-05,
"loss": 1.6374,
"step": 4460
},
{
"epoch": 5.33,
"learning_rate": 3.5824651244923184e-05,
"loss": 1.5732,
"step": 4470
},
{
"epoch": 5.34,
"learning_rate": 3.57805050326682e-05,
"loss": 1.5805,
"step": 4480
},
{
"epoch": 5.35,
"learning_rate": 3.573635882041321e-05,
"loss": 1.623,
"step": 4490
},
{
"epoch": 5.36,
"learning_rate": 3.569221260815822e-05,
"loss": 1.5536,
"step": 4500
},
{
"epoch": 5.38,
"learning_rate": 3.5648066395903236e-05,
"loss": 1.5806,
"step": 4510
},
{
"epoch": 5.39,
"learning_rate": 3.5603920183648243e-05,
"loss": 1.5798,
"step": 4520
},
{
"epoch": 5.4,
"learning_rate": 3.555977397139326e-05,
"loss": 1.5538,
"step": 4530
},
{
"epoch": 5.41,
"learning_rate": 3.551562775913827e-05,
"loss": 1.5041,
"step": 4540
},
{
"epoch": 5.42,
"learning_rate": 3.547148154688328e-05,
"loss": 1.5274,
"step": 4550
},
{
"epoch": 5.43,
"learning_rate": 3.5427335334628295e-05,
"loss": 1.5284,
"step": 4560
},
{
"epoch": 5.45,
"learning_rate": 3.53831891223733e-05,
"loss": 1.4944,
"step": 4570
},
{
"epoch": 5.46,
"learning_rate": 3.533904291011831e-05,
"loss": 1.5106,
"step": 4580
},
{
"epoch": 5.47,
"learning_rate": 3.5294896697863326e-05,
"loss": 1.549,
"step": 4590
},
{
"epoch": 5.48,
"learning_rate": 3.5250750485608334e-05,
"loss": 1.5478,
"step": 4600
},
{
"epoch": 5.49,
"learning_rate": 3.520660427335335e-05,
"loss": 1.5143,
"step": 4610
},
{
"epoch": 5.51,
"learning_rate": 3.5162458061098356e-05,
"loss": 1.5077,
"step": 4620
},
{
"epoch": 5.52,
"learning_rate": 3.511831184884337e-05,
"loss": 1.5095,
"step": 4630
},
{
"epoch": 5.53,
"learning_rate": 3.507416563658838e-05,
"loss": 1.4635,
"step": 4640
},
{
"epoch": 5.54,
"learning_rate": 3.503001942433339e-05,
"loss": 1.4623,
"step": 4650
},
{
"epoch": 5.55,
"learning_rate": 3.498587321207841e-05,
"loss": 1.5481,
"step": 4660
},
{
"epoch": 5.57,
"learning_rate": 3.4941726999823416e-05,
"loss": 1.4816,
"step": 4670
},
{
"epoch": 5.58,
"learning_rate": 3.489758078756843e-05,
"loss": 1.4665,
"step": 4680
},
{
"epoch": 5.59,
"learning_rate": 3.485343457531344e-05,
"loss": 1.4978,
"step": 4690
},
{
"epoch": 5.6,
"learning_rate": 3.480928836305845e-05,
"loss": 1.4649,
"step": 4700
},
{
"epoch": 5.61,
"learning_rate": 3.476514215080347e-05,
"loss": 1.4676,
"step": 4710
},
{
"epoch": 5.63,
"learning_rate": 3.4720995938548475e-05,
"loss": 1.4891,
"step": 4720
},
{
"epoch": 5.64,
"learning_rate": 3.467684972629348e-05,
"loss": 1.4662,
"step": 4730
},
{
"epoch": 5.65,
"learning_rate": 3.46327035140385e-05,
"loss": 1.5112,
"step": 4740
},
{
"epoch": 5.66,
"learning_rate": 3.4588557301783506e-05,
"loss": 1.4777,
"step": 4750
},
{
"epoch": 5.67,
"learning_rate": 3.454441108952852e-05,
"loss": 1.4861,
"step": 4760
},
{
"epoch": 5.69,
"learning_rate": 3.450026487727353e-05,
"loss": 1.4659,
"step": 4770
},
{
"epoch": 5.7,
"learning_rate": 3.445611866501854e-05,
"loss": 1.4698,
"step": 4780
},
{
"epoch": 5.71,
"learning_rate": 3.441197245276355e-05,
"loss": 1.4517,
"step": 4790
},
{
"epoch": 5.72,
"learning_rate": 3.4367826240508566e-05,
"loss": 1.4347,
"step": 4800
},
{
"epoch": 5.73,
"learning_rate": 3.432368002825358e-05,
"loss": 1.4547,
"step": 4810
},
{
"epoch": 5.74,
"learning_rate": 3.427953381599859e-05,
"loss": 1.4362,
"step": 4820
},
{
"epoch": 5.76,
"learning_rate": 3.42353876037436e-05,
"loss": 1.4627,
"step": 4830
},
{
"epoch": 5.77,
"learning_rate": 3.419124139148861e-05,
"loss": 1.4835,
"step": 4840
},
{
"epoch": 5.78,
"learning_rate": 3.4147095179233625e-05,
"loss": 1.4554,
"step": 4850
},
{
"epoch": 5.79,
"learning_rate": 3.410294896697864e-05,
"loss": 1.4479,
"step": 4860
},
{
"epoch": 5.8,
"learning_rate": 3.405880275472365e-05,
"loss": 1.4177,
"step": 4870
},
{
"epoch": 5.82,
"learning_rate": 3.401465654246866e-05,
"loss": 1.4393,
"step": 4880
},
{
"epoch": 5.83,
"learning_rate": 3.397051033021367e-05,
"loss": 1.4812,
"step": 4890
},
{
"epoch": 5.84,
"learning_rate": 3.392636411795868e-05,
"loss": 1.4318,
"step": 4900
},
{
"epoch": 5.85,
"learning_rate": 3.388221790570369e-05,
"loss": 1.4818,
"step": 4910
},
{
"epoch": 5.86,
"learning_rate": 3.38380716934487e-05,
"loss": 1.472,
"step": 4920
},
{
"epoch": 5.88,
"learning_rate": 3.3793925481193715e-05,
"loss": 1.4164,
"step": 4930
},
{
"epoch": 5.89,
"learning_rate": 3.374977926893872e-05,
"loss": 1.4173,
"step": 4940
},
{
"epoch": 5.9,
"learning_rate": 3.370563305668374e-05,
"loss": 1.4133,
"step": 4950
},
{
"epoch": 5.91,
"learning_rate": 3.366148684442875e-05,
"loss": 1.4526,
"step": 4960
},
{
"epoch": 5.92,
"learning_rate": 3.361734063217376e-05,
"loss": 1.418,
"step": 4970
},
{
"epoch": 5.94,
"learning_rate": 3.3573194419918775e-05,
"loss": 1.4083,
"step": 4980
},
{
"epoch": 5.95,
"learning_rate": 3.352904820766378e-05,
"loss": 1.4273,
"step": 4990
},
{
"epoch": 5.96,
"learning_rate": 3.34849019954088e-05,
"loss": 1.3889,
"step": 5000
},
{
"epoch": 5.97,
"learning_rate": 3.3440755783153805e-05,
"loss": 1.3927,
"step": 5010
},
{
"epoch": 5.98,
"learning_rate": 3.339660957089882e-05,
"loss": 1.3998,
"step": 5020
},
{
"epoch": 5.99,
"learning_rate": 3.3352463358643835e-05,
"loss": 1.4288,
"step": 5030
},
{
"epoch": 6.0,
"eval_accuracy": 0.6978995977953225,
"eval_f1": 0.6686418903782712,
"eval_loss": 1.2768018245697021,
"eval_precision": 0.6810067175098012,
"eval_recall": 0.6978995977953225,
"eval_runtime": 99.1055,
"eval_samples_per_second": 270.944,
"eval_steps_per_second": 4.238,
"step": 5034
},
{
"epoch": 6.01,
"learning_rate": 3.330831714638884e-05,
"loss": 1.4183,
"step": 5040
},
{
"epoch": 6.02,
"learning_rate": 3.326417093413385e-05,
"loss": 1.3548,
"step": 5050
},
{
"epoch": 6.03,
"learning_rate": 3.3220024721878865e-05,
"loss": 1.352,
"step": 5060
},
{
"epoch": 6.04,
"learning_rate": 3.317587850962387e-05,
"loss": 1.3463,
"step": 5070
},
{
"epoch": 6.05,
"learning_rate": 3.313173229736889e-05,
"loss": 1.3743,
"step": 5080
},
{
"epoch": 6.07,
"learning_rate": 3.3087586085113895e-05,
"loss": 1.3347,
"step": 5090
},
{
"epoch": 6.08,
"learning_rate": 3.304343987285891e-05,
"loss": 1.3853,
"step": 5100
},
{
"epoch": 6.09,
"learning_rate": 3.299929366060392e-05,
"loss": 1.3626,
"step": 5110
},
{
"epoch": 6.1,
"learning_rate": 3.295514744834893e-05,
"loss": 1.3255,
"step": 5120
},
{
"epoch": 6.11,
"learning_rate": 3.291100123609395e-05,
"loss": 1.3577,
"step": 5130
},
{
"epoch": 6.13,
"learning_rate": 3.2866855023838955e-05,
"loss": 1.3738,
"step": 5140
},
{
"epoch": 6.14,
"learning_rate": 3.282270881158397e-05,
"loss": 1.3802,
"step": 5150
},
{
"epoch": 6.15,
"learning_rate": 3.277856259932898e-05,
"loss": 1.3946,
"step": 5160
},
{
"epoch": 6.16,
"learning_rate": 3.273441638707399e-05,
"loss": 1.3322,
"step": 5170
},
{
"epoch": 6.17,
"learning_rate": 3.269027017481901e-05,
"loss": 1.403,
"step": 5180
},
{
"epoch": 6.19,
"learning_rate": 3.2646123962564015e-05,
"loss": 1.3271,
"step": 5190
},
{
"epoch": 6.2,
"learning_rate": 3.260197775030903e-05,
"loss": 1.3338,
"step": 5200
},
{
"epoch": 6.21,
"learning_rate": 3.255783153805404e-05,
"loss": 1.3484,
"step": 5210
},
{
"epoch": 6.22,
"learning_rate": 3.2513685325799045e-05,
"loss": 1.3764,
"step": 5220
},
{
"epoch": 6.23,
"learning_rate": 3.246953911354406e-05,
"loss": 1.2798,
"step": 5230
},
{
"epoch": 6.25,
"learning_rate": 3.242539290128907e-05,
"loss": 1.3485,
"step": 5240
},
{
"epoch": 6.26,
"learning_rate": 3.238124668903408e-05,
"loss": 1.3634,
"step": 5250
},
{
"epoch": 6.27,
"learning_rate": 3.233710047677909e-05,
"loss": 1.3283,
"step": 5260
},
{
"epoch": 6.28,
"learning_rate": 3.2292954264524105e-05,
"loss": 1.2951,
"step": 5270
},
{
"epoch": 6.29,
"learning_rate": 3.224880805226912e-05,
"loss": 1.3562,
"step": 5280
},
{
"epoch": 6.31,
"learning_rate": 3.220466184001413e-05,
"loss": 1.3264,
"step": 5290
},
{
"epoch": 6.32,
"learning_rate": 3.216051562775914e-05,
"loss": 1.2662,
"step": 5300
},
{
"epoch": 6.33,
"learning_rate": 3.211636941550415e-05,
"loss": 1.3093,
"step": 5310
},
{
"epoch": 6.34,
"learning_rate": 3.2072223203249165e-05,
"loss": 1.3059,
"step": 5320
},
{
"epoch": 6.35,
"learning_rate": 3.202807699099418e-05,
"loss": 1.3101,
"step": 5330
},
{
"epoch": 6.36,
"learning_rate": 3.198393077873919e-05,
"loss": 1.2837,
"step": 5340
},
{
"epoch": 6.38,
"learning_rate": 3.19397845664842e-05,
"loss": 1.3089,
"step": 5350
},
{
"epoch": 6.39,
"learning_rate": 3.189563835422921e-05,
"loss": 1.2935,
"step": 5360
},
{
"epoch": 6.4,
"learning_rate": 3.185149214197422e-05,
"loss": 1.2767,
"step": 5370
},
{
"epoch": 6.41,
"learning_rate": 3.180734592971923e-05,
"loss": 1.3082,
"step": 5380
},
{
"epoch": 6.42,
"learning_rate": 3.176319971746424e-05,
"loss": 1.2936,
"step": 5390
},
{
"epoch": 6.44,
"learning_rate": 3.1719053505209255e-05,
"loss": 1.2872,
"step": 5400
},
{
"epoch": 6.45,
"learning_rate": 3.167490729295426e-05,
"loss": 1.4089,
"step": 5410
},
{
"epoch": 6.46,
"learning_rate": 3.163076108069928e-05,
"loss": 1.3171,
"step": 5420
},
{
"epoch": 6.47,
"learning_rate": 3.1586614868444285e-05,
"loss": 1.2926,
"step": 5430
},
{
"epoch": 6.48,
"learning_rate": 3.15424686561893e-05,
"loss": 1.2932,
"step": 5440
},
{
"epoch": 6.5,
"learning_rate": 3.1498322443934314e-05,
"loss": 1.3524,
"step": 5450
},
{
"epoch": 6.51,
"learning_rate": 3.145417623167932e-05,
"loss": 1.3105,
"step": 5460
},
{
"epoch": 6.52,
"learning_rate": 3.141003001942434e-05,
"loss": 1.2973,
"step": 5470
},
{
"epoch": 6.53,
"learning_rate": 3.1365883807169345e-05,
"loss": 1.2757,
"step": 5480
},
{
"epoch": 6.54,
"learning_rate": 3.132173759491436e-05,
"loss": 1.2243,
"step": 5490
},
{
"epoch": 6.56,
"learning_rate": 3.1277591382659374e-05,
"loss": 1.323,
"step": 5500
},
{
"epoch": 6.57,
"learning_rate": 3.123344517040438e-05,
"loss": 1.2593,
"step": 5510
},
{
"epoch": 6.58,
"learning_rate": 3.11892989581494e-05,
"loss": 1.3141,
"step": 5520
},
{
"epoch": 6.59,
"learning_rate": 3.1145152745894405e-05,
"loss": 1.2202,
"step": 5530
},
{
"epoch": 6.6,
"learning_rate": 3.110100653363941e-05,
"loss": 1.2719,
"step": 5540
},
{
"epoch": 6.61,
"learning_rate": 3.105686032138443e-05,
"loss": 1.3169,
"step": 5550
},
{
"epoch": 6.63,
"learning_rate": 3.1012714109129435e-05,
"loss": 1.3548,
"step": 5560
},
{
"epoch": 6.64,
"learning_rate": 3.096856789687445e-05,
"loss": 1.3031,
"step": 5570
},
{
"epoch": 6.65,
"learning_rate": 3.092442168461946e-05,
"loss": 1.2254,
"step": 5580
},
{
"epoch": 6.66,
"learning_rate": 3.088027547236447e-05,
"loss": 1.2795,
"step": 5590
},
{
"epoch": 6.67,
"learning_rate": 3.083612926010949e-05,
"loss": 1.2555,
"step": 5600
},
{
"epoch": 6.69,
"learning_rate": 3.0791983047854495e-05,
"loss": 1.2079,
"step": 5610
},
{
"epoch": 6.7,
"learning_rate": 3.074783683559951e-05,
"loss": 1.2155,
"step": 5620
},
{
"epoch": 6.71,
"learning_rate": 3.070369062334452e-05,
"loss": 1.254,
"step": 5630
},
{
"epoch": 6.72,
"learning_rate": 3.065954441108953e-05,
"loss": 1.2222,
"step": 5640
},
{
"epoch": 6.73,
"learning_rate": 3.0615398198834546e-05,
"loss": 1.2446,
"step": 5650
},
{
"epoch": 6.75,
"learning_rate": 3.0571251986579554e-05,
"loss": 1.2586,
"step": 5660
},
{
"epoch": 6.76,
"learning_rate": 3.052710577432457e-05,
"loss": 1.2234,
"step": 5670
},
{
"epoch": 6.77,
"learning_rate": 3.0482959562069573e-05,
"loss": 1.2368,
"step": 5680
},
{
"epoch": 6.78,
"learning_rate": 3.0438813349814588e-05,
"loss": 1.2462,
"step": 5690
},
{
"epoch": 6.79,
"learning_rate": 3.0394667137559603e-05,
"loss": 1.2774,
"step": 5700
},
{
"epoch": 6.81,
"learning_rate": 3.035052092530461e-05,
"loss": 1.2202,
"step": 5710
},
{
"epoch": 6.82,
"learning_rate": 3.0306374713049622e-05,
"loss": 1.2673,
"step": 5720
},
{
"epoch": 6.83,
"learning_rate": 3.0262228500794633e-05,
"loss": 1.2337,
"step": 5730
},
{
"epoch": 6.84,
"learning_rate": 3.0218082288539644e-05,
"loss": 1.2289,
"step": 5740
},
{
"epoch": 6.85,
"learning_rate": 3.0173936076284652e-05,
"loss": 1.2486,
"step": 5750
},
{
"epoch": 6.87,
"learning_rate": 3.0129789864029667e-05,
"loss": 1.2476,
"step": 5760
},
{
"epoch": 6.88,
"learning_rate": 3.008564365177468e-05,
"loss": 1.2412,
"step": 5770
},
{
"epoch": 6.89,
"learning_rate": 3.004149743951969e-05,
"loss": 1.2322,
"step": 5780
},
{
"epoch": 6.9,
"learning_rate": 2.9997351227264704e-05,
"loss": 1.2127,
"step": 5790
},
{
"epoch": 6.91,
"learning_rate": 2.9953205015009712e-05,
"loss": 1.2725,
"step": 5800
},
{
"epoch": 6.92,
"learning_rate": 2.9909058802754723e-05,
"loss": 1.2347,
"step": 5810
},
{
"epoch": 6.94,
"learning_rate": 2.9864912590499738e-05,
"loss": 1.2351,
"step": 5820
},
{
"epoch": 6.95,
"learning_rate": 2.9820766378244746e-05,
"loss": 1.2328,
"step": 5830
},
{
"epoch": 6.96,
"learning_rate": 2.977662016598976e-05,
"loss": 1.2027,
"step": 5840
},
{
"epoch": 6.97,
"learning_rate": 2.9732473953734768e-05,
"loss": 1.2175,
"step": 5850
},
{
"epoch": 6.98,
"learning_rate": 2.9688327741479783e-05,
"loss": 1.1781,
"step": 5860
},
{
"epoch": 7.0,
"learning_rate": 2.9644181529224798e-05,
"loss": 1.1953,
"step": 5870
},
{
"epoch": 7.0,
"eval_accuracy": 0.732347683598987,
"eval_f1": 0.7077241444760507,
"eval_loss": 1.09598970413208,
"eval_precision": 0.7217957955270089,
"eval_recall": 0.732347683598987,
"eval_runtime": 99.334,
"eval_samples_per_second": 270.32,
"eval_steps_per_second": 4.228,
"step": 5873
},
{
"epoch": 7.01,
"learning_rate": 2.9600035316969805e-05,
"loss": 1.181,
"step": 5880
},
{
"epoch": 7.02,
"learning_rate": 2.9555889104714817e-05,
"loss": 1.2091,
"step": 5890
},
{
"epoch": 7.03,
"learning_rate": 2.9511742892459825e-05,
"loss": 1.2003,
"step": 5900
},
{
"epoch": 7.04,
"learning_rate": 2.946759668020484e-05,
"loss": 1.143,
"step": 5910
},
{
"epoch": 7.06,
"learning_rate": 2.9423450467949854e-05,
"loss": 1.1644,
"step": 5920
},
{
"epoch": 7.07,
"learning_rate": 2.9379304255694862e-05,
"loss": 1.2121,
"step": 5930
},
{
"epoch": 7.08,
"learning_rate": 2.9335158043439876e-05,
"loss": 1.1864,
"step": 5940
},
{
"epoch": 7.09,
"learning_rate": 2.9291011831184884e-05,
"loss": 1.1574,
"step": 5950
},
{
"epoch": 7.1,
"learning_rate": 2.92468656189299e-05,
"loss": 1.1473,
"step": 5960
},
{
"epoch": 7.12,
"learning_rate": 2.920271940667491e-05,
"loss": 1.1431,
"step": 5970
},
{
"epoch": 7.13,
"learning_rate": 2.9158573194419918e-05,
"loss": 1.1197,
"step": 5980
},
{
"epoch": 7.14,
"learning_rate": 2.9114426982164933e-05,
"loss": 1.1862,
"step": 5990
},
{
"epoch": 7.15,
"learning_rate": 2.907028076990994e-05,
"loss": 1.1715,
"step": 6000
},
{
"epoch": 7.16,
"learning_rate": 2.9026134557654955e-05,
"loss": 1.1822,
"step": 6010
},
{
"epoch": 7.18,
"learning_rate": 2.898198834539997e-05,
"loss": 1.1529,
"step": 6020
},
{
"epoch": 7.19,
"learning_rate": 2.8937842133144978e-05,
"loss": 1.1397,
"step": 6030
},
{
"epoch": 7.2,
"learning_rate": 2.889369592088999e-05,
"loss": 1.1757,
"step": 6040
},
{
"epoch": 7.21,
"learning_rate": 2.8849549708635e-05,
"loss": 1.1489,
"step": 6050
},
{
"epoch": 7.22,
"learning_rate": 2.880540349638001e-05,
"loss": 1.1669,
"step": 6060
},
{
"epoch": 7.23,
"learning_rate": 2.8761257284125026e-05,
"loss": 1.1342,
"step": 6070
},
{
"epoch": 7.25,
"learning_rate": 2.8717111071870034e-05,
"loss": 1.1516,
"step": 6080
},
{
"epoch": 7.26,
"learning_rate": 2.867296485961505e-05,
"loss": 1.134,
"step": 6090
},
{
"epoch": 7.27,
"learning_rate": 2.8628818647360057e-05,
"loss": 1.1583,
"step": 6100
},
{
"epoch": 7.28,
"learning_rate": 2.858467243510507e-05,
"loss": 1.1387,
"step": 6110
},
{
"epoch": 7.29,
"learning_rate": 2.8540526222850082e-05,
"loss": 1.1366,
"step": 6120
},
{
"epoch": 7.31,
"learning_rate": 2.849638001059509e-05,
"loss": 1.1627,
"step": 6130
},
{
"epoch": 7.32,
"learning_rate": 2.8452233798340105e-05,
"loss": 1.1384,
"step": 6140
},
{
"epoch": 7.33,
"learning_rate": 2.8408087586085113e-05,
"loss": 1.15,
"step": 6150
},
{
"epoch": 7.34,
"learning_rate": 2.8363941373830128e-05,
"loss": 1.1452,
"step": 6160
},
{
"epoch": 7.35,
"learning_rate": 2.8319795161575135e-05,
"loss": 1.1839,
"step": 6170
},
{
"epoch": 7.37,
"learning_rate": 2.827564894932015e-05,
"loss": 1.1116,
"step": 6180
},
{
"epoch": 7.38,
"learning_rate": 2.8231502737065165e-05,
"loss": 1.1167,
"step": 6190
},
{
"epoch": 7.39,
"learning_rate": 2.8187356524810173e-05,
"loss": 1.1393,
"step": 6200
},
{
"epoch": 7.4,
"learning_rate": 2.8143210312555184e-05,
"loss": 1.1897,
"step": 6210
},
{
"epoch": 7.41,
"learning_rate": 2.8099064100300192e-05,
"loss": 1.0915,
"step": 6220
},
{
"epoch": 7.43,
"learning_rate": 2.8054917888045206e-05,
"loss": 1.1432,
"step": 6230
},
{
"epoch": 7.44,
"learning_rate": 2.801077167579022e-05,
"loss": 1.123,
"step": 6240
},
{
"epoch": 7.45,
"learning_rate": 2.796662546353523e-05,
"loss": 1.1394,
"step": 6250
},
{
"epoch": 7.46,
"learning_rate": 2.7922479251280244e-05,
"loss": 1.1488,
"step": 6260
},
{
"epoch": 7.47,
"learning_rate": 2.787833303902525e-05,
"loss": 1.1482,
"step": 6270
},
{
"epoch": 7.48,
"learning_rate": 2.7834186826770266e-05,
"loss": 1.1294,
"step": 6280
},
{
"epoch": 7.5,
"learning_rate": 2.7790040614515277e-05,
"loss": 1.1433,
"step": 6290
},
{
"epoch": 7.51,
"learning_rate": 2.7745894402260285e-05,
"loss": 1.0743,
"step": 6300
},
{
"epoch": 7.52,
"learning_rate": 2.77017481900053e-05,
"loss": 1.1177,
"step": 6310
},
{
"epoch": 7.53,
"learning_rate": 2.7657601977750308e-05,
"loss": 1.1416,
"step": 6320
},
{
"epoch": 7.54,
"learning_rate": 2.7613455765495322e-05,
"loss": 1.082,
"step": 6330
},
{
"epoch": 7.56,
"learning_rate": 2.7569309553240337e-05,
"loss": 1.1172,
"step": 6340
},
{
"epoch": 7.57,
"learning_rate": 2.7525163340985345e-05,
"loss": 1.0803,
"step": 6350
},
{
"epoch": 7.58,
"learning_rate": 2.7481017128730356e-05,
"loss": 1.1274,
"step": 6360
},
{
"epoch": 7.59,
"learning_rate": 2.7436870916475364e-05,
"loss": 1.1648,
"step": 6370
},
{
"epoch": 7.6,
"learning_rate": 2.739272470422038e-05,
"loss": 1.1242,
"step": 6380
},
{
"epoch": 7.62,
"learning_rate": 2.7348578491965393e-05,
"loss": 1.0659,
"step": 6390
},
{
"epoch": 7.63,
"learning_rate": 2.73044322797104e-05,
"loss": 1.0619,
"step": 6400
},
{
"epoch": 7.64,
"learning_rate": 2.7260286067455416e-05,
"loss": 1.1214,
"step": 6410
},
{
"epoch": 7.65,
"learning_rate": 2.7216139855200424e-05,
"loss": 1.1358,
"step": 6420
},
{
"epoch": 7.66,
"learning_rate": 2.717199364294544e-05,
"loss": 1.0954,
"step": 6430
},
{
"epoch": 7.68,
"learning_rate": 2.712784743069045e-05,
"loss": 1.138,
"step": 6440
},
{
"epoch": 7.69,
"learning_rate": 2.7083701218435457e-05,
"loss": 1.1409,
"step": 6450
},
{
"epoch": 7.7,
"learning_rate": 2.7039555006180472e-05,
"loss": 1.0962,
"step": 6460
},
{
"epoch": 7.71,
"learning_rate": 2.699540879392548e-05,
"loss": 1.1233,
"step": 6470
},
{
"epoch": 7.72,
"learning_rate": 2.6951262581670495e-05,
"loss": 1.0884,
"step": 6480
},
{
"epoch": 7.74,
"learning_rate": 2.690711636941551e-05,
"loss": 1.0763,
"step": 6490
},
{
"epoch": 7.75,
"learning_rate": 2.6862970157160517e-05,
"loss": 1.0832,
"step": 6500
},
{
"epoch": 7.76,
"learning_rate": 2.681882394490553e-05,
"loss": 1.0683,
"step": 6510
},
{
"epoch": 7.77,
"learning_rate": 2.677467773265054e-05,
"loss": 1.1104,
"step": 6520
},
{
"epoch": 7.78,
"learning_rate": 2.673053152039555e-05,
"loss": 1.125,
"step": 6530
},
{
"epoch": 7.79,
"learning_rate": 2.668638530814056e-05,
"loss": 1.1071,
"step": 6540
},
{
"epoch": 7.81,
"learning_rate": 2.6642239095885573e-05,
"loss": 1.1547,
"step": 6550
},
{
"epoch": 7.82,
"learning_rate": 2.6598092883630588e-05,
"loss": 1.0844,
"step": 6560
},
{
"epoch": 7.83,
"learning_rate": 2.6553946671375596e-05,
"loss": 1.1004,
"step": 6570
},
{
"epoch": 7.84,
"learning_rate": 2.650980045912061e-05,
"loss": 1.092,
"step": 6580
},
{
"epoch": 7.85,
"learning_rate": 2.646565424686562e-05,
"loss": 1.1057,
"step": 6590
},
{
"epoch": 7.87,
"learning_rate": 2.642150803461063e-05,
"loss": 1.0887,
"step": 6600
},
{
"epoch": 7.88,
"learning_rate": 2.6377361822355644e-05,
"loss": 1.07,
"step": 6610
},
{
"epoch": 7.89,
"learning_rate": 2.6333215610100652e-05,
"loss": 1.0863,
"step": 6620
},
{
"epoch": 7.9,
"learning_rate": 2.6289069397845667e-05,
"loss": 1.1196,
"step": 6630
},
{
"epoch": 7.91,
"learning_rate": 2.6244923185590675e-05,
"loss": 1.0892,
"step": 6640
},
{
"epoch": 7.93,
"learning_rate": 2.620077697333569e-05,
"loss": 1.1039,
"step": 6650
},
{
"epoch": 7.94,
"learning_rate": 2.6156630761080704e-05,
"loss": 1.0494,
"step": 6660
},
{
"epoch": 7.95,
"learning_rate": 2.6112484548825712e-05,
"loss": 1.0824,
"step": 6670
},
{
"epoch": 7.96,
"learning_rate": 2.6068338336570723e-05,
"loss": 1.1038,
"step": 6680
},
{
"epoch": 7.97,
"learning_rate": 2.602419212431573e-05,
"loss": 1.0558,
"step": 6690
},
{
"epoch": 7.99,
"learning_rate": 2.5980045912060746e-05,
"loss": 1.0946,
"step": 6700
},
{
"epoch": 8.0,
"learning_rate": 2.593589969980576e-05,
"loss": 1.058,
"step": 6710
},
{
"epoch": 8.0,
"eval_accuracy": 0.7548041114255921,
"eval_f1": 0.7350043558463751,
"eval_loss": 0.9828243255615234,
"eval_precision": 0.7440513899220582,
"eval_recall": 0.7548041114255921,
"eval_runtime": 100.0502,
"eval_samples_per_second": 268.385,
"eval_steps_per_second": 4.198,
"step": 6712
},
{
"epoch": 8.01,
"learning_rate": 2.5891753487550768e-05,
"loss": 1.042,
"step": 6720
},
{
"epoch": 8.02,
"learning_rate": 2.5847607275295783e-05,
"loss": 1.0727,
"step": 6730
},
{
"epoch": 8.03,
"learning_rate": 2.580346106304079e-05,
"loss": 1.0546,
"step": 6740
},
{
"epoch": 8.05,
"learning_rate": 2.5759314850785805e-05,
"loss": 1.0544,
"step": 6750
},
{
"epoch": 8.06,
"learning_rate": 2.5715168638530817e-05,
"loss": 1.0355,
"step": 6760
},
{
"epoch": 8.07,
"learning_rate": 2.5671022426275825e-05,
"loss": 0.9754,
"step": 6770
},
{
"epoch": 8.08,
"learning_rate": 2.562687621402084e-05,
"loss": 1.0294,
"step": 6780
},
{
"epoch": 8.09,
"learning_rate": 2.5582730001765847e-05,
"loss": 1.0225,
"step": 6790
},
{
"epoch": 8.1,
"learning_rate": 2.5538583789510862e-05,
"loss": 1.0651,
"step": 6800
},
{
"epoch": 8.12,
"learning_rate": 2.5494437577255876e-05,
"loss": 1.0233,
"step": 6810
},
{
"epoch": 8.13,
"learning_rate": 2.5450291365000884e-05,
"loss": 1.0067,
"step": 6820
},
{
"epoch": 8.14,
"learning_rate": 2.5406145152745896e-05,
"loss": 1.0843,
"step": 6830
},
{
"epoch": 8.15,
"learning_rate": 2.5361998940490907e-05,
"loss": 1.0345,
"step": 6840
},
{
"epoch": 8.16,
"learning_rate": 2.5317852728235918e-05,
"loss": 1.0876,
"step": 6850
},
{
"epoch": 8.18,
"learning_rate": 2.5273706515980933e-05,
"loss": 1.0309,
"step": 6860
},
{
"epoch": 8.19,
"learning_rate": 2.522956030372594e-05,
"loss": 1.0354,
"step": 6870
},
{
"epoch": 8.2,
"learning_rate": 2.5185414091470955e-05,
"loss": 0.9963,
"step": 6880
},
{
"epoch": 8.21,
"learning_rate": 2.5141267879215963e-05,
"loss": 1.0118,
"step": 6890
},
{
"epoch": 8.22,
"learning_rate": 2.5097121666960978e-05,
"loss": 1.0476,
"step": 6900
},
{
"epoch": 8.24,
"learning_rate": 2.5052975454705986e-05,
"loss": 1.079,
"step": 6910
},
{
"epoch": 8.25,
"learning_rate": 2.5008829242450997e-05,
"loss": 0.9995,
"step": 6920
},
{
"epoch": 8.26,
"learning_rate": 2.4964683030196008e-05,
"loss": 1.061,
"step": 6930
},
{
"epoch": 8.27,
"learning_rate": 2.4920536817941023e-05,
"loss": 1.0008,
"step": 6940
},
{
"epoch": 8.28,
"learning_rate": 2.4876390605686034e-05,
"loss": 1.0351,
"step": 6950
},
{
"epoch": 8.3,
"learning_rate": 2.4832244393431045e-05,
"loss": 1.062,
"step": 6960
},
{
"epoch": 8.31,
"learning_rate": 2.4788098181176057e-05,
"loss": 0.9703,
"step": 6970
},
{
"epoch": 8.32,
"learning_rate": 2.4743951968921068e-05,
"loss": 1.0638,
"step": 6980
},
{
"epoch": 8.33,
"learning_rate": 2.469980575666608e-05,
"loss": 1.0217,
"step": 6990
},
{
"epoch": 8.34,
"learning_rate": 2.465565954441109e-05,
"loss": 1.0515,
"step": 7000
},
{
"epoch": 8.36,
"learning_rate": 2.46115133321561e-05,
"loss": 0.9567,
"step": 7010
},
{
"epoch": 8.37,
"learning_rate": 2.4567367119901113e-05,
"loss": 1.0541,
"step": 7020
},
{
"epoch": 8.38,
"learning_rate": 2.4523220907646124e-05,
"loss": 1.0051,
"step": 7030
},
{
"epoch": 8.39,
"learning_rate": 2.4479074695391135e-05,
"loss": 0.9657,
"step": 7040
},
{
"epoch": 8.4,
"learning_rate": 2.443492848313615e-05,
"loss": 1.0323,
"step": 7050
},
{
"epoch": 8.41,
"learning_rate": 2.439078227088116e-05,
"loss": 1.0378,
"step": 7060
},
{
"epoch": 8.43,
"learning_rate": 2.4346636058626173e-05,
"loss": 0.974,
"step": 7070
},
{
"epoch": 8.44,
"learning_rate": 2.430248984637118e-05,
"loss": 1.0138,
"step": 7080
},
{
"epoch": 8.45,
"learning_rate": 2.4258343634116192e-05,
"loss": 1.0023,
"step": 7090
},
{
"epoch": 8.46,
"learning_rate": 2.4214197421861206e-05,
"loss": 1.0473,
"step": 7100
},
{
"epoch": 8.47,
"learning_rate": 2.4170051209606218e-05,
"loss": 0.9473,
"step": 7110
},
{
"epoch": 8.49,
"learning_rate": 2.412590499735123e-05,
"loss": 1.0078,
"step": 7120
},
{
"epoch": 8.5,
"learning_rate": 2.408175878509624e-05,
"loss": 1.0177,
"step": 7130
},
{
"epoch": 8.51,
"learning_rate": 2.403761257284125e-05,
"loss": 0.9631,
"step": 7140
},
{
"epoch": 8.52,
"learning_rate": 2.3993466360586263e-05,
"loss": 0.9581,
"step": 7150
},
{
"epoch": 8.53,
"learning_rate": 2.3949320148331274e-05,
"loss": 1.0014,
"step": 7160
},
{
"epoch": 8.55,
"learning_rate": 2.3905173936076285e-05,
"loss": 0.9788,
"step": 7170
},
{
"epoch": 8.56,
"learning_rate": 2.3861027723821296e-05,
"loss": 0.9712,
"step": 7180
},
{
"epoch": 8.57,
"learning_rate": 2.3816881511566308e-05,
"loss": 0.9748,
"step": 7190
},
{
"epoch": 8.58,
"learning_rate": 2.377273529931132e-05,
"loss": 0.9946,
"step": 7200
},
{
"epoch": 8.59,
"learning_rate": 2.3728589087056334e-05,
"loss": 0.9634,
"step": 7210
},
{
"epoch": 8.61,
"learning_rate": 2.3684442874801345e-05,
"loss": 1.0009,
"step": 7220
},
{
"epoch": 8.62,
"learning_rate": 2.3640296662546356e-05,
"loss": 0.9781,
"step": 7230
},
{
"epoch": 8.63,
"learning_rate": 2.3596150450291364e-05,
"loss": 0.9896,
"step": 7240
},
{
"epoch": 8.64,
"learning_rate": 2.3552004238036375e-05,
"loss": 0.975,
"step": 7250
},
{
"epoch": 8.65,
"learning_rate": 2.350785802578139e-05,
"loss": 1.0166,
"step": 7260
},
{
"epoch": 8.66,
"learning_rate": 2.34637118135264e-05,
"loss": 0.9855,
"step": 7270
},
{
"epoch": 8.68,
"learning_rate": 2.3419565601271412e-05,
"loss": 0.9839,
"step": 7280
},
{
"epoch": 8.69,
"learning_rate": 2.3375419389016424e-05,
"loss": 0.9924,
"step": 7290
},
{
"epoch": 8.7,
"learning_rate": 2.3331273176761435e-05,
"loss": 0.9736,
"step": 7300
},
{
"epoch": 8.71,
"learning_rate": 2.3287126964506446e-05,
"loss": 1.0229,
"step": 7310
},
{
"epoch": 8.72,
"learning_rate": 2.3242980752251458e-05,
"loss": 1.0075,
"step": 7320
},
{
"epoch": 8.74,
"learning_rate": 2.319883453999647e-05,
"loss": 1.065,
"step": 7330
},
{
"epoch": 8.75,
"learning_rate": 2.315468832774148e-05,
"loss": 0.9951,
"step": 7340
},
{
"epoch": 8.76,
"learning_rate": 2.311054211548649e-05,
"loss": 0.985,
"step": 7350
},
{
"epoch": 8.77,
"learning_rate": 2.3066395903231503e-05,
"loss": 1.0109,
"step": 7360
},
{
"epoch": 8.78,
"learning_rate": 2.3022249690976517e-05,
"loss": 0.9473,
"step": 7370
},
{
"epoch": 8.8,
"learning_rate": 2.297810347872153e-05,
"loss": 0.9864,
"step": 7380
},
{
"epoch": 8.81,
"learning_rate": 2.293395726646654e-05,
"loss": 0.9472,
"step": 7390
},
{
"epoch": 8.82,
"learning_rate": 2.2889811054211548e-05,
"loss": 0.9759,
"step": 7400
},
{
"epoch": 8.83,
"learning_rate": 2.284566484195656e-05,
"loss": 0.9271,
"step": 7410
},
{
"epoch": 8.84,
"learning_rate": 2.2801518629701574e-05,
"loss": 1.0013,
"step": 7420
},
{
"epoch": 8.86,
"learning_rate": 2.2757372417446585e-05,
"loss": 0.9603,
"step": 7430
},
{
"epoch": 8.87,
"learning_rate": 2.2713226205191596e-05,
"loss": 0.9903,
"step": 7440
},
{
"epoch": 8.88,
"learning_rate": 2.2669079992936607e-05,
"loss": 0.9983,
"step": 7450
},
{
"epoch": 8.89,
"learning_rate": 2.262493378068162e-05,
"loss": 0.9947,
"step": 7460
},
{
"epoch": 8.9,
"learning_rate": 2.258078756842663e-05,
"loss": 0.9341,
"step": 7470
},
{
"epoch": 8.92,
"learning_rate": 2.253664135617164e-05,
"loss": 0.9749,
"step": 7480
},
{
"epoch": 8.93,
"learning_rate": 2.2492495143916652e-05,
"loss": 0.9877,
"step": 7490
},
{
"epoch": 8.94,
"learning_rate": 2.2448348931661664e-05,
"loss": 1.0253,
"step": 7500
},
{
"epoch": 8.95,
"learning_rate": 2.2404202719406675e-05,
"loss": 0.9495,
"step": 7510
},
{
"epoch": 8.96,
"learning_rate": 2.236005650715169e-05,
"loss": 0.9616,
"step": 7520
},
{
"epoch": 8.97,
"learning_rate": 2.23159102948967e-05,
"loss": 0.9267,
"step": 7530
},
{
"epoch": 8.99,
"learning_rate": 2.2271764082641712e-05,
"loss": 0.957,
"step": 7540
},
{
"epoch": 9.0,
"learning_rate": 2.222761787038672e-05,
"loss": 0.9691,
"step": 7550
},
{
"epoch": 9.0,
"eval_accuracy": 0.7718233278712945,
"eval_f1": 0.7536014274909693,
"eval_loss": 0.9018393754959106,
"eval_precision": 0.7615836668708097,
"eval_recall": 0.7718233278712945,
"eval_runtime": 100.4679,
"eval_samples_per_second": 267.269,
"eval_steps_per_second": 4.18,
"step": 7551
},
{
"epoch": 9.01,
"learning_rate": 2.218347165813173e-05,
"loss": 0.9592,
"step": 7560
},
{
"epoch": 9.02,
"learning_rate": 2.2139325445876742e-05,
"loss": 0.8877,
"step": 7570
},
{
"epoch": 9.03,
"learning_rate": 2.2095179233621757e-05,
"loss": 0.9331,
"step": 7580
},
{
"epoch": 9.05,
"learning_rate": 2.205103302136677e-05,
"loss": 0.9374,
"step": 7590
},
{
"epoch": 9.06,
"learning_rate": 2.200688680911178e-05,
"loss": 0.9148,
"step": 7600
},
{
"epoch": 9.07,
"learning_rate": 2.196274059685679e-05,
"loss": 0.9694,
"step": 7610
},
{
"epoch": 9.08,
"learning_rate": 2.1918594384601802e-05,
"loss": 0.8986,
"step": 7620
},
{
"epoch": 9.09,
"learning_rate": 2.1874448172346813e-05,
"loss": 0.9645,
"step": 7630
},
{
"epoch": 9.11,
"learning_rate": 2.1830301960091825e-05,
"loss": 0.9577,
"step": 7640
},
{
"epoch": 9.12,
"learning_rate": 2.1786155747836836e-05,
"loss": 0.876,
"step": 7650
},
{
"epoch": 9.13,
"learning_rate": 2.1742009535581847e-05,
"loss": 0.9324,
"step": 7660
},
{
"epoch": 9.14,
"learning_rate": 2.169786332332686e-05,
"loss": 0.9094,
"step": 7670
},
{
"epoch": 9.15,
"learning_rate": 2.1653717111071873e-05,
"loss": 0.9311,
"step": 7680
},
{
"epoch": 9.17,
"learning_rate": 2.1609570898816884e-05,
"loss": 0.9111,
"step": 7690
},
{
"epoch": 9.18,
"learning_rate": 2.1565424686561896e-05,
"loss": 0.9487,
"step": 7700
},
{
"epoch": 9.19,
"learning_rate": 2.1521278474306903e-05,
"loss": 0.9526,
"step": 7710
},
{
"epoch": 9.2,
"learning_rate": 2.1477132262051915e-05,
"loss": 0.9594,
"step": 7720
},
{
"epoch": 9.21,
"learning_rate": 2.143298604979693e-05,
"loss": 1.0186,
"step": 7730
},
{
"epoch": 9.23,
"learning_rate": 2.138883983754194e-05,
"loss": 0.8755,
"step": 7740
},
{
"epoch": 9.24,
"learning_rate": 2.1344693625286952e-05,
"loss": 0.9704,
"step": 7750
},
{
"epoch": 9.25,
"learning_rate": 2.1300547413031963e-05,
"loss": 0.9313,
"step": 7760
},
{
"epoch": 9.26,
"learning_rate": 2.1256401200776974e-05,
"loss": 0.9476,
"step": 7770
},
{
"epoch": 9.27,
"learning_rate": 2.1212254988521986e-05,
"loss": 0.9674,
"step": 7780
},
{
"epoch": 9.28,
"learning_rate": 2.1168108776266997e-05,
"loss": 0.9253,
"step": 7790
},
{
"epoch": 9.3,
"learning_rate": 2.1123962564012008e-05,
"loss": 0.9806,
"step": 7800
},
{
"epoch": 9.31,
"learning_rate": 2.107981635175702e-05,
"loss": 0.9188,
"step": 7810
},
{
"epoch": 9.32,
"learning_rate": 2.103567013950203e-05,
"loss": 0.9386,
"step": 7820
},
{
"epoch": 9.33,
"learning_rate": 2.0991523927247042e-05,
"loss": 0.9111,
"step": 7830
},
{
"epoch": 9.34,
"learning_rate": 2.0947377714992057e-05,
"loss": 0.912,
"step": 7840
},
{
"epoch": 9.36,
"learning_rate": 2.0903231502737068e-05,
"loss": 0.8859,
"step": 7850
},
{
"epoch": 9.37,
"learning_rate": 2.085908529048208e-05,
"loss": 0.954,
"step": 7860
},
{
"epoch": 9.38,
"learning_rate": 2.0814939078227087e-05,
"loss": 0.9201,
"step": 7870
},
{
"epoch": 9.39,
"learning_rate": 2.07707928659721e-05,
"loss": 0.9238,
"step": 7880
},
{
"epoch": 9.4,
"learning_rate": 2.0726646653717113e-05,
"loss": 0.9186,
"step": 7890
},
{
"epoch": 9.42,
"learning_rate": 2.0682500441462124e-05,
"loss": 0.9529,
"step": 7900
},
{
"epoch": 9.43,
"learning_rate": 2.0638354229207135e-05,
"loss": 0.8942,
"step": 7910
},
{
"epoch": 9.44,
"learning_rate": 2.0594208016952147e-05,
"loss": 0.9486,
"step": 7920
},
{
"epoch": 9.45,
"learning_rate": 2.0550061804697158e-05,
"loss": 0.9249,
"step": 7930
},
{
"epoch": 9.46,
"learning_rate": 2.050591559244217e-05,
"loss": 0.9115,
"step": 7940
},
{
"epoch": 9.48,
"learning_rate": 2.046176938018718e-05,
"loss": 0.8388,
"step": 7950
},
{
"epoch": 9.49,
"learning_rate": 2.0417623167932192e-05,
"loss": 0.9013,
"step": 7960
},
{
"epoch": 9.5,
"learning_rate": 2.0373476955677203e-05,
"loss": 0.8917,
"step": 7970
},
{
"epoch": 9.51,
"learning_rate": 2.0329330743422214e-05,
"loss": 0.913,
"step": 7980
},
{
"epoch": 9.52,
"learning_rate": 2.0285184531167226e-05,
"loss": 0.9192,
"step": 7990
},
{
"epoch": 9.54,
"learning_rate": 2.024103831891224e-05,
"loss": 0.9011,
"step": 8000
},
{
"epoch": 9.55,
"learning_rate": 2.019689210665725e-05,
"loss": 0.91,
"step": 8010
},
{
"epoch": 9.56,
"learning_rate": 2.0152745894402263e-05,
"loss": 0.8934,
"step": 8020
},
{
"epoch": 9.57,
"learning_rate": 2.010859968214727e-05,
"loss": 0.9183,
"step": 8030
},
{
"epoch": 9.58,
"learning_rate": 2.0064453469892282e-05,
"loss": 0.8899,
"step": 8040
},
{
"epoch": 9.59,
"learning_rate": 2.0020307257637297e-05,
"loss": 0.8904,
"step": 8050
},
{
"epoch": 9.61,
"learning_rate": 1.9976161045382308e-05,
"loss": 0.8728,
"step": 8060
},
{
"epoch": 9.62,
"learning_rate": 1.993201483312732e-05,
"loss": 0.933,
"step": 8070
},
{
"epoch": 9.63,
"learning_rate": 1.988786862087233e-05,
"loss": 0.8514,
"step": 8080
},
{
"epoch": 9.64,
"learning_rate": 1.984372240861734e-05,
"loss": 0.9437,
"step": 8090
},
{
"epoch": 9.65,
"learning_rate": 1.9799576196362353e-05,
"loss": 0.9064,
"step": 8100
},
{
"epoch": 9.67,
"learning_rate": 1.9755429984107364e-05,
"loss": 0.8757,
"step": 8110
},
{
"epoch": 9.68,
"learning_rate": 1.9711283771852375e-05,
"loss": 0.863,
"step": 8120
},
{
"epoch": 9.69,
"learning_rate": 1.9667137559597387e-05,
"loss": 0.9145,
"step": 8130
},
{
"epoch": 9.7,
"learning_rate": 1.9622991347342398e-05,
"loss": 0.9044,
"step": 8140
},
{
"epoch": 9.71,
"learning_rate": 1.957884513508741e-05,
"loss": 0.8805,
"step": 8150
},
{
"epoch": 9.73,
"learning_rate": 1.9534698922832424e-05,
"loss": 0.9111,
"step": 8160
},
{
"epoch": 9.74,
"learning_rate": 1.9490552710577435e-05,
"loss": 0.9262,
"step": 8170
},
{
"epoch": 9.75,
"learning_rate": 1.9446406498322446e-05,
"loss": 0.9375,
"step": 8180
},
{
"epoch": 9.76,
"learning_rate": 1.9402260286067454e-05,
"loss": 0.8493,
"step": 8190
},
{
"epoch": 9.77,
"learning_rate": 1.9358114073812465e-05,
"loss": 0.9687,
"step": 8200
},
{
"epoch": 9.79,
"learning_rate": 1.931396786155748e-05,
"loss": 0.911,
"step": 8210
},
{
"epoch": 9.8,
"learning_rate": 1.926982164930249e-05,
"loss": 0.837,
"step": 8220
},
{
"epoch": 9.81,
"learning_rate": 1.9225675437047503e-05,
"loss": 0.9345,
"step": 8230
},
{
"epoch": 9.82,
"learning_rate": 1.9181529224792514e-05,
"loss": 0.8811,
"step": 8240
},
{
"epoch": 9.83,
"learning_rate": 1.9137383012537525e-05,
"loss": 0.8878,
"step": 8250
},
{
"epoch": 9.84,
"learning_rate": 1.9093236800282536e-05,
"loss": 0.8808,
"step": 8260
},
{
"epoch": 9.86,
"learning_rate": 1.9049090588027548e-05,
"loss": 0.8808,
"step": 8270
},
{
"epoch": 9.87,
"learning_rate": 1.900494437577256e-05,
"loss": 0.9018,
"step": 8280
},
{
"epoch": 9.88,
"learning_rate": 1.896079816351757e-05,
"loss": 0.9156,
"step": 8290
},
{
"epoch": 9.89,
"learning_rate": 1.891665195126258e-05,
"loss": 0.8381,
"step": 8300
},
{
"epoch": 9.9,
"learning_rate": 1.8872505739007593e-05,
"loss": 0.8914,
"step": 8310
},
{
"epoch": 9.92,
"learning_rate": 1.8828359526752607e-05,
"loss": 0.8733,
"step": 8320
},
{
"epoch": 9.93,
"learning_rate": 1.878421331449762e-05,
"loss": 0.9012,
"step": 8330
},
{
"epoch": 9.94,
"learning_rate": 1.874006710224263e-05,
"loss": 0.9081,
"step": 8340
},
{
"epoch": 9.95,
"learning_rate": 1.8695920889987638e-05,
"loss": 0.8976,
"step": 8350
},
{
"epoch": 9.96,
"learning_rate": 1.865177467773265e-05,
"loss": 0.9017,
"step": 8360
},
{
"epoch": 9.98,
"learning_rate": 1.8607628465477664e-05,
"loss": 0.8704,
"step": 8370
},
{
"epoch": 9.99,
"learning_rate": 1.8563482253222675e-05,
"loss": 0.8366,
"step": 8380
},
{
"epoch": 10.0,
"learning_rate": 1.8519336040967686e-05,
"loss": 0.8757,
"step": 8390
},
{
"epoch": 10.0,
"eval_accuracy": 0.7892521972292567,
"eval_f1": 0.7755970684102179,
"eval_loss": 0.838049054145813,
"eval_precision": 0.780591813364183,
"eval_recall": 0.7892521972292567,
"eval_runtime": 98.9975,
"eval_samples_per_second": 271.239,
"eval_steps_per_second": 4.243,
"step": 8390
},
{
"epoch": 10.01,
"learning_rate": 1.8475189828712697e-05,
"loss": 0.8931,
"step": 8400
},
{
"epoch": 10.02,
"learning_rate": 1.843104361645771e-05,
"loss": 0.901,
"step": 8410
},
{
"epoch": 10.04,
"learning_rate": 1.838689740420272e-05,
"loss": 0.8172,
"step": 8420
},
{
"epoch": 10.05,
"learning_rate": 1.834275119194773e-05,
"loss": 0.8767,
"step": 8430
},
{
"epoch": 10.06,
"learning_rate": 1.8298604979692742e-05,
"loss": 0.8376,
"step": 8440
},
{
"epoch": 10.07,
"learning_rate": 1.8254458767437754e-05,
"loss": 0.8581,
"step": 8450
},
{
"epoch": 10.08,
"learning_rate": 1.8210312555182765e-05,
"loss": 0.8818,
"step": 8460
},
{
"epoch": 10.1,
"learning_rate": 1.816616634292778e-05,
"loss": 0.8747,
"step": 8470
},
{
"epoch": 10.11,
"learning_rate": 1.812202013067279e-05,
"loss": 0.8558,
"step": 8480
},
{
"epoch": 10.12,
"learning_rate": 1.8077873918417802e-05,
"loss": 0.8284,
"step": 8490
},
{
"epoch": 10.13,
"learning_rate": 1.8033727706162813e-05,
"loss": 0.8434,
"step": 8500
},
{
"epoch": 10.14,
"learning_rate": 1.798958149390782e-05,
"loss": 0.83,
"step": 8510
},
{
"epoch": 10.15,
"learning_rate": 1.7945435281652833e-05,
"loss": 0.8254,
"step": 8520
},
{
"epoch": 10.17,
"learning_rate": 1.7901289069397847e-05,
"loss": 0.8608,
"step": 8530
},
{
"epoch": 10.18,
"learning_rate": 1.785714285714286e-05,
"loss": 0.8495,
"step": 8540
},
{
"epoch": 10.19,
"learning_rate": 1.781299664488787e-05,
"loss": 0.8754,
"step": 8550
},
{
"epoch": 10.2,
"learning_rate": 1.776885043263288e-05,
"loss": 0.8561,
"step": 8560
},
{
"epoch": 10.21,
"learning_rate": 1.7724704220377892e-05,
"loss": 0.8647,
"step": 8570
},
{
"epoch": 10.23,
"learning_rate": 1.7680558008122904e-05,
"loss": 0.8261,
"step": 8580
},
{
"epoch": 10.24,
"learning_rate": 1.7636411795867915e-05,
"loss": 0.8337,
"step": 8590
},
{
"epoch": 10.25,
"learning_rate": 1.7592265583612926e-05,
"loss": 0.8431,
"step": 8600
},
{
"epoch": 10.26,
"learning_rate": 1.7548119371357937e-05,
"loss": 0.8226,
"step": 8610
},
{
"epoch": 10.27,
"learning_rate": 1.750397315910295e-05,
"loss": 0.8261,
"step": 8620
},
{
"epoch": 10.29,
"learning_rate": 1.7459826946847963e-05,
"loss": 0.8508,
"step": 8630
},
{
"epoch": 10.3,
"learning_rate": 1.7415680734592974e-05,
"loss": 0.8536,
"step": 8640
},
{
"epoch": 10.31,
"learning_rate": 1.7371534522337986e-05,
"loss": 0.8555,
"step": 8650
},
{
"epoch": 10.32,
"learning_rate": 1.7327388310082997e-05,
"loss": 0.8368,
"step": 8660
},
{
"epoch": 10.33,
"learning_rate": 1.7283242097828005e-05,
"loss": 0.8585,
"step": 8670
},
{
"epoch": 10.35,
"learning_rate": 1.723909588557302e-05,
"loss": 0.869,
"step": 8680
},
{
"epoch": 10.36,
"learning_rate": 1.719494967331803e-05,
"loss": 0.8741,
"step": 8690
},
{
"epoch": 10.37,
"learning_rate": 1.7150803461063042e-05,
"loss": 0.7962,
"step": 8700
},
{
"epoch": 10.38,
"learning_rate": 1.7106657248808053e-05,
"loss": 0.8805,
"step": 8710
},
{
"epoch": 10.39,
"learning_rate": 1.7062511036553065e-05,
"loss": 0.8335,
"step": 8720
},
{
"epoch": 10.41,
"learning_rate": 1.7018364824298076e-05,
"loss": 0.8563,
"step": 8730
},
{
"epoch": 10.42,
"learning_rate": 1.6974218612043087e-05,
"loss": 0.8335,
"step": 8740
},
{
"epoch": 10.43,
"learning_rate": 1.69300723997881e-05,
"loss": 0.8624,
"step": 8750
},
{
"epoch": 10.44,
"learning_rate": 1.688592618753311e-05,
"loss": 0.835,
"step": 8760
},
{
"epoch": 10.45,
"learning_rate": 1.684177997527812e-05,
"loss": 0.8292,
"step": 8770
},
{
"epoch": 10.46,
"learning_rate": 1.6797633763023132e-05,
"loss": 0.8683,
"step": 8780
},
{
"epoch": 10.48,
"learning_rate": 1.6753487550768147e-05,
"loss": 0.8817,
"step": 8790
},
{
"epoch": 10.49,
"learning_rate": 1.6709341338513158e-05,
"loss": 0.8308,
"step": 8800
},
{
"epoch": 10.5,
"learning_rate": 1.666519512625817e-05,
"loss": 0.8214,
"step": 8810
},
{
"epoch": 10.51,
"learning_rate": 1.6621048914003177e-05,
"loss": 0.8095,
"step": 8820
},
{
"epoch": 10.52,
"learning_rate": 1.657690270174819e-05,
"loss": 0.8254,
"step": 8830
},
{
"epoch": 10.54,
"learning_rate": 1.6532756489493203e-05,
"loss": 0.846,
"step": 8840
},
{
"epoch": 10.55,
"learning_rate": 1.6488610277238214e-05,
"loss": 0.8039,
"step": 8850
},
{
"epoch": 10.56,
"learning_rate": 1.6444464064983226e-05,
"loss": 0.8354,
"step": 8860
},
{
"epoch": 10.57,
"learning_rate": 1.6400317852728237e-05,
"loss": 0.8321,
"step": 8870
},
{
"epoch": 10.58,
"learning_rate": 1.6356171640473248e-05,
"loss": 0.8048,
"step": 8880
},
{
"epoch": 10.6,
"learning_rate": 1.631202542821826e-05,
"loss": 0.8414,
"step": 8890
},
{
"epoch": 10.61,
"learning_rate": 1.626787921596327e-05,
"loss": 0.87,
"step": 8900
},
{
"epoch": 10.62,
"learning_rate": 1.6223733003708282e-05,
"loss": 0.8323,
"step": 8910
},
{
"epoch": 10.63,
"learning_rate": 1.6179586791453293e-05,
"loss": 0.8496,
"step": 8920
},
{
"epoch": 10.64,
"learning_rate": 1.6135440579198304e-05,
"loss": 0.8311,
"step": 8930
},
{
"epoch": 10.66,
"learning_rate": 1.6091294366943316e-05,
"loss": 0.8351,
"step": 8940
},
{
"epoch": 10.67,
"learning_rate": 1.604714815468833e-05,
"loss": 0.8303,
"step": 8950
},
{
"epoch": 10.68,
"learning_rate": 1.600300194243334e-05,
"loss": 0.8205,
"step": 8960
},
{
"epoch": 10.69,
"learning_rate": 1.5958855730178353e-05,
"loss": 0.8116,
"step": 8970
},
{
"epoch": 10.7,
"learning_rate": 1.591470951792336e-05,
"loss": 0.792,
"step": 8980
},
{
"epoch": 10.71,
"learning_rate": 1.5870563305668372e-05,
"loss": 0.8326,
"step": 8990
},
{
"epoch": 10.73,
"learning_rate": 1.5826417093413387e-05,
"loss": 0.8107,
"step": 9000
},
{
"epoch": 10.74,
"learning_rate": 1.5782270881158398e-05,
"loss": 0.8512,
"step": 9010
},
{
"epoch": 10.75,
"learning_rate": 1.573812466890341e-05,
"loss": 0.8814,
"step": 9020
},
{
"epoch": 10.76,
"learning_rate": 1.569397845664842e-05,
"loss": 0.8471,
"step": 9030
},
{
"epoch": 10.77,
"learning_rate": 1.5649832244393432e-05,
"loss": 0.841,
"step": 9040
},
{
"epoch": 10.79,
"learning_rate": 1.5605686032138443e-05,
"loss": 0.8457,
"step": 9050
},
{
"epoch": 10.8,
"learning_rate": 1.5561539819883454e-05,
"loss": 0.8292,
"step": 9060
},
{
"epoch": 10.81,
"learning_rate": 1.5517393607628465e-05,
"loss": 0.8242,
"step": 9070
},
{
"epoch": 10.82,
"learning_rate": 1.5473247395373477e-05,
"loss": 0.8112,
"step": 9080
},
{
"epoch": 10.83,
"learning_rate": 1.5429101183118488e-05,
"loss": 0.8051,
"step": 9090
},
{
"epoch": 10.85,
"learning_rate": 1.53849549708635e-05,
"loss": 0.8124,
"step": 9100
},
{
"epoch": 10.86,
"learning_rate": 1.5340808758608514e-05,
"loss": 0.8449,
"step": 9110
},
{
"epoch": 10.87,
"learning_rate": 1.5296662546353525e-05,
"loss": 0.8209,
"step": 9120
},
{
"epoch": 10.88,
"learning_rate": 1.5252516334098535e-05,
"loss": 0.7882,
"step": 9130
},
{
"epoch": 10.89,
"learning_rate": 1.5208370121843546e-05,
"loss": 0.8287,
"step": 9140
},
{
"epoch": 10.91,
"learning_rate": 1.5164223909588557e-05,
"loss": 0.8281,
"step": 9150
},
{
"epoch": 10.92,
"learning_rate": 1.512007769733357e-05,
"loss": 0.8646,
"step": 9160
},
{
"epoch": 10.93,
"learning_rate": 1.5075931485078581e-05,
"loss": 0.8217,
"step": 9170
},
{
"epoch": 10.94,
"learning_rate": 1.5031785272823593e-05,
"loss": 0.8445,
"step": 9180
},
{
"epoch": 10.95,
"learning_rate": 1.4987639060568604e-05,
"loss": 0.8246,
"step": 9190
},
{
"epoch": 10.97,
"learning_rate": 1.4943492848313614e-05,
"loss": 0.8449,
"step": 9200
},
{
"epoch": 10.98,
"learning_rate": 1.4899346636058628e-05,
"loss": 0.8551,
"step": 9210
},
{
"epoch": 10.99,
"learning_rate": 1.485520042380364e-05,
"loss": 0.8446,
"step": 9220
},
{
"epoch": 11.0,
"eval_accuracy": 0.7981528377774467,
"eval_f1": 0.7859063096300944,
"eval_loss": 0.7904874682426453,
"eval_precision": 0.7913122656849716,
"eval_recall": 0.7981528377774467,
"eval_runtime": 98.969,
"eval_samples_per_second": 271.317,
"eval_steps_per_second": 4.244,
"step": 9229
},
{
"epoch": 11.0,
"learning_rate": 1.481105421154865e-05,
"loss": 0.8497,
"step": 9230
},
{
"epoch": 11.01,
"learning_rate": 1.476690799929366e-05,
"loss": 0.7354,
"step": 9240
},
{
"epoch": 11.03,
"learning_rate": 1.4722761787038672e-05,
"loss": 0.8413,
"step": 9250
},
{
"epoch": 11.04,
"learning_rate": 1.4678615574783683e-05,
"loss": 0.842,
"step": 9260
},
{
"epoch": 11.05,
"learning_rate": 1.4634469362528696e-05,
"loss": 0.8079,
"step": 9270
},
{
"epoch": 11.06,
"learning_rate": 1.4590323150273707e-05,
"loss": 0.8281,
"step": 9280
},
{
"epoch": 11.07,
"learning_rate": 1.4546176938018718e-05,
"loss": 0.7798,
"step": 9290
},
{
"epoch": 11.08,
"learning_rate": 1.450203072576373e-05,
"loss": 0.7583,
"step": 9300
},
{
"epoch": 11.1,
"learning_rate": 1.445788451350874e-05,
"loss": 0.7882,
"step": 9310
},
{
"epoch": 11.11,
"learning_rate": 1.4413738301253754e-05,
"loss": 0.7858,
"step": 9320
},
{
"epoch": 11.12,
"learning_rate": 1.4369592088998765e-05,
"loss": 0.7928,
"step": 9330
},
{
"epoch": 11.13,
"learning_rate": 1.4325445876743776e-05,
"loss": 0.7811,
"step": 9340
},
{
"epoch": 11.14,
"learning_rate": 1.4281299664488788e-05,
"loss": 0.7588,
"step": 9350
},
{
"epoch": 11.16,
"learning_rate": 1.4237153452233797e-05,
"loss": 0.8435,
"step": 9360
},
{
"epoch": 11.17,
"learning_rate": 1.4193007239978812e-05,
"loss": 0.7808,
"step": 9370
},
{
"epoch": 11.18,
"learning_rate": 1.4148861027723823e-05,
"loss": 0.7983,
"step": 9380
},
{
"epoch": 11.19,
"learning_rate": 1.4104714815468834e-05,
"loss": 0.7767,
"step": 9390
},
{
"epoch": 11.2,
"learning_rate": 1.4060568603213844e-05,
"loss": 0.7829,
"step": 9400
},
{
"epoch": 11.22,
"learning_rate": 1.4016422390958855e-05,
"loss": 0.7967,
"step": 9410
},
{
"epoch": 11.23,
"learning_rate": 1.397227617870387e-05,
"loss": 0.7639,
"step": 9420
},
{
"epoch": 11.24,
"learning_rate": 1.392812996644888e-05,
"loss": 0.8109,
"step": 9430
},
{
"epoch": 11.25,
"learning_rate": 1.388398375419389e-05,
"loss": 0.8006,
"step": 9440
},
{
"epoch": 11.26,
"learning_rate": 1.3839837541938902e-05,
"loss": 0.7926,
"step": 9450
},
{
"epoch": 11.28,
"learning_rate": 1.3795691329683913e-05,
"loss": 0.8429,
"step": 9460
},
{
"epoch": 11.29,
"learning_rate": 1.3751545117428924e-05,
"loss": 0.8168,
"step": 9470
},
{
"epoch": 11.3,
"learning_rate": 1.3707398905173937e-05,
"loss": 0.7793,
"step": 9480
},
{
"epoch": 11.31,
"learning_rate": 1.3663252692918949e-05,
"loss": 0.8215,
"step": 9490
},
{
"epoch": 11.32,
"learning_rate": 1.361910648066396e-05,
"loss": 0.7918,
"step": 9500
},
{
"epoch": 11.33,
"learning_rate": 1.3574960268408971e-05,
"loss": 0.814,
"step": 9510
},
{
"epoch": 11.35,
"learning_rate": 1.353081405615398e-05,
"loss": 0.79,
"step": 9520
},
{
"epoch": 11.36,
"learning_rate": 1.3486667843898995e-05,
"loss": 0.8204,
"step": 9530
},
{
"epoch": 11.37,
"learning_rate": 1.3442521631644007e-05,
"loss": 0.7532,
"step": 9540
},
{
"epoch": 11.38,
"learning_rate": 1.3398375419389018e-05,
"loss": 0.7819,
"step": 9550
},
{
"epoch": 11.39,
"learning_rate": 1.3354229207134027e-05,
"loss": 0.8243,
"step": 9560
},
{
"epoch": 11.41,
"learning_rate": 1.3310082994879039e-05,
"loss": 0.7817,
"step": 9570
},
{
"epoch": 11.42,
"learning_rate": 1.3265936782624053e-05,
"loss": 0.8109,
"step": 9580
},
{
"epoch": 11.43,
"learning_rate": 1.3221790570369063e-05,
"loss": 0.8164,
"step": 9590
},
{
"epoch": 11.44,
"learning_rate": 1.3177644358114074e-05,
"loss": 0.7479,
"step": 9600
},
{
"epoch": 11.45,
"learning_rate": 1.3133498145859085e-05,
"loss": 0.8017,
"step": 9610
},
{
"epoch": 11.47,
"learning_rate": 1.3089351933604097e-05,
"loss": 0.7898,
"step": 9620
},
{
"epoch": 11.48,
"learning_rate": 1.304520572134911e-05,
"loss": 0.7693,
"step": 9630
},
{
"epoch": 11.49,
"learning_rate": 1.3001059509094121e-05,
"loss": 0.7851,
"step": 9640
},
{
"epoch": 11.5,
"learning_rate": 1.2956913296839132e-05,
"loss": 0.7936,
"step": 9650
},
{
"epoch": 11.51,
"learning_rate": 1.2912767084584143e-05,
"loss": 0.7208,
"step": 9660
},
{
"epoch": 11.53,
"learning_rate": 1.2868620872329155e-05,
"loss": 0.7852,
"step": 9670
},
{
"epoch": 11.54,
"learning_rate": 1.2824474660074164e-05,
"loss": 0.766,
"step": 9680
},
{
"epoch": 11.55,
"learning_rate": 1.2780328447819179e-05,
"loss": 0.7612,
"step": 9690
},
{
"epoch": 11.56,
"learning_rate": 1.273618223556419e-05,
"loss": 0.7778,
"step": 9700
},
{
"epoch": 11.57,
"learning_rate": 1.2692036023309201e-05,
"loss": 0.8051,
"step": 9710
},
{
"epoch": 11.59,
"learning_rate": 1.2647889811054211e-05,
"loss": 0.8111,
"step": 9720
},
{
"epoch": 11.6,
"learning_rate": 1.2603743598799222e-05,
"loss": 0.7677,
"step": 9730
},
{
"epoch": 11.61,
"learning_rate": 1.2559597386544237e-05,
"loss": 0.7886,
"step": 9740
},
{
"epoch": 11.62,
"learning_rate": 1.2515451174289246e-05,
"loss": 0.7708,
"step": 9750
},
{
"epoch": 11.63,
"learning_rate": 1.2471304962034258e-05,
"loss": 0.7954,
"step": 9760
},
{
"epoch": 11.64,
"learning_rate": 1.2427158749779269e-05,
"loss": 0.8294,
"step": 9770
},
{
"epoch": 11.66,
"learning_rate": 1.2383012537524282e-05,
"loss": 0.7807,
"step": 9780
},
{
"epoch": 11.67,
"learning_rate": 1.2338866325269292e-05,
"loss": 0.7939,
"step": 9790
},
{
"epoch": 11.68,
"learning_rate": 1.2294720113014303e-05,
"loss": 0.8255,
"step": 9800
},
{
"epoch": 11.69,
"learning_rate": 1.2250573900759316e-05,
"loss": 0.7947,
"step": 9810
},
{
"epoch": 11.7,
"learning_rate": 1.2206427688504327e-05,
"loss": 0.7623,
"step": 9820
},
{
"epoch": 11.72,
"learning_rate": 1.2162281476249338e-05,
"loss": 0.7654,
"step": 9830
},
{
"epoch": 11.73,
"learning_rate": 1.211813526399435e-05,
"loss": 0.8064,
"step": 9840
},
{
"epoch": 11.74,
"learning_rate": 1.207398905173936e-05,
"loss": 0.8024,
"step": 9850
},
{
"epoch": 11.75,
"learning_rate": 1.2029842839484374e-05,
"loss": 0.7557,
"step": 9860
},
{
"epoch": 11.76,
"learning_rate": 1.1985696627229383e-05,
"loss": 0.7451,
"step": 9870
},
{
"epoch": 11.78,
"learning_rate": 1.1941550414974395e-05,
"loss": 0.8012,
"step": 9880
},
{
"epoch": 11.79,
"learning_rate": 1.1897404202719408e-05,
"loss": 0.7658,
"step": 9890
},
{
"epoch": 11.8,
"learning_rate": 1.1853257990464419e-05,
"loss": 0.8007,
"step": 9900
},
{
"epoch": 11.81,
"learning_rate": 1.180911177820943e-05,
"loss": 0.7832,
"step": 9910
},
{
"epoch": 11.82,
"learning_rate": 1.1764965565954441e-05,
"loss": 0.7768,
"step": 9920
},
{
"epoch": 11.84,
"learning_rate": 1.1720819353699453e-05,
"loss": 0.7761,
"step": 9930
},
{
"epoch": 11.85,
"learning_rate": 1.1676673141444466e-05,
"loss": 0.7613,
"step": 9940
},
{
"epoch": 11.86,
"learning_rate": 1.1632526929189475e-05,
"loss": 0.7475,
"step": 9950
},
{
"epoch": 11.87,
"learning_rate": 1.1588380716934488e-05,
"loss": 0.7412,
"step": 9960
},
{
"epoch": 11.88,
"learning_rate": 1.15442345046795e-05,
"loss": 0.7915,
"step": 9970
},
{
"epoch": 11.89,
"learning_rate": 1.150008829242451e-05,
"loss": 0.7724,
"step": 9980
},
{
"epoch": 11.91,
"learning_rate": 1.1455942080169522e-05,
"loss": 0.8079,
"step": 9990
},
{
"epoch": 11.92,
"learning_rate": 1.1411795867914533e-05,
"loss": 0.8112,
"step": 10000
},
{
"epoch": 11.93,
"learning_rate": 1.1367649655659544e-05,
"loss": 0.8072,
"step": 10010
},
{
"epoch": 11.94,
"learning_rate": 1.1323503443404557e-05,
"loss": 0.7606,
"step": 10020
},
{
"epoch": 11.95,
"learning_rate": 1.1279357231149567e-05,
"loss": 0.739,
"step": 10030
},
{
"epoch": 11.97,
"learning_rate": 1.123521101889458e-05,
"loss": 0.7504,
"step": 10040
},
{
"epoch": 11.98,
"learning_rate": 1.1191064806639591e-05,
"loss": 0.7826,
"step": 10050
},
{
"epoch": 11.99,
"learning_rate": 1.1146918594384602e-05,
"loss": 0.7711,
"step": 10060
},
{
"epoch": 12.0,
"eval_accuracy": 0.8069045136302696,
"eval_f1": 0.794973833568864,
"eval_loss": 0.7523674964904785,
"eval_precision": 0.7995192234884096,
"eval_recall": 0.8069045136302696,
"eval_runtime": 99.9574,
"eval_samples_per_second": 268.634,
"eval_steps_per_second": 4.202,
"step": 10068
},
{
"epoch": 12.0,
"learning_rate": 1.1102772382129614e-05,
"loss": 0.7988,
"step": 10070
},
{
"epoch": 12.01,
"learning_rate": 1.1058626169874625e-05,
"loss": 0.7829,
"step": 10080
},
{
"epoch": 12.03,
"learning_rate": 1.1014479957619636e-05,
"loss": 0.703,
"step": 10090
},
{
"epoch": 12.04,
"learning_rate": 1.0970333745364649e-05,
"loss": 0.7694,
"step": 10100
},
{
"epoch": 12.05,
"learning_rate": 1.0926187533109659e-05,
"loss": 0.7603,
"step": 10110
},
{
"epoch": 12.06,
"learning_rate": 1.0882041320854672e-05,
"loss": 0.7369,
"step": 10120
},
{
"epoch": 12.07,
"learning_rate": 1.0837895108599683e-05,
"loss": 0.7168,
"step": 10130
},
{
"epoch": 12.09,
"learning_rate": 1.0793748896344694e-05,
"loss": 0.7568,
"step": 10140
},
{
"epoch": 12.1,
"learning_rate": 1.0749602684089705e-05,
"loss": 0.7458,
"step": 10150
},
{
"epoch": 12.11,
"learning_rate": 1.0705456471834717e-05,
"loss": 0.7414,
"step": 10160
},
{
"epoch": 12.12,
"learning_rate": 1.0661310259579728e-05,
"loss": 0.7439,
"step": 10170
},
{
"epoch": 12.13,
"learning_rate": 1.0617164047324741e-05,
"loss": 0.7865,
"step": 10180
},
{
"epoch": 12.15,
"learning_rate": 1.057301783506975e-05,
"loss": 0.7622,
"step": 10190
},
{
"epoch": 12.16,
"learning_rate": 1.0528871622814763e-05,
"loss": 0.7578,
"step": 10200
},
{
"epoch": 12.17,
"learning_rate": 1.0484725410559775e-05,
"loss": 0.7623,
"step": 10210
},
{
"epoch": 12.18,
"learning_rate": 1.0440579198304786e-05,
"loss": 0.7498,
"step": 10220
},
{
"epoch": 12.19,
"learning_rate": 1.0396432986049797e-05,
"loss": 0.7892,
"step": 10230
},
{
"epoch": 12.2,
"learning_rate": 1.0352286773794808e-05,
"loss": 0.7462,
"step": 10240
},
{
"epoch": 12.22,
"learning_rate": 1.0308140561539821e-05,
"loss": 0.7069,
"step": 10250
},
{
"epoch": 12.23,
"learning_rate": 1.0263994349284833e-05,
"loss": 0.7679,
"step": 10260
},
{
"epoch": 12.24,
"learning_rate": 1.0219848137029842e-05,
"loss": 0.7507,
"step": 10270
},
{
"epoch": 12.25,
"learning_rate": 1.0175701924774855e-05,
"loss": 0.7748,
"step": 10280
},
{
"epoch": 12.26,
"learning_rate": 1.0131555712519866e-05,
"loss": 0.7781,
"step": 10290
},
{
"epoch": 12.28,
"learning_rate": 1.0087409500264878e-05,
"loss": 0.6939,
"step": 10300
},
{
"epoch": 12.29,
"learning_rate": 1.0043263288009889e-05,
"loss": 0.6655,
"step": 10310
},
{
"epoch": 12.3,
"learning_rate": 9.9991170757549e-06,
"loss": 0.7509,
"step": 10320
},
{
"epoch": 12.31,
"learning_rate": 9.954970863499913e-06,
"loss": 0.7902,
"step": 10330
},
{
"epoch": 12.32,
"learning_rate": 9.910824651244924e-06,
"loss": 0.7459,
"step": 10340
},
{
"epoch": 12.34,
"learning_rate": 9.866678438989934e-06,
"loss": 0.7505,
"step": 10350
},
{
"epoch": 12.35,
"learning_rate": 9.822532226734947e-06,
"loss": 0.7476,
"step": 10360
},
{
"epoch": 12.36,
"learning_rate": 9.778386014479958e-06,
"loss": 0.6797,
"step": 10370
},
{
"epoch": 12.37,
"learning_rate": 9.73423980222497e-06,
"loss": 0.7846,
"step": 10380
},
{
"epoch": 12.38,
"learning_rate": 9.69009358996998e-06,
"loss": 0.7465,
"step": 10390
},
{
"epoch": 12.4,
"learning_rate": 9.645947377714992e-06,
"loss": 0.7493,
"step": 10400
},
{
"epoch": 12.41,
"learning_rate": 9.601801165460005e-06,
"loss": 0.7691,
"step": 10410
},
{
"epoch": 12.42,
"learning_rate": 9.557654953205016e-06,
"loss": 0.7668,
"step": 10420
},
{
"epoch": 12.43,
"learning_rate": 9.513508740950026e-06,
"loss": 0.743,
"step": 10430
},
{
"epoch": 12.44,
"learning_rate": 9.469362528695039e-06,
"loss": 0.7609,
"step": 10440
},
{
"epoch": 12.46,
"learning_rate": 9.42521631644005e-06,
"loss": 0.7925,
"step": 10450
},
{
"epoch": 12.47,
"learning_rate": 9.381070104185061e-06,
"loss": 0.7793,
"step": 10460
},
{
"epoch": 12.48,
"learning_rate": 9.336923891930073e-06,
"loss": 0.7548,
"step": 10470
},
{
"epoch": 12.49,
"learning_rate": 9.292777679675084e-06,
"loss": 0.7509,
"step": 10480
},
{
"epoch": 12.5,
"learning_rate": 9.248631467420097e-06,
"loss": 0.6699,
"step": 10490
},
{
"epoch": 12.51,
"learning_rate": 9.204485255165108e-06,
"loss": 0.7488,
"step": 10500
},
{
"epoch": 12.53,
"learning_rate": 9.160339042910118e-06,
"loss": 0.7262,
"step": 10510
},
{
"epoch": 12.54,
"learning_rate": 9.11619283065513e-06,
"loss": 0.7565,
"step": 10520
},
{
"epoch": 12.55,
"learning_rate": 9.072046618400142e-06,
"loss": 0.7549,
"step": 10530
},
{
"epoch": 12.56,
"learning_rate": 9.027900406145153e-06,
"loss": 0.7766,
"step": 10540
},
{
"epoch": 12.57,
"learning_rate": 8.983754193890164e-06,
"loss": 0.7764,
"step": 10550
},
{
"epoch": 12.59,
"learning_rate": 8.939607981635176e-06,
"loss": 0.7215,
"step": 10560
},
{
"epoch": 12.6,
"learning_rate": 8.895461769380189e-06,
"loss": 0.7217,
"step": 10570
},
{
"epoch": 12.61,
"learning_rate": 8.8513155571252e-06,
"loss": 0.747,
"step": 10580
},
{
"epoch": 12.62,
"learning_rate": 8.80716934487021e-06,
"loss": 0.7748,
"step": 10590
},
{
"epoch": 12.63,
"learning_rate": 8.763023132615222e-06,
"loss": 0.7711,
"step": 10600
},
{
"epoch": 12.65,
"learning_rate": 8.718876920360234e-06,
"loss": 0.743,
"step": 10610
},
{
"epoch": 12.66,
"learning_rate": 8.674730708105245e-06,
"loss": 0.7345,
"step": 10620
},
{
"epoch": 12.67,
"learning_rate": 8.630584495850256e-06,
"loss": 0.7406,
"step": 10630
},
{
"epoch": 12.68,
"learning_rate": 8.586438283595267e-06,
"loss": 0.732,
"step": 10640
},
{
"epoch": 12.69,
"learning_rate": 8.54229207134028e-06,
"loss": 0.729,
"step": 10650
},
{
"epoch": 12.71,
"learning_rate": 8.498145859085292e-06,
"loss": 0.7323,
"step": 10660
},
{
"epoch": 12.72,
"learning_rate": 8.453999646830301e-06,
"loss": 0.7796,
"step": 10670
},
{
"epoch": 12.73,
"learning_rate": 8.409853434575314e-06,
"loss": 0.7401,
"step": 10680
},
{
"epoch": 12.74,
"learning_rate": 8.365707222320325e-06,
"loss": 0.7403,
"step": 10690
},
{
"epoch": 12.75,
"learning_rate": 8.321561010065337e-06,
"loss": 0.7433,
"step": 10700
},
{
"epoch": 12.76,
"learning_rate": 8.277414797810348e-06,
"loss": 0.7227,
"step": 10710
},
{
"epoch": 12.78,
"learning_rate": 8.233268585555359e-06,
"loss": 0.7929,
"step": 10720
},
{
"epoch": 12.79,
"learning_rate": 8.189122373300372e-06,
"loss": 0.7602,
"step": 10730
},
{
"epoch": 12.8,
"learning_rate": 8.144976161045383e-06,
"loss": 0.7554,
"step": 10740
},
{
"epoch": 12.81,
"learning_rate": 8.100829948790393e-06,
"loss": 0.7635,
"step": 10750
},
{
"epoch": 12.82,
"learning_rate": 8.056683736535406e-06,
"loss": 0.7451,
"step": 10760
},
{
"epoch": 12.84,
"learning_rate": 8.012537524280417e-06,
"loss": 0.7906,
"step": 10770
},
{
"epoch": 12.85,
"learning_rate": 7.968391312025428e-06,
"loss": 0.7279,
"step": 10780
},
{
"epoch": 12.86,
"learning_rate": 7.92424509977044e-06,
"loss": 0.7292,
"step": 10790
},
{
"epoch": 12.87,
"learning_rate": 7.880098887515451e-06,
"loss": 0.7269,
"step": 10800
},
{
"epoch": 12.88,
"learning_rate": 7.835952675260464e-06,
"loss": 0.705,
"step": 10810
},
{
"epoch": 12.9,
"learning_rate": 7.791806463005475e-06,
"loss": 0.7554,
"step": 10820
},
{
"epoch": 12.91,
"learning_rate": 7.747660250750485e-06,
"loss": 0.7186,
"step": 10830
},
{
"epoch": 12.92,
"learning_rate": 7.703514038495498e-06,
"loss": 0.7928,
"step": 10840
},
{
"epoch": 12.93,
"learning_rate": 7.659367826240509e-06,
"loss": 0.7391,
"step": 10850
},
{
"epoch": 12.94,
"learning_rate": 7.615221613985521e-06,
"loss": 0.728,
"step": 10860
},
{
"epoch": 12.96,
"learning_rate": 7.5710754017305314e-06,
"loss": 0.7191,
"step": 10870
},
{
"epoch": 12.97,
"learning_rate": 7.526929189475543e-06,
"loss": 0.7436,
"step": 10880
},
{
"epoch": 12.98,
"learning_rate": 7.482782977220555e-06,
"loss": 0.71,
"step": 10890
},
{
"epoch": 12.99,
"learning_rate": 7.438636764965566e-06,
"loss": 0.7689,
"step": 10900
},
{
"epoch": 13.0,
"eval_accuracy": 0.8123417250111723,
"eval_f1": 0.8008904007472688,
"eval_loss": 0.7283065915107727,
"eval_precision": 0.8043071439972418,
"eval_recall": 0.8123417250111723,
"eval_runtime": 99.1806,
"eval_samples_per_second": 270.739,
"eval_steps_per_second": 4.235,
"step": 10907
},
{
"epoch": 13.0,
"learning_rate": 7.394490552710578e-06,
"loss": 0.7298,
"step": 10910
},
{
"epoch": 13.02,
"learning_rate": 7.3503443404555894e-06,
"loss": 0.7718,
"step": 10920
},
{
"epoch": 13.03,
"learning_rate": 7.306198128200601e-06,
"loss": 0.6979,
"step": 10930
},
{
"epoch": 13.04,
"learning_rate": 7.262051915945613e-06,
"loss": 0.7406,
"step": 10940
},
{
"epoch": 13.05,
"learning_rate": 7.217905703690623e-06,
"loss": 0.7342,
"step": 10950
},
{
"epoch": 13.06,
"learning_rate": 7.1737594914356345e-06,
"loss": 0.7568,
"step": 10960
},
{
"epoch": 13.08,
"learning_rate": 7.129613279180647e-06,
"loss": 0.7185,
"step": 10970
},
{
"epoch": 13.09,
"learning_rate": 7.085467066925658e-06,
"loss": 0.7223,
"step": 10980
},
{
"epoch": 13.1,
"learning_rate": 7.04132085467067e-06,
"loss": 0.7146,
"step": 10990
},
{
"epoch": 13.11,
"learning_rate": 6.997174642415681e-06,
"loss": 0.679,
"step": 11000
},
{
"epoch": 13.12,
"learning_rate": 6.9530284301606925e-06,
"loss": 0.7124,
"step": 11010
},
{
"epoch": 13.13,
"learning_rate": 6.908882217905705e-06,
"loss": 0.7503,
"step": 11020
},
{
"epoch": 13.15,
"learning_rate": 6.864736005650715e-06,
"loss": 0.7214,
"step": 11030
},
{
"epoch": 13.16,
"learning_rate": 6.820589793395726e-06,
"loss": 0.7273,
"step": 11040
},
{
"epoch": 13.17,
"learning_rate": 6.776443581140738e-06,
"loss": 0.7334,
"step": 11050
},
{
"epoch": 13.18,
"learning_rate": 6.73229736888575e-06,
"loss": 0.7317,
"step": 11060
},
{
"epoch": 13.19,
"learning_rate": 6.688151156630762e-06,
"loss": 0.7214,
"step": 11070
},
{
"epoch": 13.21,
"learning_rate": 6.644004944375773e-06,
"loss": 0.7343,
"step": 11080
},
{
"epoch": 13.22,
"learning_rate": 6.599858732120784e-06,
"loss": 0.7117,
"step": 11090
},
{
"epoch": 13.23,
"learning_rate": 6.555712519865796e-06,
"loss": 0.7101,
"step": 11100
},
{
"epoch": 13.24,
"learning_rate": 6.511566307610807e-06,
"loss": 0.67,
"step": 11110
},
{
"epoch": 13.25,
"learning_rate": 6.467420095355818e-06,
"loss": 0.7423,
"step": 11120
},
{
"epoch": 13.27,
"learning_rate": 6.42327388310083e-06,
"loss": 0.7379,
"step": 11130
},
{
"epoch": 13.28,
"learning_rate": 6.3791276708458414e-06,
"loss": 0.7012,
"step": 11140
},
{
"epoch": 13.29,
"learning_rate": 6.3349814585908535e-06,
"loss": 0.7246,
"step": 11150
},
{
"epoch": 13.3,
"learning_rate": 6.290835246335865e-06,
"loss": 0.7448,
"step": 11160
},
{
"epoch": 13.31,
"learning_rate": 6.246689034080876e-06,
"loss": 0.6997,
"step": 11170
},
{
"epoch": 13.33,
"learning_rate": 6.202542821825888e-06,
"loss": 0.7124,
"step": 11180
},
{
"epoch": 13.34,
"learning_rate": 6.158396609570899e-06,
"loss": 0.7324,
"step": 11190
},
{
"epoch": 13.35,
"learning_rate": 6.114250397315911e-06,
"loss": 0.7154,
"step": 11200
},
{
"epoch": 13.36,
"learning_rate": 6.070104185060922e-06,
"loss": 0.7044,
"step": 11210
},
{
"epoch": 13.37,
"learning_rate": 6.025957972805934e-06,
"loss": 0.73,
"step": 11220
},
{
"epoch": 13.38,
"learning_rate": 5.9818117605509445e-06,
"loss": 0.72,
"step": 11230
},
{
"epoch": 13.4,
"learning_rate": 5.937665548295957e-06,
"loss": 0.7784,
"step": 11240
},
{
"epoch": 13.41,
"learning_rate": 5.893519336040968e-06,
"loss": 0.6891,
"step": 11250
},
{
"epoch": 13.42,
"learning_rate": 5.84937312378598e-06,
"loss": 0.7191,
"step": 11260
},
{
"epoch": 13.43,
"learning_rate": 5.80522691153099e-06,
"loss": 0.7387,
"step": 11270
},
{
"epoch": 13.44,
"learning_rate": 5.7610806992760025e-06,
"loss": 0.7136,
"step": 11280
},
{
"epoch": 13.46,
"learning_rate": 5.716934487021014e-06,
"loss": 0.732,
"step": 11290
},
{
"epoch": 13.47,
"learning_rate": 5.672788274766026e-06,
"loss": 0.7582,
"step": 11300
},
{
"epoch": 13.48,
"learning_rate": 5.628642062511036e-06,
"loss": 0.7532,
"step": 11310
},
{
"epoch": 13.49,
"learning_rate": 5.584495850256048e-06,
"loss": 0.7333,
"step": 11320
},
{
"epoch": 13.5,
"learning_rate": 5.54034963800106e-06,
"loss": 0.7313,
"step": 11330
},
{
"epoch": 13.52,
"learning_rate": 5.496203425746072e-06,
"loss": 0.7207,
"step": 11340
},
{
"epoch": 13.53,
"learning_rate": 5.452057213491082e-06,
"loss": 0.7069,
"step": 11350
},
{
"epoch": 13.54,
"learning_rate": 5.407911001236094e-06,
"loss": 0.6881,
"step": 11360
},
{
"epoch": 13.55,
"learning_rate": 5.3637647889811055e-06,
"loss": 0.7396,
"step": 11370
},
{
"epoch": 13.56,
"learning_rate": 5.319618576726118e-06,
"loss": 0.6671,
"step": 11380
},
{
"epoch": 13.58,
"learning_rate": 5.275472364471128e-06,
"loss": 0.7244,
"step": 11390
},
{
"epoch": 13.59,
"learning_rate": 5.23132615221614e-06,
"loss": 0.7028,
"step": 11400
},
{
"epoch": 13.6,
"learning_rate": 5.187179939961151e-06,
"loss": 0.7315,
"step": 11410
},
{
"epoch": 13.61,
"learning_rate": 5.1430337277061635e-06,
"loss": 0.7555,
"step": 11420
},
{
"epoch": 13.62,
"learning_rate": 5.098887515451174e-06,
"loss": 0.7226,
"step": 11430
},
{
"epoch": 13.64,
"learning_rate": 5.054741303196186e-06,
"loss": 0.7307,
"step": 11440
},
{
"epoch": 13.65,
"learning_rate": 5.010595090941197e-06,
"loss": 0.7348,
"step": 11450
},
{
"epoch": 13.66,
"learning_rate": 4.966448878686209e-06,
"loss": 0.6986,
"step": 11460
},
{
"epoch": 13.67,
"learning_rate": 4.92230266643122e-06,
"loss": 0.746,
"step": 11470
},
{
"epoch": 13.68,
"learning_rate": 4.878156454176232e-06,
"loss": 0.7028,
"step": 11480
},
{
"epoch": 13.69,
"learning_rate": 4.834010241921243e-06,
"loss": 0.7215,
"step": 11490
},
{
"epoch": 13.71,
"learning_rate": 4.789864029666255e-06,
"loss": 0.7616,
"step": 11500
},
{
"epoch": 13.72,
"learning_rate": 4.7457178174112666e-06,
"loss": 0.7073,
"step": 11510
},
{
"epoch": 13.73,
"learning_rate": 4.701571605156278e-06,
"loss": 0.6928,
"step": 11520
},
{
"epoch": 13.74,
"learning_rate": 4.657425392901289e-06,
"loss": 0.6846,
"step": 11530
},
{
"epoch": 13.75,
"learning_rate": 4.613279180646301e-06,
"loss": 0.716,
"step": 11540
},
{
"epoch": 13.77,
"learning_rate": 4.5691329683913125e-06,
"loss": 0.7124,
"step": 11550
},
{
"epoch": 13.78,
"learning_rate": 4.524986756136324e-06,
"loss": 0.7151,
"step": 11560
},
{
"epoch": 13.79,
"learning_rate": 4.480840543881335e-06,
"loss": 0.7018,
"step": 11570
},
{
"epoch": 13.8,
"learning_rate": 4.436694331626347e-06,
"loss": 0.7275,
"step": 11580
},
{
"epoch": 13.81,
"learning_rate": 4.392548119371358e-06,
"loss": 0.7311,
"step": 11590
},
{
"epoch": 13.83,
"learning_rate": 4.34840190711637e-06,
"loss": 0.6931,
"step": 11600
},
{
"epoch": 13.84,
"learning_rate": 4.304255694861381e-06,
"loss": 0.7124,
"step": 11610
},
{
"epoch": 13.85,
"learning_rate": 4.260109482606393e-06,
"loss": 0.7396,
"step": 11620
},
{
"epoch": 13.86,
"learning_rate": 4.215963270351404e-06,
"loss": 0.7241,
"step": 11630
},
{
"epoch": 13.87,
"learning_rate": 4.1718170580964155e-06,
"loss": 0.7272,
"step": 11640
},
{
"epoch": 13.89,
"learning_rate": 4.127670845841427e-06,
"loss": 0.6968,
"step": 11650
},
{
"epoch": 13.9,
"learning_rate": 4.083524633586439e-06,
"loss": 0.6634,
"step": 11660
},
{
"epoch": 13.91,
"learning_rate": 4.03937842133145e-06,
"loss": 0.7018,
"step": 11670
},
{
"epoch": 13.92,
"learning_rate": 3.995232209076461e-06,
"loss": 0.7265,
"step": 11680
},
{
"epoch": 13.93,
"learning_rate": 3.951085996821473e-06,
"loss": 0.6852,
"step": 11690
},
{
"epoch": 13.94,
"learning_rate": 3.906939784566485e-06,
"loss": 0.7071,
"step": 11700
},
{
"epoch": 13.96,
"learning_rate": 3.862793572311496e-06,
"loss": 0.7031,
"step": 11710
},
{
"epoch": 13.97,
"learning_rate": 3.818647360056507e-06,
"loss": 0.7017,
"step": 11720
},
{
"epoch": 13.98,
"learning_rate": 3.7745011478015185e-06,
"loss": 0.7198,
"step": 11730
},
{
"epoch": 13.99,
"learning_rate": 3.7303549355465302e-06,
"loss": 0.6919,
"step": 11740
},
{
"epoch": 14.0,
"eval_accuracy": 0.8147996424847311,
"eval_f1": 0.8036085852328231,
"eval_loss": 0.7133387327194214,
"eval_precision": 0.8060988123614719,
"eval_recall": 0.8147996424847311,
"eval_runtime": 99.251,
"eval_samples_per_second": 270.546,
"eval_steps_per_second": 4.232,
"step": 11746
},
{
"epoch": 14.0,
"learning_rate": 3.686208723291542e-06,
"loss": 0.7503,
"step": 11750
},
{
"epoch": 14.02,
"learning_rate": 3.6420625110365527e-06,
"loss": 0.6886,
"step": 11760
},
{
"epoch": 14.03,
"learning_rate": 3.5979162987815644e-06,
"loss": 0.7366,
"step": 11770
},
{
"epoch": 14.04,
"learning_rate": 3.553770086526576e-06,
"loss": 0.6969,
"step": 11780
},
{
"epoch": 14.05,
"learning_rate": 3.509623874271588e-06,
"loss": 0.7285,
"step": 11790
},
{
"epoch": 14.06,
"learning_rate": 3.4654776620165995e-06,
"loss": 0.6919,
"step": 11800
},
{
"epoch": 14.08,
"learning_rate": 3.4213314497616103e-06,
"loss": 0.6904,
"step": 11810
},
{
"epoch": 14.09,
"learning_rate": 3.377185237506622e-06,
"loss": 0.6896,
"step": 11820
},
{
"epoch": 14.1,
"learning_rate": 3.3330390252516337e-06,
"loss": 0.6798,
"step": 11830
},
{
"epoch": 14.11,
"learning_rate": 3.2888928129966454e-06,
"loss": 0.7035,
"step": 11840
},
{
"epoch": 14.12,
"learning_rate": 3.2447466007416562e-06,
"loss": 0.6692,
"step": 11850
},
{
"epoch": 14.14,
"learning_rate": 3.200600388486668e-06,
"loss": 0.7203,
"step": 11860
},
{
"epoch": 14.15,
"learning_rate": 3.1564541762316796e-06,
"loss": 0.7057,
"step": 11870
},
{
"epoch": 14.16,
"learning_rate": 3.112307963976691e-06,
"loss": 0.7154,
"step": 11880
},
{
"epoch": 14.17,
"learning_rate": 3.0681617517217025e-06,
"loss": 0.6911,
"step": 11890
},
{
"epoch": 14.18,
"learning_rate": 3.024015539466714e-06,
"loss": 0.6745,
"step": 11900
},
{
"epoch": 14.2,
"learning_rate": 2.9798693272117255e-06,
"loss": 0.7365,
"step": 11910
},
{
"epoch": 14.21,
"learning_rate": 2.9357231149567367e-06,
"loss": 0.7185,
"step": 11920
},
{
"epoch": 14.22,
"learning_rate": 2.8915769027017484e-06,
"loss": 0.654,
"step": 11930
},
{
"epoch": 14.23,
"learning_rate": 2.8474306904467597e-06,
"loss": 0.7046,
"step": 11940
},
{
"epoch": 14.24,
"learning_rate": 2.8032844781917714e-06,
"loss": 0.7372,
"step": 11950
},
{
"epoch": 14.25,
"learning_rate": 2.7591382659367826e-06,
"loss": 0.7017,
"step": 11960
},
{
"epoch": 14.27,
"learning_rate": 2.7149920536817943e-06,
"loss": 0.6843,
"step": 11970
},
{
"epoch": 14.28,
"learning_rate": 2.6708458414268056e-06,
"loss": 0.6891,
"step": 11980
},
{
"epoch": 14.29,
"learning_rate": 2.6266996291718173e-06,
"loss": 0.6984,
"step": 11990
},
{
"epoch": 14.3,
"learning_rate": 2.5825534169168285e-06,
"loss": 0.7012,
"step": 12000
},
{
"epoch": 14.31,
"learning_rate": 2.53840720466184e-06,
"loss": 0.7015,
"step": 12010
},
{
"epoch": 14.33,
"learning_rate": 2.4942609924068515e-06,
"loss": 0.7127,
"step": 12020
},
{
"epoch": 14.34,
"learning_rate": 2.450114780151863e-06,
"loss": 0.7021,
"step": 12030
},
{
"epoch": 14.35,
"learning_rate": 2.4059685678968744e-06,
"loss": 0.6599,
"step": 12040
},
{
"epoch": 14.36,
"learning_rate": 2.361822355641886e-06,
"loss": 0.7266,
"step": 12050
},
{
"epoch": 14.37,
"learning_rate": 2.3176761433868974e-06,
"loss": 0.6562,
"step": 12060
},
{
"epoch": 14.39,
"learning_rate": 2.273529931131909e-06,
"loss": 0.7178,
"step": 12070
},
{
"epoch": 14.4,
"learning_rate": 2.2293837188769203e-06,
"loss": 0.683,
"step": 12080
},
{
"epoch": 14.41,
"learning_rate": 2.185237506621932e-06,
"loss": 0.7003,
"step": 12090
},
{
"epoch": 14.42,
"learning_rate": 2.1410912943669432e-06,
"loss": 0.6672,
"step": 12100
},
{
"epoch": 14.43,
"learning_rate": 2.096945082111955e-06,
"loss": 0.7333,
"step": 12110
},
{
"epoch": 14.45,
"learning_rate": 2.0527988698569666e-06,
"loss": 0.7226,
"step": 12120
},
{
"epoch": 14.46,
"learning_rate": 2.008652657601978e-06,
"loss": 0.7062,
"step": 12130
},
{
"epoch": 14.47,
"learning_rate": 1.9645064453469896e-06,
"loss": 0.6825,
"step": 12140
},
{
"epoch": 14.48,
"learning_rate": 1.920360233092001e-06,
"loss": 0.7219,
"step": 12150
},
{
"epoch": 14.49,
"learning_rate": 1.8762140208370125e-06,
"loss": 0.7151,
"step": 12160
},
{
"epoch": 14.51,
"learning_rate": 1.8320678085820238e-06,
"loss": 0.6999,
"step": 12170
},
{
"epoch": 14.52,
"learning_rate": 1.7879215963270355e-06,
"loss": 0.7182,
"step": 12180
},
{
"epoch": 14.53,
"learning_rate": 1.7437753840720467e-06,
"loss": 0.7063,
"step": 12190
},
{
"epoch": 14.54,
"learning_rate": 1.6996291718170584e-06,
"loss": 0.7208,
"step": 12200
},
{
"epoch": 14.55,
"learning_rate": 1.6554829595620697e-06,
"loss": 0.7346,
"step": 12210
},
{
"epoch": 14.56,
"learning_rate": 1.6113367473070813e-06,
"loss": 0.6791,
"step": 12220
},
{
"epoch": 14.58,
"learning_rate": 1.5671905350520926e-06,
"loss": 0.7421,
"step": 12230
},
{
"epoch": 14.59,
"learning_rate": 1.523044322797104e-06,
"loss": 0.6476,
"step": 12240
},
{
"epoch": 14.6,
"learning_rate": 1.4788981105421156e-06,
"loss": 0.6935,
"step": 12250
},
{
"epoch": 14.61,
"learning_rate": 1.434751898287127e-06,
"loss": 0.6992,
"step": 12260
},
{
"epoch": 14.62,
"learning_rate": 1.3906056860321385e-06,
"loss": 0.6843,
"step": 12270
},
{
"epoch": 14.64,
"learning_rate": 1.34645947377715e-06,
"loss": 0.6774,
"step": 12280
},
{
"epoch": 14.65,
"learning_rate": 1.3023132615221614e-06,
"loss": 0.6726,
"step": 12290
},
{
"epoch": 14.66,
"learning_rate": 1.258167049267173e-06,
"loss": 0.6871,
"step": 12300
},
{
"epoch": 14.67,
"learning_rate": 1.2140208370121844e-06,
"loss": 0.7278,
"step": 12310
},
{
"epoch": 14.68,
"learning_rate": 1.1698746247571959e-06,
"loss": 0.6631,
"step": 12320
},
{
"epoch": 14.7,
"learning_rate": 1.1257284125022073e-06,
"loss": 0.6951,
"step": 12330
},
{
"epoch": 14.71,
"learning_rate": 1.0815822002472188e-06,
"loss": 0.7023,
"step": 12340
},
{
"epoch": 14.72,
"learning_rate": 1.0374359879922303e-06,
"loss": 0.7297,
"step": 12350
},
{
"epoch": 14.73,
"learning_rate": 9.932897757372418e-07,
"loss": 0.6868,
"step": 12360
},
{
"epoch": 14.74,
"learning_rate": 9.491435634822533e-07,
"loss": 0.6741,
"step": 12370
},
{
"epoch": 14.76,
"learning_rate": 9.049973512272648e-07,
"loss": 0.6589,
"step": 12380
},
{
"epoch": 14.77,
"learning_rate": 8.608511389722763e-07,
"loss": 0.7029,
"step": 12390
},
{
"epoch": 14.78,
"learning_rate": 8.167049267172878e-07,
"loss": 0.7267,
"step": 12400
},
{
"epoch": 14.79,
"learning_rate": 7.725587144622992e-07,
"loss": 0.6523,
"step": 12410
},
{
"epoch": 14.8,
"learning_rate": 7.284125022073107e-07,
"loss": 0.7113,
"step": 12420
},
{
"epoch": 14.82,
"learning_rate": 6.842662899523222e-07,
"loss": 0.6823,
"step": 12430
},
{
"epoch": 14.83,
"learning_rate": 6.401200776973336e-07,
"loss": 0.7095,
"step": 12440
},
{
"epoch": 14.84,
"learning_rate": 5.95973865442345e-07,
"loss": 0.6959,
"step": 12450
},
{
"epoch": 14.85,
"learning_rate": 5.518276531873565e-07,
"loss": 0.7162,
"step": 12460
},
{
"epoch": 14.86,
"learning_rate": 5.07681440932368e-07,
"loss": 0.683,
"step": 12470
},
{
"epoch": 14.87,
"learning_rate": 4.635352286773795e-07,
"loss": 0.7184,
"step": 12480
},
{
"epoch": 14.89,
"learning_rate": 4.1938901642239095e-07,
"loss": 0.6654,
"step": 12490
},
{
"epoch": 14.9,
"learning_rate": 3.752428041674025e-07,
"loss": 0.6955,
"step": 12500
},
{
"epoch": 14.91,
"learning_rate": 3.3109659191241395e-07,
"loss": 0.6912,
"step": 12510
},
{
"epoch": 14.92,
"learning_rate": 2.8695037965742537e-07,
"loss": 0.7228,
"step": 12520
},
{
"epoch": 14.93,
"learning_rate": 2.4280416740243685e-07,
"loss": 0.6885,
"step": 12530
},
{
"epoch": 14.95,
"learning_rate": 1.9865795514744835e-07,
"loss": 0.7197,
"step": 12540
},
{
"epoch": 14.96,
"learning_rate": 1.5451174289245984e-07,
"loss": 0.7271,
"step": 12550
},
{
"epoch": 14.97,
"learning_rate": 1.103655306374713e-07,
"loss": 0.709,
"step": 12560
},
{
"epoch": 14.98,
"learning_rate": 6.621931838248278e-08,
"loss": 0.7227,
"step": 12570
},
{
"epoch": 14.99,
"learning_rate": 2.2073106127494264e-08,
"loss": 0.694,
"step": 12580
},
{
"epoch": 15.0,
"eval_accuracy": 0.8176672128705497,
"eval_f1": 0.8067463077825504,
"eval_loss": 0.7063722014427185,
"eval_precision": 0.808932447502446,
"eval_recall": 0.8176672128705497,
"eval_runtime": 99.9807,
"eval_samples_per_second": 268.572,
"eval_steps_per_second": 4.201,
"step": 12585
},
{
"epoch": 15.0,
"step": 12585,
"total_flos": 8.179980758810437e+19,
"train_loss": 1.7412154973476806,
"train_runtime": 20817.4419,
"train_samples_per_second": 154.783,
"train_steps_per_second": 0.605
},
{
"epoch": 15.0,
"eval_accuracy": 0.8244604584473866,
"eval_f1": 0.8143373319248239,
"eval_loss": 0.6935074329376221,
"eval_precision": 0.8209253030287482,
"eval_recall": 0.8244604584473866,
"eval_runtime": 847.887,
"eval_samples_per_second": 253.35,
"eval_steps_per_second": 3.959,
"step": 12585
},
{
"epoch": 15.0,
"eval_accuracy": 0.8176672128705497,
"eval_f1": 0.8067463077825504,
"eval_loss": 0.7063722014427185,
"eval_precision": 0.808932447502446,
"eval_recall": 0.8176672128705497,
"eval_runtime": 100.5004,
"eval_samples_per_second": 267.183,
"eval_steps_per_second": 4.179,
"step": 12585
},
{
"epoch": 15.0,
"eval_accuracy": 0.8172575599582899,
"eval_f1": 0.8057061753323418,
"eval_loss": 0.7021384835243225,
"eval_precision": 0.8093542627058361,
"eval_recall": 0.8172575599582899,
"eval_runtime": 100.0753,
"eval_samples_per_second": 268.318,
"eval_steps_per_second": 4.197,
"step": 12585
}
],
"max_steps": 12585,
"num_train_epochs": 15,
"total_flos": 8.179980758810437e+19,
"trial_name": null,
"trial_params": null
}