riken01's picture
Upload folder using huggingface_hub
7564718 verified
{
"best_metric": 1.2045246362686157,
"best_model_checkpoint": "TrustPilot-balanced-location-roberta/checkpoint-6477",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 6477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01157943492357573,
"grad_norm": 4.521674633026123,
"learning_rate": 1.9290123456790124e-06,
"loss": 1.4326,
"step": 25
},
{
"epoch": 0.02315886984715146,
"grad_norm": 3.7206783294677734,
"learning_rate": 3.858024691358025e-06,
"loss": 1.3881,
"step": 50
},
{
"epoch": 0.03473830477072719,
"grad_norm": 5.430587291717529,
"learning_rate": 5.787037037037038e-06,
"loss": 1.3384,
"step": 75
},
{
"epoch": 0.04631773969430292,
"grad_norm": 5.371946334838867,
"learning_rate": 7.63888888888889e-06,
"loss": 1.2747,
"step": 100
},
{
"epoch": 0.05789717461787865,
"grad_norm": 7.290340423583984,
"learning_rate": 9.5679012345679e-06,
"loss": 1.1889,
"step": 125
},
{
"epoch": 0.06947660954145438,
"grad_norm": 7.079736232757568,
"learning_rate": 1.1496913580246914e-05,
"loss": 1.2006,
"step": 150
},
{
"epoch": 0.0810560444650301,
"grad_norm": 8.647546768188477,
"learning_rate": 1.3425925925925928e-05,
"loss": 1.1498,
"step": 175
},
{
"epoch": 0.09263547938860583,
"grad_norm": 12.410360336303711,
"learning_rate": 1.5354938271604938e-05,
"loss": 1.195,
"step": 200
},
{
"epoch": 0.10421491431218156,
"grad_norm": 6.746768951416016,
"learning_rate": 1.728395061728395e-05,
"loss": 1.1531,
"step": 225
},
{
"epoch": 0.1157943492357573,
"grad_norm": 4.067171573638916,
"learning_rate": 1.91358024691358e-05,
"loss": 1.2655,
"step": 250
},
{
"epoch": 0.12737378415933304,
"grad_norm": 8.56696891784668,
"learning_rate": 2.1064814814814816e-05,
"loss": 1.1576,
"step": 275
},
{
"epoch": 0.13895321908290875,
"grad_norm": 5.337764263153076,
"learning_rate": 2.2916666666666667e-05,
"loss": 1.1628,
"step": 300
},
{
"epoch": 0.1505326540064845,
"grad_norm": 3.743994951248169,
"learning_rate": 2.484567901234568e-05,
"loss": 1.2353,
"step": 325
},
{
"epoch": 0.1621120889300602,
"grad_norm": 7.327773571014404,
"learning_rate": 2.6774691358024694e-05,
"loss": 1.1858,
"step": 350
},
{
"epoch": 0.17369152385363595,
"grad_norm": 4.512418270111084,
"learning_rate": 2.8703703703703706e-05,
"loss": 1.1889,
"step": 375
},
{
"epoch": 0.18527095877721167,
"grad_norm": 5.212406635284424,
"learning_rate": 3.063271604938271e-05,
"loss": 1.2394,
"step": 400
},
{
"epoch": 0.1968503937007874,
"grad_norm": 2.575005531311035,
"learning_rate": 3.256172839506173e-05,
"loss": 1.3288,
"step": 425
},
{
"epoch": 0.20842982862436313,
"grad_norm": 5.339956283569336,
"learning_rate": 3.449074074074074e-05,
"loss": 1.2452,
"step": 450
},
{
"epoch": 0.22000926354793887,
"grad_norm": 5.540539741516113,
"learning_rate": 3.6419753086419754e-05,
"loss": 1.1581,
"step": 475
},
{
"epoch": 0.2315886984715146,
"grad_norm": 4.318514347076416,
"learning_rate": 3.8348765432098766e-05,
"loss": 1.1733,
"step": 500
},
{
"epoch": 0.24316813339509033,
"grad_norm": 2.906097173690796,
"learning_rate": 4.027777777777778e-05,
"loss": 1.1512,
"step": 525
},
{
"epoch": 0.2547475683186661,
"grad_norm": 1.404222011566162,
"learning_rate": 4.220679012345679e-05,
"loss": 1.3561,
"step": 550
},
{
"epoch": 0.2663270032422418,
"grad_norm": 2.1675493717193604,
"learning_rate": 4.413580246913581e-05,
"loss": 1.1446,
"step": 575
},
{
"epoch": 0.2779064381658175,
"grad_norm": 4.99737548828125,
"learning_rate": 4.6064814814814814e-05,
"loss": 1.2864,
"step": 600
},
{
"epoch": 0.2894858730893932,
"grad_norm": 1.7532799243927002,
"learning_rate": 4.799382716049383e-05,
"loss": 1.269,
"step": 625
},
{
"epoch": 0.301065308012969,
"grad_norm": 3.0470633506774902,
"learning_rate": 4.9922839506172845e-05,
"loss": 1.2261,
"step": 650
},
{
"epoch": 0.3126447429365447,
"grad_norm": 6.678360939025879,
"learning_rate": 4.9794132784354094e-05,
"loss": 1.1333,
"step": 675
},
{
"epoch": 0.3242241778601204,
"grad_norm": 4.9130449295043945,
"learning_rate": 4.957968776805627e-05,
"loss": 1.1869,
"step": 700
},
{
"epoch": 0.33580361278369614,
"grad_norm": 4.097753047943115,
"learning_rate": 4.936524275175845e-05,
"loss": 1.2102,
"step": 725
},
{
"epoch": 0.3473830477072719,
"grad_norm": 3.7552871704101562,
"learning_rate": 4.915079773546063e-05,
"loss": 1.1769,
"step": 750
},
{
"epoch": 0.3589624826308476,
"grad_norm": 5.447041988372803,
"learning_rate": 4.893635271916281e-05,
"loss": 1.2407,
"step": 775
},
{
"epoch": 0.37054191755442334,
"grad_norm": 3.041606903076172,
"learning_rate": 4.872190770286499e-05,
"loss": 1.2184,
"step": 800
},
{
"epoch": 0.38212135247799905,
"grad_norm": 2.11730694770813,
"learning_rate": 4.850746268656717e-05,
"loss": 1.107,
"step": 825
},
{
"epoch": 0.3937007874015748,
"grad_norm": 4.180285453796387,
"learning_rate": 4.829301767026935e-05,
"loss": 1.2928,
"step": 850
},
{
"epoch": 0.40528022232515054,
"grad_norm": 11.423721313476562,
"learning_rate": 4.807857265397153e-05,
"loss": 1.1236,
"step": 875
},
{
"epoch": 0.41685965724872626,
"grad_norm": 3.7874417304992676,
"learning_rate": 4.78641276376737e-05,
"loss": 1.1486,
"step": 900
},
{
"epoch": 0.42843909217230197,
"grad_norm": 3.0819077491760254,
"learning_rate": 4.764968262137588e-05,
"loss": 1.2213,
"step": 925
},
{
"epoch": 0.44001852709587774,
"grad_norm": 2.008617401123047,
"learning_rate": 4.743523760507806e-05,
"loss": 1.1765,
"step": 950
},
{
"epoch": 0.45159796201945346,
"grad_norm": 2.2871665954589844,
"learning_rate": 4.722079258878024e-05,
"loss": 1.1775,
"step": 975
},
{
"epoch": 0.4631773969430292,
"grad_norm": 3.751568555831909,
"learning_rate": 4.7006347572482416e-05,
"loss": 1.153,
"step": 1000
},
{
"epoch": 0.4747568318666049,
"grad_norm": 2.8901615142822266,
"learning_rate": 4.6791902556184595e-05,
"loss": 1.2544,
"step": 1025
},
{
"epoch": 0.48633626679018066,
"grad_norm": 2.7572152614593506,
"learning_rate": 4.6577457539886774e-05,
"loss": 1.1789,
"step": 1050
},
{
"epoch": 0.4979157017137564,
"grad_norm": 2.2316782474517822,
"learning_rate": 4.6363012523588953e-05,
"loss": 1.2035,
"step": 1075
},
{
"epoch": 0.5094951366373321,
"grad_norm": 2.4344851970672607,
"learning_rate": 4.614856750729113e-05,
"loss": 1.1438,
"step": 1100
},
{
"epoch": 0.5210745715609079,
"grad_norm": 2.271672010421753,
"learning_rate": 4.593412249099331e-05,
"loss": 1.1742,
"step": 1125
},
{
"epoch": 0.5326540064844836,
"grad_norm": 4.836185932159424,
"learning_rate": 4.571967747469549e-05,
"loss": 1.1543,
"step": 1150
},
{
"epoch": 0.5442334414080593,
"grad_norm": 3.8218131065368652,
"learning_rate": 4.550523245839767e-05,
"loss": 1.1536,
"step": 1175
},
{
"epoch": 0.555812876331635,
"grad_norm": 2.6469738483428955,
"learning_rate": 4.529078744209985e-05,
"loss": 1.1915,
"step": 1200
},
{
"epoch": 0.5673923112552107,
"grad_norm": 5.130224227905273,
"learning_rate": 4.507634242580203e-05,
"loss": 1.177,
"step": 1225
},
{
"epoch": 0.5789717461787864,
"grad_norm": 3.587254047393799,
"learning_rate": 4.486189740950421e-05,
"loss": 1.2532,
"step": 1250
},
{
"epoch": 0.5905511811023622,
"grad_norm": 1.91807222366333,
"learning_rate": 4.464745239320639e-05,
"loss": 1.2081,
"step": 1275
},
{
"epoch": 0.602130616025938,
"grad_norm": 2.0937275886535645,
"learning_rate": 4.4433007376908566e-05,
"loss": 1.247,
"step": 1300
},
{
"epoch": 0.6137100509495137,
"grad_norm": 4.973937511444092,
"learning_rate": 4.4218562360610745e-05,
"loss": 1.1864,
"step": 1325
},
{
"epoch": 0.6252894858730894,
"grad_norm": 4.225080490112305,
"learning_rate": 4.4004117344312924e-05,
"loss": 1.3265,
"step": 1350
},
{
"epoch": 0.6368689207966651,
"grad_norm": 3.563711166381836,
"learning_rate": 4.3789672328015096e-05,
"loss": 1.2219,
"step": 1375
},
{
"epoch": 0.6484483557202408,
"grad_norm": 2.596768856048584,
"learning_rate": 4.3575227311717276e-05,
"loss": 1.2472,
"step": 1400
},
{
"epoch": 0.6600277906438166,
"grad_norm": 2.263674020767212,
"learning_rate": 4.3360782295419455e-05,
"loss": 1.1993,
"step": 1425
},
{
"epoch": 0.6716072255673923,
"grad_norm": 2.2922544479370117,
"learning_rate": 4.3146337279121634e-05,
"loss": 1.1379,
"step": 1450
},
{
"epoch": 0.683186660490968,
"grad_norm": 5.394362449645996,
"learning_rate": 4.293189226282381e-05,
"loss": 1.1085,
"step": 1475
},
{
"epoch": 0.6947660954145438,
"grad_norm": 5.802165508270264,
"learning_rate": 4.271744724652599e-05,
"loss": 1.1417,
"step": 1500
},
{
"epoch": 0.7063455303381195,
"grad_norm": 3.079671621322632,
"learning_rate": 4.250300223022817e-05,
"loss": 1.2111,
"step": 1525
},
{
"epoch": 0.7179249652616952,
"grad_norm": 2.836214303970337,
"learning_rate": 4.228855721393035e-05,
"loss": 1.2457,
"step": 1550
},
{
"epoch": 0.729504400185271,
"grad_norm": 6.700901985168457,
"learning_rate": 4.207411219763253e-05,
"loss": 1.1931,
"step": 1575
},
{
"epoch": 0.7410838351088467,
"grad_norm": 4.292962551116943,
"learning_rate": 4.18596671813347e-05,
"loss": 1.2196,
"step": 1600
},
{
"epoch": 0.7526632700324224,
"grad_norm": 2.4369819164276123,
"learning_rate": 4.164522216503689e-05,
"loss": 1.1819,
"step": 1625
},
{
"epoch": 0.7642427049559981,
"grad_norm": 5.2853474617004395,
"learning_rate": 4.143077714873907e-05,
"loss": 1.1874,
"step": 1650
},
{
"epoch": 0.7758221398795738,
"grad_norm": 1.9918153285980225,
"learning_rate": 4.1216332132441246e-05,
"loss": 1.1866,
"step": 1675
},
{
"epoch": 0.7874015748031497,
"grad_norm": 5.2242231369018555,
"learning_rate": 4.1001887116143425e-05,
"loss": 1.1775,
"step": 1700
},
{
"epoch": 0.7989810097267254,
"grad_norm": 2.511507272720337,
"learning_rate": 4.0787442099845605e-05,
"loss": 1.2418,
"step": 1725
},
{
"epoch": 0.8105604446503011,
"grad_norm": 2.0094120502471924,
"learning_rate": 4.0572997083547784e-05,
"loss": 1.2168,
"step": 1750
},
{
"epoch": 0.8221398795738768,
"grad_norm": 2.8366715908050537,
"learning_rate": 4.035855206724996e-05,
"loss": 1.217,
"step": 1775
},
{
"epoch": 0.8337193144974525,
"grad_norm": 4.902674674987793,
"learning_rate": 4.014410705095214e-05,
"loss": 1.1925,
"step": 1800
},
{
"epoch": 0.8452987494210282,
"grad_norm": 2.4211857318878174,
"learning_rate": 3.992966203465432e-05,
"loss": 1.1716,
"step": 1825
},
{
"epoch": 0.8568781843446039,
"grad_norm": 4.9972381591796875,
"learning_rate": 3.9715217018356493e-05,
"loss": 1.1871,
"step": 1850
},
{
"epoch": 0.8684576192681797,
"grad_norm": 3.486520290374756,
"learning_rate": 3.950077200205867e-05,
"loss": 1.2001,
"step": 1875
},
{
"epoch": 0.8800370541917555,
"grad_norm": 2.2144150733947754,
"learning_rate": 3.928632698576085e-05,
"loss": 1.232,
"step": 1900
},
{
"epoch": 0.8916164891153312,
"grad_norm": 6.714953899383545,
"learning_rate": 3.907188196946303e-05,
"loss": 1.1767,
"step": 1925
},
{
"epoch": 0.9031959240389069,
"grad_norm": 6.166855812072754,
"learning_rate": 3.885743695316521e-05,
"loss": 1.1167,
"step": 1950
},
{
"epoch": 0.9147753589624826,
"grad_norm": 3.6272430419921875,
"learning_rate": 3.864299193686739e-05,
"loss": 1.2003,
"step": 1975
},
{
"epoch": 0.9263547938860583,
"grad_norm": 5.192286014556885,
"learning_rate": 3.842854692056957e-05,
"loss": 1.2661,
"step": 2000
},
{
"epoch": 0.9379342288096341,
"grad_norm": 3.80322003364563,
"learning_rate": 3.821410190427175e-05,
"loss": 1.2087,
"step": 2025
},
{
"epoch": 0.9495136637332098,
"grad_norm": 2.330951690673828,
"learning_rate": 3.799965688797393e-05,
"loss": 1.2075,
"step": 2050
},
{
"epoch": 0.9610930986567855,
"grad_norm": 3.1722116470336914,
"learning_rate": 3.77852118716761e-05,
"loss": 1.1741,
"step": 2075
},
{
"epoch": 0.9726725335803613,
"grad_norm": 3.6307098865509033,
"learning_rate": 3.757076685537828e-05,
"loss": 1.1772,
"step": 2100
},
{
"epoch": 0.984251968503937,
"grad_norm": 2.525423765182495,
"learning_rate": 3.735632183908046e-05,
"loss": 1.1213,
"step": 2125
},
{
"epoch": 0.9958314034275128,
"grad_norm": 2.655104160308838,
"learning_rate": 3.7141876822782637e-05,
"loss": 1.1762,
"step": 2150
},
{
"epoch": 1.0,
"eval_accuracy": 0.39710843373493976,
"eval_f1_macro": 0.14211797171438428,
"eval_f1_micro": 0.39710843373493976,
"eval_f1_weighted": 0.22574498061234247,
"eval_loss": 1.2174618244171143,
"eval_precision_macro": 0.09927710843373494,
"eval_precision_micro": 0.39710843373493976,
"eval_precision_weighted": 0.15769510814341703,
"eval_recall_macro": 0.25,
"eval_recall_micro": 0.39710843373493976,
"eval_recall_weighted": 0.39710843373493976,
"eval_runtime": 4.9841,
"eval_samples_per_second": 416.326,
"eval_steps_per_second": 26.083,
"step": 2159
},
{
"epoch": 1.0074108383510885,
"grad_norm": 2.2906293869018555,
"learning_rate": 3.6927431806484816e-05,
"loss": 1.2513,
"step": 2175
},
{
"epoch": 1.0189902732746643,
"grad_norm": 4.764555931091309,
"learning_rate": 3.6712986790187e-05,
"loss": 1.2133,
"step": 2200
},
{
"epoch": 1.03056970819824,
"grad_norm": 2.511648178100586,
"learning_rate": 3.649854177388918e-05,
"loss": 1.1227,
"step": 2225
},
{
"epoch": 1.0421491431218157,
"grad_norm": 5.154523849487305,
"learning_rate": 3.628409675759136e-05,
"loss": 1.1251,
"step": 2250
},
{
"epoch": 1.0537285780453913,
"grad_norm": 2.393103837966919,
"learning_rate": 3.606965174129354e-05,
"loss": 1.2593,
"step": 2275
},
{
"epoch": 1.0653080129689672,
"grad_norm": 2.7954907417297363,
"learning_rate": 3.585520672499571e-05,
"loss": 1.2476,
"step": 2300
},
{
"epoch": 1.0768874478925428,
"grad_norm": 4.258531093597412,
"learning_rate": 3.564076170869789e-05,
"loss": 1.2369,
"step": 2325
},
{
"epoch": 1.0884668828161186,
"grad_norm": 4.744079113006592,
"learning_rate": 3.542631669240007e-05,
"loss": 1.175,
"step": 2350
},
{
"epoch": 1.1000463177396944,
"grad_norm": 4.467709541320801,
"learning_rate": 3.521187167610225e-05,
"loss": 1.1174,
"step": 2375
},
{
"epoch": 1.11162575266327,
"grad_norm": 2.7314538955688477,
"learning_rate": 3.499742665980443e-05,
"loss": 1.2007,
"step": 2400
},
{
"epoch": 1.1232051875868458,
"grad_norm": 1.8456259965896606,
"learning_rate": 3.478298164350661e-05,
"loss": 1.2232,
"step": 2425
},
{
"epoch": 1.1347846225104214,
"grad_norm": 3.8557677268981934,
"learning_rate": 3.4568536627208786e-05,
"loss": 1.2338,
"step": 2450
},
{
"epoch": 1.1463640574339973,
"grad_norm": 3.338961124420166,
"learning_rate": 3.4354091610910965e-05,
"loss": 1.2004,
"step": 2475
},
{
"epoch": 1.1579434923575729,
"grad_norm": 2.3821332454681396,
"learning_rate": 3.4139646594613145e-05,
"loss": 1.1967,
"step": 2500
},
{
"epoch": 1.1695229272811487,
"grad_norm": 2.296182155609131,
"learning_rate": 3.3925201578315324e-05,
"loss": 1.1825,
"step": 2525
},
{
"epoch": 1.1811023622047245,
"grad_norm": 2.287925958633423,
"learning_rate": 3.3710756562017496e-05,
"loss": 1.1661,
"step": 2550
},
{
"epoch": 1.1926817971283001,
"grad_norm": 3.0742363929748535,
"learning_rate": 3.3496311545719675e-05,
"loss": 1.1855,
"step": 2575
},
{
"epoch": 1.204261232051876,
"grad_norm": 2.94059157371521,
"learning_rate": 3.3281866529421854e-05,
"loss": 1.186,
"step": 2600
},
{
"epoch": 1.2158406669754516,
"grad_norm": 5.658060073852539,
"learning_rate": 3.3067421513124034e-05,
"loss": 1.2018,
"step": 2625
},
{
"epoch": 1.2274201018990274,
"grad_norm": 4.225418567657471,
"learning_rate": 3.285297649682621e-05,
"loss": 1.1913,
"step": 2650
},
{
"epoch": 1.238999536822603,
"grad_norm": 3.121039867401123,
"learning_rate": 3.263853148052839e-05,
"loss": 1.1655,
"step": 2675
},
{
"epoch": 1.2505789717461788,
"grad_norm": 2.750720977783203,
"learning_rate": 3.242408646423057e-05,
"loss": 1.1818,
"step": 2700
},
{
"epoch": 1.2621584066697547,
"grad_norm": 3.299870491027832,
"learning_rate": 3.220964144793275e-05,
"loss": 1.2333,
"step": 2725
},
{
"epoch": 1.2737378415933303,
"grad_norm": 1.8936024904251099,
"learning_rate": 3.1995196431634936e-05,
"loss": 1.2091,
"step": 2750
},
{
"epoch": 1.2853172765169059,
"grad_norm": 4.938189506530762,
"learning_rate": 3.178075141533711e-05,
"loss": 1.2436,
"step": 2775
},
{
"epoch": 1.2968967114404817,
"grad_norm": 3.0422909259796143,
"learning_rate": 3.156630639903929e-05,
"loss": 1.2071,
"step": 2800
},
{
"epoch": 1.3084761463640575,
"grad_norm": 3.3571670055389404,
"learning_rate": 3.135186138274147e-05,
"loss": 1.1012,
"step": 2825
},
{
"epoch": 1.3200555812876331,
"grad_norm": 5.697854518890381,
"learning_rate": 3.1137416366443646e-05,
"loss": 1.1837,
"step": 2850
},
{
"epoch": 1.331635016211209,
"grad_norm": 2.8652396202087402,
"learning_rate": 3.0922971350145825e-05,
"loss": 1.2351,
"step": 2875
},
{
"epoch": 1.3432144511347845,
"grad_norm": 2.0512943267822266,
"learning_rate": 3.0708526333848004e-05,
"loss": 1.1621,
"step": 2900
},
{
"epoch": 1.3547938860583604,
"grad_norm": 3.4354703426361084,
"learning_rate": 3.0494081317550183e-05,
"loss": 1.1627,
"step": 2925
},
{
"epoch": 1.366373320981936,
"grad_norm": 2.0285403728485107,
"learning_rate": 3.0279636301252362e-05,
"loss": 1.2202,
"step": 2950
},
{
"epoch": 1.3779527559055118,
"grad_norm": 4.55291223526001,
"learning_rate": 3.006519128495454e-05,
"loss": 1.1935,
"step": 2975
},
{
"epoch": 1.3895321908290876,
"grad_norm": 3.867063045501709,
"learning_rate": 2.9850746268656714e-05,
"loss": 1.0448,
"step": 3000
},
{
"epoch": 1.4011116257526632,
"grad_norm": 3.6873021125793457,
"learning_rate": 2.9636301252358893e-05,
"loss": 1.2339,
"step": 3025
},
{
"epoch": 1.412691060676239,
"grad_norm": 2.2147438526153564,
"learning_rate": 2.9421856236061072e-05,
"loss": 1.1809,
"step": 3050
},
{
"epoch": 1.4242704955998147,
"grad_norm": 2.6401538848876953,
"learning_rate": 2.9207411219763255e-05,
"loss": 1.1291,
"step": 3075
},
{
"epoch": 1.4358499305233905,
"grad_norm": 2.2739460468292236,
"learning_rate": 2.8992966203465434e-05,
"loss": 1.1953,
"step": 3100
},
{
"epoch": 1.447429365446966,
"grad_norm": 1.2269738912582397,
"learning_rate": 2.8778521187167613e-05,
"loss": 1.2693,
"step": 3125
},
{
"epoch": 1.459008800370542,
"grad_norm": 4.429539680480957,
"learning_rate": 2.8564076170869792e-05,
"loss": 1.2717,
"step": 3150
},
{
"epoch": 1.4705882352941178,
"grad_norm": 3.294246196746826,
"learning_rate": 2.834963115457197e-05,
"loss": 1.1788,
"step": 3175
},
{
"epoch": 1.4821676702176934,
"grad_norm": 5.130248546600342,
"learning_rate": 2.813518613827415e-05,
"loss": 1.1758,
"step": 3200
},
{
"epoch": 1.4937471051412692,
"grad_norm": 2.9172611236572266,
"learning_rate": 2.792074112197633e-05,
"loss": 1.1474,
"step": 3225
},
{
"epoch": 1.5053265400648448,
"grad_norm": 2.7223055362701416,
"learning_rate": 2.7706296105678502e-05,
"loss": 1.2265,
"step": 3250
},
{
"epoch": 1.5169059749884206,
"grad_norm": 3.7259788513183594,
"learning_rate": 2.749185108938068e-05,
"loss": 1.2508,
"step": 3275
},
{
"epoch": 1.5284854099119962,
"grad_norm": 1.7198467254638672,
"learning_rate": 2.727740607308286e-05,
"loss": 1.1782,
"step": 3300
},
{
"epoch": 1.540064844835572,
"grad_norm": 1.9382416009902954,
"learning_rate": 2.706296105678504e-05,
"loss": 1.2074,
"step": 3325
},
{
"epoch": 1.5516442797591479,
"grad_norm": 2.2858059406280518,
"learning_rate": 2.6848516040487222e-05,
"loss": 1.1473,
"step": 3350
},
{
"epoch": 1.5632237146827235,
"grad_norm": 1.461945652961731,
"learning_rate": 2.66340710241894e-05,
"loss": 1.1901,
"step": 3375
},
{
"epoch": 1.574803149606299,
"grad_norm": 5.549622058868408,
"learning_rate": 2.641962600789158e-05,
"loss": 1.1618,
"step": 3400
},
{
"epoch": 1.586382584529875,
"grad_norm": 2.907963991165161,
"learning_rate": 2.620518099159376e-05,
"loss": 1.1767,
"step": 3425
},
{
"epoch": 1.5979620194534507,
"grad_norm": 1.6633723974227905,
"learning_rate": 2.599073597529594e-05,
"loss": 1.1997,
"step": 3450
},
{
"epoch": 1.6095414543770263,
"grad_norm": 5.5080742835998535,
"learning_rate": 2.577629095899811e-05,
"loss": 1.1881,
"step": 3475
},
{
"epoch": 1.6211208893006022,
"grad_norm": 2.5473108291625977,
"learning_rate": 2.556184594270029e-05,
"loss": 1.15,
"step": 3500
},
{
"epoch": 1.632700324224178,
"grad_norm": 2.0483057498931885,
"learning_rate": 2.534740092640247e-05,
"loss": 1.2271,
"step": 3525
},
{
"epoch": 1.6442797591477536,
"grad_norm": 3.6027846336364746,
"learning_rate": 2.513295591010465e-05,
"loss": 1.2121,
"step": 3550
},
{
"epoch": 1.6558591940713292,
"grad_norm": 3.154784917831421,
"learning_rate": 2.4918510893806828e-05,
"loss": 1.2706,
"step": 3575
},
{
"epoch": 1.667438628994905,
"grad_norm": 5.780117511749268,
"learning_rate": 2.4704065877509007e-05,
"loss": 1.1916,
"step": 3600
},
{
"epoch": 1.6790180639184809,
"grad_norm": 4.522841930389404,
"learning_rate": 2.448962086121119e-05,
"loss": 1.1344,
"step": 3625
},
{
"epoch": 1.6905974988420565,
"grad_norm": 2.302856922149658,
"learning_rate": 2.4275175844913365e-05,
"loss": 1.1316,
"step": 3650
},
{
"epoch": 1.7021769337656323,
"grad_norm": 3.6142489910125732,
"learning_rate": 2.4060730828615544e-05,
"loss": 1.1464,
"step": 3675
},
{
"epoch": 1.713756368689208,
"grad_norm": 3.417003870010376,
"learning_rate": 2.3846285812317723e-05,
"loss": 1.2767,
"step": 3700
},
{
"epoch": 1.7253358036127837,
"grad_norm": 1.8820807933807373,
"learning_rate": 2.3631840796019903e-05,
"loss": 1.1759,
"step": 3725
},
{
"epoch": 1.7369152385363593,
"grad_norm": 2.0070981979370117,
"learning_rate": 2.341739577972208e-05,
"loss": 1.2357,
"step": 3750
},
{
"epoch": 1.7484946734599351,
"grad_norm": 1.9160246849060059,
"learning_rate": 2.3202950763424257e-05,
"loss": 1.148,
"step": 3775
},
{
"epoch": 1.760074108383511,
"grad_norm": 2.4420526027679443,
"learning_rate": 2.2988505747126437e-05,
"loss": 1.239,
"step": 3800
},
{
"epoch": 1.7716535433070866,
"grad_norm": 1.5695481300354004,
"learning_rate": 2.2774060730828616e-05,
"loss": 1.1936,
"step": 3825
},
{
"epoch": 1.7832329782306622,
"grad_norm": 2.090928077697754,
"learning_rate": 2.2559615714530795e-05,
"loss": 1.176,
"step": 3850
},
{
"epoch": 1.7948124131542382,
"grad_norm": 2.7507429122924805,
"learning_rate": 2.2345170698232974e-05,
"loss": 1.1847,
"step": 3875
},
{
"epoch": 1.8063918480778138,
"grad_norm": 2.7657129764556885,
"learning_rate": 2.2130725681935153e-05,
"loss": 1.1522,
"step": 3900
},
{
"epoch": 1.8179712830013894,
"grad_norm": 4.012863636016846,
"learning_rate": 2.1916280665637332e-05,
"loss": 1.1398,
"step": 3925
},
{
"epoch": 1.8295507179249653,
"grad_norm": 2.7316641807556152,
"learning_rate": 2.170183564933951e-05,
"loss": 1.1766,
"step": 3950
},
{
"epoch": 1.841130152848541,
"grad_norm": 3.0468456745147705,
"learning_rate": 2.148739063304169e-05,
"loss": 1.2055,
"step": 3975
},
{
"epoch": 1.8527095877721167,
"grad_norm": 2.0280911922454834,
"learning_rate": 2.1272945616743866e-05,
"loss": 1.184,
"step": 4000
},
{
"epoch": 1.8642890226956923,
"grad_norm": 2.5638182163238525,
"learning_rate": 2.1058500600446046e-05,
"loss": 1.2357,
"step": 4025
},
{
"epoch": 1.8758684576192681,
"grad_norm": 2.283189535140991,
"learning_rate": 2.0844055584148225e-05,
"loss": 1.1874,
"step": 4050
},
{
"epoch": 1.887447892542844,
"grad_norm": 1.3770339488983154,
"learning_rate": 2.0629610567850404e-05,
"loss": 1.2674,
"step": 4075
},
{
"epoch": 1.8990273274664196,
"grad_norm": 1.9555165767669678,
"learning_rate": 2.0415165551552583e-05,
"loss": 1.1922,
"step": 4100
},
{
"epoch": 1.9106067623899954,
"grad_norm": 3.256969928741455,
"learning_rate": 2.0200720535254762e-05,
"loss": 1.1587,
"step": 4125
},
{
"epoch": 1.9221861973135712,
"grad_norm": 1.853826642036438,
"learning_rate": 1.998627551895694e-05,
"loss": 1.1769,
"step": 4150
},
{
"epoch": 1.9337656322371468,
"grad_norm": 2.319624662399292,
"learning_rate": 1.977183050265912e-05,
"loss": 1.1328,
"step": 4175
},
{
"epoch": 1.9453450671607224,
"grad_norm": 2.720109701156616,
"learning_rate": 1.95573854863613e-05,
"loss": 1.1331,
"step": 4200
},
{
"epoch": 1.9569245020842982,
"grad_norm": 4.079843044281006,
"learning_rate": 1.9342940470063475e-05,
"loss": 1.1479,
"step": 4225
},
{
"epoch": 1.968503937007874,
"grad_norm": 3.2058353424072266,
"learning_rate": 1.9128495453765654e-05,
"loss": 1.1891,
"step": 4250
},
{
"epoch": 1.9800833719314497,
"grad_norm": 2.1098670959472656,
"learning_rate": 1.8914050437467834e-05,
"loss": 1.1548,
"step": 4275
},
{
"epoch": 1.9916628068550255,
"grad_norm": 2.4204399585723877,
"learning_rate": 1.8699605421170013e-05,
"loss": 1.2393,
"step": 4300
},
{
"epoch": 2.0,
"eval_accuracy": 0.39710843373493976,
"eval_f1_macro": 0.14211797171438428,
"eval_f1_micro": 0.39710843373493976,
"eval_f1_weighted": 0.22574498061234247,
"eval_loss": 1.2068006992340088,
"eval_precision_macro": 0.09927710843373494,
"eval_precision_micro": 0.39710843373493976,
"eval_precision_weighted": 0.15769510814341703,
"eval_recall_macro": 0.25,
"eval_recall_micro": 0.39710843373493976,
"eval_recall_weighted": 0.39710843373493976,
"eval_runtime": 5.002,
"eval_samples_per_second": 414.833,
"eval_steps_per_second": 25.99,
"step": 4318
},
{
"epoch": 2.0032422417786013,
"grad_norm": 4.668676376342773,
"learning_rate": 1.8485160404872192e-05,
"loss": 1.2158,
"step": 4325
},
{
"epoch": 2.014821676702177,
"grad_norm": 2.2886784076690674,
"learning_rate": 1.8270715388574368e-05,
"loss": 1.1545,
"step": 4350
},
{
"epoch": 2.0264011116257525,
"grad_norm": 4.7184038162231445,
"learning_rate": 1.805627037227655e-05,
"loss": 1.2628,
"step": 4375
},
{
"epoch": 2.0379805465493286,
"grad_norm": 1.9082050323486328,
"learning_rate": 1.784182535597873e-05,
"loss": 1.1398,
"step": 4400
},
{
"epoch": 2.049559981472904,
"grad_norm": 2.6623945236206055,
"learning_rate": 1.762738033968091e-05,
"loss": 1.182,
"step": 4425
},
{
"epoch": 2.06113941639648,
"grad_norm": 5.472179412841797,
"learning_rate": 1.7412935323383088e-05,
"loss": 1.2342,
"step": 4450
},
{
"epoch": 2.0727188513200554,
"grad_norm": 2.2513480186462402,
"learning_rate": 1.7198490307085263e-05,
"loss": 1.219,
"step": 4475
},
{
"epoch": 2.0842982862436314,
"grad_norm": 2.979966878890991,
"learning_rate": 1.6984045290787443e-05,
"loss": 1.2084,
"step": 4500
},
{
"epoch": 2.095877721167207,
"grad_norm": 4.656105041503906,
"learning_rate": 1.6769600274489622e-05,
"loss": 1.2289,
"step": 4525
},
{
"epoch": 2.1074571560907827,
"grad_norm": 8.104940414428711,
"learning_rate": 1.65551552581918e-05,
"loss": 1.2331,
"step": 4550
},
{
"epoch": 2.1190365910143587,
"grad_norm": 6.20884370803833,
"learning_rate": 1.6340710241893977e-05,
"loss": 1.2045,
"step": 4575
},
{
"epoch": 2.1306160259379343,
"grad_norm": 3.645780086517334,
"learning_rate": 1.6126265225596156e-05,
"loss": 1.1754,
"step": 4600
},
{
"epoch": 2.14219546086151,
"grad_norm": 2.960564136505127,
"learning_rate": 1.5911820209298335e-05,
"loss": 1.195,
"step": 4625
},
{
"epoch": 2.1537748957850855,
"grad_norm": 2.6137890815734863,
"learning_rate": 1.5697375193000517e-05,
"loss": 1.099,
"step": 4650
},
{
"epoch": 2.1653543307086616,
"grad_norm": 4.492011547088623,
"learning_rate": 1.5482930176702697e-05,
"loss": 1.2088,
"step": 4675
},
{
"epoch": 2.176933765632237,
"grad_norm": 3.653522253036499,
"learning_rate": 1.5268485160404872e-05,
"loss": 1.2309,
"step": 4700
},
{
"epoch": 2.1885132005558128,
"grad_norm": 2.6041195392608643,
"learning_rate": 1.5054040144107052e-05,
"loss": 1.2099,
"step": 4725
},
{
"epoch": 2.200092635479389,
"grad_norm": 5.426154613494873,
"learning_rate": 1.483959512780923e-05,
"loss": 1.203,
"step": 4750
},
{
"epoch": 2.2116720704029644,
"grad_norm": 2.009709358215332,
"learning_rate": 1.462515011151141e-05,
"loss": 1.1193,
"step": 4775
},
{
"epoch": 2.22325150532654,
"grad_norm": 2.586091995239258,
"learning_rate": 1.4410705095213589e-05,
"loss": 1.1109,
"step": 4800
},
{
"epoch": 2.2348309402501156,
"grad_norm": 1.9446204900741577,
"learning_rate": 1.4196260078915766e-05,
"loss": 1.2251,
"step": 4825
},
{
"epoch": 2.2464103751736917,
"grad_norm": 2.2268826961517334,
"learning_rate": 1.3981815062617946e-05,
"loss": 1.1447,
"step": 4850
},
{
"epoch": 2.2579898100972673,
"grad_norm": 4.135994911193848,
"learning_rate": 1.3767370046320125e-05,
"loss": 1.1326,
"step": 4875
},
{
"epoch": 2.269569245020843,
"grad_norm": 1.3713667392730713,
"learning_rate": 1.3552925030022304e-05,
"loss": 1.2048,
"step": 4900
},
{
"epoch": 2.281148679944419,
"grad_norm": 4.801929473876953,
"learning_rate": 1.333848001372448e-05,
"loss": 1.1726,
"step": 4925
},
{
"epoch": 2.2927281148679945,
"grad_norm": 1.5106154680252075,
"learning_rate": 1.312403499742666e-05,
"loss": 1.2191,
"step": 4950
},
{
"epoch": 2.30430754979157,
"grad_norm": 1.9938125610351562,
"learning_rate": 1.290958998112884e-05,
"loss": 1.181,
"step": 4975
},
{
"epoch": 2.3158869847151458,
"grad_norm": 5.004785060882568,
"learning_rate": 1.2695144964831019e-05,
"loss": 1.1746,
"step": 5000
},
{
"epoch": 2.327466419638722,
"grad_norm": 2.9320216178894043,
"learning_rate": 1.2480699948533196e-05,
"loss": 1.2399,
"step": 5025
},
{
"epoch": 2.3390458545622974,
"grad_norm": 4.154562473297119,
"learning_rate": 1.2266254932235375e-05,
"loss": 1.2275,
"step": 5050
},
{
"epoch": 2.350625289485873,
"grad_norm": 2.5340206623077393,
"learning_rate": 1.2051809915937553e-05,
"loss": 1.1873,
"step": 5075
},
{
"epoch": 2.362204724409449,
"grad_norm": 2.7467281818389893,
"learning_rate": 1.1837364899639734e-05,
"loss": 1.2125,
"step": 5100
},
{
"epoch": 2.3737841593330247,
"grad_norm": 2.1378886699676514,
"learning_rate": 1.1622919883341913e-05,
"loss": 1.1484,
"step": 5125
},
{
"epoch": 2.3853635942566003,
"grad_norm": 1.7250367403030396,
"learning_rate": 1.140847486704409e-05,
"loss": 1.1293,
"step": 5150
},
{
"epoch": 2.396943029180176,
"grad_norm": 4.303859233856201,
"learning_rate": 1.119402985074627e-05,
"loss": 1.2065,
"step": 5175
},
{
"epoch": 2.408522464103752,
"grad_norm": 4.186789035797119,
"learning_rate": 1.0979584834448447e-05,
"loss": 1.1573,
"step": 5200
},
{
"epoch": 2.4201018990273275,
"grad_norm": 2.763376474380493,
"learning_rate": 1.0765139818150626e-05,
"loss": 1.2093,
"step": 5225
},
{
"epoch": 2.431681333950903,
"grad_norm": 4.167290210723877,
"learning_rate": 1.0550694801852805e-05,
"loss": 1.2073,
"step": 5250
},
{
"epoch": 2.4432607688744787,
"grad_norm": 4.77427864074707,
"learning_rate": 1.0336249785554984e-05,
"loss": 1.2074,
"step": 5275
},
{
"epoch": 2.454840203798055,
"grad_norm": 2.2546989917755127,
"learning_rate": 1.0121804769257163e-05,
"loss": 1.2089,
"step": 5300
},
{
"epoch": 2.4664196387216304,
"grad_norm": 2.603929281234741,
"learning_rate": 9.907359752959341e-06,
"loss": 1.1774,
"step": 5325
},
{
"epoch": 2.477999073645206,
"grad_norm": 2.2647411823272705,
"learning_rate": 9.69291473666152e-06,
"loss": 1.1509,
"step": 5350
},
{
"epoch": 2.4895785085687816,
"grad_norm": 2.784689426422119,
"learning_rate": 9.4784697203637e-06,
"loss": 1.1545,
"step": 5375
},
{
"epoch": 2.5011579434923576,
"grad_norm": 3.1189873218536377,
"learning_rate": 9.264024704065878e-06,
"loss": 1.1353,
"step": 5400
},
{
"epoch": 2.5127373784159333,
"grad_norm": 3.5311825275421143,
"learning_rate": 9.049579687768056e-06,
"loss": 1.2599,
"step": 5425
},
{
"epoch": 2.5243168133395093,
"grad_norm": 1.4731173515319824,
"learning_rate": 8.835134671470235e-06,
"loss": 1.127,
"step": 5450
},
{
"epoch": 2.535896248263085,
"grad_norm": 2.797048807144165,
"learning_rate": 8.620689655172414e-06,
"loss": 1.0877,
"step": 5475
},
{
"epoch": 2.5474756831866605,
"grad_norm": 3.5394978523254395,
"learning_rate": 8.406244638874593e-06,
"loss": 1.1834,
"step": 5500
},
{
"epoch": 2.559055118110236,
"grad_norm": 4.206399440765381,
"learning_rate": 8.191799622576772e-06,
"loss": 1.1239,
"step": 5525
},
{
"epoch": 2.5706345530338117,
"grad_norm": 2.8601016998291016,
"learning_rate": 7.97735460627895e-06,
"loss": 1.217,
"step": 5550
},
{
"epoch": 2.5822139879573878,
"grad_norm": 3.3993771076202393,
"learning_rate": 7.762909589981129e-06,
"loss": 1.1876,
"step": 5575
},
{
"epoch": 2.5937934228809634,
"grad_norm": 2.0492095947265625,
"learning_rate": 7.548464573683307e-06,
"loss": 1.1923,
"step": 5600
},
{
"epoch": 2.605372857804539,
"grad_norm": 3.045842170715332,
"learning_rate": 7.3340195573854865e-06,
"loss": 1.2492,
"step": 5625
},
{
"epoch": 2.616952292728115,
"grad_norm": 1.7433972358703613,
"learning_rate": 7.119574541087666e-06,
"loss": 1.2652,
"step": 5650
},
{
"epoch": 2.6285317276516906,
"grad_norm": 2.3767240047454834,
"learning_rate": 6.905129524789844e-06,
"loss": 1.2284,
"step": 5675
},
{
"epoch": 2.6401111625752662,
"grad_norm": 4.228554725646973,
"learning_rate": 6.690684508492023e-06,
"loss": 1.1705,
"step": 5700
},
{
"epoch": 2.651690597498842,
"grad_norm": 4.027316570281982,
"learning_rate": 6.476239492194201e-06,
"loss": 1.1751,
"step": 5725
},
{
"epoch": 2.663270032422418,
"grad_norm": 2.5308732986450195,
"learning_rate": 6.2617944758963805e-06,
"loss": 1.2038,
"step": 5750
},
{
"epoch": 2.6748494673459935,
"grad_norm": 2.849998712539673,
"learning_rate": 6.04734945959856e-06,
"loss": 1.13,
"step": 5775
},
{
"epoch": 2.686428902269569,
"grad_norm": 1.7784459590911865,
"learning_rate": 5.832904443300738e-06,
"loss": 1.2206,
"step": 5800
},
{
"epoch": 2.698008337193145,
"grad_norm": 5.856213569641113,
"learning_rate": 5.618459427002916e-06,
"loss": 1.1501,
"step": 5825
},
{
"epoch": 2.7095877721167207,
"grad_norm": 2.401578664779663,
"learning_rate": 5.4040144107050954e-06,
"loss": 1.2501,
"step": 5850
},
{
"epoch": 2.7211672070402964,
"grad_norm": 2.7738897800445557,
"learning_rate": 5.189569394407274e-06,
"loss": 1.1535,
"step": 5875
},
{
"epoch": 2.732746641963872,
"grad_norm": 7.125967979431152,
"learning_rate": 4.975124378109453e-06,
"loss": 1.2025,
"step": 5900
},
{
"epoch": 2.744326076887448,
"grad_norm": 1.4013216495513916,
"learning_rate": 4.760679361811632e-06,
"loss": 1.2028,
"step": 5925
},
{
"epoch": 2.7559055118110236,
"grad_norm": 6.033567905426025,
"learning_rate": 4.54623434551381e-06,
"loss": 1.1888,
"step": 5950
},
{
"epoch": 2.767484946734599,
"grad_norm": 2.6083405017852783,
"learning_rate": 4.3317893292159895e-06,
"loss": 1.1546,
"step": 5975
},
{
"epoch": 2.7790643816581753,
"grad_norm": 3.179180860519409,
"learning_rate": 4.117344312918168e-06,
"loss": 1.2136,
"step": 6000
},
{
"epoch": 2.790643816581751,
"grad_norm": 1.8782720565795898,
"learning_rate": 3.902899296620346e-06,
"loss": 1.116,
"step": 6025
},
{
"epoch": 2.8022232515053265,
"grad_norm": 1.5270411968231201,
"learning_rate": 3.6884542803225257e-06,
"loss": 1.1756,
"step": 6050
},
{
"epoch": 2.813802686428902,
"grad_norm": 2.8853325843811035,
"learning_rate": 3.4740092640247044e-06,
"loss": 1.1418,
"step": 6075
},
{
"epoch": 2.825382121352478,
"grad_norm": 4.495181560516357,
"learning_rate": 3.259564247726883e-06,
"loss": 1.2573,
"step": 6100
},
{
"epoch": 2.8369615562760537,
"grad_norm": 3.329115390777588,
"learning_rate": 3.045119231429062e-06,
"loss": 1.1984,
"step": 6125
},
{
"epoch": 2.8485409911996293,
"grad_norm": 2.650596857070923,
"learning_rate": 2.8306742151312406e-06,
"loss": 1.1734,
"step": 6150
},
{
"epoch": 2.8601204261232054,
"grad_norm": 2.172297239303589,
"learning_rate": 2.616229198833419e-06,
"loss": 1.1968,
"step": 6175
},
{
"epoch": 2.871699861046781,
"grad_norm": 2.388245105743408,
"learning_rate": 2.401784182535598e-06,
"loss": 1.1787,
"step": 6200
},
{
"epoch": 2.8832792959703566,
"grad_norm": 2.751389265060425,
"learning_rate": 2.1873391662377767e-06,
"loss": 1.1884,
"step": 6225
},
{
"epoch": 2.894858730893932,
"grad_norm": 3.0782809257507324,
"learning_rate": 1.9728941499399555e-06,
"loss": 1.1554,
"step": 6250
},
{
"epoch": 2.9064381658175082,
"grad_norm": 2.73215913772583,
"learning_rate": 1.7584491336421344e-06,
"loss": 1.2045,
"step": 6275
},
{
"epoch": 2.918017600741084,
"grad_norm": 2.4149420261383057,
"learning_rate": 1.544004117344313e-06,
"loss": 1.1549,
"step": 6300
},
{
"epoch": 2.9295970356646595,
"grad_norm": 2.5072360038757324,
"learning_rate": 1.3295591010464916e-06,
"loss": 1.1785,
"step": 6325
},
{
"epoch": 2.9411764705882355,
"grad_norm": 3.0238680839538574,
"learning_rate": 1.1151140847486706e-06,
"loss": 1.1882,
"step": 6350
},
{
"epoch": 2.952755905511811,
"grad_norm": 1.7855486869812012,
"learning_rate": 9.006690684508493e-07,
"loss": 1.1809,
"step": 6375
},
{
"epoch": 2.9643353404353867,
"grad_norm": 5.080151557922363,
"learning_rate": 6.862240521530279e-07,
"loss": 1.2288,
"step": 6400
},
{
"epoch": 2.9759147753589623,
"grad_norm": 2.6682169437408447,
"learning_rate": 4.717790358552067e-07,
"loss": 1.2158,
"step": 6425
},
{
"epoch": 2.9874942102825384,
"grad_norm": 1.6711657047271729,
"learning_rate": 2.573340195573855e-07,
"loss": 1.1705,
"step": 6450
},
{
"epoch": 2.999073645206114,
"grad_norm": 3.766089916229248,
"learning_rate": 4.2889003259564246e-08,
"loss": 1.1834,
"step": 6475
},
{
"epoch": 3.0,
"eval_accuracy": 0.39710843373493976,
"eval_f1_macro": 0.14211797171438428,
"eval_f1_micro": 0.39710843373493976,
"eval_f1_weighted": 0.22574498061234247,
"eval_loss": 1.2045246362686157,
"eval_precision_macro": 0.09927710843373494,
"eval_precision_micro": 0.39710843373493976,
"eval_precision_weighted": 0.15769510814341703,
"eval_recall_macro": 0.25,
"eval_recall_micro": 0.39710843373493976,
"eval_recall_weighted": 0.39710843373493976,
"eval_runtime": 4.9488,
"eval_samples_per_second": 419.296,
"eval_steps_per_second": 26.269,
"step": 6477
}
],
"logging_steps": 25,
"max_steps": 6477,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3407612468023296.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}