|
{ |
|
"best_metric": 1.2045246362686157, |
|
"best_model_checkpoint": "TrustPilot-balanced-location-roberta/checkpoint-6477", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 6477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01157943492357573, |
|
"grad_norm": 4.521674633026123, |
|
"learning_rate": 1.9290123456790124e-06, |
|
"loss": 1.4326, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02315886984715146, |
|
"grad_norm": 3.7206783294677734, |
|
"learning_rate": 3.858024691358025e-06, |
|
"loss": 1.3881, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03473830477072719, |
|
"grad_norm": 5.430587291717529, |
|
"learning_rate": 5.787037037037038e-06, |
|
"loss": 1.3384, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04631773969430292, |
|
"grad_norm": 5.371946334838867, |
|
"learning_rate": 7.63888888888889e-06, |
|
"loss": 1.2747, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05789717461787865, |
|
"grad_norm": 7.290340423583984, |
|
"learning_rate": 9.5679012345679e-06, |
|
"loss": 1.1889, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06947660954145438, |
|
"grad_norm": 7.079736232757568, |
|
"learning_rate": 1.1496913580246914e-05, |
|
"loss": 1.2006, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0810560444650301, |
|
"grad_norm": 8.647546768188477, |
|
"learning_rate": 1.3425925925925928e-05, |
|
"loss": 1.1498, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09263547938860583, |
|
"grad_norm": 12.410360336303711, |
|
"learning_rate": 1.5354938271604938e-05, |
|
"loss": 1.195, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10421491431218156, |
|
"grad_norm": 6.746768951416016, |
|
"learning_rate": 1.728395061728395e-05, |
|
"loss": 1.1531, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1157943492357573, |
|
"grad_norm": 4.067171573638916, |
|
"learning_rate": 1.91358024691358e-05, |
|
"loss": 1.2655, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12737378415933304, |
|
"grad_norm": 8.56696891784668, |
|
"learning_rate": 2.1064814814814816e-05, |
|
"loss": 1.1576, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.13895321908290875, |
|
"grad_norm": 5.337764263153076, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 1.1628, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1505326540064845, |
|
"grad_norm": 3.743994951248169, |
|
"learning_rate": 2.484567901234568e-05, |
|
"loss": 1.2353, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1621120889300602, |
|
"grad_norm": 7.327773571014404, |
|
"learning_rate": 2.6774691358024694e-05, |
|
"loss": 1.1858, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17369152385363595, |
|
"grad_norm": 4.512418270111084, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 1.1889, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.18527095877721167, |
|
"grad_norm": 5.212406635284424, |
|
"learning_rate": 3.063271604938271e-05, |
|
"loss": 1.2394, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1968503937007874, |
|
"grad_norm": 2.575005531311035, |
|
"learning_rate": 3.256172839506173e-05, |
|
"loss": 1.3288, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.20842982862436313, |
|
"grad_norm": 5.339956283569336, |
|
"learning_rate": 3.449074074074074e-05, |
|
"loss": 1.2452, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22000926354793887, |
|
"grad_norm": 5.540539741516113, |
|
"learning_rate": 3.6419753086419754e-05, |
|
"loss": 1.1581, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2315886984715146, |
|
"grad_norm": 4.318514347076416, |
|
"learning_rate": 3.8348765432098766e-05, |
|
"loss": 1.1733, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24316813339509033, |
|
"grad_norm": 2.906097173690796, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 1.1512, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2547475683186661, |
|
"grad_norm": 1.404222011566162, |
|
"learning_rate": 4.220679012345679e-05, |
|
"loss": 1.3561, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2663270032422418, |
|
"grad_norm": 2.1675493717193604, |
|
"learning_rate": 4.413580246913581e-05, |
|
"loss": 1.1446, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.2779064381658175, |
|
"grad_norm": 4.99737548828125, |
|
"learning_rate": 4.6064814814814814e-05, |
|
"loss": 1.2864, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2894858730893932, |
|
"grad_norm": 1.7532799243927002, |
|
"learning_rate": 4.799382716049383e-05, |
|
"loss": 1.269, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.301065308012969, |
|
"grad_norm": 3.0470633506774902, |
|
"learning_rate": 4.9922839506172845e-05, |
|
"loss": 1.2261, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3126447429365447, |
|
"grad_norm": 6.678360939025879, |
|
"learning_rate": 4.9794132784354094e-05, |
|
"loss": 1.1333, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3242241778601204, |
|
"grad_norm": 4.9130449295043945, |
|
"learning_rate": 4.957968776805627e-05, |
|
"loss": 1.1869, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33580361278369614, |
|
"grad_norm": 4.097753047943115, |
|
"learning_rate": 4.936524275175845e-05, |
|
"loss": 1.2102, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3473830477072719, |
|
"grad_norm": 3.7552871704101562, |
|
"learning_rate": 4.915079773546063e-05, |
|
"loss": 1.1769, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3589624826308476, |
|
"grad_norm": 5.447041988372803, |
|
"learning_rate": 4.893635271916281e-05, |
|
"loss": 1.2407, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.37054191755442334, |
|
"grad_norm": 3.041606903076172, |
|
"learning_rate": 4.872190770286499e-05, |
|
"loss": 1.2184, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38212135247799905, |
|
"grad_norm": 2.11730694770813, |
|
"learning_rate": 4.850746268656717e-05, |
|
"loss": 1.107, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3937007874015748, |
|
"grad_norm": 4.180285453796387, |
|
"learning_rate": 4.829301767026935e-05, |
|
"loss": 1.2928, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.40528022232515054, |
|
"grad_norm": 11.423721313476562, |
|
"learning_rate": 4.807857265397153e-05, |
|
"loss": 1.1236, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.41685965724872626, |
|
"grad_norm": 3.7874417304992676, |
|
"learning_rate": 4.78641276376737e-05, |
|
"loss": 1.1486, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.42843909217230197, |
|
"grad_norm": 3.0819077491760254, |
|
"learning_rate": 4.764968262137588e-05, |
|
"loss": 1.2213, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.44001852709587774, |
|
"grad_norm": 2.008617401123047, |
|
"learning_rate": 4.743523760507806e-05, |
|
"loss": 1.1765, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.45159796201945346, |
|
"grad_norm": 2.2871665954589844, |
|
"learning_rate": 4.722079258878024e-05, |
|
"loss": 1.1775, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4631773969430292, |
|
"grad_norm": 3.751568555831909, |
|
"learning_rate": 4.7006347572482416e-05, |
|
"loss": 1.153, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4747568318666049, |
|
"grad_norm": 2.8901615142822266, |
|
"learning_rate": 4.6791902556184595e-05, |
|
"loss": 1.2544, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.48633626679018066, |
|
"grad_norm": 2.7572152614593506, |
|
"learning_rate": 4.6577457539886774e-05, |
|
"loss": 1.1789, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4979157017137564, |
|
"grad_norm": 2.2316782474517822, |
|
"learning_rate": 4.6363012523588953e-05, |
|
"loss": 1.2035, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5094951366373321, |
|
"grad_norm": 2.4344851970672607, |
|
"learning_rate": 4.614856750729113e-05, |
|
"loss": 1.1438, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5210745715609079, |
|
"grad_norm": 2.271672010421753, |
|
"learning_rate": 4.593412249099331e-05, |
|
"loss": 1.1742, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5326540064844836, |
|
"grad_norm": 4.836185932159424, |
|
"learning_rate": 4.571967747469549e-05, |
|
"loss": 1.1543, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5442334414080593, |
|
"grad_norm": 3.8218131065368652, |
|
"learning_rate": 4.550523245839767e-05, |
|
"loss": 1.1536, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.555812876331635, |
|
"grad_norm": 2.6469738483428955, |
|
"learning_rate": 4.529078744209985e-05, |
|
"loss": 1.1915, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5673923112552107, |
|
"grad_norm": 5.130224227905273, |
|
"learning_rate": 4.507634242580203e-05, |
|
"loss": 1.177, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5789717461787864, |
|
"grad_norm": 3.587254047393799, |
|
"learning_rate": 4.486189740950421e-05, |
|
"loss": 1.2532, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5905511811023622, |
|
"grad_norm": 1.91807222366333, |
|
"learning_rate": 4.464745239320639e-05, |
|
"loss": 1.2081, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.602130616025938, |
|
"grad_norm": 2.0937275886535645, |
|
"learning_rate": 4.4433007376908566e-05, |
|
"loss": 1.247, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6137100509495137, |
|
"grad_norm": 4.973937511444092, |
|
"learning_rate": 4.4218562360610745e-05, |
|
"loss": 1.1864, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6252894858730894, |
|
"grad_norm": 4.225080490112305, |
|
"learning_rate": 4.4004117344312924e-05, |
|
"loss": 1.3265, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6368689207966651, |
|
"grad_norm": 3.563711166381836, |
|
"learning_rate": 4.3789672328015096e-05, |
|
"loss": 1.2219, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6484483557202408, |
|
"grad_norm": 2.596768856048584, |
|
"learning_rate": 4.3575227311717276e-05, |
|
"loss": 1.2472, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6600277906438166, |
|
"grad_norm": 2.263674020767212, |
|
"learning_rate": 4.3360782295419455e-05, |
|
"loss": 1.1993, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6716072255673923, |
|
"grad_norm": 2.2922544479370117, |
|
"learning_rate": 4.3146337279121634e-05, |
|
"loss": 1.1379, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.683186660490968, |
|
"grad_norm": 5.394362449645996, |
|
"learning_rate": 4.293189226282381e-05, |
|
"loss": 1.1085, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6947660954145438, |
|
"grad_norm": 5.802165508270264, |
|
"learning_rate": 4.271744724652599e-05, |
|
"loss": 1.1417, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7063455303381195, |
|
"grad_norm": 3.079671621322632, |
|
"learning_rate": 4.250300223022817e-05, |
|
"loss": 1.2111, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7179249652616952, |
|
"grad_norm": 2.836214303970337, |
|
"learning_rate": 4.228855721393035e-05, |
|
"loss": 1.2457, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.729504400185271, |
|
"grad_norm": 6.700901985168457, |
|
"learning_rate": 4.207411219763253e-05, |
|
"loss": 1.1931, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.7410838351088467, |
|
"grad_norm": 4.292962551116943, |
|
"learning_rate": 4.18596671813347e-05, |
|
"loss": 1.2196, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7526632700324224, |
|
"grad_norm": 2.4369819164276123, |
|
"learning_rate": 4.164522216503689e-05, |
|
"loss": 1.1819, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7642427049559981, |
|
"grad_norm": 5.2853474617004395, |
|
"learning_rate": 4.143077714873907e-05, |
|
"loss": 1.1874, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7758221398795738, |
|
"grad_norm": 1.9918153285980225, |
|
"learning_rate": 4.1216332132441246e-05, |
|
"loss": 1.1866, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7874015748031497, |
|
"grad_norm": 5.2242231369018555, |
|
"learning_rate": 4.1001887116143425e-05, |
|
"loss": 1.1775, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7989810097267254, |
|
"grad_norm": 2.511507272720337, |
|
"learning_rate": 4.0787442099845605e-05, |
|
"loss": 1.2418, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.8105604446503011, |
|
"grad_norm": 2.0094120502471924, |
|
"learning_rate": 4.0572997083547784e-05, |
|
"loss": 1.2168, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8221398795738768, |
|
"grad_norm": 2.8366715908050537, |
|
"learning_rate": 4.035855206724996e-05, |
|
"loss": 1.217, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.8337193144974525, |
|
"grad_norm": 4.902674674987793, |
|
"learning_rate": 4.014410705095214e-05, |
|
"loss": 1.1925, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8452987494210282, |
|
"grad_norm": 2.4211857318878174, |
|
"learning_rate": 3.992966203465432e-05, |
|
"loss": 1.1716, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8568781843446039, |
|
"grad_norm": 4.9972381591796875, |
|
"learning_rate": 3.9715217018356493e-05, |
|
"loss": 1.1871, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8684576192681797, |
|
"grad_norm": 3.486520290374756, |
|
"learning_rate": 3.950077200205867e-05, |
|
"loss": 1.2001, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8800370541917555, |
|
"grad_norm": 2.2144150733947754, |
|
"learning_rate": 3.928632698576085e-05, |
|
"loss": 1.232, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8916164891153312, |
|
"grad_norm": 6.714953899383545, |
|
"learning_rate": 3.907188196946303e-05, |
|
"loss": 1.1767, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.9031959240389069, |
|
"grad_norm": 6.166855812072754, |
|
"learning_rate": 3.885743695316521e-05, |
|
"loss": 1.1167, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9147753589624826, |
|
"grad_norm": 3.6272430419921875, |
|
"learning_rate": 3.864299193686739e-05, |
|
"loss": 1.2003, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.9263547938860583, |
|
"grad_norm": 5.192286014556885, |
|
"learning_rate": 3.842854692056957e-05, |
|
"loss": 1.2661, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9379342288096341, |
|
"grad_norm": 3.80322003364563, |
|
"learning_rate": 3.821410190427175e-05, |
|
"loss": 1.2087, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.9495136637332098, |
|
"grad_norm": 2.330951690673828, |
|
"learning_rate": 3.799965688797393e-05, |
|
"loss": 1.2075, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9610930986567855, |
|
"grad_norm": 3.1722116470336914, |
|
"learning_rate": 3.77852118716761e-05, |
|
"loss": 1.1741, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9726725335803613, |
|
"grad_norm": 3.6307098865509033, |
|
"learning_rate": 3.757076685537828e-05, |
|
"loss": 1.1772, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.984251968503937, |
|
"grad_norm": 2.525423765182495, |
|
"learning_rate": 3.735632183908046e-05, |
|
"loss": 1.1213, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9958314034275128, |
|
"grad_norm": 2.655104160308838, |
|
"learning_rate": 3.7141876822782637e-05, |
|
"loss": 1.1762, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.39710843373493976, |
|
"eval_f1_macro": 0.14211797171438428, |
|
"eval_f1_micro": 0.39710843373493976, |
|
"eval_f1_weighted": 0.22574498061234247, |
|
"eval_loss": 1.2174618244171143, |
|
"eval_precision_macro": 0.09927710843373494, |
|
"eval_precision_micro": 0.39710843373493976, |
|
"eval_precision_weighted": 0.15769510814341703, |
|
"eval_recall_macro": 0.25, |
|
"eval_recall_micro": 0.39710843373493976, |
|
"eval_recall_weighted": 0.39710843373493976, |
|
"eval_runtime": 4.9841, |
|
"eval_samples_per_second": 416.326, |
|
"eval_steps_per_second": 26.083, |
|
"step": 2159 |
|
}, |
|
{ |
|
"epoch": 1.0074108383510885, |
|
"grad_norm": 2.2906293869018555, |
|
"learning_rate": 3.6927431806484816e-05, |
|
"loss": 1.2513, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.0189902732746643, |
|
"grad_norm": 4.764555931091309, |
|
"learning_rate": 3.6712986790187e-05, |
|
"loss": 1.2133, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.03056970819824, |
|
"grad_norm": 2.511648178100586, |
|
"learning_rate": 3.649854177388918e-05, |
|
"loss": 1.1227, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.0421491431218157, |
|
"grad_norm": 5.154523849487305, |
|
"learning_rate": 3.628409675759136e-05, |
|
"loss": 1.1251, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0537285780453913, |
|
"grad_norm": 2.393103837966919, |
|
"learning_rate": 3.606965174129354e-05, |
|
"loss": 1.2593, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.0653080129689672, |
|
"grad_norm": 2.7954907417297363, |
|
"learning_rate": 3.585520672499571e-05, |
|
"loss": 1.2476, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0768874478925428, |
|
"grad_norm": 4.258531093597412, |
|
"learning_rate": 3.564076170869789e-05, |
|
"loss": 1.2369, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0884668828161186, |
|
"grad_norm": 4.744079113006592, |
|
"learning_rate": 3.542631669240007e-05, |
|
"loss": 1.175, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.1000463177396944, |
|
"grad_norm": 4.467709541320801, |
|
"learning_rate": 3.521187167610225e-05, |
|
"loss": 1.1174, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.11162575266327, |
|
"grad_norm": 2.7314538955688477, |
|
"learning_rate": 3.499742665980443e-05, |
|
"loss": 1.2007, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.1232051875868458, |
|
"grad_norm": 1.8456259965896606, |
|
"learning_rate": 3.478298164350661e-05, |
|
"loss": 1.2232, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.1347846225104214, |
|
"grad_norm": 3.8557677268981934, |
|
"learning_rate": 3.4568536627208786e-05, |
|
"loss": 1.2338, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.1463640574339973, |
|
"grad_norm": 3.338961124420166, |
|
"learning_rate": 3.4354091610910965e-05, |
|
"loss": 1.2004, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.1579434923575729, |
|
"grad_norm": 2.3821332454681396, |
|
"learning_rate": 3.4139646594613145e-05, |
|
"loss": 1.1967, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1695229272811487, |
|
"grad_norm": 2.296182155609131, |
|
"learning_rate": 3.3925201578315324e-05, |
|
"loss": 1.1825, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.1811023622047245, |
|
"grad_norm": 2.287925958633423, |
|
"learning_rate": 3.3710756562017496e-05, |
|
"loss": 1.1661, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1926817971283001, |
|
"grad_norm": 3.0742363929748535, |
|
"learning_rate": 3.3496311545719675e-05, |
|
"loss": 1.1855, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.204261232051876, |
|
"grad_norm": 2.94059157371521, |
|
"learning_rate": 3.3281866529421854e-05, |
|
"loss": 1.186, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.2158406669754516, |
|
"grad_norm": 5.658060073852539, |
|
"learning_rate": 3.3067421513124034e-05, |
|
"loss": 1.2018, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.2274201018990274, |
|
"grad_norm": 4.225418567657471, |
|
"learning_rate": 3.285297649682621e-05, |
|
"loss": 1.1913, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.238999536822603, |
|
"grad_norm": 3.121039867401123, |
|
"learning_rate": 3.263853148052839e-05, |
|
"loss": 1.1655, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.2505789717461788, |
|
"grad_norm": 2.750720977783203, |
|
"learning_rate": 3.242408646423057e-05, |
|
"loss": 1.1818, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.2621584066697547, |
|
"grad_norm": 3.299870491027832, |
|
"learning_rate": 3.220964144793275e-05, |
|
"loss": 1.2333, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.2737378415933303, |
|
"grad_norm": 1.8936024904251099, |
|
"learning_rate": 3.1995196431634936e-05, |
|
"loss": 1.2091, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.2853172765169059, |
|
"grad_norm": 4.938189506530762, |
|
"learning_rate": 3.178075141533711e-05, |
|
"loss": 1.2436, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.2968967114404817, |
|
"grad_norm": 3.0422909259796143, |
|
"learning_rate": 3.156630639903929e-05, |
|
"loss": 1.2071, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.3084761463640575, |
|
"grad_norm": 3.3571670055389404, |
|
"learning_rate": 3.135186138274147e-05, |
|
"loss": 1.1012, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.3200555812876331, |
|
"grad_norm": 5.697854518890381, |
|
"learning_rate": 3.1137416366443646e-05, |
|
"loss": 1.1837, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.331635016211209, |
|
"grad_norm": 2.8652396202087402, |
|
"learning_rate": 3.0922971350145825e-05, |
|
"loss": 1.2351, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.3432144511347845, |
|
"grad_norm": 2.0512943267822266, |
|
"learning_rate": 3.0708526333848004e-05, |
|
"loss": 1.1621, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.3547938860583604, |
|
"grad_norm": 3.4354703426361084, |
|
"learning_rate": 3.0494081317550183e-05, |
|
"loss": 1.1627, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.366373320981936, |
|
"grad_norm": 2.0285403728485107, |
|
"learning_rate": 3.0279636301252362e-05, |
|
"loss": 1.2202, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.3779527559055118, |
|
"grad_norm": 4.55291223526001, |
|
"learning_rate": 3.006519128495454e-05, |
|
"loss": 1.1935, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.3895321908290876, |
|
"grad_norm": 3.867063045501709, |
|
"learning_rate": 2.9850746268656714e-05, |
|
"loss": 1.0448, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4011116257526632, |
|
"grad_norm": 3.6873021125793457, |
|
"learning_rate": 2.9636301252358893e-05, |
|
"loss": 1.2339, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.412691060676239, |
|
"grad_norm": 2.2147438526153564, |
|
"learning_rate": 2.9421856236061072e-05, |
|
"loss": 1.1809, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.4242704955998147, |
|
"grad_norm": 2.6401538848876953, |
|
"learning_rate": 2.9207411219763255e-05, |
|
"loss": 1.1291, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.4358499305233905, |
|
"grad_norm": 2.2739460468292236, |
|
"learning_rate": 2.8992966203465434e-05, |
|
"loss": 1.1953, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.447429365446966, |
|
"grad_norm": 1.2269738912582397, |
|
"learning_rate": 2.8778521187167613e-05, |
|
"loss": 1.2693, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.459008800370542, |
|
"grad_norm": 4.429539680480957, |
|
"learning_rate": 2.8564076170869792e-05, |
|
"loss": 1.2717, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 3.294246196746826, |
|
"learning_rate": 2.834963115457197e-05, |
|
"loss": 1.1788, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.4821676702176934, |
|
"grad_norm": 5.130248546600342, |
|
"learning_rate": 2.813518613827415e-05, |
|
"loss": 1.1758, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.4937471051412692, |
|
"grad_norm": 2.9172611236572266, |
|
"learning_rate": 2.792074112197633e-05, |
|
"loss": 1.1474, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.5053265400648448, |
|
"grad_norm": 2.7223055362701416, |
|
"learning_rate": 2.7706296105678502e-05, |
|
"loss": 1.2265, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.5169059749884206, |
|
"grad_norm": 3.7259788513183594, |
|
"learning_rate": 2.749185108938068e-05, |
|
"loss": 1.2508, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.5284854099119962, |
|
"grad_norm": 1.7198467254638672, |
|
"learning_rate": 2.727740607308286e-05, |
|
"loss": 1.1782, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.540064844835572, |
|
"grad_norm": 1.9382416009902954, |
|
"learning_rate": 2.706296105678504e-05, |
|
"loss": 1.2074, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.5516442797591479, |
|
"grad_norm": 2.2858059406280518, |
|
"learning_rate": 2.6848516040487222e-05, |
|
"loss": 1.1473, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.5632237146827235, |
|
"grad_norm": 1.461945652961731, |
|
"learning_rate": 2.66340710241894e-05, |
|
"loss": 1.1901, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.574803149606299, |
|
"grad_norm": 5.549622058868408, |
|
"learning_rate": 2.641962600789158e-05, |
|
"loss": 1.1618, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.586382584529875, |
|
"grad_norm": 2.907963991165161, |
|
"learning_rate": 2.620518099159376e-05, |
|
"loss": 1.1767, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.5979620194534507, |
|
"grad_norm": 1.6633723974227905, |
|
"learning_rate": 2.599073597529594e-05, |
|
"loss": 1.1997, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.6095414543770263, |
|
"grad_norm": 5.5080742835998535, |
|
"learning_rate": 2.577629095899811e-05, |
|
"loss": 1.1881, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.6211208893006022, |
|
"grad_norm": 2.5473108291625977, |
|
"learning_rate": 2.556184594270029e-05, |
|
"loss": 1.15, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.632700324224178, |
|
"grad_norm": 2.0483057498931885, |
|
"learning_rate": 2.534740092640247e-05, |
|
"loss": 1.2271, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.6442797591477536, |
|
"grad_norm": 3.6027846336364746, |
|
"learning_rate": 2.513295591010465e-05, |
|
"loss": 1.2121, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.6558591940713292, |
|
"grad_norm": 3.154784917831421, |
|
"learning_rate": 2.4918510893806828e-05, |
|
"loss": 1.2706, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.667438628994905, |
|
"grad_norm": 5.780117511749268, |
|
"learning_rate": 2.4704065877509007e-05, |
|
"loss": 1.1916, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.6790180639184809, |
|
"grad_norm": 4.522841930389404, |
|
"learning_rate": 2.448962086121119e-05, |
|
"loss": 1.1344, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.6905974988420565, |
|
"grad_norm": 2.302856922149658, |
|
"learning_rate": 2.4275175844913365e-05, |
|
"loss": 1.1316, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.7021769337656323, |
|
"grad_norm": 3.6142489910125732, |
|
"learning_rate": 2.4060730828615544e-05, |
|
"loss": 1.1464, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.713756368689208, |
|
"grad_norm": 3.417003870010376, |
|
"learning_rate": 2.3846285812317723e-05, |
|
"loss": 1.2767, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.7253358036127837, |
|
"grad_norm": 1.8820807933807373, |
|
"learning_rate": 2.3631840796019903e-05, |
|
"loss": 1.1759, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.7369152385363593, |
|
"grad_norm": 2.0070981979370117, |
|
"learning_rate": 2.341739577972208e-05, |
|
"loss": 1.2357, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.7484946734599351, |
|
"grad_norm": 1.9160246849060059, |
|
"learning_rate": 2.3202950763424257e-05, |
|
"loss": 1.148, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.760074108383511, |
|
"grad_norm": 2.4420526027679443, |
|
"learning_rate": 2.2988505747126437e-05, |
|
"loss": 1.239, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.7716535433070866, |
|
"grad_norm": 1.5695481300354004, |
|
"learning_rate": 2.2774060730828616e-05, |
|
"loss": 1.1936, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.7832329782306622, |
|
"grad_norm": 2.090928077697754, |
|
"learning_rate": 2.2559615714530795e-05, |
|
"loss": 1.176, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.7948124131542382, |
|
"grad_norm": 2.7507429122924805, |
|
"learning_rate": 2.2345170698232974e-05, |
|
"loss": 1.1847, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.8063918480778138, |
|
"grad_norm": 2.7657129764556885, |
|
"learning_rate": 2.2130725681935153e-05, |
|
"loss": 1.1522, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.8179712830013894, |
|
"grad_norm": 4.012863636016846, |
|
"learning_rate": 2.1916280665637332e-05, |
|
"loss": 1.1398, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.8295507179249653, |
|
"grad_norm": 2.7316641807556152, |
|
"learning_rate": 2.170183564933951e-05, |
|
"loss": 1.1766, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.841130152848541, |
|
"grad_norm": 3.0468456745147705, |
|
"learning_rate": 2.148739063304169e-05, |
|
"loss": 1.2055, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.8527095877721167, |
|
"grad_norm": 2.0280911922454834, |
|
"learning_rate": 2.1272945616743866e-05, |
|
"loss": 1.184, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.8642890226956923, |
|
"grad_norm": 2.5638182163238525, |
|
"learning_rate": 2.1058500600446046e-05, |
|
"loss": 1.2357, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.8758684576192681, |
|
"grad_norm": 2.283189535140991, |
|
"learning_rate": 2.0844055584148225e-05, |
|
"loss": 1.1874, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.887447892542844, |
|
"grad_norm": 1.3770339488983154, |
|
"learning_rate": 2.0629610567850404e-05, |
|
"loss": 1.2674, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.8990273274664196, |
|
"grad_norm": 1.9555165767669678, |
|
"learning_rate": 2.0415165551552583e-05, |
|
"loss": 1.1922, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.9106067623899954, |
|
"grad_norm": 3.256969928741455, |
|
"learning_rate": 2.0200720535254762e-05, |
|
"loss": 1.1587, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.9221861973135712, |
|
"grad_norm": 1.853826642036438, |
|
"learning_rate": 1.998627551895694e-05, |
|
"loss": 1.1769, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.9337656322371468, |
|
"grad_norm": 2.319624662399292, |
|
"learning_rate": 1.977183050265912e-05, |
|
"loss": 1.1328, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.9453450671607224, |
|
"grad_norm": 2.720109701156616, |
|
"learning_rate": 1.95573854863613e-05, |
|
"loss": 1.1331, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.9569245020842982, |
|
"grad_norm": 4.079843044281006, |
|
"learning_rate": 1.9342940470063475e-05, |
|
"loss": 1.1479, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.968503937007874, |
|
"grad_norm": 3.2058353424072266, |
|
"learning_rate": 1.9128495453765654e-05, |
|
"loss": 1.1891, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.9800833719314497, |
|
"grad_norm": 2.1098670959472656, |
|
"learning_rate": 1.8914050437467834e-05, |
|
"loss": 1.1548, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.9916628068550255, |
|
"grad_norm": 2.4204399585723877, |
|
"learning_rate": 1.8699605421170013e-05, |
|
"loss": 1.2393, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.39710843373493976, |
|
"eval_f1_macro": 0.14211797171438428, |
|
"eval_f1_micro": 0.39710843373493976, |
|
"eval_f1_weighted": 0.22574498061234247, |
|
"eval_loss": 1.2068006992340088, |
|
"eval_precision_macro": 0.09927710843373494, |
|
"eval_precision_micro": 0.39710843373493976, |
|
"eval_precision_weighted": 0.15769510814341703, |
|
"eval_recall_macro": 0.25, |
|
"eval_recall_micro": 0.39710843373493976, |
|
"eval_recall_weighted": 0.39710843373493976, |
|
"eval_runtime": 5.002, |
|
"eval_samples_per_second": 414.833, |
|
"eval_steps_per_second": 25.99, |
|
"step": 4318 |
|
}, |
|
{ |
|
"epoch": 2.0032422417786013, |
|
"grad_norm": 4.668676376342773, |
|
"learning_rate": 1.8485160404872192e-05, |
|
"loss": 1.2158, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.014821676702177, |
|
"grad_norm": 2.2886784076690674, |
|
"learning_rate": 1.8270715388574368e-05, |
|
"loss": 1.1545, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.0264011116257525, |
|
"grad_norm": 4.7184038162231445, |
|
"learning_rate": 1.805627037227655e-05, |
|
"loss": 1.2628, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.0379805465493286, |
|
"grad_norm": 1.9082050323486328, |
|
"learning_rate": 1.784182535597873e-05, |
|
"loss": 1.1398, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.049559981472904, |
|
"grad_norm": 2.6623945236206055, |
|
"learning_rate": 1.762738033968091e-05, |
|
"loss": 1.182, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.06113941639648, |
|
"grad_norm": 5.472179412841797, |
|
"learning_rate": 1.7412935323383088e-05, |
|
"loss": 1.2342, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.0727188513200554, |
|
"grad_norm": 2.2513480186462402, |
|
"learning_rate": 1.7198490307085263e-05, |
|
"loss": 1.219, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.0842982862436314, |
|
"grad_norm": 2.979966878890991, |
|
"learning_rate": 1.6984045290787443e-05, |
|
"loss": 1.2084, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.095877721167207, |
|
"grad_norm": 4.656105041503906, |
|
"learning_rate": 1.6769600274489622e-05, |
|
"loss": 1.2289, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.1074571560907827, |
|
"grad_norm": 8.104940414428711, |
|
"learning_rate": 1.65551552581918e-05, |
|
"loss": 1.2331, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.1190365910143587, |
|
"grad_norm": 6.20884370803833, |
|
"learning_rate": 1.6340710241893977e-05, |
|
"loss": 1.2045, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.1306160259379343, |
|
"grad_norm": 3.645780086517334, |
|
"learning_rate": 1.6126265225596156e-05, |
|
"loss": 1.1754, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.14219546086151, |
|
"grad_norm": 2.960564136505127, |
|
"learning_rate": 1.5911820209298335e-05, |
|
"loss": 1.195, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.1537748957850855, |
|
"grad_norm": 2.6137890815734863, |
|
"learning_rate": 1.5697375193000517e-05, |
|
"loss": 1.099, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.1653543307086616, |
|
"grad_norm": 4.492011547088623, |
|
"learning_rate": 1.5482930176702697e-05, |
|
"loss": 1.2088, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.176933765632237, |
|
"grad_norm": 3.653522253036499, |
|
"learning_rate": 1.5268485160404872e-05, |
|
"loss": 1.2309, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.1885132005558128, |
|
"grad_norm": 2.6041195392608643, |
|
"learning_rate": 1.5054040144107052e-05, |
|
"loss": 1.2099, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.200092635479389, |
|
"grad_norm": 5.426154613494873, |
|
"learning_rate": 1.483959512780923e-05, |
|
"loss": 1.203, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.2116720704029644, |
|
"grad_norm": 2.009709358215332, |
|
"learning_rate": 1.462515011151141e-05, |
|
"loss": 1.1193, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.22325150532654, |
|
"grad_norm": 2.586091995239258, |
|
"learning_rate": 1.4410705095213589e-05, |
|
"loss": 1.1109, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.2348309402501156, |
|
"grad_norm": 1.9446204900741577, |
|
"learning_rate": 1.4196260078915766e-05, |
|
"loss": 1.2251, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.2464103751736917, |
|
"grad_norm": 2.2268826961517334, |
|
"learning_rate": 1.3981815062617946e-05, |
|
"loss": 1.1447, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.2579898100972673, |
|
"grad_norm": 4.135994911193848, |
|
"learning_rate": 1.3767370046320125e-05, |
|
"loss": 1.1326, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.269569245020843, |
|
"grad_norm": 1.3713667392730713, |
|
"learning_rate": 1.3552925030022304e-05, |
|
"loss": 1.2048, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.281148679944419, |
|
"grad_norm": 4.801929473876953, |
|
"learning_rate": 1.333848001372448e-05, |
|
"loss": 1.1726, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.2927281148679945, |
|
"grad_norm": 1.5106154680252075, |
|
"learning_rate": 1.312403499742666e-05, |
|
"loss": 1.2191, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.30430754979157, |
|
"grad_norm": 1.9938125610351562, |
|
"learning_rate": 1.290958998112884e-05, |
|
"loss": 1.181, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.3158869847151458, |
|
"grad_norm": 5.004785060882568, |
|
"learning_rate": 1.2695144964831019e-05, |
|
"loss": 1.1746, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.327466419638722, |
|
"grad_norm": 2.9320216178894043, |
|
"learning_rate": 1.2480699948533196e-05, |
|
"loss": 1.2399, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.3390458545622974, |
|
"grad_norm": 4.154562473297119, |
|
"learning_rate": 1.2266254932235375e-05, |
|
"loss": 1.2275, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.350625289485873, |
|
"grad_norm": 2.5340206623077393, |
|
"learning_rate": 1.2051809915937553e-05, |
|
"loss": 1.1873, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.362204724409449, |
|
"grad_norm": 2.7467281818389893, |
|
"learning_rate": 1.1837364899639734e-05, |
|
"loss": 1.2125, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.3737841593330247, |
|
"grad_norm": 2.1378886699676514, |
|
"learning_rate": 1.1622919883341913e-05, |
|
"loss": 1.1484, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.3853635942566003, |
|
"grad_norm": 1.7250367403030396, |
|
"learning_rate": 1.140847486704409e-05, |
|
"loss": 1.1293, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.396943029180176, |
|
"grad_norm": 4.303859233856201, |
|
"learning_rate": 1.119402985074627e-05, |
|
"loss": 1.2065, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.408522464103752, |
|
"grad_norm": 4.186789035797119, |
|
"learning_rate": 1.0979584834448447e-05, |
|
"loss": 1.1573, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.4201018990273275, |
|
"grad_norm": 2.763376474380493, |
|
"learning_rate": 1.0765139818150626e-05, |
|
"loss": 1.2093, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.431681333950903, |
|
"grad_norm": 4.167290210723877, |
|
"learning_rate": 1.0550694801852805e-05, |
|
"loss": 1.2073, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.4432607688744787, |
|
"grad_norm": 4.77427864074707, |
|
"learning_rate": 1.0336249785554984e-05, |
|
"loss": 1.2074, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.454840203798055, |
|
"grad_norm": 2.2546989917755127, |
|
"learning_rate": 1.0121804769257163e-05, |
|
"loss": 1.2089, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.4664196387216304, |
|
"grad_norm": 2.603929281234741, |
|
"learning_rate": 9.907359752959341e-06, |
|
"loss": 1.1774, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.477999073645206, |
|
"grad_norm": 2.2647411823272705, |
|
"learning_rate": 9.69291473666152e-06, |
|
"loss": 1.1509, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.4895785085687816, |
|
"grad_norm": 2.784689426422119, |
|
"learning_rate": 9.4784697203637e-06, |
|
"loss": 1.1545, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.5011579434923576, |
|
"grad_norm": 3.1189873218536377, |
|
"learning_rate": 9.264024704065878e-06, |
|
"loss": 1.1353, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.5127373784159333, |
|
"grad_norm": 3.5311825275421143, |
|
"learning_rate": 9.049579687768056e-06, |
|
"loss": 1.2599, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.5243168133395093, |
|
"grad_norm": 1.4731173515319824, |
|
"learning_rate": 8.835134671470235e-06, |
|
"loss": 1.127, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.535896248263085, |
|
"grad_norm": 2.797048807144165, |
|
"learning_rate": 8.620689655172414e-06, |
|
"loss": 1.0877, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.5474756831866605, |
|
"grad_norm": 3.5394978523254395, |
|
"learning_rate": 8.406244638874593e-06, |
|
"loss": 1.1834, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.559055118110236, |
|
"grad_norm": 4.206399440765381, |
|
"learning_rate": 8.191799622576772e-06, |
|
"loss": 1.1239, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.5706345530338117, |
|
"grad_norm": 2.8601016998291016, |
|
"learning_rate": 7.97735460627895e-06, |
|
"loss": 1.217, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.5822139879573878, |
|
"grad_norm": 3.3993771076202393, |
|
"learning_rate": 7.762909589981129e-06, |
|
"loss": 1.1876, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.5937934228809634, |
|
"grad_norm": 2.0492095947265625, |
|
"learning_rate": 7.548464573683307e-06, |
|
"loss": 1.1923, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.605372857804539, |
|
"grad_norm": 3.045842170715332, |
|
"learning_rate": 7.3340195573854865e-06, |
|
"loss": 1.2492, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.616952292728115, |
|
"grad_norm": 1.7433972358703613, |
|
"learning_rate": 7.119574541087666e-06, |
|
"loss": 1.2652, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.6285317276516906, |
|
"grad_norm": 2.3767240047454834, |
|
"learning_rate": 6.905129524789844e-06, |
|
"loss": 1.2284, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.6401111625752662, |
|
"grad_norm": 4.228554725646973, |
|
"learning_rate": 6.690684508492023e-06, |
|
"loss": 1.1705, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.651690597498842, |
|
"grad_norm": 4.027316570281982, |
|
"learning_rate": 6.476239492194201e-06, |
|
"loss": 1.1751, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.663270032422418, |
|
"grad_norm": 2.5308732986450195, |
|
"learning_rate": 6.2617944758963805e-06, |
|
"loss": 1.2038, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.6748494673459935, |
|
"grad_norm": 2.849998712539673, |
|
"learning_rate": 6.04734945959856e-06, |
|
"loss": 1.13, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.686428902269569, |
|
"grad_norm": 1.7784459590911865, |
|
"learning_rate": 5.832904443300738e-06, |
|
"loss": 1.2206, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.698008337193145, |
|
"grad_norm": 5.856213569641113, |
|
"learning_rate": 5.618459427002916e-06, |
|
"loss": 1.1501, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.7095877721167207, |
|
"grad_norm": 2.401578664779663, |
|
"learning_rate": 5.4040144107050954e-06, |
|
"loss": 1.2501, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.7211672070402964, |
|
"grad_norm": 2.7738897800445557, |
|
"learning_rate": 5.189569394407274e-06, |
|
"loss": 1.1535, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.732746641963872, |
|
"grad_norm": 7.125967979431152, |
|
"learning_rate": 4.975124378109453e-06, |
|
"loss": 1.2025, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.744326076887448, |
|
"grad_norm": 1.4013216495513916, |
|
"learning_rate": 4.760679361811632e-06, |
|
"loss": 1.2028, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.7559055118110236, |
|
"grad_norm": 6.033567905426025, |
|
"learning_rate": 4.54623434551381e-06, |
|
"loss": 1.1888, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.767484946734599, |
|
"grad_norm": 2.6083405017852783, |
|
"learning_rate": 4.3317893292159895e-06, |
|
"loss": 1.1546, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.7790643816581753, |
|
"grad_norm": 3.179180860519409, |
|
"learning_rate": 4.117344312918168e-06, |
|
"loss": 1.2136, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.790643816581751, |
|
"grad_norm": 1.8782720565795898, |
|
"learning_rate": 3.902899296620346e-06, |
|
"loss": 1.116, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.8022232515053265, |
|
"grad_norm": 1.5270411968231201, |
|
"learning_rate": 3.6884542803225257e-06, |
|
"loss": 1.1756, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.813802686428902, |
|
"grad_norm": 2.8853325843811035, |
|
"learning_rate": 3.4740092640247044e-06, |
|
"loss": 1.1418, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.825382121352478, |
|
"grad_norm": 4.495181560516357, |
|
"learning_rate": 3.259564247726883e-06, |
|
"loss": 1.2573, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.8369615562760537, |
|
"grad_norm": 3.329115390777588, |
|
"learning_rate": 3.045119231429062e-06, |
|
"loss": 1.1984, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.8485409911996293, |
|
"grad_norm": 2.650596857070923, |
|
"learning_rate": 2.8306742151312406e-06, |
|
"loss": 1.1734, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.8601204261232054, |
|
"grad_norm": 2.172297239303589, |
|
"learning_rate": 2.616229198833419e-06, |
|
"loss": 1.1968, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.871699861046781, |
|
"grad_norm": 2.388245105743408, |
|
"learning_rate": 2.401784182535598e-06, |
|
"loss": 1.1787, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.8832792959703566, |
|
"grad_norm": 2.751389265060425, |
|
"learning_rate": 2.1873391662377767e-06, |
|
"loss": 1.1884, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.894858730893932, |
|
"grad_norm": 3.0782809257507324, |
|
"learning_rate": 1.9728941499399555e-06, |
|
"loss": 1.1554, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.9064381658175082, |
|
"grad_norm": 2.73215913772583, |
|
"learning_rate": 1.7584491336421344e-06, |
|
"loss": 1.2045, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.918017600741084, |
|
"grad_norm": 2.4149420261383057, |
|
"learning_rate": 1.544004117344313e-06, |
|
"loss": 1.1549, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.9295970356646595, |
|
"grad_norm": 2.5072360038757324, |
|
"learning_rate": 1.3295591010464916e-06, |
|
"loss": 1.1785, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 3.0238680839538574, |
|
"learning_rate": 1.1151140847486706e-06, |
|
"loss": 1.1882, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.952755905511811, |
|
"grad_norm": 1.7855486869812012, |
|
"learning_rate": 9.006690684508493e-07, |
|
"loss": 1.1809, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.9643353404353867, |
|
"grad_norm": 5.080151557922363, |
|
"learning_rate": 6.862240521530279e-07, |
|
"loss": 1.2288, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.9759147753589623, |
|
"grad_norm": 2.6682169437408447, |
|
"learning_rate": 4.717790358552067e-07, |
|
"loss": 1.2158, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.9874942102825384, |
|
"grad_norm": 1.6711657047271729, |
|
"learning_rate": 2.573340195573855e-07, |
|
"loss": 1.1705, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.999073645206114, |
|
"grad_norm": 3.766089916229248, |
|
"learning_rate": 4.2889003259564246e-08, |
|
"loss": 1.1834, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.39710843373493976, |
|
"eval_f1_macro": 0.14211797171438428, |
|
"eval_f1_micro": 0.39710843373493976, |
|
"eval_f1_weighted": 0.22574498061234247, |
|
"eval_loss": 1.2045246362686157, |
|
"eval_precision_macro": 0.09927710843373494, |
|
"eval_precision_micro": 0.39710843373493976, |
|
"eval_precision_weighted": 0.15769510814341703, |
|
"eval_recall_macro": 0.25, |
|
"eval_recall_micro": 0.39710843373493976, |
|
"eval_recall_weighted": 0.39710843373493976, |
|
"eval_runtime": 4.9488, |
|
"eval_samples_per_second": 419.296, |
|
"eval_steps_per_second": 26.269, |
|
"step": 6477 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 6477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3407612468023296.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|