Kushagra07's picture
Upload folder using huggingface_hub
caa7e49 verified
{
"best_metric": 0.22053596377372742,
"best_model_checkpoint": "autotrain-beit-base-patch16-224/checkpoint-14620",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 14620,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01709986320109439,
"grad_norm": 46.709171295166016,
"learning_rate": 7.523939808481532e-07,
"loss": 2.7484,
"step": 25
},
{
"epoch": 0.03419972640218878,
"grad_norm": 38.754066467285156,
"learning_rate": 1.6073871409028727e-06,
"loss": 2.4985,
"step": 50
},
{
"epoch": 0.05129958960328317,
"grad_norm": 43.13520431518555,
"learning_rate": 2.4623803009575924e-06,
"loss": 2.1311,
"step": 75
},
{
"epoch": 0.06839945280437756,
"grad_norm": 51.77237319946289,
"learning_rate": 3.3173734610123124e-06,
"loss": 1.916,
"step": 100
},
{
"epoch": 0.08549931600547196,
"grad_norm": 52.562774658203125,
"learning_rate": 4.138166894664843e-06,
"loss": 1.5007,
"step": 125
},
{
"epoch": 0.10259917920656635,
"grad_norm": 48.85651779174805,
"learning_rate": 4.993160054719562e-06,
"loss": 1.5403,
"step": 150
},
{
"epoch": 0.11969904240766074,
"grad_norm": 34.78243637084961,
"learning_rate": 5.848153214774282e-06,
"loss": 1.3732,
"step": 175
},
{
"epoch": 0.13679890560875513,
"grad_norm": 36.370094299316406,
"learning_rate": 6.7031463748290014e-06,
"loss": 1.32,
"step": 200
},
{
"epoch": 0.1538987688098495,
"grad_norm": 36.674888610839844,
"learning_rate": 7.558139534883721e-06,
"loss": 1.2517,
"step": 225
},
{
"epoch": 0.17099863201094392,
"grad_norm": 39.88427734375,
"learning_rate": 8.41313269493844e-06,
"loss": 1.1245,
"step": 250
},
{
"epoch": 0.1880984952120383,
"grad_norm": 37.10840606689453,
"learning_rate": 9.26812585499316e-06,
"loss": 1.0716,
"step": 275
},
{
"epoch": 0.2051983584131327,
"grad_norm": 40.91892623901367,
"learning_rate": 1.0123119015047879e-05,
"loss": 1.2636,
"step": 300
},
{
"epoch": 0.22229822161422708,
"grad_norm": 25.208866119384766,
"learning_rate": 1.09781121751026e-05,
"loss": 1.0415,
"step": 325
},
{
"epoch": 0.2393980848153215,
"grad_norm": 32.90418243408203,
"learning_rate": 1.183310533515732e-05,
"loss": 0.9204,
"step": 350
},
{
"epoch": 0.25649794801641584,
"grad_norm": 22.914512634277344,
"learning_rate": 1.2688098495212038e-05,
"loss": 0.8906,
"step": 375
},
{
"epoch": 0.27359781121751026,
"grad_norm": 26.361547470092773,
"learning_rate": 1.354309165526676e-05,
"loss": 0.7451,
"step": 400
},
{
"epoch": 0.29069767441860467,
"grad_norm": 6.595183849334717,
"learning_rate": 1.4398084815321477e-05,
"loss": 0.8366,
"step": 425
},
{
"epoch": 0.307797537619699,
"grad_norm": 21.60556983947754,
"learning_rate": 1.5253077975376198e-05,
"loss": 0.8602,
"step": 450
},
{
"epoch": 0.32489740082079344,
"grad_norm": 19.94957160949707,
"learning_rate": 1.6108071135430915e-05,
"loss": 0.8112,
"step": 475
},
{
"epoch": 0.34199726402188785,
"grad_norm": 18.41588020324707,
"learning_rate": 1.6963064295485636e-05,
"loss": 0.7563,
"step": 500
},
{
"epoch": 0.3590971272229822,
"grad_norm": 26.010887145996094,
"learning_rate": 1.7818057455540357e-05,
"loss": 0.745,
"step": 525
},
{
"epoch": 0.3761969904240766,
"grad_norm": 29.746938705444336,
"learning_rate": 1.8673050615595075e-05,
"loss": 0.9174,
"step": 550
},
{
"epoch": 0.393296853625171,
"grad_norm": 9.71439266204834,
"learning_rate": 1.9528043775649796e-05,
"loss": 0.7998,
"step": 575
},
{
"epoch": 0.4103967168262654,
"grad_norm": 25.851831436157227,
"learning_rate": 2.0383036935704516e-05,
"loss": 0.8316,
"step": 600
},
{
"epoch": 0.4274965800273598,
"grad_norm": 6.615222454071045,
"learning_rate": 2.1238030095759234e-05,
"loss": 0.7572,
"step": 625
},
{
"epoch": 0.44459644322845415,
"grad_norm": 21.029979705810547,
"learning_rate": 2.2093023255813955e-05,
"loss": 0.8479,
"step": 650
},
{
"epoch": 0.46169630642954856,
"grad_norm": 34.08558654785156,
"learning_rate": 2.2948016415868672e-05,
"loss": 0.9019,
"step": 675
},
{
"epoch": 0.478796169630643,
"grad_norm": 21.173648834228516,
"learning_rate": 2.3803009575923393e-05,
"loss": 0.6358,
"step": 700
},
{
"epoch": 0.49589603283173733,
"grad_norm": 28.03355598449707,
"learning_rate": 2.4658002735978114e-05,
"loss": 0.6318,
"step": 725
},
{
"epoch": 0.5129958960328317,
"grad_norm": 32.51506423950195,
"learning_rate": 2.5512995896032832e-05,
"loss": 0.7156,
"step": 750
},
{
"epoch": 0.5300957592339262,
"grad_norm": 15.213716506958008,
"learning_rate": 2.6367989056087556e-05,
"loss": 0.7459,
"step": 775
},
{
"epoch": 0.5471956224350205,
"grad_norm": 25.61234474182129,
"learning_rate": 2.7222982216142274e-05,
"loss": 0.6615,
"step": 800
},
{
"epoch": 0.5642954856361149,
"grad_norm": 6.342990875244141,
"learning_rate": 2.807797537619699e-05,
"loss": 0.6494,
"step": 825
},
{
"epoch": 0.5813953488372093,
"grad_norm": 8.253657341003418,
"learning_rate": 2.893296853625171e-05,
"loss": 0.7401,
"step": 850
},
{
"epoch": 0.5984952120383037,
"grad_norm": 20.726346969604492,
"learning_rate": 2.9787961696306433e-05,
"loss": 0.8272,
"step": 875
},
{
"epoch": 0.615595075239398,
"grad_norm": 14.493860244750977,
"learning_rate": 3.064295485636115e-05,
"loss": 0.7291,
"step": 900
},
{
"epoch": 0.6326949384404925,
"grad_norm": 26.049036026000977,
"learning_rate": 3.149794801641587e-05,
"loss": 0.7125,
"step": 925
},
{
"epoch": 0.6497948016415869,
"grad_norm": 24.505985260009766,
"learning_rate": 3.235294117647059e-05,
"loss": 0.7522,
"step": 950
},
{
"epoch": 0.6668946648426812,
"grad_norm": 16.45219612121582,
"learning_rate": 3.3207934336525306e-05,
"loss": 0.9847,
"step": 975
},
{
"epoch": 0.6839945280437757,
"grad_norm": 16.54450798034668,
"learning_rate": 3.406292749658003e-05,
"loss": 0.6838,
"step": 1000
},
{
"epoch": 0.70109439124487,
"grad_norm": 20.877397537231445,
"learning_rate": 3.488372093023256e-05,
"loss": 0.8538,
"step": 1025
},
{
"epoch": 0.7181942544459644,
"grad_norm": 16.170602798461914,
"learning_rate": 3.573871409028728e-05,
"loss": 0.5981,
"step": 1050
},
{
"epoch": 0.7352941176470589,
"grad_norm": 4.53754186630249,
"learning_rate": 3.6593707250342e-05,
"loss": 0.7112,
"step": 1075
},
{
"epoch": 0.7523939808481532,
"grad_norm": 104.9443359375,
"learning_rate": 3.741450068399453e-05,
"loss": 0.8659,
"step": 1100
},
{
"epoch": 0.7694938440492476,
"grad_norm": 9.153970718383789,
"learning_rate": 3.826949384404925e-05,
"loss": 0.7048,
"step": 1125
},
{
"epoch": 0.786593707250342,
"grad_norm": 8.457886695861816,
"learning_rate": 3.912448700410397e-05,
"loss": 0.6735,
"step": 1150
},
{
"epoch": 0.8036935704514364,
"grad_norm": 22.64398765563965,
"learning_rate": 3.997948016415869e-05,
"loss": 0.6603,
"step": 1175
},
{
"epoch": 0.8207934336525308,
"grad_norm": 8.883172988891602,
"learning_rate": 4.083447332421341e-05,
"loss": 0.6347,
"step": 1200
},
{
"epoch": 0.8378932968536251,
"grad_norm": 15.982242584228516,
"learning_rate": 4.168946648426813e-05,
"loss": 0.8553,
"step": 1225
},
{
"epoch": 0.8549931600547196,
"grad_norm": 17.371896743774414,
"learning_rate": 4.2544459644322845e-05,
"loss": 0.8338,
"step": 1250
},
{
"epoch": 0.872093023255814,
"grad_norm": 9.03439998626709,
"learning_rate": 4.3399452804377566e-05,
"loss": 0.6968,
"step": 1275
},
{
"epoch": 0.8891928864569083,
"grad_norm": 12.585426330566406,
"learning_rate": 4.425444596443229e-05,
"loss": 0.7398,
"step": 1300
},
{
"epoch": 0.9062927496580028,
"grad_norm": 24.8383731842041,
"learning_rate": 4.510943912448701e-05,
"loss": 0.74,
"step": 1325
},
{
"epoch": 0.9233926128590971,
"grad_norm": 23.374618530273438,
"learning_rate": 4.596443228454172e-05,
"loss": 0.8325,
"step": 1350
},
{
"epoch": 0.9404924760601915,
"grad_norm": 18.9708251953125,
"learning_rate": 4.681942544459644e-05,
"loss": 0.705,
"step": 1375
},
{
"epoch": 0.957592339261286,
"grad_norm": 17.654476165771484,
"learning_rate": 4.7674418604651164e-05,
"loss": 0.6591,
"step": 1400
},
{
"epoch": 0.9746922024623803,
"grad_norm": 15.2923583984375,
"learning_rate": 4.8529411764705885e-05,
"loss": 0.6672,
"step": 1425
},
{
"epoch": 0.9917920656634747,
"grad_norm": 20.557374954223633,
"learning_rate": 4.93844049247606e-05,
"loss": 0.7082,
"step": 1450
},
{
"epoch": 1.0,
"eval_accuracy": 0.7678388528508023,
"eval_f1_macro": 0.36714416708471204,
"eval_f1_micro": 0.7678388528508023,
"eval_f1_weighted": 0.7507762998391535,
"eval_loss": 0.7348673939704895,
"eval_precision_macro": 0.4581539504891226,
"eval_precision_micro": 0.7678388528508023,
"eval_precision_weighted": 0.7986681621261933,
"eval_recall_macro": 0.3826291513511869,
"eval_recall_micro": 0.7678388528508023,
"eval_recall_weighted": 0.7678388528508023,
"eval_runtime": 19.2382,
"eval_samples_per_second": 152.249,
"eval_steps_per_second": 9.564,
"step": 1462
},
{
"epoch": 1.008891928864569,
"grad_norm": 8.590130805969238,
"learning_rate": 4.99734002127983e-05,
"loss": 0.6613,
"step": 1475
},
{
"epoch": 1.0259917920656634,
"grad_norm": 7.963166236877441,
"learning_rate": 4.987840097279222e-05,
"loss": 0.6899,
"step": 1500
},
{
"epoch": 1.043091655266758,
"grad_norm": 21.036991119384766,
"learning_rate": 4.978340173278614e-05,
"loss": 0.7883,
"step": 1525
},
{
"epoch": 1.0601915184678523,
"grad_norm": 18.586833953857422,
"learning_rate": 4.968840249278006e-05,
"loss": 0.5927,
"step": 1550
},
{
"epoch": 1.0772913816689467,
"grad_norm": 10.346550941467285,
"learning_rate": 4.959340325277398e-05,
"loss": 0.7403,
"step": 1575
},
{
"epoch": 1.094391244870041,
"grad_norm": 15.1462984085083,
"learning_rate": 4.94984040127679e-05,
"loss": 0.67,
"step": 1600
},
{
"epoch": 1.1114911080711354,
"grad_norm": 29.532644271850586,
"learning_rate": 4.940340477276182e-05,
"loss": 0.724,
"step": 1625
},
{
"epoch": 1.1285909712722297,
"grad_norm": 20.413328170776367,
"learning_rate": 4.930840553275574e-05,
"loss": 0.7664,
"step": 1650
},
{
"epoch": 1.1456908344733243,
"grad_norm": 6.083666801452637,
"learning_rate": 4.921340629274966e-05,
"loss": 0.6986,
"step": 1675
},
{
"epoch": 1.1627906976744187,
"grad_norm": 10.168739318847656,
"learning_rate": 4.911840705274358e-05,
"loss": 0.6873,
"step": 1700
},
{
"epoch": 1.179890560875513,
"grad_norm": 16.213897705078125,
"learning_rate": 4.90234078127375e-05,
"loss": 0.6432,
"step": 1725
},
{
"epoch": 1.1969904240766074,
"grad_norm": 14.341080665588379,
"learning_rate": 4.892840857273142e-05,
"loss": 0.6205,
"step": 1750
},
{
"epoch": 1.2140902872777017,
"grad_norm": 8.960565567016602,
"learning_rate": 4.883340933272534e-05,
"loss": 0.7297,
"step": 1775
},
{
"epoch": 1.231190150478796,
"grad_norm": 3.6972837448120117,
"learning_rate": 4.8738410092719264e-05,
"loss": 0.5647,
"step": 1800
},
{
"epoch": 1.2482900136798905,
"grad_norm": 17.91363525390625,
"learning_rate": 4.864341085271318e-05,
"loss": 0.5821,
"step": 1825
},
{
"epoch": 1.265389876880985,
"grad_norm": 16.03091049194336,
"learning_rate": 4.8548411612707104e-05,
"loss": 0.8215,
"step": 1850
},
{
"epoch": 1.2824897400820794,
"grad_norm": 20.258285522460938,
"learning_rate": 4.845341237270102e-05,
"loss": 0.6713,
"step": 1875
},
{
"epoch": 1.2995896032831737,
"grad_norm": 21.53881072998047,
"learning_rate": 4.835841313269494e-05,
"loss": 0.6425,
"step": 1900
},
{
"epoch": 1.316689466484268,
"grad_norm": 17.40162467956543,
"learning_rate": 4.826341389268886e-05,
"loss": 0.7661,
"step": 1925
},
{
"epoch": 1.3337893296853625,
"grad_norm": 11.091559410095215,
"learning_rate": 4.816841465268278e-05,
"loss": 0.6502,
"step": 1950
},
{
"epoch": 1.350889192886457,
"grad_norm": 16.19685173034668,
"learning_rate": 4.80734154126767e-05,
"loss": 0.6064,
"step": 1975
},
{
"epoch": 1.3679890560875512,
"grad_norm": 18.266754150390625,
"learning_rate": 4.797841617267062e-05,
"loss": 0.7654,
"step": 2000
},
{
"epoch": 1.3850889192886457,
"grad_norm": 2.7492332458496094,
"learning_rate": 4.788341693266454e-05,
"loss": 0.6592,
"step": 2025
},
{
"epoch": 1.40218878248974,
"grad_norm": 6.613536357879639,
"learning_rate": 4.778841769265846e-05,
"loss": 0.8603,
"step": 2050
},
{
"epoch": 1.4192886456908345,
"grad_norm": 12.348869323730469,
"learning_rate": 4.769341845265238e-05,
"loss": 0.7457,
"step": 2075
},
{
"epoch": 1.4363885088919288,
"grad_norm": 10.183746337890625,
"learning_rate": 4.75984192126463e-05,
"loss": 0.5621,
"step": 2100
},
{
"epoch": 1.4534883720930232,
"grad_norm": 6.9152045249938965,
"learning_rate": 4.7503419972640224e-05,
"loss": 0.6217,
"step": 2125
},
{
"epoch": 1.4705882352941178,
"grad_norm": 9.911653518676758,
"learning_rate": 4.740842073263414e-05,
"loss": 0.8387,
"step": 2150
},
{
"epoch": 1.487688098495212,
"grad_norm": 19.982093811035156,
"learning_rate": 4.7313421492628064e-05,
"loss": 0.5981,
"step": 2175
},
{
"epoch": 1.5047879616963065,
"grad_norm": 11.546788215637207,
"learning_rate": 4.721842225262198e-05,
"loss": 0.6257,
"step": 2200
},
{
"epoch": 1.5218878248974008,
"grad_norm": 11.54325008392334,
"learning_rate": 4.7123423012615905e-05,
"loss": 0.6088,
"step": 2225
},
{
"epoch": 1.5389876880984952,
"grad_norm": 9.77811050415039,
"learning_rate": 4.703222374221006e-05,
"loss": 0.4845,
"step": 2250
},
{
"epoch": 1.5560875512995898,
"grad_norm": 9.156554222106934,
"learning_rate": 4.6937224502203985e-05,
"loss": 0.701,
"step": 2275
},
{
"epoch": 1.573187414500684,
"grad_norm": 9.874088287353516,
"learning_rate": 4.68422252621979e-05,
"loss": 0.7453,
"step": 2300
},
{
"epoch": 1.5902872777017785,
"grad_norm": 9.201342582702637,
"learning_rate": 4.6747226022191826e-05,
"loss": 0.5633,
"step": 2325
},
{
"epoch": 1.6073871409028728,
"grad_norm": 29.79956817626953,
"learning_rate": 4.665222678218574e-05,
"loss": 0.6438,
"step": 2350
},
{
"epoch": 1.6244870041039672,
"grad_norm": 8.855256080627441,
"learning_rate": 4.6557227542179666e-05,
"loss": 0.5698,
"step": 2375
},
{
"epoch": 1.6415868673050615,
"grad_norm": 8.34626579284668,
"learning_rate": 4.646222830217358e-05,
"loss": 0.7143,
"step": 2400
},
{
"epoch": 1.658686730506156,
"grad_norm": 18.65626335144043,
"learning_rate": 4.636722906216751e-05,
"loss": 0.6445,
"step": 2425
},
{
"epoch": 1.6757865937072505,
"grad_norm": 8.134085655212402,
"learning_rate": 4.6272229822161424e-05,
"loss": 0.4898,
"step": 2450
},
{
"epoch": 1.6928864569083446,
"grad_norm": 15.127388000488281,
"learning_rate": 4.617723058215535e-05,
"loss": 0.6497,
"step": 2475
},
{
"epoch": 1.7099863201094392,
"grad_norm": 17.599140167236328,
"learning_rate": 4.6082231342149264e-05,
"loss": 0.6605,
"step": 2500
},
{
"epoch": 1.7270861833105335,
"grad_norm": 22.064022064208984,
"learning_rate": 4.598723210214319e-05,
"loss": 0.5562,
"step": 2525
},
{
"epoch": 1.744186046511628,
"grad_norm": 8.446691513061523,
"learning_rate": 4.5892232862137105e-05,
"loss": 0.591,
"step": 2550
},
{
"epoch": 1.7612859097127223,
"grad_norm": 15.194252967834473,
"learning_rate": 4.579723362213102e-05,
"loss": 0.635,
"step": 2575
},
{
"epoch": 1.7783857729138166,
"grad_norm": 14.25900936126709,
"learning_rate": 4.5702234382124946e-05,
"loss": 0.6253,
"step": 2600
},
{
"epoch": 1.7954856361149112,
"grad_norm": 9.975160598754883,
"learning_rate": 4.560723514211886e-05,
"loss": 0.4944,
"step": 2625
},
{
"epoch": 1.8125854993160053,
"grad_norm": 12.048364639282227,
"learning_rate": 4.5512235902112786e-05,
"loss": 0.5643,
"step": 2650
},
{
"epoch": 1.8296853625171,
"grad_norm": 16.70762825012207,
"learning_rate": 4.54172366621067e-05,
"loss": 0.6445,
"step": 2675
},
{
"epoch": 1.8467852257181943,
"grad_norm": 15.203225135803223,
"learning_rate": 4.532223742210063e-05,
"loss": 0.4731,
"step": 2700
},
{
"epoch": 1.8638850889192886,
"grad_norm": 6.673511028289795,
"learning_rate": 4.5227238182094544e-05,
"loss": 0.6115,
"step": 2725
},
{
"epoch": 1.8809849521203832,
"grad_norm": 3.219144105911255,
"learning_rate": 4.513223894208847e-05,
"loss": 0.6447,
"step": 2750
},
{
"epoch": 1.8980848153214773,
"grad_norm": 10.122079849243164,
"learning_rate": 4.5037239702082384e-05,
"loss": 0.5048,
"step": 2775
},
{
"epoch": 1.915184678522572,
"grad_norm": 8.148963928222656,
"learning_rate": 4.494224046207631e-05,
"loss": 0.522,
"step": 2800
},
{
"epoch": 1.9322845417236663,
"grad_norm": 17.671016693115234,
"learning_rate": 4.4847241222070225e-05,
"loss": 0.4838,
"step": 2825
},
{
"epoch": 1.9493844049247606,
"grad_norm": 11.81804370880127,
"learning_rate": 4.475224198206415e-05,
"loss": 0.6314,
"step": 2850
},
{
"epoch": 1.966484268125855,
"grad_norm": 9.446462631225586,
"learning_rate": 4.4657242742058065e-05,
"loss": 0.6951,
"step": 2875
},
{
"epoch": 1.9835841313269493,
"grad_norm": 0.3831145167350769,
"learning_rate": 4.456224350205199e-05,
"loss": 0.5709,
"step": 2900
},
{
"epoch": 2.0,
"eval_accuracy": 0.890406282007511,
"eval_f1_macro": 0.6277004140591871,
"eval_f1_micro": 0.890406282007511,
"eval_f1_weighted": 0.8835937168276483,
"eval_loss": 0.3201202154159546,
"eval_precision_macro": 0.7489533576066802,
"eval_precision_micro": 0.890406282007511,
"eval_precision_weighted": 0.8996932956273017,
"eval_recall_macro": 0.6160822936733158,
"eval_recall_micro": 0.890406282007511,
"eval_recall_weighted": 0.890406282007511,
"eval_runtime": 19.0268,
"eval_samples_per_second": 153.941,
"eval_steps_per_second": 9.671,
"step": 2924
},
{
"epoch": 2.000683994528044,
"grad_norm": 8.932915687561035,
"learning_rate": 4.4467244262045906e-05,
"loss": 0.6213,
"step": 2925
},
{
"epoch": 2.017783857729138,
"grad_norm": 53.977447509765625,
"learning_rate": 4.437224502203983e-05,
"loss": 0.4965,
"step": 2950
},
{
"epoch": 2.0348837209302326,
"grad_norm": 5.680665969848633,
"learning_rate": 4.4277245782033746e-05,
"loss": 0.6174,
"step": 2975
},
{
"epoch": 2.0519835841313268,
"grad_norm": 14.235191345214844,
"learning_rate": 4.418224654202767e-05,
"loss": 0.5522,
"step": 3000
},
{
"epoch": 2.0690834473324213,
"grad_norm": 3.3336021900177,
"learning_rate": 4.408724730202159e-05,
"loss": 0.6311,
"step": 3025
},
{
"epoch": 2.086183310533516,
"grad_norm": 17.21300506591797,
"learning_rate": 4.3992248062015504e-05,
"loss": 0.6788,
"step": 3050
},
{
"epoch": 2.10328317373461,
"grad_norm": 8.625337600708008,
"learning_rate": 4.389724882200942e-05,
"loss": 0.4847,
"step": 3075
},
{
"epoch": 2.1203830369357046,
"grad_norm": 13.06096076965332,
"learning_rate": 4.3802249582003344e-05,
"loss": 0.5771,
"step": 3100
},
{
"epoch": 2.1374829001367988,
"grad_norm": 14.993678092956543,
"learning_rate": 4.370725034199726e-05,
"loss": 0.495,
"step": 3125
},
{
"epoch": 2.1545827633378933,
"grad_norm": 18.640869140625,
"learning_rate": 4.3612251101991185e-05,
"loss": 0.5442,
"step": 3150
},
{
"epoch": 2.1716826265389875,
"grad_norm": 13.761073112487793,
"learning_rate": 4.351725186198511e-05,
"loss": 0.563,
"step": 3175
},
{
"epoch": 2.188782489740082,
"grad_norm": 11.877754211425781,
"learning_rate": 4.3422252621979025e-05,
"loss": 0.8003,
"step": 3200
},
{
"epoch": 2.2058823529411766,
"grad_norm": 8.639129638671875,
"learning_rate": 4.332725338197295e-05,
"loss": 0.6284,
"step": 3225
},
{
"epoch": 2.2229822161422708,
"grad_norm": 6.337215900421143,
"learning_rate": 4.3232254141966866e-05,
"loss": 0.5617,
"step": 3250
},
{
"epoch": 2.2400820793433653,
"grad_norm": 1.9488357305526733,
"learning_rate": 4.313725490196079e-05,
"loss": 0.3964,
"step": 3275
},
{
"epoch": 2.2571819425444595,
"grad_norm": 4.854595184326172,
"learning_rate": 4.3042255661954706e-05,
"loss": 0.6221,
"step": 3300
},
{
"epoch": 2.274281805745554,
"grad_norm": 10.604134559631348,
"learning_rate": 4.294725642194863e-05,
"loss": 0.4416,
"step": 3325
},
{
"epoch": 2.2913816689466486,
"grad_norm": 9.8331937789917,
"learning_rate": 4.285225718194255e-05,
"loss": 0.6644,
"step": 3350
},
{
"epoch": 2.3084815321477428,
"grad_norm": 7.878199100494385,
"learning_rate": 4.275725794193647e-05,
"loss": 0.4562,
"step": 3375
},
{
"epoch": 2.3255813953488373,
"grad_norm": 5.875706195831299,
"learning_rate": 4.266225870193039e-05,
"loss": 0.6038,
"step": 3400
},
{
"epoch": 2.3426812585499315,
"grad_norm": 1.360823631286621,
"learning_rate": 4.256725946192431e-05,
"loss": 0.3918,
"step": 3425
},
{
"epoch": 2.359781121751026,
"grad_norm": 5.344891548156738,
"learning_rate": 4.247226022191823e-05,
"loss": 0.5924,
"step": 3450
},
{
"epoch": 2.37688098495212,
"grad_norm": 19.596725463867188,
"learning_rate": 4.2377260981912145e-05,
"loss": 0.6252,
"step": 3475
},
{
"epoch": 2.3939808481532148,
"grad_norm": 9.855287551879883,
"learning_rate": 4.228226174190606e-05,
"loss": 0.6215,
"step": 3500
},
{
"epoch": 2.4110807113543093,
"grad_norm": 10.44688606262207,
"learning_rate": 4.2187262501899986e-05,
"loss": 0.4234,
"step": 3525
},
{
"epoch": 2.4281805745554035,
"grad_norm": 8.25647258758545,
"learning_rate": 4.20922632618939e-05,
"loss": 0.4522,
"step": 3550
},
{
"epoch": 2.445280437756498,
"grad_norm": 18.42440414428711,
"learning_rate": 4.1997264021887826e-05,
"loss": 0.5475,
"step": 3575
},
{
"epoch": 2.462380300957592,
"grad_norm": 3.88397216796875,
"learning_rate": 4.190226478188174e-05,
"loss": 0.4464,
"step": 3600
},
{
"epoch": 2.4794801641586868,
"grad_norm": 15.069050788879395,
"learning_rate": 4.180726554187567e-05,
"loss": 0.6738,
"step": 3625
},
{
"epoch": 2.496580027359781,
"grad_norm": 5.434013366699219,
"learning_rate": 4.1712266301869584e-05,
"loss": 0.5139,
"step": 3650
},
{
"epoch": 2.5136798905608755,
"grad_norm": 6.18742036819458,
"learning_rate": 4.161726706186351e-05,
"loss": 0.6905,
"step": 3675
},
{
"epoch": 2.53077975376197,
"grad_norm": 4.691986560821533,
"learning_rate": 4.1522267821857424e-05,
"loss": 0.5514,
"step": 3700
},
{
"epoch": 2.547879616963064,
"grad_norm": 11.21522331237793,
"learning_rate": 4.142726858185135e-05,
"loss": 0.5283,
"step": 3725
},
{
"epoch": 2.5649794801641588,
"grad_norm": 18.263111114501953,
"learning_rate": 4.1332269341845265e-05,
"loss": 0.5471,
"step": 3750
},
{
"epoch": 2.582079343365253,
"grad_norm": 2.245192766189575,
"learning_rate": 4.123727010183919e-05,
"loss": 0.4889,
"step": 3775
},
{
"epoch": 2.5991792065663475,
"grad_norm": 8.650074005126953,
"learning_rate": 4.114227086183311e-05,
"loss": 0.5821,
"step": 3800
},
{
"epoch": 2.616279069767442,
"grad_norm": 8.487887382507324,
"learning_rate": 4.104727162182703e-05,
"loss": 0.4633,
"step": 3825
},
{
"epoch": 2.633378932968536,
"grad_norm": 3.491182327270508,
"learning_rate": 4.095227238182095e-05,
"loss": 0.4839,
"step": 3850
},
{
"epoch": 2.650478796169631,
"grad_norm": 15.229668617248535,
"learning_rate": 4.085727314181487e-05,
"loss": 0.4741,
"step": 3875
},
{
"epoch": 2.667578659370725,
"grad_norm": 5.991665363311768,
"learning_rate": 4.0762273901808786e-05,
"loss": 0.6269,
"step": 3900
},
{
"epoch": 2.6846785225718195,
"grad_norm": 3.6225790977478027,
"learning_rate": 4.066727466180271e-05,
"loss": 0.5778,
"step": 3925
},
{
"epoch": 2.701778385772914,
"grad_norm": 7.361936092376709,
"learning_rate": 4.057227542179663e-05,
"loss": 0.5857,
"step": 3950
},
{
"epoch": 2.718878248974008,
"grad_norm": 18.498151779174805,
"learning_rate": 4.0477276181790544e-05,
"loss": 0.599,
"step": 3975
},
{
"epoch": 2.7359781121751023,
"grad_norm": 11.898250579833984,
"learning_rate": 4.038227694178447e-05,
"loss": 0.5114,
"step": 4000
},
{
"epoch": 2.753077975376197,
"grad_norm": 5.535077095031738,
"learning_rate": 4.0287277701778384e-05,
"loss": 0.5272,
"step": 4025
},
{
"epoch": 2.7701778385772915,
"grad_norm": 2.3556160926818848,
"learning_rate": 4.019227846177231e-05,
"loss": 0.5648,
"step": 4050
},
{
"epoch": 2.7872777017783856,
"grad_norm": 11.369132041931152,
"learning_rate": 4.0097279221766225e-05,
"loss": 0.5935,
"step": 4075
},
{
"epoch": 2.80437756497948,
"grad_norm": 5.496129989624023,
"learning_rate": 4.000227998176015e-05,
"loss": 0.699,
"step": 4100
},
{
"epoch": 2.8214774281805743,
"grad_norm": 12.352839469909668,
"learning_rate": 3.9907280741754065e-05,
"loss": 0.5325,
"step": 4125
},
{
"epoch": 2.838577291381669,
"grad_norm": 2.7082407474517822,
"learning_rate": 3.981228150174799e-05,
"loss": 0.5331,
"step": 4150
},
{
"epoch": 2.8556771545827635,
"grad_norm": 12.403038024902344,
"learning_rate": 3.9717282261741906e-05,
"loss": 0.6043,
"step": 4175
},
{
"epoch": 2.8727770177838576,
"grad_norm": 12.153759002685547,
"learning_rate": 3.962228302173583e-05,
"loss": 0.4958,
"step": 4200
},
{
"epoch": 2.889876880984952,
"grad_norm": 6.992998123168945,
"learning_rate": 3.9527283781729746e-05,
"loss": 0.3868,
"step": 4225
},
{
"epoch": 2.9069767441860463,
"grad_norm": 3.785193681716919,
"learning_rate": 3.943228454172367e-05,
"loss": 0.5372,
"step": 4250
},
{
"epoch": 2.924076607387141,
"grad_norm": 22.4363956451416,
"learning_rate": 3.933728530171759e-05,
"loss": 0.5244,
"step": 4275
},
{
"epoch": 2.9411764705882355,
"grad_norm": 3.622431516647339,
"learning_rate": 3.924228606171151e-05,
"loss": 0.4722,
"step": 4300
},
{
"epoch": 2.9582763337893296,
"grad_norm": 1.2941017150878906,
"learning_rate": 3.914728682170543e-05,
"loss": 0.4208,
"step": 4325
},
{
"epoch": 2.975376196990424,
"grad_norm": 10.482751846313477,
"learning_rate": 3.905228758169935e-05,
"loss": 0.6347,
"step": 4350
},
{
"epoch": 2.9924760601915183,
"grad_norm": 4.376351356506348,
"learning_rate": 3.895728834169327e-05,
"loss": 0.6077,
"step": 4375
},
{
"epoch": 3.0,
"eval_accuracy": 0.896551724137931,
"eval_f1_macro": 0.6998434390712878,
"eval_f1_micro": 0.896551724137931,
"eval_f1_weighted": 0.8937364843753902,
"eval_loss": 0.3129188120365143,
"eval_precision_macro": 0.8102800926777759,
"eval_precision_micro": 0.896551724137931,
"eval_precision_weighted": 0.9029461578223588,
"eval_recall_macro": 0.6655916075939068,
"eval_recall_micro": 0.896551724137931,
"eval_recall_weighted": 0.896551724137931,
"eval_runtime": 18.8573,
"eval_samples_per_second": 155.324,
"eval_steps_per_second": 9.757,
"step": 4386
},
{
"epoch": 3.009575923392613,
"grad_norm": 8.401654243469238,
"learning_rate": 3.8862289101687185e-05,
"loss": 0.5739,
"step": 4400
},
{
"epoch": 3.026675786593707,
"grad_norm": 10.48408031463623,
"learning_rate": 3.876728986168111e-05,
"loss": 0.6117,
"step": 4425
},
{
"epoch": 3.0437756497948016,
"grad_norm": 19.265623092651367,
"learning_rate": 3.8672290621675026e-05,
"loss": 0.4945,
"step": 4450
},
{
"epoch": 3.060875512995896,
"grad_norm": 25.774412155151367,
"learning_rate": 3.857729138166895e-05,
"loss": 0.5458,
"step": 4475
},
{
"epoch": 3.0779753761969904,
"grad_norm": 4.172712326049805,
"learning_rate": 3.8482292141662866e-05,
"loss": 0.4408,
"step": 4500
},
{
"epoch": 3.095075239398085,
"grad_norm": 5.7756876945495605,
"learning_rate": 3.838729290165679e-05,
"loss": 0.3037,
"step": 4525
},
{
"epoch": 3.112175102599179,
"grad_norm": 12.178646087646484,
"learning_rate": 3.829229366165071e-05,
"loss": 0.6773,
"step": 4550
},
{
"epoch": 3.1292749658002736,
"grad_norm": 4.9638800621032715,
"learning_rate": 3.819729442164463e-05,
"loss": 0.4036,
"step": 4575
},
{
"epoch": 3.146374829001368,
"grad_norm": 6.199288845062256,
"learning_rate": 3.810229518163855e-05,
"loss": 0.5313,
"step": 4600
},
{
"epoch": 3.1634746922024624,
"grad_norm": 19.781579971313477,
"learning_rate": 3.800729594163247e-05,
"loss": 0.5946,
"step": 4625
},
{
"epoch": 3.180574555403557,
"grad_norm": 0.15058183670043945,
"learning_rate": 3.791229670162639e-05,
"loss": 0.516,
"step": 4650
},
{
"epoch": 3.197674418604651,
"grad_norm": 13.215787887573242,
"learning_rate": 3.781729746162031e-05,
"loss": 0.4023,
"step": 4675
},
{
"epoch": 3.2147742818057456,
"grad_norm": 5.896836757659912,
"learning_rate": 3.772229822161423e-05,
"loss": 0.4748,
"step": 4700
},
{
"epoch": 3.23187414500684,
"grad_norm": 0.36866021156311035,
"learning_rate": 3.762729898160815e-05,
"loss": 0.5704,
"step": 4725
},
{
"epoch": 3.2489740082079344,
"grad_norm": 10.511465072631836,
"learning_rate": 3.753229974160207e-05,
"loss": 0.5316,
"step": 4750
},
{
"epoch": 3.266073871409029,
"grad_norm": 3.424712896347046,
"learning_rate": 3.743730050159599e-05,
"loss": 0.4717,
"step": 4775
},
{
"epoch": 3.283173734610123,
"grad_norm": 14.572440147399902,
"learning_rate": 3.734230126158991e-05,
"loss": 0.6337,
"step": 4800
},
{
"epoch": 3.3002735978112177,
"grad_norm": 10.70576286315918,
"learning_rate": 3.724730202158383e-05,
"loss": 0.6731,
"step": 4825
},
{
"epoch": 3.317373461012312,
"grad_norm": 11.98401165008545,
"learning_rate": 3.715230278157775e-05,
"loss": 0.4016,
"step": 4850
},
{
"epoch": 3.3344733242134064,
"grad_norm": 11.411341667175293,
"learning_rate": 3.705730354157167e-05,
"loss": 0.4779,
"step": 4875
},
{
"epoch": 3.3515731874145005,
"grad_norm": 15.914603233337402,
"learning_rate": 3.6962304301565584e-05,
"loss": 0.5832,
"step": 4900
},
{
"epoch": 3.368673050615595,
"grad_norm": 3.610494613647461,
"learning_rate": 3.686730506155951e-05,
"loss": 0.5463,
"step": 4925
},
{
"epoch": 3.3857729138166897,
"grad_norm": 14.400090217590332,
"learning_rate": 3.6772305821553424e-05,
"loss": 0.5733,
"step": 4950
},
{
"epoch": 3.402872777017784,
"grad_norm": 6.468245506286621,
"learning_rate": 3.667730658154735e-05,
"loss": 0.5193,
"step": 4975
},
{
"epoch": 3.4199726402188784,
"grad_norm": 8.739253044128418,
"learning_rate": 3.658230734154127e-05,
"loss": 0.4821,
"step": 5000
},
{
"epoch": 3.4370725034199725,
"grad_norm": 0.5965850949287415,
"learning_rate": 3.648730810153519e-05,
"loss": 0.3247,
"step": 5025
},
{
"epoch": 3.454172366621067,
"grad_norm": 2.4634127616882324,
"learning_rate": 3.639230886152911e-05,
"loss": 0.5018,
"step": 5050
},
{
"epoch": 3.471272229822161,
"grad_norm": 12.17545223236084,
"learning_rate": 3.629730962152303e-05,
"loss": 0.4185,
"step": 5075
},
{
"epoch": 3.488372093023256,
"grad_norm": 10.63932991027832,
"learning_rate": 3.620231038151695e-05,
"loss": 0.7251,
"step": 5100
},
{
"epoch": 3.5054719562243504,
"grad_norm": 3.384568214416504,
"learning_rate": 3.610731114151087e-05,
"loss": 0.4883,
"step": 5125
},
{
"epoch": 3.5225718194254445,
"grad_norm": 7.895840167999268,
"learning_rate": 3.601231190150479e-05,
"loss": 0.5038,
"step": 5150
},
{
"epoch": 3.539671682626539,
"grad_norm": 7.191064834594727,
"learning_rate": 3.591731266149871e-05,
"loss": 0.467,
"step": 5175
},
{
"epoch": 3.556771545827633,
"grad_norm": 8.865562438964844,
"learning_rate": 3.5822313421492634e-05,
"loss": 0.504,
"step": 5200
},
{
"epoch": 3.573871409028728,
"grad_norm": 5.6215434074401855,
"learning_rate": 3.572731418148655e-05,
"loss": 0.6207,
"step": 5225
},
{
"epoch": 3.5909712722298224,
"grad_norm": 3.1758780479431152,
"learning_rate": 3.5632314941480474e-05,
"loss": 0.5304,
"step": 5250
},
{
"epoch": 3.6080711354309165,
"grad_norm": 6.1815056800842285,
"learning_rate": 3.553731570147439e-05,
"loss": 0.4599,
"step": 5275
},
{
"epoch": 3.625170998632011,
"grad_norm": 29.166934967041016,
"learning_rate": 3.544231646146831e-05,
"loss": 0.5521,
"step": 5300
},
{
"epoch": 3.6422708618331052,
"grad_norm": 10.150755882263184,
"learning_rate": 3.5347317221462225e-05,
"loss": 0.4214,
"step": 5325
},
{
"epoch": 3.6593707250342,
"grad_norm": 12.637552261352539,
"learning_rate": 3.525231798145615e-05,
"loss": 0.3804,
"step": 5350
},
{
"epoch": 3.6764705882352944,
"grad_norm": 5.059484481811523,
"learning_rate": 3.5157318741450066e-05,
"loss": 0.5716,
"step": 5375
},
{
"epoch": 3.6935704514363885,
"grad_norm": 0.10250476002693176,
"learning_rate": 3.506231950144399e-05,
"loss": 0.4074,
"step": 5400
},
{
"epoch": 3.7106703146374826,
"grad_norm": 8.807113647460938,
"learning_rate": 3.4967320261437906e-05,
"loss": 0.4943,
"step": 5425
},
{
"epoch": 3.7277701778385772,
"grad_norm": 11.27835750579834,
"learning_rate": 3.487232102143183e-05,
"loss": 0.52,
"step": 5450
},
{
"epoch": 3.744870041039672,
"grad_norm": 9.193815231323242,
"learning_rate": 3.477732178142575e-05,
"loss": 0.4272,
"step": 5475
},
{
"epoch": 3.761969904240766,
"grad_norm": 5.949501991271973,
"learning_rate": 3.468232254141967e-05,
"loss": 0.5627,
"step": 5500
},
{
"epoch": 3.7790697674418605,
"grad_norm": 8.378783226013184,
"learning_rate": 3.458732330141359e-05,
"loss": 0.5856,
"step": 5525
},
{
"epoch": 3.7961696306429547,
"grad_norm": 10.514230728149414,
"learning_rate": 3.449232406140751e-05,
"loss": 0.4319,
"step": 5550
},
{
"epoch": 3.8132694938440492,
"grad_norm": 9.19676399230957,
"learning_rate": 3.4397324821401435e-05,
"loss": 0.5279,
"step": 5575
},
{
"epoch": 3.830369357045144,
"grad_norm": 33.51396560668945,
"learning_rate": 3.430232558139535e-05,
"loss": 0.4681,
"step": 5600
},
{
"epoch": 3.847469220246238,
"grad_norm": 9.01288890838623,
"learning_rate": 3.4207326341389275e-05,
"loss": 0.476,
"step": 5625
},
{
"epoch": 3.8645690834473325,
"grad_norm": 8.594268798828125,
"learning_rate": 3.411232710138319e-05,
"loss": 0.3972,
"step": 5650
},
{
"epoch": 3.8816689466484267,
"grad_norm": 16.336450576782227,
"learning_rate": 3.4017327861377116e-05,
"loss": 0.556,
"step": 5675
},
{
"epoch": 3.8987688098495212,
"grad_norm": 9.880993843078613,
"learning_rate": 3.392232862137103e-05,
"loss": 0.4506,
"step": 5700
},
{
"epoch": 3.915868673050616,
"grad_norm": 17.31952476501465,
"learning_rate": 3.382732938136495e-05,
"loss": 0.5402,
"step": 5725
},
{
"epoch": 3.93296853625171,
"grad_norm": 27.180463790893555,
"learning_rate": 3.373233014135887e-05,
"loss": 0.5221,
"step": 5750
},
{
"epoch": 3.9500683994528045,
"grad_norm": 6.002215385437012,
"learning_rate": 3.363733090135279e-05,
"loss": 0.4936,
"step": 5775
},
{
"epoch": 3.9671682626538987,
"grad_norm": 18.105520248413086,
"learning_rate": 3.354233166134671e-05,
"loss": 0.4508,
"step": 5800
},
{
"epoch": 3.9842681258549932,
"grad_norm": 10.82498550415039,
"learning_rate": 3.344733242134063e-05,
"loss": 0.45,
"step": 5825
},
{
"epoch": 4.0,
"eval_accuracy": 0.8955274837828611,
"eval_f1_macro": 0.7057064371974652,
"eval_f1_micro": 0.8955274837828611,
"eval_f1_weighted": 0.8886748900635787,
"eval_loss": 0.31441542506217957,
"eval_precision_macro": 0.8491702927441283,
"eval_precision_micro": 0.8955274837828611,
"eval_precision_weighted": 0.904039527542345,
"eval_recall_macro": 0.6801244258050726,
"eval_recall_micro": 0.8955274837828611,
"eval_recall_weighted": 0.8955274837828611,
"eval_runtime": 19.2708,
"eval_samples_per_second": 151.992,
"eval_steps_per_second": 9.548,
"step": 5848
},
{
"epoch": 4.001367989056088,
"grad_norm": 11.459450721740723,
"learning_rate": 3.335233318133455e-05,
"loss": 0.3849,
"step": 5850
},
{
"epoch": 4.0184678522571815,
"grad_norm": 5.290565013885498,
"learning_rate": 3.325733394132847e-05,
"loss": 0.4288,
"step": 5875
},
{
"epoch": 4.035567715458276,
"grad_norm": 5.566415309906006,
"learning_rate": 3.316233470132239e-05,
"loss": 0.5999,
"step": 5900
},
{
"epoch": 4.052667578659371,
"grad_norm": 14.68671703338623,
"learning_rate": 3.306733546131631e-05,
"loss": 0.3921,
"step": 5925
},
{
"epoch": 4.069767441860465,
"grad_norm": 4.023522853851318,
"learning_rate": 3.297233622131023e-05,
"loss": 0.5771,
"step": 5950
},
{
"epoch": 4.08686730506156,
"grad_norm": 21.95399284362793,
"learning_rate": 3.287733698130415e-05,
"loss": 0.4062,
"step": 5975
},
{
"epoch": 4.1039671682626535,
"grad_norm": 0.2723749279975891,
"learning_rate": 3.278233774129807e-05,
"loss": 0.474,
"step": 6000
},
{
"epoch": 4.121067031463748,
"grad_norm": 2.193208694458008,
"learning_rate": 3.268733850129199e-05,
"loss": 0.3756,
"step": 6025
},
{
"epoch": 4.138166894664843,
"grad_norm": 7.093472480773926,
"learning_rate": 3.259233926128591e-05,
"loss": 0.4341,
"step": 6050
},
{
"epoch": 4.155266757865937,
"grad_norm": 15.10814380645752,
"learning_rate": 3.249734002127983e-05,
"loss": 0.6389,
"step": 6075
},
{
"epoch": 4.172366621067032,
"grad_norm": 1.5080924034118652,
"learning_rate": 3.240234078127375e-05,
"loss": 0.3716,
"step": 6100
},
{
"epoch": 4.1894664842681255,
"grad_norm": 6.386539936065674,
"learning_rate": 3.2307341541267674e-05,
"loss": 0.4362,
"step": 6125
},
{
"epoch": 4.20656634746922,
"grad_norm": 5.12455415725708,
"learning_rate": 3.221234230126159e-05,
"loss": 0.3797,
"step": 6150
},
{
"epoch": 4.223666210670315,
"grad_norm": 17.729442596435547,
"learning_rate": 3.2117343061255514e-05,
"loss": 0.5386,
"step": 6175
},
{
"epoch": 4.240766073871409,
"grad_norm": 11.959110260009766,
"learning_rate": 3.202234382124943e-05,
"loss": 0.5592,
"step": 6200
},
{
"epoch": 4.257865937072504,
"grad_norm": 8.719466209411621,
"learning_rate": 3.192734458124335e-05,
"loss": 0.5439,
"step": 6225
},
{
"epoch": 4.2749658002735975,
"grad_norm": 16.87335205078125,
"learning_rate": 3.183234534123727e-05,
"loss": 0.4024,
"step": 6250
},
{
"epoch": 4.292065663474692,
"grad_norm": 18.301565170288086,
"learning_rate": 3.173734610123119e-05,
"loss": 0.5395,
"step": 6275
},
{
"epoch": 4.309165526675787,
"grad_norm": 3.5666756629943848,
"learning_rate": 3.164234686122511e-05,
"loss": 0.4384,
"step": 6300
},
{
"epoch": 4.326265389876881,
"grad_norm": 6.758172035217285,
"learning_rate": 3.154734762121903e-05,
"loss": 0.4922,
"step": 6325
},
{
"epoch": 4.343365253077975,
"grad_norm": 10.049732208251953,
"learning_rate": 3.145234838121295e-05,
"loss": 0.5458,
"step": 6350
},
{
"epoch": 4.3604651162790695,
"grad_norm": 8.759356498718262,
"learning_rate": 3.135734914120687e-05,
"loss": 0.3634,
"step": 6375
},
{
"epoch": 4.377564979480164,
"grad_norm": 26.165199279785156,
"learning_rate": 3.1262349901200794e-05,
"loss": 0.5239,
"step": 6400
},
{
"epoch": 4.394664842681259,
"grad_norm": 9.9360990524292,
"learning_rate": 3.116735066119471e-05,
"loss": 0.3593,
"step": 6425
},
{
"epoch": 4.411764705882353,
"grad_norm": 6.546799182891846,
"learning_rate": 3.1072351421188634e-05,
"loss": 0.5414,
"step": 6450
},
{
"epoch": 4.428864569083447,
"grad_norm": 10.599846839904785,
"learning_rate": 3.097735218118255e-05,
"loss": 0.493,
"step": 6475
},
{
"epoch": 4.4459644322845415,
"grad_norm": 13.960310935974121,
"learning_rate": 3.0882352941176475e-05,
"loss": 0.3437,
"step": 6500
},
{
"epoch": 4.463064295485636,
"grad_norm": 5.401963710784912,
"learning_rate": 3.078735370117039e-05,
"loss": 0.4259,
"step": 6525
},
{
"epoch": 4.480164158686731,
"grad_norm": 4.808218955993652,
"learning_rate": 3.0692354461164315e-05,
"loss": 0.4627,
"step": 6550
},
{
"epoch": 4.497264021887825,
"grad_norm": 22.903667449951172,
"learning_rate": 3.059735522115823e-05,
"loss": 0.5063,
"step": 6575
},
{
"epoch": 4.514363885088919,
"grad_norm": 4.878890037536621,
"learning_rate": 3.0502355981152152e-05,
"loss": 0.3412,
"step": 6600
},
{
"epoch": 4.5314637482900135,
"grad_norm": 6.41884708404541,
"learning_rate": 3.040735674114607e-05,
"loss": 0.4038,
"step": 6625
},
{
"epoch": 4.548563611491108,
"grad_norm": 7.6325154304504395,
"learning_rate": 3.0312357501139993e-05,
"loss": 0.5503,
"step": 6650
},
{
"epoch": 4.565663474692203,
"grad_norm": 10.409296035766602,
"learning_rate": 3.021735826113391e-05,
"loss": 0.4306,
"step": 6675
},
{
"epoch": 4.582763337893297,
"grad_norm": 9.539959907531738,
"learning_rate": 3.0122359021127833e-05,
"loss": 0.5506,
"step": 6700
},
{
"epoch": 4.599863201094391,
"grad_norm": 15.213808059692383,
"learning_rate": 3.002735978112175e-05,
"loss": 0.4715,
"step": 6725
},
{
"epoch": 4.6169630642954855,
"grad_norm": 15.897672653198242,
"learning_rate": 2.9932360541115674e-05,
"loss": 0.4674,
"step": 6750
},
{
"epoch": 4.63406292749658,
"grad_norm": 2.023172378540039,
"learning_rate": 2.983736130110959e-05,
"loss": 0.5035,
"step": 6775
},
{
"epoch": 4.651162790697675,
"grad_norm": 9.661181449890137,
"learning_rate": 2.974236206110351e-05,
"loss": 0.4261,
"step": 6800
},
{
"epoch": 4.668262653898768,
"grad_norm": 6.808616638183594,
"learning_rate": 2.9647362821097435e-05,
"loss": 0.3963,
"step": 6825
},
{
"epoch": 4.685362517099863,
"grad_norm": 2.418628215789795,
"learning_rate": 2.9552363581091352e-05,
"loss": 0.5116,
"step": 6850
},
{
"epoch": 4.7024623803009575,
"grad_norm": 5.1149749755859375,
"learning_rate": 2.9457364341085275e-05,
"loss": 0.3861,
"step": 6875
},
{
"epoch": 4.719562243502052,
"grad_norm": 10.152314186096191,
"learning_rate": 2.9362365101079192e-05,
"loss": 0.4005,
"step": 6900
},
{
"epoch": 4.736662106703147,
"grad_norm": 11.572530746459961,
"learning_rate": 2.9267365861073116e-05,
"loss": 0.5105,
"step": 6925
},
{
"epoch": 4.75376196990424,
"grad_norm": 11.438729286193848,
"learning_rate": 2.9172366621067033e-05,
"loss": 0.3964,
"step": 6950
},
{
"epoch": 4.770861833105335,
"grad_norm": 2.2795422077178955,
"learning_rate": 2.9077367381060953e-05,
"loss": 0.4141,
"step": 6975
},
{
"epoch": 4.7879616963064295,
"grad_norm": 17.774606704711914,
"learning_rate": 2.8982368141054873e-05,
"loss": 0.4031,
"step": 7000
},
{
"epoch": 4.805061559507524,
"grad_norm": 5.122858047485352,
"learning_rate": 2.8887368901048794e-05,
"loss": 0.5089,
"step": 7025
},
{
"epoch": 4.822161422708619,
"grad_norm": 9.22169303894043,
"learning_rate": 2.879236966104271e-05,
"loss": 0.4628,
"step": 7050
},
{
"epoch": 4.839261285909712,
"grad_norm": 7.689781665802002,
"learning_rate": 2.8697370421036634e-05,
"loss": 0.435,
"step": 7075
},
{
"epoch": 4.856361149110807,
"grad_norm": 14.785922050476074,
"learning_rate": 2.860237118103055e-05,
"loss": 0.5333,
"step": 7100
},
{
"epoch": 4.8734610123119015,
"grad_norm": 9.352224349975586,
"learning_rate": 2.8507371941024475e-05,
"loss": 0.5289,
"step": 7125
},
{
"epoch": 4.890560875512996,
"grad_norm": 13.73246955871582,
"learning_rate": 2.841237270101839e-05,
"loss": 0.4828,
"step": 7150
},
{
"epoch": 4.907660738714091,
"grad_norm": 25.362621307373047,
"learning_rate": 2.8317373461012315e-05,
"loss": 0.4774,
"step": 7175
},
{
"epoch": 4.924760601915184,
"grad_norm": 7.927663803100586,
"learning_rate": 2.8222374221006232e-05,
"loss": 0.4548,
"step": 7200
},
{
"epoch": 4.941860465116279,
"grad_norm": 7.368469715118408,
"learning_rate": 2.8127374981000152e-05,
"loss": 0.503,
"step": 7225
},
{
"epoch": 4.9589603283173735,
"grad_norm": 4.176021575927734,
"learning_rate": 2.8032375740994073e-05,
"loss": 0.4104,
"step": 7250
},
{
"epoch": 4.976060191518468,
"grad_norm": 9.954981803894043,
"learning_rate": 2.7937376500987993e-05,
"loss": 0.4093,
"step": 7275
},
{
"epoch": 4.993160054719562,
"grad_norm": 6.885503768920898,
"learning_rate": 2.784237726098191e-05,
"loss": 0.5022,
"step": 7300
},
{
"epoch": 5.0,
"eval_accuracy": 0.8955274837828611,
"eval_f1_macro": 0.7161875284577881,
"eval_f1_micro": 0.8955274837828611,
"eval_f1_weighted": 0.8926041027507408,
"eval_loss": 0.28958025574684143,
"eval_precision_macro": 0.862110704875171,
"eval_precision_micro": 0.8955274837828611,
"eval_precision_weighted": 0.9050944481184527,
"eval_recall_macro": 0.6655249799036212,
"eval_recall_micro": 0.8955274837828611,
"eval_recall_weighted": 0.8955274837828611,
"eval_runtime": 18.9608,
"eval_samples_per_second": 154.477,
"eval_steps_per_second": 9.704,
"step": 7310
},
{
"epoch": 5.010259917920656,
"grad_norm": 0.8062827587127686,
"learning_rate": 2.7747378020975834e-05,
"loss": 0.458,
"step": 7325
},
{
"epoch": 5.027359781121751,
"grad_norm": 7.012026786804199,
"learning_rate": 2.765237878096975e-05,
"loss": 0.4691,
"step": 7350
},
{
"epoch": 5.0444596443228455,
"grad_norm": 3.819838762283325,
"learning_rate": 2.7557379540963674e-05,
"loss": 0.5331,
"step": 7375
},
{
"epoch": 5.06155950752394,
"grad_norm": 11.148397445678711,
"learning_rate": 2.7462380300957598e-05,
"loss": 0.4309,
"step": 7400
},
{
"epoch": 5.078659370725034,
"grad_norm": 4.97418737411499,
"learning_rate": 2.7367381060951515e-05,
"loss": 0.4308,
"step": 7425
},
{
"epoch": 5.095759233926128,
"grad_norm": 9.843364715576172,
"learning_rate": 2.7272381820945435e-05,
"loss": 0.5226,
"step": 7450
},
{
"epoch": 5.112859097127223,
"grad_norm": 11.50365924835205,
"learning_rate": 2.7177382580939352e-05,
"loss": 0.4092,
"step": 7475
},
{
"epoch": 5.1299589603283176,
"grad_norm": 6.617554187774658,
"learning_rate": 2.7082383340933276e-05,
"loss": 0.3954,
"step": 7500
},
{
"epoch": 5.147058823529412,
"grad_norm": 0.518602728843689,
"learning_rate": 2.6987384100927192e-05,
"loss": 0.422,
"step": 7525
},
{
"epoch": 5.164158686730506,
"grad_norm": 16.087276458740234,
"learning_rate": 2.6892384860921116e-05,
"loss": 0.3651,
"step": 7550
},
{
"epoch": 5.1812585499316,
"grad_norm": 0.1962614506483078,
"learning_rate": 2.6797385620915033e-05,
"loss": 0.5446,
"step": 7575
},
{
"epoch": 5.198358413132695,
"grad_norm": 8.01890754699707,
"learning_rate": 2.6702386380908957e-05,
"loss": 0.3318,
"step": 7600
},
{
"epoch": 5.2154582763337896,
"grad_norm": 36.442684173583984,
"learning_rate": 2.6607387140902874e-05,
"loss": 0.4495,
"step": 7625
},
{
"epoch": 5.232558139534884,
"grad_norm": 8.66895866394043,
"learning_rate": 2.6512387900896797e-05,
"loss": 0.4476,
"step": 7650
},
{
"epoch": 5.249658002735978,
"grad_norm": 14.132843971252441,
"learning_rate": 2.6417388660890714e-05,
"loss": 0.3548,
"step": 7675
},
{
"epoch": 5.266757865937072,
"grad_norm": 11.379664421081543,
"learning_rate": 2.6322389420884634e-05,
"loss": 0.4658,
"step": 7700
},
{
"epoch": 5.283857729138167,
"grad_norm": 12.820823669433594,
"learning_rate": 2.622739018087855e-05,
"loss": 0.2941,
"step": 7725
},
{
"epoch": 5.300957592339262,
"grad_norm": 26.1966609954834,
"learning_rate": 2.6132390940872475e-05,
"loss": 0.4083,
"step": 7750
},
{
"epoch": 5.318057455540355,
"grad_norm": 12.518375396728516,
"learning_rate": 2.6041191670466635e-05,
"loss": 0.3166,
"step": 7775
},
{
"epoch": 5.33515731874145,
"grad_norm": 4.027897834777832,
"learning_rate": 2.594619243046056e-05,
"loss": 0.4087,
"step": 7800
},
{
"epoch": 5.352257181942544,
"grad_norm": 13.574274063110352,
"learning_rate": 2.5851193190454476e-05,
"loss": 0.5478,
"step": 7825
},
{
"epoch": 5.369357045143639,
"grad_norm": 12.73529052734375,
"learning_rate": 2.57561939504484e-05,
"loss": 0.4394,
"step": 7850
},
{
"epoch": 5.386456908344734,
"grad_norm": 8.502470016479492,
"learning_rate": 2.5661194710442316e-05,
"loss": 0.397,
"step": 7875
},
{
"epoch": 5.403556771545827,
"grad_norm": 7.308871746063232,
"learning_rate": 2.556619547043624e-05,
"loss": 0.4541,
"step": 7900
},
{
"epoch": 5.420656634746922,
"grad_norm": 14.608325004577637,
"learning_rate": 2.5471196230430157e-05,
"loss": 0.4646,
"step": 7925
},
{
"epoch": 5.437756497948016,
"grad_norm": 6.4289655685424805,
"learning_rate": 2.5376196990424077e-05,
"loss": 0.399,
"step": 7950
},
{
"epoch": 5.454856361149111,
"grad_norm": 3.8061683177948,
"learning_rate": 2.5281197750417994e-05,
"loss": 0.4327,
"step": 7975
},
{
"epoch": 5.471956224350206,
"grad_norm": 6.391703128814697,
"learning_rate": 2.5186198510411917e-05,
"loss": 0.4641,
"step": 8000
},
{
"epoch": 5.489056087551299,
"grad_norm": 2.9124350547790527,
"learning_rate": 2.509119927040584e-05,
"loss": 0.3654,
"step": 8025
},
{
"epoch": 5.506155950752394,
"grad_norm": 3.834289789199829,
"learning_rate": 2.4996200030399758e-05,
"loss": 0.5162,
"step": 8050
},
{
"epoch": 5.523255813953488,
"grad_norm": 16.672739028930664,
"learning_rate": 2.4901200790393678e-05,
"loss": 0.5626,
"step": 8075
},
{
"epoch": 5.540355677154583,
"grad_norm": 26.094615936279297,
"learning_rate": 2.48062015503876e-05,
"loss": 0.3838,
"step": 8100
},
{
"epoch": 5.557455540355678,
"grad_norm": 1.9188295602798462,
"learning_rate": 2.471120231038152e-05,
"loss": 0.3746,
"step": 8125
},
{
"epoch": 5.574555403556771,
"grad_norm": 3.0162570476531982,
"learning_rate": 2.461620307037544e-05,
"loss": 0.4517,
"step": 8150
},
{
"epoch": 5.591655266757866,
"grad_norm": 14.349656105041504,
"learning_rate": 2.4521203830369356e-05,
"loss": 0.4876,
"step": 8175
},
{
"epoch": 5.60875512995896,
"grad_norm": 9.013519287109375,
"learning_rate": 2.4426204590363276e-05,
"loss": 0.4394,
"step": 8200
},
{
"epoch": 5.625854993160055,
"grad_norm": 3.7371058464050293,
"learning_rate": 2.4331205350357197e-05,
"loss": 0.4345,
"step": 8225
},
{
"epoch": 5.642954856361149,
"grad_norm": 13.115042686462402,
"learning_rate": 2.4236206110351117e-05,
"loss": 0.4146,
"step": 8250
},
{
"epoch": 5.660054719562243,
"grad_norm": 12.576549530029297,
"learning_rate": 2.4141206870345037e-05,
"loss": 0.4096,
"step": 8275
},
{
"epoch": 5.677154582763338,
"grad_norm": 7.4951605796813965,
"learning_rate": 2.4046207630338957e-05,
"loss": 0.3997,
"step": 8300
},
{
"epoch": 5.694254445964432,
"grad_norm": 8.070563316345215,
"learning_rate": 2.3951208390332878e-05,
"loss": 0.3297,
"step": 8325
},
{
"epoch": 5.711354309165527,
"grad_norm": 14.807238578796387,
"learning_rate": 2.38562091503268e-05,
"loss": 0.3864,
"step": 8350
},
{
"epoch": 5.728454172366621,
"grad_norm": 6.503055572509766,
"learning_rate": 2.3761209910320718e-05,
"loss": 0.5571,
"step": 8375
},
{
"epoch": 5.745554035567715,
"grad_norm": 3.811549186706543,
"learning_rate": 2.366621067031464e-05,
"loss": 0.3065,
"step": 8400
},
{
"epoch": 5.76265389876881,
"grad_norm": 4.377668857574463,
"learning_rate": 2.357121143030856e-05,
"loss": 0.3606,
"step": 8425
},
{
"epoch": 5.779753761969904,
"grad_norm": 6.7863874435424805,
"learning_rate": 2.347621219030248e-05,
"loss": 0.3654,
"step": 8450
},
{
"epoch": 5.796853625170999,
"grad_norm": 8.570117950439453,
"learning_rate": 2.33812129502964e-05,
"loss": 0.3821,
"step": 8475
},
{
"epoch": 5.813953488372093,
"grad_norm": 3.4964771270751953,
"learning_rate": 2.328621371029032e-05,
"loss": 0.3593,
"step": 8500
},
{
"epoch": 5.831053351573187,
"grad_norm": 5.006895065307617,
"learning_rate": 2.319121447028424e-05,
"loss": 0.3856,
"step": 8525
},
{
"epoch": 5.848153214774282,
"grad_norm": 7.012197971343994,
"learning_rate": 2.309621523027816e-05,
"loss": 0.5216,
"step": 8550
},
{
"epoch": 5.865253077975376,
"grad_norm": 11.0383882522583,
"learning_rate": 2.300121599027208e-05,
"loss": 0.5002,
"step": 8575
},
{
"epoch": 5.882352941176471,
"grad_norm": 6.153685092926025,
"learning_rate": 2.2906216750266e-05,
"loss": 0.4749,
"step": 8600
},
{
"epoch": 5.899452804377565,
"grad_norm": 21.01350975036621,
"learning_rate": 2.2811217510259918e-05,
"loss": 0.3583,
"step": 8625
},
{
"epoch": 5.916552667578659,
"grad_norm": 6.175297737121582,
"learning_rate": 2.2716218270253838e-05,
"loss": 0.4317,
"step": 8650
},
{
"epoch": 5.933652530779754,
"grad_norm": 2.6204943656921387,
"learning_rate": 2.2621219030247758e-05,
"loss": 0.4452,
"step": 8675
},
{
"epoch": 5.950752393980848,
"grad_norm": 2.762593984603882,
"learning_rate": 2.252621979024168e-05,
"loss": 0.3466,
"step": 8700
},
{
"epoch": 5.967852257181942,
"grad_norm": 11.155779838562012,
"learning_rate": 2.24312205502356e-05,
"loss": 0.4283,
"step": 8725
},
{
"epoch": 5.984952120383037,
"grad_norm": 61.69544219970703,
"learning_rate": 2.233622131022952e-05,
"loss": 0.3336,
"step": 8750
},
{
"epoch": 6.0,
"eval_accuracy": 0.9095254353021509,
"eval_f1_macro": 0.748356749541202,
"eval_f1_micro": 0.9095254353021509,
"eval_f1_weighted": 0.9037758276025493,
"eval_loss": 0.297870934009552,
"eval_precision_macro": 0.83851223488873,
"eval_precision_micro": 0.9095254353021509,
"eval_precision_weighted": 0.9132744969964606,
"eval_recall_macro": 0.7323996923201544,
"eval_recall_micro": 0.9095254353021509,
"eval_recall_weighted": 0.9095254353021509,
"eval_runtime": 18.9912,
"eval_samples_per_second": 154.229,
"eval_steps_per_second": 9.689,
"step": 8772
},
{
"epoch": 6.002051983584131,
"grad_norm": 9.449117660522461,
"learning_rate": 2.224122207022344e-05,
"loss": 0.3617,
"step": 8775
},
{
"epoch": 6.019151846785226,
"grad_norm": 9.420016288757324,
"learning_rate": 2.214622283021736e-05,
"loss": 0.5047,
"step": 8800
},
{
"epoch": 6.03625170998632,
"grad_norm": 8.470691680908203,
"learning_rate": 2.2055023559811523e-05,
"loss": 0.4069,
"step": 8825
},
{
"epoch": 6.053351573187414,
"grad_norm": 16.81625747680664,
"learning_rate": 2.1960024319805443e-05,
"loss": 0.5216,
"step": 8850
},
{
"epoch": 6.070451436388509,
"grad_norm": 14.323150634765625,
"learning_rate": 2.186502507979936e-05,
"loss": 0.3137,
"step": 8875
},
{
"epoch": 6.087551299589603,
"grad_norm": 5.009669780731201,
"learning_rate": 2.177002583979328e-05,
"loss": 0.4713,
"step": 8900
},
{
"epoch": 6.104651162790698,
"grad_norm": 14.51624870300293,
"learning_rate": 2.16750265997872e-05,
"loss": 0.373,
"step": 8925
},
{
"epoch": 6.121751025991792,
"grad_norm": 4.42010498046875,
"learning_rate": 2.158002735978112e-05,
"loss": 0.3218,
"step": 8950
},
{
"epoch": 6.138850889192886,
"grad_norm": 18.838573455810547,
"learning_rate": 2.1485028119775045e-05,
"loss": 0.3754,
"step": 8975
},
{
"epoch": 6.155950752393981,
"grad_norm": 2.5859174728393555,
"learning_rate": 2.1390028879768965e-05,
"loss": 0.4086,
"step": 9000
},
{
"epoch": 6.173050615595075,
"grad_norm": 4.829029560089111,
"learning_rate": 2.1295029639762885e-05,
"loss": 0.3722,
"step": 9025
},
{
"epoch": 6.19015047879617,
"grad_norm": 11.934502601623535,
"learning_rate": 2.1200030399756805e-05,
"loss": 0.3506,
"step": 9050
},
{
"epoch": 6.207250341997264,
"grad_norm": 3.9261722564697266,
"learning_rate": 2.1105031159750722e-05,
"loss": 0.2612,
"step": 9075
},
{
"epoch": 6.224350205198358,
"grad_norm": 0.23096883296966553,
"learning_rate": 2.1010031919744643e-05,
"loss": 0.5446,
"step": 9100
},
{
"epoch": 6.241450068399453,
"grad_norm": 13.32019329071045,
"learning_rate": 2.0915032679738563e-05,
"loss": 0.4225,
"step": 9125
},
{
"epoch": 6.258549931600547,
"grad_norm": 12.433130264282227,
"learning_rate": 2.0820033439732483e-05,
"loss": 0.3323,
"step": 9150
},
{
"epoch": 6.275649794801642,
"grad_norm": 22.49323844909668,
"learning_rate": 2.0725034199726403e-05,
"loss": 0.4702,
"step": 9175
},
{
"epoch": 6.292749658002736,
"grad_norm": 7.992762088775635,
"learning_rate": 2.0630034959720324e-05,
"loss": 0.4188,
"step": 9200
},
{
"epoch": 6.30984952120383,
"grad_norm": 2.31046986579895,
"learning_rate": 2.0535035719714244e-05,
"loss": 0.2581,
"step": 9225
},
{
"epoch": 6.326949384404925,
"grad_norm": 13.177254676818848,
"learning_rate": 2.0440036479708164e-05,
"loss": 0.5264,
"step": 9250
},
{
"epoch": 6.344049247606019,
"grad_norm": 16.654388427734375,
"learning_rate": 2.0345037239702085e-05,
"loss": 0.5404,
"step": 9275
},
{
"epoch": 6.361149110807114,
"grad_norm": 7.191986083984375,
"learning_rate": 2.0250037999696005e-05,
"loss": 0.3926,
"step": 9300
},
{
"epoch": 6.378248974008208,
"grad_norm": 2.7967660427093506,
"learning_rate": 2.0155038759689922e-05,
"loss": 0.3759,
"step": 9325
},
{
"epoch": 6.395348837209302,
"grad_norm": 11.951244354248047,
"learning_rate": 2.0060039519683842e-05,
"loss": 0.3715,
"step": 9350
},
{
"epoch": 6.412448700410397,
"grad_norm": 20.298959732055664,
"learning_rate": 1.9965040279677762e-05,
"loss": 0.3348,
"step": 9375
},
{
"epoch": 6.429548563611491,
"grad_norm": 4.485177516937256,
"learning_rate": 1.9870041039671683e-05,
"loss": 0.3164,
"step": 9400
},
{
"epoch": 6.446648426812586,
"grad_norm": 8.650040626525879,
"learning_rate": 1.9775041799665603e-05,
"loss": 0.4892,
"step": 9425
},
{
"epoch": 6.46374829001368,
"grad_norm": 8.256196975708008,
"learning_rate": 1.9680042559659523e-05,
"loss": 0.4008,
"step": 9450
},
{
"epoch": 6.480848153214774,
"grad_norm": 13.1589994430542,
"learning_rate": 1.9585043319653443e-05,
"loss": 0.3471,
"step": 9475
},
{
"epoch": 6.497948016415869,
"grad_norm": 5.785964488983154,
"learning_rate": 1.9490044079647364e-05,
"loss": 0.4492,
"step": 9500
},
{
"epoch": 6.515047879616963,
"grad_norm": 5.720312118530273,
"learning_rate": 1.9395044839641284e-05,
"loss": 0.3955,
"step": 9525
},
{
"epoch": 6.532147742818058,
"grad_norm": 4.752621650695801,
"learning_rate": 1.9300045599635204e-05,
"loss": 0.5226,
"step": 9550
},
{
"epoch": 6.549247606019152,
"grad_norm": 6.577572822570801,
"learning_rate": 1.9205046359629124e-05,
"loss": 0.4383,
"step": 9575
},
{
"epoch": 6.566347469220246,
"grad_norm": 2.3268673419952393,
"learning_rate": 1.9110047119623045e-05,
"loss": 0.4475,
"step": 9600
},
{
"epoch": 6.583447332421341,
"grad_norm": 7.915472030639648,
"learning_rate": 1.9015047879616965e-05,
"loss": 0.4374,
"step": 9625
},
{
"epoch": 6.600547195622435,
"grad_norm": 14.391087532043457,
"learning_rate": 1.8920048639610885e-05,
"loss": 0.3706,
"step": 9650
},
{
"epoch": 6.617647058823529,
"grad_norm": 5.97300386428833,
"learning_rate": 1.8825049399604806e-05,
"loss": 0.425,
"step": 9675
},
{
"epoch": 6.634746922024624,
"grad_norm": 9.130365371704102,
"learning_rate": 1.8730050159598726e-05,
"loss": 0.3341,
"step": 9700
},
{
"epoch": 6.651846785225718,
"grad_norm": 5.5994038581848145,
"learning_rate": 1.8635050919592646e-05,
"loss": 0.4933,
"step": 9725
},
{
"epoch": 6.668946648426813,
"grad_norm": 9.19884967803955,
"learning_rate": 1.8540051679586566e-05,
"loss": 0.4012,
"step": 9750
},
{
"epoch": 6.686046511627907,
"grad_norm": 3.408245325088501,
"learning_rate": 1.8445052439580483e-05,
"loss": 0.3444,
"step": 9775
},
{
"epoch": 6.703146374829001,
"grad_norm": 11.616069793701172,
"learning_rate": 1.8350053199574404e-05,
"loss": 0.3627,
"step": 9800
},
{
"epoch": 6.720246238030096,
"grad_norm": 12.855060577392578,
"learning_rate": 1.8255053959568324e-05,
"loss": 0.4833,
"step": 9825
},
{
"epoch": 6.73734610123119,
"grad_norm": 4.252665042877197,
"learning_rate": 1.8160054719562244e-05,
"loss": 0.3607,
"step": 9850
},
{
"epoch": 6.754445964432285,
"grad_norm": 8.759148597717285,
"learning_rate": 1.8065055479556164e-05,
"loss": 0.403,
"step": 9875
},
{
"epoch": 6.771545827633379,
"grad_norm": 11.92839527130127,
"learning_rate": 1.7970056239550085e-05,
"loss": 0.3562,
"step": 9900
},
{
"epoch": 6.788645690834473,
"grad_norm": 1.0502179861068726,
"learning_rate": 1.7875056999544005e-05,
"loss": 0.4002,
"step": 9925
},
{
"epoch": 6.805745554035568,
"grad_norm": 8.642801284790039,
"learning_rate": 1.7780057759537925e-05,
"loss": 0.4298,
"step": 9950
},
{
"epoch": 6.822845417236662,
"grad_norm": 3.608553886413574,
"learning_rate": 1.7685058519531845e-05,
"loss": 0.3687,
"step": 9975
},
{
"epoch": 6.839945280437757,
"grad_norm": 17.244091033935547,
"learning_rate": 1.7590059279525762e-05,
"loss": 0.4086,
"step": 10000
},
{
"epoch": 6.857045143638851,
"grad_norm": 9.269475936889648,
"learning_rate": 1.7495060039519683e-05,
"loss": 0.4166,
"step": 10025
},
{
"epoch": 6.874145006839945,
"grad_norm": 6.287049293518066,
"learning_rate": 1.7400060799513603e-05,
"loss": 0.5342,
"step": 10050
},
{
"epoch": 6.89124487004104,
"grad_norm": 2.380673408508301,
"learning_rate": 1.7305061559507523e-05,
"loss": 0.3687,
"step": 10075
},
{
"epoch": 6.908344733242134,
"grad_norm": 23.413028717041016,
"learning_rate": 1.7210062319501443e-05,
"loss": 0.3996,
"step": 10100
},
{
"epoch": 6.925444596443229,
"grad_norm": 16.1468563079834,
"learning_rate": 1.7115063079495364e-05,
"loss": 0.3844,
"step": 10125
},
{
"epoch": 6.942544459644322,
"grad_norm": 1.6500098705291748,
"learning_rate": 1.7020063839489284e-05,
"loss": 0.523,
"step": 10150
},
{
"epoch": 6.959644322845417,
"grad_norm": 9.402831077575684,
"learning_rate": 1.6925064599483208e-05,
"loss": 0.3376,
"step": 10175
},
{
"epoch": 6.976744186046512,
"grad_norm": 2.928579807281494,
"learning_rate": 1.6830065359477125e-05,
"loss": 0.3303,
"step": 10200
},
{
"epoch": 6.993844049247606,
"grad_norm": 2.99859881401062,
"learning_rate": 1.6735066119471045e-05,
"loss": 0.4049,
"step": 10225
},
{
"epoch": 7.0,
"eval_accuracy": 0.9156708774325708,
"eval_f1_macro": 0.8149754054596434,
"eval_f1_micro": 0.9156708774325708,
"eval_f1_weighted": 0.9140872488341879,
"eval_loss": 0.250088632106781,
"eval_precision_macro": 0.9251793446372559,
"eval_precision_micro": 0.9156708774325708,
"eval_precision_weighted": 0.9218398298083498,
"eval_recall_macro": 0.7805045376076867,
"eval_recall_micro": 0.9156708774325708,
"eval_recall_weighted": 0.9156708774325708,
"eval_runtime": 19.166,
"eval_samples_per_second": 152.822,
"eval_steps_per_second": 9.6,
"step": 10234
},
{
"epoch": 7.010943912448701,
"grad_norm": 5.946883678436279,
"learning_rate": 1.6640066879464965e-05,
"loss": 0.4273,
"step": 10250
},
{
"epoch": 7.028043775649794,
"grad_norm": 12.816991806030273,
"learning_rate": 1.6545067639458885e-05,
"loss": 0.3663,
"step": 10275
},
{
"epoch": 7.045143638850889,
"grad_norm": 10.432554244995117,
"learning_rate": 1.6450068399452806e-05,
"loss": 0.3136,
"step": 10300
},
{
"epoch": 7.062243502051984,
"grad_norm": 13.881523132324219,
"learning_rate": 1.6355069159446726e-05,
"loss": 0.3795,
"step": 10325
},
{
"epoch": 7.079343365253078,
"grad_norm": 8.671323776245117,
"learning_rate": 1.6260069919440646e-05,
"loss": 0.4158,
"step": 10350
},
{
"epoch": 7.096443228454173,
"grad_norm": 7.5603132247924805,
"learning_rate": 1.6165070679434567e-05,
"loss": 0.3809,
"step": 10375
},
{
"epoch": 7.113543091655266,
"grad_norm": 13.723405838012695,
"learning_rate": 1.6070071439428487e-05,
"loss": 0.391,
"step": 10400
},
{
"epoch": 7.130642954856361,
"grad_norm": 9.176318168640137,
"learning_rate": 1.5975072199422407e-05,
"loss": 0.3475,
"step": 10425
},
{
"epoch": 7.147742818057456,
"grad_norm": 5.787652015686035,
"learning_rate": 1.5880072959416324e-05,
"loss": 0.3745,
"step": 10450
},
{
"epoch": 7.16484268125855,
"grad_norm": 3.6111419200897217,
"learning_rate": 1.5785073719410244e-05,
"loss": 0.3897,
"step": 10475
},
{
"epoch": 7.181942544459645,
"grad_norm": 9.432286262512207,
"learning_rate": 1.5690074479404165e-05,
"loss": 0.5103,
"step": 10500
},
{
"epoch": 7.199042407660738,
"grad_norm": 6.067584037780762,
"learning_rate": 1.5595075239398085e-05,
"loss": 0.4322,
"step": 10525
},
{
"epoch": 7.216142270861833,
"grad_norm": 0.6759016513824463,
"learning_rate": 1.5500075999392005e-05,
"loss": 0.4045,
"step": 10550
},
{
"epoch": 7.233242134062928,
"grad_norm": 6.492595672607422,
"learning_rate": 1.5405076759385925e-05,
"loss": 0.3742,
"step": 10575
},
{
"epoch": 7.250341997264022,
"grad_norm": 10.5081148147583,
"learning_rate": 1.5310077519379846e-05,
"loss": 0.3432,
"step": 10600
},
{
"epoch": 7.267441860465116,
"grad_norm": 6.45819616317749,
"learning_rate": 1.5215078279373766e-05,
"loss": 0.3557,
"step": 10625
},
{
"epoch": 7.2845417236662104,
"grad_norm": 1.3473492860794067,
"learning_rate": 1.5120079039367684e-05,
"loss": 0.3995,
"step": 10650
},
{
"epoch": 7.301641586867305,
"grad_norm": 15.663151741027832,
"learning_rate": 1.5025079799361605e-05,
"loss": 0.4619,
"step": 10675
},
{
"epoch": 7.3187414500684,
"grad_norm": 2.441596746444702,
"learning_rate": 1.4930080559355525e-05,
"loss": 0.3351,
"step": 10700
},
{
"epoch": 7.335841313269494,
"grad_norm": 18.481773376464844,
"learning_rate": 1.4835081319349445e-05,
"loss": 0.4416,
"step": 10725
},
{
"epoch": 7.352941176470588,
"grad_norm": 3.074429750442505,
"learning_rate": 1.4740082079343364e-05,
"loss": 0.314,
"step": 10750
},
{
"epoch": 7.3700410396716824,
"grad_norm": 8.20934772491455,
"learning_rate": 1.4645082839337284e-05,
"loss": 0.287,
"step": 10775
},
{
"epoch": 7.387140902872777,
"grad_norm": 9.531194686889648,
"learning_rate": 1.4550083599331208e-05,
"loss": 0.4132,
"step": 10800
},
{
"epoch": 7.404240766073872,
"grad_norm": 9.128312110900879,
"learning_rate": 1.4455084359325128e-05,
"loss": 0.5293,
"step": 10825
},
{
"epoch": 7.421340629274966,
"grad_norm": 12.818424224853516,
"learning_rate": 1.4360085119319047e-05,
"loss": 0.3633,
"step": 10850
},
{
"epoch": 7.43844049247606,
"grad_norm": 2.5819342136383057,
"learning_rate": 1.4265085879312967e-05,
"loss": 0.2941,
"step": 10875
},
{
"epoch": 7.4555403556771545,
"grad_norm": 0.3548867702484131,
"learning_rate": 1.4170086639306887e-05,
"loss": 0.3477,
"step": 10900
},
{
"epoch": 7.472640218878249,
"grad_norm": 9.35716438293457,
"learning_rate": 1.4075087399300808e-05,
"loss": 0.3415,
"step": 10925
},
{
"epoch": 7.489740082079344,
"grad_norm": 0.888134241104126,
"learning_rate": 1.3980088159294726e-05,
"loss": 0.4376,
"step": 10950
},
{
"epoch": 7.506839945280438,
"grad_norm": 3.009415626525879,
"learning_rate": 1.3885088919288646e-05,
"loss": 0.3566,
"step": 10975
},
{
"epoch": 7.523939808481532,
"grad_norm": 0.4245036542415619,
"learning_rate": 1.3790089679282567e-05,
"loss": 0.3407,
"step": 11000
},
{
"epoch": 7.5410396716826265,
"grad_norm": 9.772459983825684,
"learning_rate": 1.3695090439276487e-05,
"loss": 0.5112,
"step": 11025
},
{
"epoch": 7.558139534883721,
"grad_norm": 8.6549654006958,
"learning_rate": 1.3600091199270407e-05,
"loss": 0.3654,
"step": 11050
},
{
"epoch": 7.575239398084816,
"grad_norm": 12.258879661560059,
"learning_rate": 1.3505091959264326e-05,
"loss": 0.394,
"step": 11075
},
{
"epoch": 7.592339261285909,
"grad_norm": 8.852180480957031,
"learning_rate": 1.3410092719258246e-05,
"loss": 0.3667,
"step": 11100
},
{
"epoch": 7.609439124487004,
"grad_norm": 19.00887680053711,
"learning_rate": 1.3315093479252166e-05,
"loss": 0.3465,
"step": 11125
},
{
"epoch": 7.6265389876880985,
"grad_norm": 24.143585205078125,
"learning_rate": 1.3220094239246087e-05,
"loss": 0.3878,
"step": 11150
},
{
"epoch": 7.643638850889193,
"grad_norm": 4.1856889724731445,
"learning_rate": 1.3125094999240007e-05,
"loss": 0.3615,
"step": 11175
},
{
"epoch": 7.660738714090288,
"grad_norm": 11.348432540893555,
"learning_rate": 1.3030095759233925e-05,
"loss": 0.3192,
"step": 11200
},
{
"epoch": 7.677838577291381,
"grad_norm": 4.999576091766357,
"learning_rate": 1.2935096519227846e-05,
"loss": 0.3134,
"step": 11225
},
{
"epoch": 7.694938440492476,
"grad_norm": 11.35132122039795,
"learning_rate": 1.2840097279221766e-05,
"loss": 0.4056,
"step": 11250
},
{
"epoch": 7.7120383036935705,
"grad_norm": 15.860554695129395,
"learning_rate": 1.2745098039215686e-05,
"loss": 0.2678,
"step": 11275
},
{
"epoch": 7.729138166894665,
"grad_norm": 3.4646947383880615,
"learning_rate": 1.2650098799209607e-05,
"loss": 0.396,
"step": 11300
},
{
"epoch": 7.74623803009576,
"grad_norm": 3.1925065517425537,
"learning_rate": 1.2555099559203525e-05,
"loss": 0.324,
"step": 11325
},
{
"epoch": 7.763337893296853,
"grad_norm": 3.6302490234375,
"learning_rate": 1.2460100319197447e-05,
"loss": 0.3766,
"step": 11350
},
{
"epoch": 7.780437756497948,
"grad_norm": 20.079179763793945,
"learning_rate": 1.2365101079191367e-05,
"loss": 0.3841,
"step": 11375
},
{
"epoch": 7.7975376196990425,
"grad_norm": 11.020298957824707,
"learning_rate": 1.2270101839185288e-05,
"loss": 0.4496,
"step": 11400
},
{
"epoch": 7.814637482900137,
"grad_norm": 4.884584426879883,
"learning_rate": 1.2175102599179206e-05,
"loss": 0.3219,
"step": 11425
},
{
"epoch": 7.831737346101232,
"grad_norm": 18.95062828063965,
"learning_rate": 1.2080103359173127e-05,
"loss": 0.2958,
"step": 11450
},
{
"epoch": 7.848837209302325,
"grad_norm": 7.927674770355225,
"learning_rate": 1.1985104119167047e-05,
"loss": 0.5062,
"step": 11475
},
{
"epoch": 7.86593707250342,
"grad_norm": 18.551855087280273,
"learning_rate": 1.189390484876121e-05,
"loss": 0.4039,
"step": 11500
},
{
"epoch": 7.8830369357045145,
"grad_norm": 5.578052520751953,
"learning_rate": 1.179890560875513e-05,
"loss": 0.332,
"step": 11525
},
{
"epoch": 7.900136798905609,
"grad_norm": 0.06869751960039139,
"learning_rate": 1.1703906368749049e-05,
"loss": 0.4136,
"step": 11550
},
{
"epoch": 7.917236662106703,
"grad_norm": 7.070012092590332,
"learning_rate": 1.1608907128742971e-05,
"loss": 0.3402,
"step": 11575
},
{
"epoch": 7.934336525307797,
"grad_norm": 2.309910774230957,
"learning_rate": 1.1513907888736891e-05,
"loss": 0.3272,
"step": 11600
},
{
"epoch": 7.951436388508892,
"grad_norm": 20.965015411376953,
"learning_rate": 1.1418908648730812e-05,
"loss": 0.2962,
"step": 11625
},
{
"epoch": 7.9685362517099865,
"grad_norm": 5.13886022567749,
"learning_rate": 1.132390940872473e-05,
"loss": 0.455,
"step": 11650
},
{
"epoch": 7.985636114911081,
"grad_norm": 3.935183525085449,
"learning_rate": 1.122891016871865e-05,
"loss": 0.3484,
"step": 11675
},
{
"epoch": 8.0,
"eval_accuracy": 0.9211334926596108,
"eval_f1_macro": 0.8568820222911021,
"eval_f1_micro": 0.9211334926596108,
"eval_f1_weighted": 0.9210426818413497,
"eval_loss": 0.2283647209405899,
"eval_precision_macro": 0.9255394303052991,
"eval_precision_micro": 0.9211334926596108,
"eval_precision_weighted": 0.9272957997209303,
"eval_recall_macro": 0.8317609357993986,
"eval_recall_micro": 0.9211334926596108,
"eval_recall_weighted": 0.9211334926596108,
"eval_runtime": 19.3065,
"eval_samples_per_second": 151.711,
"eval_steps_per_second": 9.53,
"step": 11696
},
{
"epoch": 8.002735978112176,
"grad_norm": 14.681279182434082,
"learning_rate": 1.113391092871257e-05,
"loss": 0.2686,
"step": 11700
},
{
"epoch": 8.01983584131327,
"grad_norm": 26.67691993713379,
"learning_rate": 1.1042711658306734e-05,
"loss": 0.3767,
"step": 11725
},
{
"epoch": 8.036935704514363,
"grad_norm": 4.675159931182861,
"learning_rate": 1.0947712418300655e-05,
"loss": 0.304,
"step": 11750
},
{
"epoch": 8.054035567715458,
"grad_norm": 8.4456787109375,
"learning_rate": 1.0852713178294575e-05,
"loss": 0.2553,
"step": 11775
},
{
"epoch": 8.071135430916552,
"grad_norm": 15.122594833374023,
"learning_rate": 1.0757713938288493e-05,
"loss": 0.3248,
"step": 11800
},
{
"epoch": 8.088235294117647,
"grad_norm": 10.912254333496094,
"learning_rate": 1.0662714698282414e-05,
"loss": 0.3009,
"step": 11825
},
{
"epoch": 8.105335157318741,
"grad_norm": 13.658234596252441,
"learning_rate": 1.0567715458276334e-05,
"loss": 0.4445,
"step": 11850
},
{
"epoch": 8.122435020519836,
"grad_norm": 0.18706431984901428,
"learning_rate": 1.0472716218270254e-05,
"loss": 0.267,
"step": 11875
},
{
"epoch": 8.13953488372093,
"grad_norm": 24.79719352722168,
"learning_rate": 1.0377716978264174e-05,
"loss": 0.3466,
"step": 11900
},
{
"epoch": 8.156634746922025,
"grad_norm": 18.876535415649414,
"learning_rate": 1.0282717738258095e-05,
"loss": 0.4939,
"step": 11925
},
{
"epoch": 8.17373461012312,
"grad_norm": 7.15775728225708,
"learning_rate": 1.0187718498252015e-05,
"loss": 0.3728,
"step": 11950
},
{
"epoch": 8.190834473324214,
"grad_norm": 4.604434967041016,
"learning_rate": 1.0092719258245935e-05,
"loss": 0.3492,
"step": 11975
},
{
"epoch": 8.207934336525307,
"grad_norm": 6.463050365447998,
"learning_rate": 9.997720018239856e-06,
"loss": 0.3298,
"step": 12000
},
{
"epoch": 8.225034199726402,
"grad_norm": 16.29618263244629,
"learning_rate": 9.902720778233774e-06,
"loss": 0.3415,
"step": 12025
},
{
"epoch": 8.242134062927496,
"grad_norm": 5.63080358505249,
"learning_rate": 9.807721538227694e-06,
"loss": 0.2608,
"step": 12050
},
{
"epoch": 8.25923392612859,
"grad_norm": 0.7199766039848328,
"learning_rate": 9.712722298221615e-06,
"loss": 0.3974,
"step": 12075
},
{
"epoch": 8.276333789329685,
"grad_norm": 15.456204414367676,
"learning_rate": 9.617723058215535e-06,
"loss": 0.3329,
"step": 12100
},
{
"epoch": 8.29343365253078,
"grad_norm": 18.643985748291016,
"learning_rate": 9.522723818209454e-06,
"loss": 0.4801,
"step": 12125
},
{
"epoch": 8.310533515731874,
"grad_norm": 4.800582408905029,
"learning_rate": 9.427724578203374e-06,
"loss": 0.557,
"step": 12150
},
{
"epoch": 8.327633378932969,
"grad_norm": 22.22751808166504,
"learning_rate": 9.332725338197294e-06,
"loss": 0.3648,
"step": 12175
},
{
"epoch": 8.344733242134064,
"grad_norm": 5.446302890777588,
"learning_rate": 9.237726098191216e-06,
"loss": 0.2558,
"step": 12200
},
{
"epoch": 8.361833105335158,
"grad_norm": 0.26866602897644043,
"learning_rate": 9.142726858185136e-06,
"loss": 0.3962,
"step": 12225
},
{
"epoch": 8.378932968536251,
"grad_norm": 3.1288976669311523,
"learning_rate": 9.047727618179055e-06,
"loss": 0.439,
"step": 12250
},
{
"epoch": 8.396032831737346,
"grad_norm": 2.740288496017456,
"learning_rate": 8.952728378172975e-06,
"loss": 0.3076,
"step": 12275
},
{
"epoch": 8.41313269493844,
"grad_norm": 4.094404697418213,
"learning_rate": 8.857729138166896e-06,
"loss": 0.3551,
"step": 12300
},
{
"epoch": 8.430232558139535,
"grad_norm": 9.859013557434082,
"learning_rate": 8.762729898160816e-06,
"loss": 0.3046,
"step": 12325
},
{
"epoch": 8.44733242134063,
"grad_norm": 7.303380966186523,
"learning_rate": 8.667730658154734e-06,
"loss": 0.2405,
"step": 12350
},
{
"epoch": 8.464432284541724,
"grad_norm": 11.945883750915527,
"learning_rate": 8.572731418148655e-06,
"loss": 0.367,
"step": 12375
},
{
"epoch": 8.481532147742818,
"grad_norm": 8.770705223083496,
"learning_rate": 8.477732178142575e-06,
"loss": 0.3977,
"step": 12400
},
{
"epoch": 8.498632010943913,
"grad_norm": 5.229104042053223,
"learning_rate": 8.382732938136495e-06,
"loss": 0.3075,
"step": 12425
},
{
"epoch": 8.515731874145008,
"grad_norm": 44.49745178222656,
"learning_rate": 8.287733698130415e-06,
"loss": 0.373,
"step": 12450
},
{
"epoch": 8.5328317373461,
"grad_norm": 11.067756652832031,
"learning_rate": 8.192734458124334e-06,
"loss": 0.5501,
"step": 12475
},
{
"epoch": 8.549931600547195,
"grad_norm": 3.7558584213256836,
"learning_rate": 8.097735218118254e-06,
"loss": 0.3831,
"step": 12500
},
{
"epoch": 8.56703146374829,
"grad_norm": 6.008462429046631,
"learning_rate": 8.002735978112176e-06,
"loss": 0.2394,
"step": 12525
},
{
"epoch": 8.584131326949384,
"grad_norm": 10.782341003417969,
"learning_rate": 7.907736738106097e-06,
"loss": 0.2815,
"step": 12550
},
{
"epoch": 8.601231190150479,
"grad_norm": 3.08451247215271,
"learning_rate": 7.812737498100015e-06,
"loss": 0.4385,
"step": 12575
},
{
"epoch": 8.618331053351573,
"grad_norm": 2.4561235904693604,
"learning_rate": 7.717738258093935e-06,
"loss": 0.3698,
"step": 12600
},
{
"epoch": 8.635430916552668,
"grad_norm": 6.739116668701172,
"learning_rate": 7.622739018087856e-06,
"loss": 0.3201,
"step": 12625
},
{
"epoch": 8.652530779753763,
"grad_norm": 11.243478775024414,
"learning_rate": 7.527739778081776e-06,
"loss": 0.4415,
"step": 12650
},
{
"epoch": 8.669630642954857,
"grad_norm": 3.1412322521209717,
"learning_rate": 7.432740538075695e-06,
"loss": 0.2533,
"step": 12675
},
{
"epoch": 8.68673050615595,
"grad_norm": 14.60197639465332,
"learning_rate": 7.337741298069616e-06,
"loss": 0.4057,
"step": 12700
},
{
"epoch": 8.703830369357044,
"grad_norm": 9.934842109680176,
"learning_rate": 7.242742058063535e-06,
"loss": 0.3252,
"step": 12725
},
{
"epoch": 8.720930232558139,
"grad_norm": 1.3907521963119507,
"learning_rate": 7.147742818057455e-06,
"loss": 0.4068,
"step": 12750
},
{
"epoch": 8.738030095759234,
"grad_norm": 5.904654502868652,
"learning_rate": 7.052743578051376e-06,
"loss": 0.3572,
"step": 12775
},
{
"epoch": 8.755129958960328,
"grad_norm": 12.644196510314941,
"learning_rate": 6.957744338045295e-06,
"loss": 0.3342,
"step": 12800
},
{
"epoch": 8.772229822161423,
"grad_norm": 13.406341552734375,
"learning_rate": 6.862745098039216e-06,
"loss": 0.3859,
"step": 12825
},
{
"epoch": 8.789329685362517,
"grad_norm": 7.523469924926758,
"learning_rate": 6.7677458580331365e-06,
"loss": 0.2771,
"step": 12850
},
{
"epoch": 8.806429548563612,
"grad_norm": 2.058061122894287,
"learning_rate": 6.672746618027057e-06,
"loss": 0.3956,
"step": 12875
},
{
"epoch": 8.823529411764707,
"grad_norm": 13.852447509765625,
"learning_rate": 6.577747378020976e-06,
"loss": 0.288,
"step": 12900
},
{
"epoch": 8.840629274965801,
"grad_norm": 3.28694748878479,
"learning_rate": 6.4827481380148965e-06,
"loss": 0.3175,
"step": 12925
},
{
"epoch": 8.857729138166894,
"grad_norm": 4.923558235168457,
"learning_rate": 6.387748898008816e-06,
"loss": 0.4003,
"step": 12950
},
{
"epoch": 8.874829001367988,
"grad_norm": 13.867571830749512,
"learning_rate": 6.292749658002736e-06,
"loss": 0.3514,
"step": 12975
},
{
"epoch": 8.891928864569083,
"grad_norm": 3.354799747467041,
"learning_rate": 6.1977504179966565e-06,
"loss": 0.3073,
"step": 13000
},
{
"epoch": 8.909028727770178,
"grad_norm": 20.982271194458008,
"learning_rate": 6.102751177990576e-06,
"loss": 0.4001,
"step": 13025
},
{
"epoch": 8.926128590971272,
"grad_norm": 1.5266101360321045,
"learning_rate": 6.007751937984497e-06,
"loss": 0.2836,
"step": 13050
},
{
"epoch": 8.943228454172367,
"grad_norm": 4.203621864318848,
"learning_rate": 5.9127526979784164e-06,
"loss": 0.3879,
"step": 13075
},
{
"epoch": 8.960328317373461,
"grad_norm": 13.059199333190918,
"learning_rate": 5.817753457972337e-06,
"loss": 0.2895,
"step": 13100
},
{
"epoch": 8.977428180574556,
"grad_norm": 11.570258140563965,
"learning_rate": 5.722754217966256e-06,
"loss": 0.3616,
"step": 13125
},
{
"epoch": 8.99452804377565,
"grad_norm": 31.507492065429688,
"learning_rate": 5.627754977960176e-06,
"loss": 0.3524,
"step": 13150
},
{
"epoch": 9.0,
"eval_accuracy": 0.9231819733697507,
"eval_f1_macro": 0.8652261940397432,
"eval_f1_micro": 0.9231819733697507,
"eval_f1_weighted": 0.9229570596156854,
"eval_loss": 0.22409066557884216,
"eval_precision_macro": 0.939434014505588,
"eval_precision_micro": 0.9231819733697507,
"eval_precision_weighted": 0.928643976460822,
"eval_recall_macro": 0.824494199524642,
"eval_recall_micro": 0.9231819733697507,
"eval_recall_weighted": 0.9231819733697507,
"eval_runtime": 19.264,
"eval_samples_per_second": 152.046,
"eval_steps_per_second": 9.552,
"step": 13158
},
{
"epoch": 9.011627906976743,
"grad_norm": 3.1529383659362793,
"learning_rate": 5.532755737954097e-06,
"loss": 0.2935,
"step": 13175
},
{
"epoch": 9.028727770177838,
"grad_norm": 1.6082165241241455,
"learning_rate": 5.437756497948017e-06,
"loss": 0.2694,
"step": 13200
},
{
"epoch": 9.045827633378932,
"grad_norm": 6.932997703552246,
"learning_rate": 5.342757257941937e-06,
"loss": 0.4234,
"step": 13225
},
{
"epoch": 9.062927496580027,
"grad_norm": 2.4087891578674316,
"learning_rate": 5.247758017935857e-06,
"loss": 0.297,
"step": 13250
},
{
"epoch": 9.080027359781122,
"grad_norm": 8.607876777648926,
"learning_rate": 5.152758777929777e-06,
"loss": 0.3279,
"step": 13275
},
{
"epoch": 9.097127222982216,
"grad_norm": 4.843038082122803,
"learning_rate": 5.057759537923696e-06,
"loss": 0.2534,
"step": 13300
},
{
"epoch": 9.11422708618331,
"grad_norm": 9.388402938842773,
"learning_rate": 4.962760297917617e-06,
"loss": 0.2849,
"step": 13325
},
{
"epoch": 9.131326949384405,
"grad_norm": 2.4661998748779297,
"learning_rate": 4.867761057911537e-06,
"loss": 0.3157,
"step": 13350
},
{
"epoch": 9.1484268125855,
"grad_norm": 13.333016395568848,
"learning_rate": 4.772761817905457e-06,
"loss": 0.2772,
"step": 13375
},
{
"epoch": 9.165526675786595,
"grad_norm": 6.937953948974609,
"learning_rate": 4.6777625778993775e-06,
"loss": 0.3582,
"step": 13400
},
{
"epoch": 9.182626538987687,
"grad_norm": 5.6831159591674805,
"learning_rate": 4.582763337893297e-06,
"loss": 0.3183,
"step": 13425
},
{
"epoch": 9.199726402188782,
"grad_norm": 7.25540018081665,
"learning_rate": 4.487764097887217e-06,
"loss": 0.4322,
"step": 13450
},
{
"epoch": 9.216826265389876,
"grad_norm": 4.3177103996276855,
"learning_rate": 4.392764857881137e-06,
"loss": 0.3983,
"step": 13475
},
{
"epoch": 9.233926128590971,
"grad_norm": 15.372535705566406,
"learning_rate": 4.297765617875058e-06,
"loss": 0.3684,
"step": 13500
},
{
"epoch": 9.251025991792066,
"grad_norm": 8.219186782836914,
"learning_rate": 4.202766377868977e-06,
"loss": 0.2893,
"step": 13525
},
{
"epoch": 9.26812585499316,
"grad_norm": 14.162530899047852,
"learning_rate": 4.1077671378628974e-06,
"loss": 0.3841,
"step": 13550
},
{
"epoch": 9.285225718194255,
"grad_norm": 2.816765308380127,
"learning_rate": 4.012767897856817e-06,
"loss": 0.4276,
"step": 13575
},
{
"epoch": 9.30232558139535,
"grad_norm": 1.3700157403945923,
"learning_rate": 3.917768657850737e-06,
"loss": 0.4496,
"step": 13600
},
{
"epoch": 9.319425444596444,
"grad_norm": 8.893135070800781,
"learning_rate": 3.822769417844657e-06,
"loss": 0.2779,
"step": 13625
},
{
"epoch": 9.336525307797537,
"grad_norm": 6.580329895019531,
"learning_rate": 3.7277701778385777e-06,
"loss": 0.341,
"step": 13650
},
{
"epoch": 9.353625170998631,
"grad_norm": 6.170793533325195,
"learning_rate": 3.6327709378324975e-06,
"loss": 0.3211,
"step": 13675
},
{
"epoch": 9.370725034199726,
"grad_norm": 6.2319254875183105,
"learning_rate": 3.537771697826418e-06,
"loss": 0.3529,
"step": 13700
},
{
"epoch": 9.38782489740082,
"grad_norm": 3.14901065826416,
"learning_rate": 3.4427724578203377e-06,
"loss": 0.2847,
"step": 13725
},
{
"epoch": 9.404924760601915,
"grad_norm": 12.451719284057617,
"learning_rate": 3.3477732178142575e-06,
"loss": 0.3679,
"step": 13750
},
{
"epoch": 9.42202462380301,
"grad_norm": 2.5386195182800293,
"learning_rate": 3.2527739778081774e-06,
"loss": 0.2476,
"step": 13775
},
{
"epoch": 9.439124487004104,
"grad_norm": 11.419671058654785,
"learning_rate": 3.157774737802098e-06,
"loss": 0.3914,
"step": 13800
},
{
"epoch": 9.456224350205199,
"grad_norm": 23.787368774414062,
"learning_rate": 3.0627754977960175e-06,
"loss": 0.3023,
"step": 13825
},
{
"epoch": 9.473324213406293,
"grad_norm": 13.726613998413086,
"learning_rate": 2.9677762577899378e-06,
"loss": 0.3243,
"step": 13850
},
{
"epoch": 9.490424076607388,
"grad_norm": 3.7777926921844482,
"learning_rate": 2.8727770177838576e-06,
"loss": 0.3515,
"step": 13875
},
{
"epoch": 9.50752393980848,
"grad_norm": 3.651082992553711,
"learning_rate": 2.777777777777778e-06,
"loss": 0.3076,
"step": 13900
},
{
"epoch": 9.524623803009575,
"grad_norm": 2.7207062244415283,
"learning_rate": 2.682778537771698e-06,
"loss": 0.3613,
"step": 13925
},
{
"epoch": 9.54172366621067,
"grad_norm": 6.451671600341797,
"learning_rate": 2.587779297765618e-06,
"loss": 0.2136,
"step": 13950
},
{
"epoch": 9.558823529411764,
"grad_norm": 10.220746040344238,
"learning_rate": 2.492780057759538e-06,
"loss": 0.3348,
"step": 13975
},
{
"epoch": 9.575923392612859,
"grad_norm": 14.093595504760742,
"learning_rate": 2.397780817753458e-06,
"loss": 0.2827,
"step": 14000
},
{
"epoch": 9.593023255813954,
"grad_norm": 2.391063928604126,
"learning_rate": 2.302781577747378e-06,
"loss": 0.3201,
"step": 14025
},
{
"epoch": 9.610123119015048,
"grad_norm": 15.106823921203613,
"learning_rate": 2.207782337741298e-06,
"loss": 0.3192,
"step": 14050
},
{
"epoch": 9.627222982216143,
"grad_norm": 4.812911510467529,
"learning_rate": 2.112783097735218e-06,
"loss": 0.3065,
"step": 14075
},
{
"epoch": 9.644322845417237,
"grad_norm": 4.565815448760986,
"learning_rate": 2.0177838577291384e-06,
"loss": 0.3443,
"step": 14100
},
{
"epoch": 9.661422708618332,
"grad_norm": 0.13094140589237213,
"learning_rate": 1.9227846177230583e-06,
"loss": 0.2713,
"step": 14125
},
{
"epoch": 9.678522571819425,
"grad_norm": 22.36683464050293,
"learning_rate": 1.8277853777169783e-06,
"loss": 0.314,
"step": 14150
},
{
"epoch": 9.69562243502052,
"grad_norm": 8.649237632751465,
"learning_rate": 1.7327861377108984e-06,
"loss": 0.3816,
"step": 14175
},
{
"epoch": 9.712722298221614,
"grad_norm": 2.255821466445923,
"learning_rate": 1.6377868977048183e-06,
"loss": 0.3827,
"step": 14200
},
{
"epoch": 9.729822161422709,
"grad_norm": 5.888030052185059,
"learning_rate": 1.5427876576987383e-06,
"loss": 0.3207,
"step": 14225
},
{
"epoch": 9.746922024623803,
"grad_norm": 1.6394869089126587,
"learning_rate": 1.4477884176926586e-06,
"loss": 0.2782,
"step": 14250
},
{
"epoch": 9.764021887824898,
"grad_norm": 0.9336591362953186,
"learning_rate": 1.3527891776865787e-06,
"loss": 0.3653,
"step": 14275
},
{
"epoch": 9.781121751025992,
"grad_norm": 8.919906616210938,
"learning_rate": 1.2577899376804985e-06,
"loss": 0.2396,
"step": 14300
},
{
"epoch": 9.798221614227087,
"grad_norm": 6.571496963500977,
"learning_rate": 1.1627906976744186e-06,
"loss": 0.303,
"step": 14325
},
{
"epoch": 9.815321477428181,
"grad_norm": 13.167415618896484,
"learning_rate": 1.0677914576683389e-06,
"loss": 0.2993,
"step": 14350
},
{
"epoch": 9.832421340629274,
"grad_norm": 1.0842267274856567,
"learning_rate": 9.727922176622587e-07,
"loss": 0.3435,
"step": 14375
},
{
"epoch": 9.849521203830369,
"grad_norm": 4.068078517913818,
"learning_rate": 8.777929776561788e-07,
"loss": 0.2995,
"step": 14400
},
{
"epoch": 9.866621067031463,
"grad_norm": 11.969517707824707,
"learning_rate": 7.827937376500988e-07,
"loss": 0.3289,
"step": 14425
},
{
"epoch": 9.883720930232558,
"grad_norm": 9.880623817443848,
"learning_rate": 6.877944976440189e-07,
"loss": 0.292,
"step": 14450
},
{
"epoch": 9.900820793433653,
"grad_norm": 11.973766326904297,
"learning_rate": 5.92795257637939e-07,
"loss": 0.3398,
"step": 14475
},
{
"epoch": 9.917920656634747,
"grad_norm": 2.8612163066864014,
"learning_rate": 4.977960176318589e-07,
"loss": 0.414,
"step": 14500
},
{
"epoch": 9.935020519835842,
"grad_norm": 31.290515899658203,
"learning_rate": 4.0279677762577904e-07,
"loss": 0.3436,
"step": 14525
},
{
"epoch": 9.952120383036936,
"grad_norm": 6.3889241218566895,
"learning_rate": 3.0779753761969905e-07,
"loss": 0.3069,
"step": 14550
},
{
"epoch": 9.96922024623803,
"grad_norm": 9.988734245300293,
"learning_rate": 2.127982976136191e-07,
"loss": 0.4289,
"step": 14575
},
{
"epoch": 9.986320109439124,
"grad_norm": 13.143084526062012,
"learning_rate": 1.1779905760753915e-07,
"loss": 0.2766,
"step": 14600
},
{
"epoch": 10.0,
"eval_accuracy": 0.9269375213383407,
"eval_f1_macro": 0.881587062204185,
"eval_f1_micro": 0.9269375213383407,
"eval_f1_weighted": 0.9267500134300362,
"eval_loss": 0.22053596377372742,
"eval_precision_macro": 0.9520135455160805,
"eval_precision_micro": 0.9269375213383407,
"eval_precision_weighted": 0.932072731880276,
"eval_recall_macro": 0.8425714533291321,
"eval_recall_micro": 0.9269375213383407,
"eval_recall_weighted": 0.9269375213383407,
"eval_runtime": 19.2006,
"eval_samples_per_second": 152.547,
"eval_steps_per_second": 9.583,
"step": 14620
}
],
"logging_steps": 25,
"max_steps": 14620,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 9.058483691559752e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}