|
{ |
|
"best_metric": 0.22053596377372742, |
|
"best_model_checkpoint": "autotrain-beit-base-patch16-224/checkpoint-14620", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 14620, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01709986320109439, |
|
"grad_norm": 46.709171295166016, |
|
"learning_rate": 7.523939808481532e-07, |
|
"loss": 2.7484, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03419972640218878, |
|
"grad_norm": 38.754066467285156, |
|
"learning_rate": 1.6073871409028727e-06, |
|
"loss": 2.4985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05129958960328317, |
|
"grad_norm": 43.13520431518555, |
|
"learning_rate": 2.4623803009575924e-06, |
|
"loss": 2.1311, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06839945280437756, |
|
"grad_norm": 51.77237319946289, |
|
"learning_rate": 3.3173734610123124e-06, |
|
"loss": 1.916, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08549931600547196, |
|
"grad_norm": 52.562774658203125, |
|
"learning_rate": 4.138166894664843e-06, |
|
"loss": 1.5007, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10259917920656635, |
|
"grad_norm": 48.85651779174805, |
|
"learning_rate": 4.993160054719562e-06, |
|
"loss": 1.5403, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11969904240766074, |
|
"grad_norm": 34.78243637084961, |
|
"learning_rate": 5.848153214774282e-06, |
|
"loss": 1.3732, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13679890560875513, |
|
"grad_norm": 36.370094299316406, |
|
"learning_rate": 6.7031463748290014e-06, |
|
"loss": 1.32, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1538987688098495, |
|
"grad_norm": 36.674888610839844, |
|
"learning_rate": 7.558139534883721e-06, |
|
"loss": 1.2517, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.17099863201094392, |
|
"grad_norm": 39.88427734375, |
|
"learning_rate": 8.41313269493844e-06, |
|
"loss": 1.1245, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1880984952120383, |
|
"grad_norm": 37.10840606689453, |
|
"learning_rate": 9.26812585499316e-06, |
|
"loss": 1.0716, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2051983584131327, |
|
"grad_norm": 40.91892623901367, |
|
"learning_rate": 1.0123119015047879e-05, |
|
"loss": 1.2636, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22229822161422708, |
|
"grad_norm": 25.208866119384766, |
|
"learning_rate": 1.09781121751026e-05, |
|
"loss": 1.0415, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2393980848153215, |
|
"grad_norm": 32.90418243408203, |
|
"learning_rate": 1.183310533515732e-05, |
|
"loss": 0.9204, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25649794801641584, |
|
"grad_norm": 22.914512634277344, |
|
"learning_rate": 1.2688098495212038e-05, |
|
"loss": 0.8906, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.27359781121751026, |
|
"grad_norm": 26.361547470092773, |
|
"learning_rate": 1.354309165526676e-05, |
|
"loss": 0.7451, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29069767441860467, |
|
"grad_norm": 6.595183849334717, |
|
"learning_rate": 1.4398084815321477e-05, |
|
"loss": 0.8366, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.307797537619699, |
|
"grad_norm": 21.60556983947754, |
|
"learning_rate": 1.5253077975376198e-05, |
|
"loss": 0.8602, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32489740082079344, |
|
"grad_norm": 19.94957160949707, |
|
"learning_rate": 1.6108071135430915e-05, |
|
"loss": 0.8112, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.34199726402188785, |
|
"grad_norm": 18.41588020324707, |
|
"learning_rate": 1.6963064295485636e-05, |
|
"loss": 0.7563, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3590971272229822, |
|
"grad_norm": 26.010887145996094, |
|
"learning_rate": 1.7818057455540357e-05, |
|
"loss": 0.745, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3761969904240766, |
|
"grad_norm": 29.746938705444336, |
|
"learning_rate": 1.8673050615595075e-05, |
|
"loss": 0.9174, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.393296853625171, |
|
"grad_norm": 9.71439266204834, |
|
"learning_rate": 1.9528043775649796e-05, |
|
"loss": 0.7998, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4103967168262654, |
|
"grad_norm": 25.851831436157227, |
|
"learning_rate": 2.0383036935704516e-05, |
|
"loss": 0.8316, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4274965800273598, |
|
"grad_norm": 6.615222454071045, |
|
"learning_rate": 2.1238030095759234e-05, |
|
"loss": 0.7572, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.44459644322845415, |
|
"grad_norm": 21.029979705810547, |
|
"learning_rate": 2.2093023255813955e-05, |
|
"loss": 0.8479, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46169630642954856, |
|
"grad_norm": 34.08558654785156, |
|
"learning_rate": 2.2948016415868672e-05, |
|
"loss": 0.9019, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.478796169630643, |
|
"grad_norm": 21.173648834228516, |
|
"learning_rate": 2.3803009575923393e-05, |
|
"loss": 0.6358, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.49589603283173733, |
|
"grad_norm": 28.03355598449707, |
|
"learning_rate": 2.4658002735978114e-05, |
|
"loss": 0.6318, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5129958960328317, |
|
"grad_norm": 32.51506423950195, |
|
"learning_rate": 2.5512995896032832e-05, |
|
"loss": 0.7156, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5300957592339262, |
|
"grad_norm": 15.213716506958008, |
|
"learning_rate": 2.6367989056087556e-05, |
|
"loss": 0.7459, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5471956224350205, |
|
"grad_norm": 25.61234474182129, |
|
"learning_rate": 2.7222982216142274e-05, |
|
"loss": 0.6615, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5642954856361149, |
|
"grad_norm": 6.342990875244141, |
|
"learning_rate": 2.807797537619699e-05, |
|
"loss": 0.6494, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 8.253657341003418, |
|
"learning_rate": 2.893296853625171e-05, |
|
"loss": 0.7401, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5984952120383037, |
|
"grad_norm": 20.726346969604492, |
|
"learning_rate": 2.9787961696306433e-05, |
|
"loss": 0.8272, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.615595075239398, |
|
"grad_norm": 14.493860244750977, |
|
"learning_rate": 3.064295485636115e-05, |
|
"loss": 0.7291, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6326949384404925, |
|
"grad_norm": 26.049036026000977, |
|
"learning_rate": 3.149794801641587e-05, |
|
"loss": 0.7125, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6497948016415869, |
|
"grad_norm": 24.505985260009766, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.7522, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6668946648426812, |
|
"grad_norm": 16.45219612121582, |
|
"learning_rate": 3.3207934336525306e-05, |
|
"loss": 0.9847, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6839945280437757, |
|
"grad_norm": 16.54450798034668, |
|
"learning_rate": 3.406292749658003e-05, |
|
"loss": 0.6838, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.70109439124487, |
|
"grad_norm": 20.877397537231445, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.8538, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7181942544459644, |
|
"grad_norm": 16.170602798461914, |
|
"learning_rate": 3.573871409028728e-05, |
|
"loss": 0.5981, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 4.53754186630249, |
|
"learning_rate": 3.6593707250342e-05, |
|
"loss": 0.7112, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7523939808481532, |
|
"grad_norm": 104.9443359375, |
|
"learning_rate": 3.741450068399453e-05, |
|
"loss": 0.8659, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7694938440492476, |
|
"grad_norm": 9.153970718383789, |
|
"learning_rate": 3.826949384404925e-05, |
|
"loss": 0.7048, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.786593707250342, |
|
"grad_norm": 8.457886695861816, |
|
"learning_rate": 3.912448700410397e-05, |
|
"loss": 0.6735, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8036935704514364, |
|
"grad_norm": 22.64398765563965, |
|
"learning_rate": 3.997948016415869e-05, |
|
"loss": 0.6603, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8207934336525308, |
|
"grad_norm": 8.883172988891602, |
|
"learning_rate": 4.083447332421341e-05, |
|
"loss": 0.6347, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8378932968536251, |
|
"grad_norm": 15.982242584228516, |
|
"learning_rate": 4.168946648426813e-05, |
|
"loss": 0.8553, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8549931600547196, |
|
"grad_norm": 17.371896743774414, |
|
"learning_rate": 4.2544459644322845e-05, |
|
"loss": 0.8338, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.872093023255814, |
|
"grad_norm": 9.03439998626709, |
|
"learning_rate": 4.3399452804377566e-05, |
|
"loss": 0.6968, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8891928864569083, |
|
"grad_norm": 12.585426330566406, |
|
"learning_rate": 4.425444596443229e-05, |
|
"loss": 0.7398, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9062927496580028, |
|
"grad_norm": 24.8383731842041, |
|
"learning_rate": 4.510943912448701e-05, |
|
"loss": 0.74, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9233926128590971, |
|
"grad_norm": 23.374618530273438, |
|
"learning_rate": 4.596443228454172e-05, |
|
"loss": 0.8325, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9404924760601915, |
|
"grad_norm": 18.9708251953125, |
|
"learning_rate": 4.681942544459644e-05, |
|
"loss": 0.705, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.957592339261286, |
|
"grad_norm": 17.654476165771484, |
|
"learning_rate": 4.7674418604651164e-05, |
|
"loss": 0.6591, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9746922024623803, |
|
"grad_norm": 15.2923583984375, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 0.6672, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.9917920656634747, |
|
"grad_norm": 20.557374954223633, |
|
"learning_rate": 4.93844049247606e-05, |
|
"loss": 0.7082, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7678388528508023, |
|
"eval_f1_macro": 0.36714416708471204, |
|
"eval_f1_micro": 0.7678388528508023, |
|
"eval_f1_weighted": 0.7507762998391535, |
|
"eval_loss": 0.7348673939704895, |
|
"eval_precision_macro": 0.4581539504891226, |
|
"eval_precision_micro": 0.7678388528508023, |
|
"eval_precision_weighted": 0.7986681621261933, |
|
"eval_recall_macro": 0.3826291513511869, |
|
"eval_recall_micro": 0.7678388528508023, |
|
"eval_recall_weighted": 0.7678388528508023, |
|
"eval_runtime": 19.2382, |
|
"eval_samples_per_second": 152.249, |
|
"eval_steps_per_second": 9.564, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 1.008891928864569, |
|
"grad_norm": 8.590130805969238, |
|
"learning_rate": 4.99734002127983e-05, |
|
"loss": 0.6613, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.0259917920656634, |
|
"grad_norm": 7.963166236877441, |
|
"learning_rate": 4.987840097279222e-05, |
|
"loss": 0.6899, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.043091655266758, |
|
"grad_norm": 21.036991119384766, |
|
"learning_rate": 4.978340173278614e-05, |
|
"loss": 0.7883, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.0601915184678523, |
|
"grad_norm": 18.586833953857422, |
|
"learning_rate": 4.968840249278006e-05, |
|
"loss": 0.5927, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0772913816689467, |
|
"grad_norm": 10.346550941467285, |
|
"learning_rate": 4.959340325277398e-05, |
|
"loss": 0.7403, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.094391244870041, |
|
"grad_norm": 15.1462984085083, |
|
"learning_rate": 4.94984040127679e-05, |
|
"loss": 0.67, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1114911080711354, |
|
"grad_norm": 29.532644271850586, |
|
"learning_rate": 4.940340477276182e-05, |
|
"loss": 0.724, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.1285909712722297, |
|
"grad_norm": 20.413328170776367, |
|
"learning_rate": 4.930840553275574e-05, |
|
"loss": 0.7664, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1456908344733243, |
|
"grad_norm": 6.083666801452637, |
|
"learning_rate": 4.921340629274966e-05, |
|
"loss": 0.6986, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"grad_norm": 10.168739318847656, |
|
"learning_rate": 4.911840705274358e-05, |
|
"loss": 0.6873, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.179890560875513, |
|
"grad_norm": 16.213897705078125, |
|
"learning_rate": 4.90234078127375e-05, |
|
"loss": 0.6432, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.1969904240766074, |
|
"grad_norm": 14.341080665588379, |
|
"learning_rate": 4.892840857273142e-05, |
|
"loss": 0.6205, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2140902872777017, |
|
"grad_norm": 8.960565567016602, |
|
"learning_rate": 4.883340933272534e-05, |
|
"loss": 0.7297, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.231190150478796, |
|
"grad_norm": 3.6972837448120117, |
|
"learning_rate": 4.8738410092719264e-05, |
|
"loss": 0.5647, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.2482900136798905, |
|
"grad_norm": 17.91363525390625, |
|
"learning_rate": 4.864341085271318e-05, |
|
"loss": 0.5821, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.265389876880985, |
|
"grad_norm": 16.03091049194336, |
|
"learning_rate": 4.8548411612707104e-05, |
|
"loss": 0.8215, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2824897400820794, |
|
"grad_norm": 20.258285522460938, |
|
"learning_rate": 4.845341237270102e-05, |
|
"loss": 0.6713, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.2995896032831737, |
|
"grad_norm": 21.53881072998047, |
|
"learning_rate": 4.835841313269494e-05, |
|
"loss": 0.6425, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.316689466484268, |
|
"grad_norm": 17.40162467956543, |
|
"learning_rate": 4.826341389268886e-05, |
|
"loss": 0.7661, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.3337893296853625, |
|
"grad_norm": 11.091559410095215, |
|
"learning_rate": 4.816841465268278e-05, |
|
"loss": 0.6502, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.350889192886457, |
|
"grad_norm": 16.19685173034668, |
|
"learning_rate": 4.80734154126767e-05, |
|
"loss": 0.6064, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.3679890560875512, |
|
"grad_norm": 18.266754150390625, |
|
"learning_rate": 4.797841617267062e-05, |
|
"loss": 0.7654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.3850889192886457, |
|
"grad_norm": 2.7492332458496094, |
|
"learning_rate": 4.788341693266454e-05, |
|
"loss": 0.6592, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.40218878248974, |
|
"grad_norm": 6.613536357879639, |
|
"learning_rate": 4.778841769265846e-05, |
|
"loss": 0.8603, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4192886456908345, |
|
"grad_norm": 12.348869323730469, |
|
"learning_rate": 4.769341845265238e-05, |
|
"loss": 0.7457, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.4363885088919288, |
|
"grad_norm": 10.183746337890625, |
|
"learning_rate": 4.75984192126463e-05, |
|
"loss": 0.5621, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.4534883720930232, |
|
"grad_norm": 6.9152045249938965, |
|
"learning_rate": 4.7503419972640224e-05, |
|
"loss": 0.6217, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 9.911653518676758, |
|
"learning_rate": 4.740842073263414e-05, |
|
"loss": 0.8387, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.487688098495212, |
|
"grad_norm": 19.982093811035156, |
|
"learning_rate": 4.7313421492628064e-05, |
|
"loss": 0.5981, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.5047879616963065, |
|
"grad_norm": 11.546788215637207, |
|
"learning_rate": 4.721842225262198e-05, |
|
"loss": 0.6257, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5218878248974008, |
|
"grad_norm": 11.54325008392334, |
|
"learning_rate": 4.7123423012615905e-05, |
|
"loss": 0.6088, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.5389876880984952, |
|
"grad_norm": 9.77811050415039, |
|
"learning_rate": 4.703222374221006e-05, |
|
"loss": 0.4845, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.5560875512995898, |
|
"grad_norm": 9.156554222106934, |
|
"learning_rate": 4.6937224502203985e-05, |
|
"loss": 0.701, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.573187414500684, |
|
"grad_norm": 9.874088287353516, |
|
"learning_rate": 4.68422252621979e-05, |
|
"loss": 0.7453, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.5902872777017785, |
|
"grad_norm": 9.201342582702637, |
|
"learning_rate": 4.6747226022191826e-05, |
|
"loss": 0.5633, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.6073871409028728, |
|
"grad_norm": 29.79956817626953, |
|
"learning_rate": 4.665222678218574e-05, |
|
"loss": 0.6438, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.6244870041039672, |
|
"grad_norm": 8.855256080627441, |
|
"learning_rate": 4.6557227542179666e-05, |
|
"loss": 0.5698, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.6415868673050615, |
|
"grad_norm": 8.34626579284668, |
|
"learning_rate": 4.646222830217358e-05, |
|
"loss": 0.7143, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.658686730506156, |
|
"grad_norm": 18.65626335144043, |
|
"learning_rate": 4.636722906216751e-05, |
|
"loss": 0.6445, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.6757865937072505, |
|
"grad_norm": 8.134085655212402, |
|
"learning_rate": 4.6272229822161424e-05, |
|
"loss": 0.4898, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.6928864569083446, |
|
"grad_norm": 15.127388000488281, |
|
"learning_rate": 4.617723058215535e-05, |
|
"loss": 0.6497, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.7099863201094392, |
|
"grad_norm": 17.599140167236328, |
|
"learning_rate": 4.6082231342149264e-05, |
|
"loss": 0.6605, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.7270861833105335, |
|
"grad_norm": 22.064022064208984, |
|
"learning_rate": 4.598723210214319e-05, |
|
"loss": 0.5562, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.744186046511628, |
|
"grad_norm": 8.446691513061523, |
|
"learning_rate": 4.5892232862137105e-05, |
|
"loss": 0.591, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.7612859097127223, |
|
"grad_norm": 15.194252967834473, |
|
"learning_rate": 4.579723362213102e-05, |
|
"loss": 0.635, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.7783857729138166, |
|
"grad_norm": 14.25900936126709, |
|
"learning_rate": 4.5702234382124946e-05, |
|
"loss": 0.6253, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.7954856361149112, |
|
"grad_norm": 9.975160598754883, |
|
"learning_rate": 4.560723514211886e-05, |
|
"loss": 0.4944, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.8125854993160053, |
|
"grad_norm": 12.048364639282227, |
|
"learning_rate": 4.5512235902112786e-05, |
|
"loss": 0.5643, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.8296853625171, |
|
"grad_norm": 16.70762825012207, |
|
"learning_rate": 4.54172366621067e-05, |
|
"loss": 0.6445, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.8467852257181943, |
|
"grad_norm": 15.203225135803223, |
|
"learning_rate": 4.532223742210063e-05, |
|
"loss": 0.4731, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.8638850889192886, |
|
"grad_norm": 6.673511028289795, |
|
"learning_rate": 4.5227238182094544e-05, |
|
"loss": 0.6115, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.8809849521203832, |
|
"grad_norm": 3.219144105911255, |
|
"learning_rate": 4.513223894208847e-05, |
|
"loss": 0.6447, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.8980848153214773, |
|
"grad_norm": 10.122079849243164, |
|
"learning_rate": 4.5037239702082384e-05, |
|
"loss": 0.5048, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.915184678522572, |
|
"grad_norm": 8.148963928222656, |
|
"learning_rate": 4.494224046207631e-05, |
|
"loss": 0.522, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.9322845417236663, |
|
"grad_norm": 17.671016693115234, |
|
"learning_rate": 4.4847241222070225e-05, |
|
"loss": 0.4838, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.9493844049247606, |
|
"grad_norm": 11.81804370880127, |
|
"learning_rate": 4.475224198206415e-05, |
|
"loss": 0.6314, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.966484268125855, |
|
"grad_norm": 9.446462631225586, |
|
"learning_rate": 4.4657242742058065e-05, |
|
"loss": 0.6951, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.9835841313269493, |
|
"grad_norm": 0.3831145167350769, |
|
"learning_rate": 4.456224350205199e-05, |
|
"loss": 0.5709, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.890406282007511, |
|
"eval_f1_macro": 0.6277004140591871, |
|
"eval_f1_micro": 0.890406282007511, |
|
"eval_f1_weighted": 0.8835937168276483, |
|
"eval_loss": 0.3201202154159546, |
|
"eval_precision_macro": 0.7489533576066802, |
|
"eval_precision_micro": 0.890406282007511, |
|
"eval_precision_weighted": 0.8996932956273017, |
|
"eval_recall_macro": 0.6160822936733158, |
|
"eval_recall_micro": 0.890406282007511, |
|
"eval_recall_weighted": 0.890406282007511, |
|
"eval_runtime": 19.0268, |
|
"eval_samples_per_second": 153.941, |
|
"eval_steps_per_second": 9.671, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 2.000683994528044, |
|
"grad_norm": 8.932915687561035, |
|
"learning_rate": 4.4467244262045906e-05, |
|
"loss": 0.6213, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.017783857729138, |
|
"grad_norm": 53.977447509765625, |
|
"learning_rate": 4.437224502203983e-05, |
|
"loss": 0.4965, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.0348837209302326, |
|
"grad_norm": 5.680665969848633, |
|
"learning_rate": 4.4277245782033746e-05, |
|
"loss": 0.6174, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.0519835841313268, |
|
"grad_norm": 14.235191345214844, |
|
"learning_rate": 4.418224654202767e-05, |
|
"loss": 0.5522, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0690834473324213, |
|
"grad_norm": 3.3336021900177, |
|
"learning_rate": 4.408724730202159e-05, |
|
"loss": 0.6311, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.086183310533516, |
|
"grad_norm": 17.21300506591797, |
|
"learning_rate": 4.3992248062015504e-05, |
|
"loss": 0.6788, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.10328317373461, |
|
"grad_norm": 8.625337600708008, |
|
"learning_rate": 4.389724882200942e-05, |
|
"loss": 0.4847, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.1203830369357046, |
|
"grad_norm": 13.06096076965332, |
|
"learning_rate": 4.3802249582003344e-05, |
|
"loss": 0.5771, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.1374829001367988, |
|
"grad_norm": 14.993678092956543, |
|
"learning_rate": 4.370725034199726e-05, |
|
"loss": 0.495, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.1545827633378933, |
|
"grad_norm": 18.640869140625, |
|
"learning_rate": 4.3612251101991185e-05, |
|
"loss": 0.5442, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.1716826265389875, |
|
"grad_norm": 13.761073112487793, |
|
"learning_rate": 4.351725186198511e-05, |
|
"loss": 0.563, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.188782489740082, |
|
"grad_norm": 11.877754211425781, |
|
"learning_rate": 4.3422252621979025e-05, |
|
"loss": 0.8003, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 8.639129638671875, |
|
"learning_rate": 4.332725338197295e-05, |
|
"loss": 0.6284, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.2229822161422708, |
|
"grad_norm": 6.337215900421143, |
|
"learning_rate": 4.3232254141966866e-05, |
|
"loss": 0.5617, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.2400820793433653, |
|
"grad_norm": 1.9488357305526733, |
|
"learning_rate": 4.313725490196079e-05, |
|
"loss": 0.3964, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.2571819425444595, |
|
"grad_norm": 4.854595184326172, |
|
"learning_rate": 4.3042255661954706e-05, |
|
"loss": 0.6221, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.274281805745554, |
|
"grad_norm": 10.604134559631348, |
|
"learning_rate": 4.294725642194863e-05, |
|
"loss": 0.4416, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.2913816689466486, |
|
"grad_norm": 9.8331937789917, |
|
"learning_rate": 4.285225718194255e-05, |
|
"loss": 0.6644, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.3084815321477428, |
|
"grad_norm": 7.878199100494385, |
|
"learning_rate": 4.275725794193647e-05, |
|
"loss": 0.4562, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"grad_norm": 5.875706195831299, |
|
"learning_rate": 4.266225870193039e-05, |
|
"loss": 0.6038, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.3426812585499315, |
|
"grad_norm": 1.360823631286621, |
|
"learning_rate": 4.256725946192431e-05, |
|
"loss": 0.3918, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.359781121751026, |
|
"grad_norm": 5.344891548156738, |
|
"learning_rate": 4.247226022191823e-05, |
|
"loss": 0.5924, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.37688098495212, |
|
"grad_norm": 19.596725463867188, |
|
"learning_rate": 4.2377260981912145e-05, |
|
"loss": 0.6252, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.3939808481532148, |
|
"grad_norm": 9.855287551879883, |
|
"learning_rate": 4.228226174190606e-05, |
|
"loss": 0.6215, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.4110807113543093, |
|
"grad_norm": 10.44688606262207, |
|
"learning_rate": 4.2187262501899986e-05, |
|
"loss": 0.4234, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.4281805745554035, |
|
"grad_norm": 8.25647258758545, |
|
"learning_rate": 4.20922632618939e-05, |
|
"loss": 0.4522, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.445280437756498, |
|
"grad_norm": 18.42440414428711, |
|
"learning_rate": 4.1997264021887826e-05, |
|
"loss": 0.5475, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.462380300957592, |
|
"grad_norm": 3.88397216796875, |
|
"learning_rate": 4.190226478188174e-05, |
|
"loss": 0.4464, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.4794801641586868, |
|
"grad_norm": 15.069050788879395, |
|
"learning_rate": 4.180726554187567e-05, |
|
"loss": 0.6738, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.496580027359781, |
|
"grad_norm": 5.434013366699219, |
|
"learning_rate": 4.1712266301869584e-05, |
|
"loss": 0.5139, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.5136798905608755, |
|
"grad_norm": 6.18742036819458, |
|
"learning_rate": 4.161726706186351e-05, |
|
"loss": 0.6905, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.53077975376197, |
|
"grad_norm": 4.691986560821533, |
|
"learning_rate": 4.1522267821857424e-05, |
|
"loss": 0.5514, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.547879616963064, |
|
"grad_norm": 11.21522331237793, |
|
"learning_rate": 4.142726858185135e-05, |
|
"loss": 0.5283, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.5649794801641588, |
|
"grad_norm": 18.263111114501953, |
|
"learning_rate": 4.1332269341845265e-05, |
|
"loss": 0.5471, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.582079343365253, |
|
"grad_norm": 2.245192766189575, |
|
"learning_rate": 4.123727010183919e-05, |
|
"loss": 0.4889, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.5991792065663475, |
|
"grad_norm": 8.650074005126953, |
|
"learning_rate": 4.114227086183311e-05, |
|
"loss": 0.5821, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.616279069767442, |
|
"grad_norm": 8.487887382507324, |
|
"learning_rate": 4.104727162182703e-05, |
|
"loss": 0.4633, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.633378932968536, |
|
"grad_norm": 3.491182327270508, |
|
"learning_rate": 4.095227238182095e-05, |
|
"loss": 0.4839, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.650478796169631, |
|
"grad_norm": 15.229668617248535, |
|
"learning_rate": 4.085727314181487e-05, |
|
"loss": 0.4741, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.667578659370725, |
|
"grad_norm": 5.991665363311768, |
|
"learning_rate": 4.0762273901808786e-05, |
|
"loss": 0.6269, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.6846785225718195, |
|
"grad_norm": 3.6225790977478027, |
|
"learning_rate": 4.066727466180271e-05, |
|
"loss": 0.5778, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.701778385772914, |
|
"grad_norm": 7.361936092376709, |
|
"learning_rate": 4.057227542179663e-05, |
|
"loss": 0.5857, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.718878248974008, |
|
"grad_norm": 18.498151779174805, |
|
"learning_rate": 4.0477276181790544e-05, |
|
"loss": 0.599, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.7359781121751023, |
|
"grad_norm": 11.898250579833984, |
|
"learning_rate": 4.038227694178447e-05, |
|
"loss": 0.5114, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.753077975376197, |
|
"grad_norm": 5.535077095031738, |
|
"learning_rate": 4.0287277701778384e-05, |
|
"loss": 0.5272, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.7701778385772915, |
|
"grad_norm": 2.3556160926818848, |
|
"learning_rate": 4.019227846177231e-05, |
|
"loss": 0.5648, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.7872777017783856, |
|
"grad_norm": 11.369132041931152, |
|
"learning_rate": 4.0097279221766225e-05, |
|
"loss": 0.5935, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.80437756497948, |
|
"grad_norm": 5.496129989624023, |
|
"learning_rate": 4.000227998176015e-05, |
|
"loss": 0.699, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.8214774281805743, |
|
"grad_norm": 12.352839469909668, |
|
"learning_rate": 3.9907280741754065e-05, |
|
"loss": 0.5325, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.838577291381669, |
|
"grad_norm": 2.7082407474517822, |
|
"learning_rate": 3.981228150174799e-05, |
|
"loss": 0.5331, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.8556771545827635, |
|
"grad_norm": 12.403038024902344, |
|
"learning_rate": 3.9717282261741906e-05, |
|
"loss": 0.6043, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.8727770177838576, |
|
"grad_norm": 12.153759002685547, |
|
"learning_rate": 3.962228302173583e-05, |
|
"loss": 0.4958, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.889876880984952, |
|
"grad_norm": 6.992998123168945, |
|
"learning_rate": 3.9527283781729746e-05, |
|
"loss": 0.3868, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.9069767441860463, |
|
"grad_norm": 3.785193681716919, |
|
"learning_rate": 3.943228454172367e-05, |
|
"loss": 0.5372, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.924076607387141, |
|
"grad_norm": 22.4363956451416, |
|
"learning_rate": 3.933728530171759e-05, |
|
"loss": 0.5244, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 3.622431516647339, |
|
"learning_rate": 3.924228606171151e-05, |
|
"loss": 0.4722, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.9582763337893296, |
|
"grad_norm": 1.2941017150878906, |
|
"learning_rate": 3.914728682170543e-05, |
|
"loss": 0.4208, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.975376196990424, |
|
"grad_norm": 10.482751846313477, |
|
"learning_rate": 3.905228758169935e-05, |
|
"loss": 0.6347, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.9924760601915183, |
|
"grad_norm": 4.376351356506348, |
|
"learning_rate": 3.895728834169327e-05, |
|
"loss": 0.6077, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.896551724137931, |
|
"eval_f1_macro": 0.6998434390712878, |
|
"eval_f1_micro": 0.896551724137931, |
|
"eval_f1_weighted": 0.8937364843753902, |
|
"eval_loss": 0.3129188120365143, |
|
"eval_precision_macro": 0.8102800926777759, |
|
"eval_precision_micro": 0.896551724137931, |
|
"eval_precision_weighted": 0.9029461578223588, |
|
"eval_recall_macro": 0.6655916075939068, |
|
"eval_recall_micro": 0.896551724137931, |
|
"eval_recall_weighted": 0.896551724137931, |
|
"eval_runtime": 18.8573, |
|
"eval_samples_per_second": 155.324, |
|
"eval_steps_per_second": 9.757, |
|
"step": 4386 |
|
}, |
|
{ |
|
"epoch": 3.009575923392613, |
|
"grad_norm": 8.401654243469238, |
|
"learning_rate": 3.8862289101687185e-05, |
|
"loss": 0.5739, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.026675786593707, |
|
"grad_norm": 10.48408031463623, |
|
"learning_rate": 3.876728986168111e-05, |
|
"loss": 0.6117, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 3.0437756497948016, |
|
"grad_norm": 19.265623092651367, |
|
"learning_rate": 3.8672290621675026e-05, |
|
"loss": 0.4945, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.060875512995896, |
|
"grad_norm": 25.774412155151367, |
|
"learning_rate": 3.857729138166895e-05, |
|
"loss": 0.5458, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 3.0779753761969904, |
|
"grad_norm": 4.172712326049805, |
|
"learning_rate": 3.8482292141662866e-05, |
|
"loss": 0.4408, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.095075239398085, |
|
"grad_norm": 5.7756876945495605, |
|
"learning_rate": 3.838729290165679e-05, |
|
"loss": 0.3037, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 3.112175102599179, |
|
"grad_norm": 12.178646087646484, |
|
"learning_rate": 3.829229366165071e-05, |
|
"loss": 0.6773, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.1292749658002736, |
|
"grad_norm": 4.9638800621032715, |
|
"learning_rate": 3.819729442164463e-05, |
|
"loss": 0.4036, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.146374829001368, |
|
"grad_norm": 6.199288845062256, |
|
"learning_rate": 3.810229518163855e-05, |
|
"loss": 0.5313, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.1634746922024624, |
|
"grad_norm": 19.781579971313477, |
|
"learning_rate": 3.800729594163247e-05, |
|
"loss": 0.5946, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 3.180574555403557, |
|
"grad_norm": 0.15058183670043945, |
|
"learning_rate": 3.791229670162639e-05, |
|
"loss": 0.516, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.197674418604651, |
|
"grad_norm": 13.215787887573242, |
|
"learning_rate": 3.781729746162031e-05, |
|
"loss": 0.4023, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 3.2147742818057456, |
|
"grad_norm": 5.896836757659912, |
|
"learning_rate": 3.772229822161423e-05, |
|
"loss": 0.4748, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.23187414500684, |
|
"grad_norm": 0.36866021156311035, |
|
"learning_rate": 3.762729898160815e-05, |
|
"loss": 0.5704, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 3.2489740082079344, |
|
"grad_norm": 10.511465072631836, |
|
"learning_rate": 3.753229974160207e-05, |
|
"loss": 0.5316, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.266073871409029, |
|
"grad_norm": 3.424712896347046, |
|
"learning_rate": 3.743730050159599e-05, |
|
"loss": 0.4717, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 3.283173734610123, |
|
"grad_norm": 14.572440147399902, |
|
"learning_rate": 3.734230126158991e-05, |
|
"loss": 0.6337, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.3002735978112177, |
|
"grad_norm": 10.70576286315918, |
|
"learning_rate": 3.724730202158383e-05, |
|
"loss": 0.6731, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 3.317373461012312, |
|
"grad_norm": 11.98401165008545, |
|
"learning_rate": 3.715230278157775e-05, |
|
"loss": 0.4016, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.3344733242134064, |
|
"grad_norm": 11.411341667175293, |
|
"learning_rate": 3.705730354157167e-05, |
|
"loss": 0.4779, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 3.3515731874145005, |
|
"grad_norm": 15.914603233337402, |
|
"learning_rate": 3.6962304301565584e-05, |
|
"loss": 0.5832, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.368673050615595, |
|
"grad_norm": 3.610494613647461, |
|
"learning_rate": 3.686730506155951e-05, |
|
"loss": 0.5463, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 3.3857729138166897, |
|
"grad_norm": 14.400090217590332, |
|
"learning_rate": 3.6772305821553424e-05, |
|
"loss": 0.5733, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.402872777017784, |
|
"grad_norm": 6.468245506286621, |
|
"learning_rate": 3.667730658154735e-05, |
|
"loss": 0.5193, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 3.4199726402188784, |
|
"grad_norm": 8.739253044128418, |
|
"learning_rate": 3.658230734154127e-05, |
|
"loss": 0.4821, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.4370725034199725, |
|
"grad_norm": 0.5965850949287415, |
|
"learning_rate": 3.648730810153519e-05, |
|
"loss": 0.3247, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 3.454172366621067, |
|
"grad_norm": 2.4634127616882324, |
|
"learning_rate": 3.639230886152911e-05, |
|
"loss": 0.5018, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 3.471272229822161, |
|
"grad_norm": 12.17545223236084, |
|
"learning_rate": 3.629730962152303e-05, |
|
"loss": 0.4185, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 3.488372093023256, |
|
"grad_norm": 10.63932991027832, |
|
"learning_rate": 3.620231038151695e-05, |
|
"loss": 0.7251, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.5054719562243504, |
|
"grad_norm": 3.384568214416504, |
|
"learning_rate": 3.610731114151087e-05, |
|
"loss": 0.4883, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 3.5225718194254445, |
|
"grad_norm": 7.895840167999268, |
|
"learning_rate": 3.601231190150479e-05, |
|
"loss": 0.5038, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.539671682626539, |
|
"grad_norm": 7.191064834594727, |
|
"learning_rate": 3.591731266149871e-05, |
|
"loss": 0.467, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 3.556771545827633, |
|
"grad_norm": 8.865562438964844, |
|
"learning_rate": 3.5822313421492634e-05, |
|
"loss": 0.504, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.573871409028728, |
|
"grad_norm": 5.6215434074401855, |
|
"learning_rate": 3.572731418148655e-05, |
|
"loss": 0.6207, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 3.5909712722298224, |
|
"grad_norm": 3.1758780479431152, |
|
"learning_rate": 3.5632314941480474e-05, |
|
"loss": 0.5304, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.6080711354309165, |
|
"grad_norm": 6.1815056800842285, |
|
"learning_rate": 3.553731570147439e-05, |
|
"loss": 0.4599, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 3.625170998632011, |
|
"grad_norm": 29.166934967041016, |
|
"learning_rate": 3.544231646146831e-05, |
|
"loss": 0.5521, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.6422708618331052, |
|
"grad_norm": 10.150755882263184, |
|
"learning_rate": 3.5347317221462225e-05, |
|
"loss": 0.4214, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 3.6593707250342, |
|
"grad_norm": 12.637552261352539, |
|
"learning_rate": 3.525231798145615e-05, |
|
"loss": 0.3804, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.6764705882352944, |
|
"grad_norm": 5.059484481811523, |
|
"learning_rate": 3.5157318741450066e-05, |
|
"loss": 0.5716, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 3.6935704514363885, |
|
"grad_norm": 0.10250476002693176, |
|
"learning_rate": 3.506231950144399e-05, |
|
"loss": 0.4074, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.7106703146374826, |
|
"grad_norm": 8.807113647460938, |
|
"learning_rate": 3.4967320261437906e-05, |
|
"loss": 0.4943, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 3.7277701778385772, |
|
"grad_norm": 11.27835750579834, |
|
"learning_rate": 3.487232102143183e-05, |
|
"loss": 0.52, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.744870041039672, |
|
"grad_norm": 9.193815231323242, |
|
"learning_rate": 3.477732178142575e-05, |
|
"loss": 0.4272, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 3.761969904240766, |
|
"grad_norm": 5.949501991271973, |
|
"learning_rate": 3.468232254141967e-05, |
|
"loss": 0.5627, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.7790697674418605, |
|
"grad_norm": 8.378783226013184, |
|
"learning_rate": 3.458732330141359e-05, |
|
"loss": 0.5856, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 3.7961696306429547, |
|
"grad_norm": 10.514230728149414, |
|
"learning_rate": 3.449232406140751e-05, |
|
"loss": 0.4319, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.8132694938440492, |
|
"grad_norm": 9.19676399230957, |
|
"learning_rate": 3.4397324821401435e-05, |
|
"loss": 0.5279, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 3.830369357045144, |
|
"grad_norm": 33.51396560668945, |
|
"learning_rate": 3.430232558139535e-05, |
|
"loss": 0.4681, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.847469220246238, |
|
"grad_norm": 9.01288890838623, |
|
"learning_rate": 3.4207326341389275e-05, |
|
"loss": 0.476, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 3.8645690834473325, |
|
"grad_norm": 8.594268798828125, |
|
"learning_rate": 3.411232710138319e-05, |
|
"loss": 0.3972, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 3.8816689466484267, |
|
"grad_norm": 16.336450576782227, |
|
"learning_rate": 3.4017327861377116e-05, |
|
"loss": 0.556, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 3.8987688098495212, |
|
"grad_norm": 9.880993843078613, |
|
"learning_rate": 3.392232862137103e-05, |
|
"loss": 0.4506, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.915868673050616, |
|
"grad_norm": 17.31952476501465, |
|
"learning_rate": 3.382732938136495e-05, |
|
"loss": 0.5402, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 3.93296853625171, |
|
"grad_norm": 27.180463790893555, |
|
"learning_rate": 3.373233014135887e-05, |
|
"loss": 0.5221, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 3.9500683994528045, |
|
"grad_norm": 6.002215385437012, |
|
"learning_rate": 3.363733090135279e-05, |
|
"loss": 0.4936, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 3.9671682626538987, |
|
"grad_norm": 18.105520248413086, |
|
"learning_rate": 3.354233166134671e-05, |
|
"loss": 0.4508, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.9842681258549932, |
|
"grad_norm": 10.82498550415039, |
|
"learning_rate": 3.344733242134063e-05, |
|
"loss": 0.45, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8955274837828611, |
|
"eval_f1_macro": 0.7057064371974652, |
|
"eval_f1_micro": 0.8955274837828611, |
|
"eval_f1_weighted": 0.8886748900635787, |
|
"eval_loss": 0.31441542506217957, |
|
"eval_precision_macro": 0.8491702927441283, |
|
"eval_precision_micro": 0.8955274837828611, |
|
"eval_precision_weighted": 0.904039527542345, |
|
"eval_recall_macro": 0.6801244258050726, |
|
"eval_recall_micro": 0.8955274837828611, |
|
"eval_recall_weighted": 0.8955274837828611, |
|
"eval_runtime": 19.2708, |
|
"eval_samples_per_second": 151.992, |
|
"eval_steps_per_second": 9.548, |
|
"step": 5848 |
|
}, |
|
{ |
|
"epoch": 4.001367989056088, |
|
"grad_norm": 11.459450721740723, |
|
"learning_rate": 3.335233318133455e-05, |
|
"loss": 0.3849, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 4.0184678522571815, |
|
"grad_norm": 5.290565013885498, |
|
"learning_rate": 3.325733394132847e-05, |
|
"loss": 0.4288, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 4.035567715458276, |
|
"grad_norm": 5.566415309906006, |
|
"learning_rate": 3.316233470132239e-05, |
|
"loss": 0.5999, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.052667578659371, |
|
"grad_norm": 14.68671703338623, |
|
"learning_rate": 3.306733546131631e-05, |
|
"loss": 0.3921, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 4.069767441860465, |
|
"grad_norm": 4.023522853851318, |
|
"learning_rate": 3.297233622131023e-05, |
|
"loss": 0.5771, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 4.08686730506156, |
|
"grad_norm": 21.95399284362793, |
|
"learning_rate": 3.287733698130415e-05, |
|
"loss": 0.4062, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 4.1039671682626535, |
|
"grad_norm": 0.2723749279975891, |
|
"learning_rate": 3.278233774129807e-05, |
|
"loss": 0.474, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.121067031463748, |
|
"grad_norm": 2.193208694458008, |
|
"learning_rate": 3.268733850129199e-05, |
|
"loss": 0.3756, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 4.138166894664843, |
|
"grad_norm": 7.093472480773926, |
|
"learning_rate": 3.259233926128591e-05, |
|
"loss": 0.4341, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 4.155266757865937, |
|
"grad_norm": 15.10814380645752, |
|
"learning_rate": 3.249734002127983e-05, |
|
"loss": 0.6389, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 4.172366621067032, |
|
"grad_norm": 1.5080924034118652, |
|
"learning_rate": 3.240234078127375e-05, |
|
"loss": 0.3716, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.1894664842681255, |
|
"grad_norm": 6.386539936065674, |
|
"learning_rate": 3.2307341541267674e-05, |
|
"loss": 0.4362, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 4.20656634746922, |
|
"grad_norm": 5.12455415725708, |
|
"learning_rate": 3.221234230126159e-05, |
|
"loss": 0.3797, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 4.223666210670315, |
|
"grad_norm": 17.729442596435547, |
|
"learning_rate": 3.2117343061255514e-05, |
|
"loss": 0.5386, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 4.240766073871409, |
|
"grad_norm": 11.959110260009766, |
|
"learning_rate": 3.202234382124943e-05, |
|
"loss": 0.5592, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.257865937072504, |
|
"grad_norm": 8.719466209411621, |
|
"learning_rate": 3.192734458124335e-05, |
|
"loss": 0.5439, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 4.2749658002735975, |
|
"grad_norm": 16.87335205078125, |
|
"learning_rate": 3.183234534123727e-05, |
|
"loss": 0.4024, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 4.292065663474692, |
|
"grad_norm": 18.301565170288086, |
|
"learning_rate": 3.173734610123119e-05, |
|
"loss": 0.5395, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 4.309165526675787, |
|
"grad_norm": 3.5666756629943848, |
|
"learning_rate": 3.164234686122511e-05, |
|
"loss": 0.4384, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.326265389876881, |
|
"grad_norm": 6.758172035217285, |
|
"learning_rate": 3.154734762121903e-05, |
|
"loss": 0.4922, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 4.343365253077975, |
|
"grad_norm": 10.049732208251953, |
|
"learning_rate": 3.145234838121295e-05, |
|
"loss": 0.5458, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 4.3604651162790695, |
|
"grad_norm": 8.759356498718262, |
|
"learning_rate": 3.135734914120687e-05, |
|
"loss": 0.3634, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 4.377564979480164, |
|
"grad_norm": 26.165199279785156, |
|
"learning_rate": 3.1262349901200794e-05, |
|
"loss": 0.5239, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.394664842681259, |
|
"grad_norm": 9.9360990524292, |
|
"learning_rate": 3.116735066119471e-05, |
|
"loss": 0.3593, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 4.411764705882353, |
|
"grad_norm": 6.546799182891846, |
|
"learning_rate": 3.1072351421188634e-05, |
|
"loss": 0.5414, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 4.428864569083447, |
|
"grad_norm": 10.599846839904785, |
|
"learning_rate": 3.097735218118255e-05, |
|
"loss": 0.493, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 4.4459644322845415, |
|
"grad_norm": 13.960310935974121, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 0.3437, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.463064295485636, |
|
"grad_norm": 5.401963710784912, |
|
"learning_rate": 3.078735370117039e-05, |
|
"loss": 0.4259, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 4.480164158686731, |
|
"grad_norm": 4.808218955993652, |
|
"learning_rate": 3.0692354461164315e-05, |
|
"loss": 0.4627, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 4.497264021887825, |
|
"grad_norm": 22.903667449951172, |
|
"learning_rate": 3.059735522115823e-05, |
|
"loss": 0.5063, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 4.514363885088919, |
|
"grad_norm": 4.878890037536621, |
|
"learning_rate": 3.0502355981152152e-05, |
|
"loss": 0.3412, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.5314637482900135, |
|
"grad_norm": 6.41884708404541, |
|
"learning_rate": 3.040735674114607e-05, |
|
"loss": 0.4038, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 4.548563611491108, |
|
"grad_norm": 7.6325154304504395, |
|
"learning_rate": 3.0312357501139993e-05, |
|
"loss": 0.5503, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 4.565663474692203, |
|
"grad_norm": 10.409296035766602, |
|
"learning_rate": 3.021735826113391e-05, |
|
"loss": 0.4306, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 4.582763337893297, |
|
"grad_norm": 9.539959907531738, |
|
"learning_rate": 3.0122359021127833e-05, |
|
"loss": 0.5506, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.599863201094391, |
|
"grad_norm": 15.213808059692383, |
|
"learning_rate": 3.002735978112175e-05, |
|
"loss": 0.4715, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 4.6169630642954855, |
|
"grad_norm": 15.897672653198242, |
|
"learning_rate": 2.9932360541115674e-05, |
|
"loss": 0.4674, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 4.63406292749658, |
|
"grad_norm": 2.023172378540039, |
|
"learning_rate": 2.983736130110959e-05, |
|
"loss": 0.5035, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 4.651162790697675, |
|
"grad_norm": 9.661181449890137, |
|
"learning_rate": 2.974236206110351e-05, |
|
"loss": 0.4261, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.668262653898768, |
|
"grad_norm": 6.808616638183594, |
|
"learning_rate": 2.9647362821097435e-05, |
|
"loss": 0.3963, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 4.685362517099863, |
|
"grad_norm": 2.418628215789795, |
|
"learning_rate": 2.9552363581091352e-05, |
|
"loss": 0.5116, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 4.7024623803009575, |
|
"grad_norm": 5.1149749755859375, |
|
"learning_rate": 2.9457364341085275e-05, |
|
"loss": 0.3861, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 4.719562243502052, |
|
"grad_norm": 10.152314186096191, |
|
"learning_rate": 2.9362365101079192e-05, |
|
"loss": 0.4005, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.736662106703147, |
|
"grad_norm": 11.572530746459961, |
|
"learning_rate": 2.9267365861073116e-05, |
|
"loss": 0.5105, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 4.75376196990424, |
|
"grad_norm": 11.438729286193848, |
|
"learning_rate": 2.9172366621067033e-05, |
|
"loss": 0.3964, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.770861833105335, |
|
"grad_norm": 2.2795422077178955, |
|
"learning_rate": 2.9077367381060953e-05, |
|
"loss": 0.4141, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 4.7879616963064295, |
|
"grad_norm": 17.774606704711914, |
|
"learning_rate": 2.8982368141054873e-05, |
|
"loss": 0.4031, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.805061559507524, |
|
"grad_norm": 5.122858047485352, |
|
"learning_rate": 2.8887368901048794e-05, |
|
"loss": 0.5089, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 4.822161422708619, |
|
"grad_norm": 9.22169303894043, |
|
"learning_rate": 2.879236966104271e-05, |
|
"loss": 0.4628, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 4.839261285909712, |
|
"grad_norm": 7.689781665802002, |
|
"learning_rate": 2.8697370421036634e-05, |
|
"loss": 0.435, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 4.856361149110807, |
|
"grad_norm": 14.785922050476074, |
|
"learning_rate": 2.860237118103055e-05, |
|
"loss": 0.5333, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.8734610123119015, |
|
"grad_norm": 9.352224349975586, |
|
"learning_rate": 2.8507371941024475e-05, |
|
"loss": 0.5289, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 4.890560875512996, |
|
"grad_norm": 13.73246955871582, |
|
"learning_rate": 2.841237270101839e-05, |
|
"loss": 0.4828, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 4.907660738714091, |
|
"grad_norm": 25.362621307373047, |
|
"learning_rate": 2.8317373461012315e-05, |
|
"loss": 0.4774, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 4.924760601915184, |
|
"grad_norm": 7.927663803100586, |
|
"learning_rate": 2.8222374221006232e-05, |
|
"loss": 0.4548, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.941860465116279, |
|
"grad_norm": 7.368469715118408, |
|
"learning_rate": 2.8127374981000152e-05, |
|
"loss": 0.503, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 4.9589603283173735, |
|
"grad_norm": 4.176021575927734, |
|
"learning_rate": 2.8032375740994073e-05, |
|
"loss": 0.4104, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 4.976060191518468, |
|
"grad_norm": 9.954981803894043, |
|
"learning_rate": 2.7937376500987993e-05, |
|
"loss": 0.4093, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 4.993160054719562, |
|
"grad_norm": 6.885503768920898, |
|
"learning_rate": 2.784237726098191e-05, |
|
"loss": 0.5022, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8955274837828611, |
|
"eval_f1_macro": 0.7161875284577881, |
|
"eval_f1_micro": 0.8955274837828611, |
|
"eval_f1_weighted": 0.8926041027507408, |
|
"eval_loss": 0.28958025574684143, |
|
"eval_precision_macro": 0.862110704875171, |
|
"eval_precision_micro": 0.8955274837828611, |
|
"eval_precision_weighted": 0.9050944481184527, |
|
"eval_recall_macro": 0.6655249799036212, |
|
"eval_recall_micro": 0.8955274837828611, |
|
"eval_recall_weighted": 0.8955274837828611, |
|
"eval_runtime": 18.9608, |
|
"eval_samples_per_second": 154.477, |
|
"eval_steps_per_second": 9.704, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 5.010259917920656, |
|
"grad_norm": 0.8062827587127686, |
|
"learning_rate": 2.7747378020975834e-05, |
|
"loss": 0.458, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 5.027359781121751, |
|
"grad_norm": 7.012026786804199, |
|
"learning_rate": 2.765237878096975e-05, |
|
"loss": 0.4691, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 5.0444596443228455, |
|
"grad_norm": 3.819838762283325, |
|
"learning_rate": 2.7557379540963674e-05, |
|
"loss": 0.5331, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 5.06155950752394, |
|
"grad_norm": 11.148397445678711, |
|
"learning_rate": 2.7462380300957598e-05, |
|
"loss": 0.4309, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 5.078659370725034, |
|
"grad_norm": 4.97418737411499, |
|
"learning_rate": 2.7367381060951515e-05, |
|
"loss": 0.4308, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 5.095759233926128, |
|
"grad_norm": 9.843364715576172, |
|
"learning_rate": 2.7272381820945435e-05, |
|
"loss": 0.5226, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 5.112859097127223, |
|
"grad_norm": 11.50365924835205, |
|
"learning_rate": 2.7177382580939352e-05, |
|
"loss": 0.4092, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 5.1299589603283176, |
|
"grad_norm": 6.617554187774658, |
|
"learning_rate": 2.7082383340933276e-05, |
|
"loss": 0.3954, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.147058823529412, |
|
"grad_norm": 0.518602728843689, |
|
"learning_rate": 2.6987384100927192e-05, |
|
"loss": 0.422, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 5.164158686730506, |
|
"grad_norm": 16.087276458740234, |
|
"learning_rate": 2.6892384860921116e-05, |
|
"loss": 0.3651, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 5.1812585499316, |
|
"grad_norm": 0.1962614506483078, |
|
"learning_rate": 2.6797385620915033e-05, |
|
"loss": 0.5446, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 5.198358413132695, |
|
"grad_norm": 8.01890754699707, |
|
"learning_rate": 2.6702386380908957e-05, |
|
"loss": 0.3318, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.2154582763337896, |
|
"grad_norm": 36.442684173583984, |
|
"learning_rate": 2.6607387140902874e-05, |
|
"loss": 0.4495, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 5.232558139534884, |
|
"grad_norm": 8.66895866394043, |
|
"learning_rate": 2.6512387900896797e-05, |
|
"loss": 0.4476, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 5.249658002735978, |
|
"grad_norm": 14.132843971252441, |
|
"learning_rate": 2.6417388660890714e-05, |
|
"loss": 0.3548, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 5.266757865937072, |
|
"grad_norm": 11.379664421081543, |
|
"learning_rate": 2.6322389420884634e-05, |
|
"loss": 0.4658, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 5.283857729138167, |
|
"grad_norm": 12.820823669433594, |
|
"learning_rate": 2.622739018087855e-05, |
|
"loss": 0.2941, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 5.300957592339262, |
|
"grad_norm": 26.1966609954834, |
|
"learning_rate": 2.6132390940872475e-05, |
|
"loss": 0.4083, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 5.318057455540355, |
|
"grad_norm": 12.518375396728516, |
|
"learning_rate": 2.6041191670466635e-05, |
|
"loss": 0.3166, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 5.33515731874145, |
|
"grad_norm": 4.027897834777832, |
|
"learning_rate": 2.594619243046056e-05, |
|
"loss": 0.4087, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 5.352257181942544, |
|
"grad_norm": 13.574274063110352, |
|
"learning_rate": 2.5851193190454476e-05, |
|
"loss": 0.5478, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 5.369357045143639, |
|
"grad_norm": 12.73529052734375, |
|
"learning_rate": 2.57561939504484e-05, |
|
"loss": 0.4394, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 5.386456908344734, |
|
"grad_norm": 8.502470016479492, |
|
"learning_rate": 2.5661194710442316e-05, |
|
"loss": 0.397, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 5.403556771545827, |
|
"grad_norm": 7.308871746063232, |
|
"learning_rate": 2.556619547043624e-05, |
|
"loss": 0.4541, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.420656634746922, |
|
"grad_norm": 14.608325004577637, |
|
"learning_rate": 2.5471196230430157e-05, |
|
"loss": 0.4646, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 5.437756497948016, |
|
"grad_norm": 6.4289655685424805, |
|
"learning_rate": 2.5376196990424077e-05, |
|
"loss": 0.399, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 5.454856361149111, |
|
"grad_norm": 3.8061683177948, |
|
"learning_rate": 2.5281197750417994e-05, |
|
"loss": 0.4327, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 5.471956224350206, |
|
"grad_norm": 6.391703128814697, |
|
"learning_rate": 2.5186198510411917e-05, |
|
"loss": 0.4641, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.489056087551299, |
|
"grad_norm": 2.9124350547790527, |
|
"learning_rate": 2.509119927040584e-05, |
|
"loss": 0.3654, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 5.506155950752394, |
|
"grad_norm": 3.834289789199829, |
|
"learning_rate": 2.4996200030399758e-05, |
|
"loss": 0.5162, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 5.523255813953488, |
|
"grad_norm": 16.672739028930664, |
|
"learning_rate": 2.4901200790393678e-05, |
|
"loss": 0.5626, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 5.540355677154583, |
|
"grad_norm": 26.094615936279297, |
|
"learning_rate": 2.48062015503876e-05, |
|
"loss": 0.3838, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 5.557455540355678, |
|
"grad_norm": 1.9188295602798462, |
|
"learning_rate": 2.471120231038152e-05, |
|
"loss": 0.3746, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 5.574555403556771, |
|
"grad_norm": 3.0162570476531982, |
|
"learning_rate": 2.461620307037544e-05, |
|
"loss": 0.4517, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 5.591655266757866, |
|
"grad_norm": 14.349656105041504, |
|
"learning_rate": 2.4521203830369356e-05, |
|
"loss": 0.4876, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 5.60875512995896, |
|
"grad_norm": 9.013519287109375, |
|
"learning_rate": 2.4426204590363276e-05, |
|
"loss": 0.4394, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.625854993160055, |
|
"grad_norm": 3.7371058464050293, |
|
"learning_rate": 2.4331205350357197e-05, |
|
"loss": 0.4345, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 5.642954856361149, |
|
"grad_norm": 13.115042686462402, |
|
"learning_rate": 2.4236206110351117e-05, |
|
"loss": 0.4146, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 5.660054719562243, |
|
"grad_norm": 12.576549530029297, |
|
"learning_rate": 2.4141206870345037e-05, |
|
"loss": 0.4096, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 5.677154582763338, |
|
"grad_norm": 7.4951605796813965, |
|
"learning_rate": 2.4046207630338957e-05, |
|
"loss": 0.3997, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 5.694254445964432, |
|
"grad_norm": 8.070563316345215, |
|
"learning_rate": 2.3951208390332878e-05, |
|
"loss": 0.3297, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 5.711354309165527, |
|
"grad_norm": 14.807238578796387, |
|
"learning_rate": 2.38562091503268e-05, |
|
"loss": 0.3864, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 5.728454172366621, |
|
"grad_norm": 6.503055572509766, |
|
"learning_rate": 2.3761209910320718e-05, |
|
"loss": 0.5571, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 5.745554035567715, |
|
"grad_norm": 3.811549186706543, |
|
"learning_rate": 2.366621067031464e-05, |
|
"loss": 0.3065, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 5.76265389876881, |
|
"grad_norm": 4.377668857574463, |
|
"learning_rate": 2.357121143030856e-05, |
|
"loss": 0.3606, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 5.779753761969904, |
|
"grad_norm": 6.7863874435424805, |
|
"learning_rate": 2.347621219030248e-05, |
|
"loss": 0.3654, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 5.796853625170999, |
|
"grad_norm": 8.570117950439453, |
|
"learning_rate": 2.33812129502964e-05, |
|
"loss": 0.3821, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 5.813953488372093, |
|
"grad_norm": 3.4964771270751953, |
|
"learning_rate": 2.328621371029032e-05, |
|
"loss": 0.3593, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.831053351573187, |
|
"grad_norm": 5.006895065307617, |
|
"learning_rate": 2.319121447028424e-05, |
|
"loss": 0.3856, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 5.848153214774282, |
|
"grad_norm": 7.012197971343994, |
|
"learning_rate": 2.309621523027816e-05, |
|
"loss": 0.5216, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 5.865253077975376, |
|
"grad_norm": 11.0383882522583, |
|
"learning_rate": 2.300121599027208e-05, |
|
"loss": 0.5002, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 6.153685092926025, |
|
"learning_rate": 2.2906216750266e-05, |
|
"loss": 0.4749, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 5.899452804377565, |
|
"grad_norm": 21.01350975036621, |
|
"learning_rate": 2.2811217510259918e-05, |
|
"loss": 0.3583, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 5.916552667578659, |
|
"grad_norm": 6.175297737121582, |
|
"learning_rate": 2.2716218270253838e-05, |
|
"loss": 0.4317, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 5.933652530779754, |
|
"grad_norm": 2.6204943656921387, |
|
"learning_rate": 2.2621219030247758e-05, |
|
"loss": 0.4452, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 5.950752393980848, |
|
"grad_norm": 2.762593984603882, |
|
"learning_rate": 2.252621979024168e-05, |
|
"loss": 0.3466, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 5.967852257181942, |
|
"grad_norm": 11.155779838562012, |
|
"learning_rate": 2.24312205502356e-05, |
|
"loss": 0.4283, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 5.984952120383037, |
|
"grad_norm": 61.69544219970703, |
|
"learning_rate": 2.233622131022952e-05, |
|
"loss": 0.3336, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9095254353021509, |
|
"eval_f1_macro": 0.748356749541202, |
|
"eval_f1_micro": 0.9095254353021509, |
|
"eval_f1_weighted": 0.9037758276025493, |
|
"eval_loss": 0.297870934009552, |
|
"eval_precision_macro": 0.83851223488873, |
|
"eval_precision_micro": 0.9095254353021509, |
|
"eval_precision_weighted": 0.9132744969964606, |
|
"eval_recall_macro": 0.7323996923201544, |
|
"eval_recall_micro": 0.9095254353021509, |
|
"eval_recall_weighted": 0.9095254353021509, |
|
"eval_runtime": 18.9912, |
|
"eval_samples_per_second": 154.229, |
|
"eval_steps_per_second": 9.689, |
|
"step": 8772 |
|
}, |
|
{ |
|
"epoch": 6.002051983584131, |
|
"grad_norm": 9.449117660522461, |
|
"learning_rate": 2.224122207022344e-05, |
|
"loss": 0.3617, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 6.019151846785226, |
|
"grad_norm": 9.420016288757324, |
|
"learning_rate": 2.214622283021736e-05, |
|
"loss": 0.5047, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 6.03625170998632, |
|
"grad_norm": 8.470691680908203, |
|
"learning_rate": 2.2055023559811523e-05, |
|
"loss": 0.4069, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 6.053351573187414, |
|
"grad_norm": 16.81625747680664, |
|
"learning_rate": 2.1960024319805443e-05, |
|
"loss": 0.5216, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 6.070451436388509, |
|
"grad_norm": 14.323150634765625, |
|
"learning_rate": 2.186502507979936e-05, |
|
"loss": 0.3137, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 6.087551299589603, |
|
"grad_norm": 5.009669780731201, |
|
"learning_rate": 2.177002583979328e-05, |
|
"loss": 0.4713, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 6.104651162790698, |
|
"grad_norm": 14.51624870300293, |
|
"learning_rate": 2.16750265997872e-05, |
|
"loss": 0.373, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 6.121751025991792, |
|
"grad_norm": 4.42010498046875, |
|
"learning_rate": 2.158002735978112e-05, |
|
"loss": 0.3218, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 6.138850889192886, |
|
"grad_norm": 18.838573455810547, |
|
"learning_rate": 2.1485028119775045e-05, |
|
"loss": 0.3754, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 6.155950752393981, |
|
"grad_norm": 2.5859174728393555, |
|
"learning_rate": 2.1390028879768965e-05, |
|
"loss": 0.4086, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.173050615595075, |
|
"grad_norm": 4.829029560089111, |
|
"learning_rate": 2.1295029639762885e-05, |
|
"loss": 0.3722, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 6.19015047879617, |
|
"grad_norm": 11.934502601623535, |
|
"learning_rate": 2.1200030399756805e-05, |
|
"loss": 0.3506, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 6.207250341997264, |
|
"grad_norm": 3.9261722564697266, |
|
"learning_rate": 2.1105031159750722e-05, |
|
"loss": 0.2612, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 6.224350205198358, |
|
"grad_norm": 0.23096883296966553, |
|
"learning_rate": 2.1010031919744643e-05, |
|
"loss": 0.5446, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 6.241450068399453, |
|
"grad_norm": 13.32019329071045, |
|
"learning_rate": 2.0915032679738563e-05, |
|
"loss": 0.4225, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 6.258549931600547, |
|
"grad_norm": 12.433130264282227, |
|
"learning_rate": 2.0820033439732483e-05, |
|
"loss": 0.3323, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 6.275649794801642, |
|
"grad_norm": 22.49323844909668, |
|
"learning_rate": 2.0725034199726403e-05, |
|
"loss": 0.4702, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 6.292749658002736, |
|
"grad_norm": 7.992762088775635, |
|
"learning_rate": 2.0630034959720324e-05, |
|
"loss": 0.4188, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 6.30984952120383, |
|
"grad_norm": 2.31046986579895, |
|
"learning_rate": 2.0535035719714244e-05, |
|
"loss": 0.2581, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 6.326949384404925, |
|
"grad_norm": 13.177254676818848, |
|
"learning_rate": 2.0440036479708164e-05, |
|
"loss": 0.5264, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 6.344049247606019, |
|
"grad_norm": 16.654388427734375, |
|
"learning_rate": 2.0345037239702085e-05, |
|
"loss": 0.5404, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 6.361149110807114, |
|
"grad_norm": 7.191986083984375, |
|
"learning_rate": 2.0250037999696005e-05, |
|
"loss": 0.3926, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 6.378248974008208, |
|
"grad_norm": 2.7967660427093506, |
|
"learning_rate": 2.0155038759689922e-05, |
|
"loss": 0.3759, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 6.395348837209302, |
|
"grad_norm": 11.951244354248047, |
|
"learning_rate": 2.0060039519683842e-05, |
|
"loss": 0.3715, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 6.412448700410397, |
|
"grad_norm": 20.298959732055664, |
|
"learning_rate": 1.9965040279677762e-05, |
|
"loss": 0.3348, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 6.429548563611491, |
|
"grad_norm": 4.485177516937256, |
|
"learning_rate": 1.9870041039671683e-05, |
|
"loss": 0.3164, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 6.446648426812586, |
|
"grad_norm": 8.650040626525879, |
|
"learning_rate": 1.9775041799665603e-05, |
|
"loss": 0.4892, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 6.46374829001368, |
|
"grad_norm": 8.256196975708008, |
|
"learning_rate": 1.9680042559659523e-05, |
|
"loss": 0.4008, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 6.480848153214774, |
|
"grad_norm": 13.1589994430542, |
|
"learning_rate": 1.9585043319653443e-05, |
|
"loss": 0.3471, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 6.497948016415869, |
|
"grad_norm": 5.785964488983154, |
|
"learning_rate": 1.9490044079647364e-05, |
|
"loss": 0.4492, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.515047879616963, |
|
"grad_norm": 5.720312118530273, |
|
"learning_rate": 1.9395044839641284e-05, |
|
"loss": 0.3955, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 6.532147742818058, |
|
"grad_norm": 4.752621650695801, |
|
"learning_rate": 1.9300045599635204e-05, |
|
"loss": 0.5226, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 6.549247606019152, |
|
"grad_norm": 6.577572822570801, |
|
"learning_rate": 1.9205046359629124e-05, |
|
"loss": 0.4383, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 6.566347469220246, |
|
"grad_norm": 2.3268673419952393, |
|
"learning_rate": 1.9110047119623045e-05, |
|
"loss": 0.4475, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 6.583447332421341, |
|
"grad_norm": 7.915472030639648, |
|
"learning_rate": 1.9015047879616965e-05, |
|
"loss": 0.4374, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 6.600547195622435, |
|
"grad_norm": 14.391087532043457, |
|
"learning_rate": 1.8920048639610885e-05, |
|
"loss": 0.3706, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 6.617647058823529, |
|
"grad_norm": 5.97300386428833, |
|
"learning_rate": 1.8825049399604806e-05, |
|
"loss": 0.425, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 6.634746922024624, |
|
"grad_norm": 9.130365371704102, |
|
"learning_rate": 1.8730050159598726e-05, |
|
"loss": 0.3341, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 6.651846785225718, |
|
"grad_norm": 5.5994038581848145, |
|
"learning_rate": 1.8635050919592646e-05, |
|
"loss": 0.4933, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 6.668946648426813, |
|
"grad_norm": 9.19884967803955, |
|
"learning_rate": 1.8540051679586566e-05, |
|
"loss": 0.4012, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 6.686046511627907, |
|
"grad_norm": 3.408245325088501, |
|
"learning_rate": 1.8445052439580483e-05, |
|
"loss": 0.3444, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 6.703146374829001, |
|
"grad_norm": 11.616069793701172, |
|
"learning_rate": 1.8350053199574404e-05, |
|
"loss": 0.3627, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 6.720246238030096, |
|
"grad_norm": 12.855060577392578, |
|
"learning_rate": 1.8255053959568324e-05, |
|
"loss": 0.4833, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 6.73734610123119, |
|
"grad_norm": 4.252665042877197, |
|
"learning_rate": 1.8160054719562244e-05, |
|
"loss": 0.3607, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 6.754445964432285, |
|
"grad_norm": 8.759148597717285, |
|
"learning_rate": 1.8065055479556164e-05, |
|
"loss": 0.403, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 6.771545827633379, |
|
"grad_norm": 11.92839527130127, |
|
"learning_rate": 1.7970056239550085e-05, |
|
"loss": 0.3562, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 6.788645690834473, |
|
"grad_norm": 1.0502179861068726, |
|
"learning_rate": 1.7875056999544005e-05, |
|
"loss": 0.4002, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 6.805745554035568, |
|
"grad_norm": 8.642801284790039, |
|
"learning_rate": 1.7780057759537925e-05, |
|
"loss": 0.4298, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 6.822845417236662, |
|
"grad_norm": 3.608553886413574, |
|
"learning_rate": 1.7685058519531845e-05, |
|
"loss": 0.3687, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 6.839945280437757, |
|
"grad_norm": 17.244091033935547, |
|
"learning_rate": 1.7590059279525762e-05, |
|
"loss": 0.4086, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.857045143638851, |
|
"grad_norm": 9.269475936889648, |
|
"learning_rate": 1.7495060039519683e-05, |
|
"loss": 0.4166, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 6.874145006839945, |
|
"grad_norm": 6.287049293518066, |
|
"learning_rate": 1.7400060799513603e-05, |
|
"loss": 0.5342, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 6.89124487004104, |
|
"grad_norm": 2.380673408508301, |
|
"learning_rate": 1.7305061559507523e-05, |
|
"loss": 0.3687, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 6.908344733242134, |
|
"grad_norm": 23.413028717041016, |
|
"learning_rate": 1.7210062319501443e-05, |
|
"loss": 0.3996, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 6.925444596443229, |
|
"grad_norm": 16.1468563079834, |
|
"learning_rate": 1.7115063079495364e-05, |
|
"loss": 0.3844, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 6.942544459644322, |
|
"grad_norm": 1.6500098705291748, |
|
"learning_rate": 1.7020063839489284e-05, |
|
"loss": 0.523, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 6.959644322845417, |
|
"grad_norm": 9.402831077575684, |
|
"learning_rate": 1.6925064599483208e-05, |
|
"loss": 0.3376, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 6.976744186046512, |
|
"grad_norm": 2.928579807281494, |
|
"learning_rate": 1.6830065359477125e-05, |
|
"loss": 0.3303, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 6.993844049247606, |
|
"grad_norm": 2.99859881401062, |
|
"learning_rate": 1.6735066119471045e-05, |
|
"loss": 0.4049, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9156708774325708, |
|
"eval_f1_macro": 0.8149754054596434, |
|
"eval_f1_micro": 0.9156708774325708, |
|
"eval_f1_weighted": 0.9140872488341879, |
|
"eval_loss": 0.250088632106781, |
|
"eval_precision_macro": 0.9251793446372559, |
|
"eval_precision_micro": 0.9156708774325708, |
|
"eval_precision_weighted": 0.9218398298083498, |
|
"eval_recall_macro": 0.7805045376076867, |
|
"eval_recall_micro": 0.9156708774325708, |
|
"eval_recall_weighted": 0.9156708774325708, |
|
"eval_runtime": 19.166, |
|
"eval_samples_per_second": 152.822, |
|
"eval_steps_per_second": 9.6, |
|
"step": 10234 |
|
}, |
|
{ |
|
"epoch": 7.010943912448701, |
|
"grad_norm": 5.946883678436279, |
|
"learning_rate": 1.6640066879464965e-05, |
|
"loss": 0.4273, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 7.028043775649794, |
|
"grad_norm": 12.816991806030273, |
|
"learning_rate": 1.6545067639458885e-05, |
|
"loss": 0.3663, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 7.045143638850889, |
|
"grad_norm": 10.432554244995117, |
|
"learning_rate": 1.6450068399452806e-05, |
|
"loss": 0.3136, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 7.062243502051984, |
|
"grad_norm": 13.881523132324219, |
|
"learning_rate": 1.6355069159446726e-05, |
|
"loss": 0.3795, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 7.079343365253078, |
|
"grad_norm": 8.671323776245117, |
|
"learning_rate": 1.6260069919440646e-05, |
|
"loss": 0.4158, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 7.096443228454173, |
|
"grad_norm": 7.5603132247924805, |
|
"learning_rate": 1.6165070679434567e-05, |
|
"loss": 0.3809, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 7.113543091655266, |
|
"grad_norm": 13.723405838012695, |
|
"learning_rate": 1.6070071439428487e-05, |
|
"loss": 0.391, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 7.130642954856361, |
|
"grad_norm": 9.176318168640137, |
|
"learning_rate": 1.5975072199422407e-05, |
|
"loss": 0.3475, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 7.147742818057456, |
|
"grad_norm": 5.787652015686035, |
|
"learning_rate": 1.5880072959416324e-05, |
|
"loss": 0.3745, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 7.16484268125855, |
|
"grad_norm": 3.6111419200897217, |
|
"learning_rate": 1.5785073719410244e-05, |
|
"loss": 0.3897, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 7.181942544459645, |
|
"grad_norm": 9.432286262512207, |
|
"learning_rate": 1.5690074479404165e-05, |
|
"loss": 0.5103, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.199042407660738, |
|
"grad_norm": 6.067584037780762, |
|
"learning_rate": 1.5595075239398085e-05, |
|
"loss": 0.4322, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 7.216142270861833, |
|
"grad_norm": 0.6759016513824463, |
|
"learning_rate": 1.5500075999392005e-05, |
|
"loss": 0.4045, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 7.233242134062928, |
|
"grad_norm": 6.492595672607422, |
|
"learning_rate": 1.5405076759385925e-05, |
|
"loss": 0.3742, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 7.250341997264022, |
|
"grad_norm": 10.5081148147583, |
|
"learning_rate": 1.5310077519379846e-05, |
|
"loss": 0.3432, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 7.267441860465116, |
|
"grad_norm": 6.45819616317749, |
|
"learning_rate": 1.5215078279373766e-05, |
|
"loss": 0.3557, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 7.2845417236662104, |
|
"grad_norm": 1.3473492860794067, |
|
"learning_rate": 1.5120079039367684e-05, |
|
"loss": 0.3995, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 7.301641586867305, |
|
"grad_norm": 15.663151741027832, |
|
"learning_rate": 1.5025079799361605e-05, |
|
"loss": 0.4619, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 7.3187414500684, |
|
"grad_norm": 2.441596746444702, |
|
"learning_rate": 1.4930080559355525e-05, |
|
"loss": 0.3351, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 7.335841313269494, |
|
"grad_norm": 18.481773376464844, |
|
"learning_rate": 1.4835081319349445e-05, |
|
"loss": 0.4416, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 7.352941176470588, |
|
"grad_norm": 3.074429750442505, |
|
"learning_rate": 1.4740082079343364e-05, |
|
"loss": 0.314, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 7.3700410396716824, |
|
"grad_norm": 8.20934772491455, |
|
"learning_rate": 1.4645082839337284e-05, |
|
"loss": 0.287, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 7.387140902872777, |
|
"grad_norm": 9.531194686889648, |
|
"learning_rate": 1.4550083599331208e-05, |
|
"loss": 0.4132, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 7.404240766073872, |
|
"grad_norm": 9.128312110900879, |
|
"learning_rate": 1.4455084359325128e-05, |
|
"loss": 0.5293, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 7.421340629274966, |
|
"grad_norm": 12.818424224853516, |
|
"learning_rate": 1.4360085119319047e-05, |
|
"loss": 0.3633, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 7.43844049247606, |
|
"grad_norm": 2.5819342136383057, |
|
"learning_rate": 1.4265085879312967e-05, |
|
"loss": 0.2941, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 7.4555403556771545, |
|
"grad_norm": 0.3548867702484131, |
|
"learning_rate": 1.4170086639306887e-05, |
|
"loss": 0.3477, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 7.472640218878249, |
|
"grad_norm": 9.35716438293457, |
|
"learning_rate": 1.4075087399300808e-05, |
|
"loss": 0.3415, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 7.489740082079344, |
|
"grad_norm": 0.888134241104126, |
|
"learning_rate": 1.3980088159294726e-05, |
|
"loss": 0.4376, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 7.506839945280438, |
|
"grad_norm": 3.009415626525879, |
|
"learning_rate": 1.3885088919288646e-05, |
|
"loss": 0.3566, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 7.523939808481532, |
|
"grad_norm": 0.4245036542415619, |
|
"learning_rate": 1.3790089679282567e-05, |
|
"loss": 0.3407, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.5410396716826265, |
|
"grad_norm": 9.772459983825684, |
|
"learning_rate": 1.3695090439276487e-05, |
|
"loss": 0.5112, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 7.558139534883721, |
|
"grad_norm": 8.6549654006958, |
|
"learning_rate": 1.3600091199270407e-05, |
|
"loss": 0.3654, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 7.575239398084816, |
|
"grad_norm": 12.258879661560059, |
|
"learning_rate": 1.3505091959264326e-05, |
|
"loss": 0.394, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 7.592339261285909, |
|
"grad_norm": 8.852180480957031, |
|
"learning_rate": 1.3410092719258246e-05, |
|
"loss": 0.3667, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 7.609439124487004, |
|
"grad_norm": 19.00887680053711, |
|
"learning_rate": 1.3315093479252166e-05, |
|
"loss": 0.3465, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 7.6265389876880985, |
|
"grad_norm": 24.143585205078125, |
|
"learning_rate": 1.3220094239246087e-05, |
|
"loss": 0.3878, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 7.643638850889193, |
|
"grad_norm": 4.1856889724731445, |
|
"learning_rate": 1.3125094999240007e-05, |
|
"loss": 0.3615, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 7.660738714090288, |
|
"grad_norm": 11.348432540893555, |
|
"learning_rate": 1.3030095759233925e-05, |
|
"loss": 0.3192, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 7.677838577291381, |
|
"grad_norm": 4.999576091766357, |
|
"learning_rate": 1.2935096519227846e-05, |
|
"loss": 0.3134, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 7.694938440492476, |
|
"grad_norm": 11.35132122039795, |
|
"learning_rate": 1.2840097279221766e-05, |
|
"loss": 0.4056, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 7.7120383036935705, |
|
"grad_norm": 15.860554695129395, |
|
"learning_rate": 1.2745098039215686e-05, |
|
"loss": 0.2678, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 7.729138166894665, |
|
"grad_norm": 3.4646947383880615, |
|
"learning_rate": 1.2650098799209607e-05, |
|
"loss": 0.396, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 7.74623803009576, |
|
"grad_norm": 3.1925065517425537, |
|
"learning_rate": 1.2555099559203525e-05, |
|
"loss": 0.324, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 7.763337893296853, |
|
"grad_norm": 3.6302490234375, |
|
"learning_rate": 1.2460100319197447e-05, |
|
"loss": 0.3766, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 7.780437756497948, |
|
"grad_norm": 20.079179763793945, |
|
"learning_rate": 1.2365101079191367e-05, |
|
"loss": 0.3841, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 7.7975376196990425, |
|
"grad_norm": 11.020298957824707, |
|
"learning_rate": 1.2270101839185288e-05, |
|
"loss": 0.4496, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 7.814637482900137, |
|
"grad_norm": 4.884584426879883, |
|
"learning_rate": 1.2175102599179206e-05, |
|
"loss": 0.3219, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 7.831737346101232, |
|
"grad_norm": 18.95062828063965, |
|
"learning_rate": 1.2080103359173127e-05, |
|
"loss": 0.2958, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 7.848837209302325, |
|
"grad_norm": 7.927674770355225, |
|
"learning_rate": 1.1985104119167047e-05, |
|
"loss": 0.5062, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 7.86593707250342, |
|
"grad_norm": 18.551855087280273, |
|
"learning_rate": 1.189390484876121e-05, |
|
"loss": 0.4039, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.8830369357045145, |
|
"grad_norm": 5.578052520751953, |
|
"learning_rate": 1.179890560875513e-05, |
|
"loss": 0.332, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 7.900136798905609, |
|
"grad_norm": 0.06869751960039139, |
|
"learning_rate": 1.1703906368749049e-05, |
|
"loss": 0.4136, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 7.917236662106703, |
|
"grad_norm": 7.070012092590332, |
|
"learning_rate": 1.1608907128742971e-05, |
|
"loss": 0.3402, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 7.934336525307797, |
|
"grad_norm": 2.309910774230957, |
|
"learning_rate": 1.1513907888736891e-05, |
|
"loss": 0.3272, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 7.951436388508892, |
|
"grad_norm": 20.965015411376953, |
|
"learning_rate": 1.1418908648730812e-05, |
|
"loss": 0.2962, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 7.9685362517099865, |
|
"grad_norm": 5.13886022567749, |
|
"learning_rate": 1.132390940872473e-05, |
|
"loss": 0.455, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 7.985636114911081, |
|
"grad_norm": 3.935183525085449, |
|
"learning_rate": 1.122891016871865e-05, |
|
"loss": 0.3484, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9211334926596108, |
|
"eval_f1_macro": 0.8568820222911021, |
|
"eval_f1_micro": 0.9211334926596108, |
|
"eval_f1_weighted": 0.9210426818413497, |
|
"eval_loss": 0.2283647209405899, |
|
"eval_precision_macro": 0.9255394303052991, |
|
"eval_precision_micro": 0.9211334926596108, |
|
"eval_precision_weighted": 0.9272957997209303, |
|
"eval_recall_macro": 0.8317609357993986, |
|
"eval_recall_micro": 0.9211334926596108, |
|
"eval_recall_weighted": 0.9211334926596108, |
|
"eval_runtime": 19.3065, |
|
"eval_samples_per_second": 151.711, |
|
"eval_steps_per_second": 9.53, |
|
"step": 11696 |
|
}, |
|
{ |
|
"epoch": 8.002735978112176, |
|
"grad_norm": 14.681279182434082, |
|
"learning_rate": 1.113391092871257e-05, |
|
"loss": 0.2686, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 8.01983584131327, |
|
"grad_norm": 26.67691993713379, |
|
"learning_rate": 1.1042711658306734e-05, |
|
"loss": 0.3767, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 8.036935704514363, |
|
"grad_norm": 4.675159931182861, |
|
"learning_rate": 1.0947712418300655e-05, |
|
"loss": 0.304, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 8.054035567715458, |
|
"grad_norm": 8.4456787109375, |
|
"learning_rate": 1.0852713178294575e-05, |
|
"loss": 0.2553, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 8.071135430916552, |
|
"grad_norm": 15.122594833374023, |
|
"learning_rate": 1.0757713938288493e-05, |
|
"loss": 0.3248, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 8.088235294117647, |
|
"grad_norm": 10.912254333496094, |
|
"learning_rate": 1.0662714698282414e-05, |
|
"loss": 0.3009, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 8.105335157318741, |
|
"grad_norm": 13.658234596252441, |
|
"learning_rate": 1.0567715458276334e-05, |
|
"loss": 0.4445, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 8.122435020519836, |
|
"grad_norm": 0.18706431984901428, |
|
"learning_rate": 1.0472716218270254e-05, |
|
"loss": 0.267, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 8.13953488372093, |
|
"grad_norm": 24.79719352722168, |
|
"learning_rate": 1.0377716978264174e-05, |
|
"loss": 0.3466, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 8.156634746922025, |
|
"grad_norm": 18.876535415649414, |
|
"learning_rate": 1.0282717738258095e-05, |
|
"loss": 0.4939, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 8.17373461012312, |
|
"grad_norm": 7.15775728225708, |
|
"learning_rate": 1.0187718498252015e-05, |
|
"loss": 0.3728, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 8.190834473324214, |
|
"grad_norm": 4.604434967041016, |
|
"learning_rate": 1.0092719258245935e-05, |
|
"loss": 0.3492, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 8.207934336525307, |
|
"grad_norm": 6.463050365447998, |
|
"learning_rate": 9.997720018239856e-06, |
|
"loss": 0.3298, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.225034199726402, |
|
"grad_norm": 16.29618263244629, |
|
"learning_rate": 9.902720778233774e-06, |
|
"loss": 0.3415, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 8.242134062927496, |
|
"grad_norm": 5.63080358505249, |
|
"learning_rate": 9.807721538227694e-06, |
|
"loss": 0.2608, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 8.25923392612859, |
|
"grad_norm": 0.7199766039848328, |
|
"learning_rate": 9.712722298221615e-06, |
|
"loss": 0.3974, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 8.276333789329685, |
|
"grad_norm": 15.456204414367676, |
|
"learning_rate": 9.617723058215535e-06, |
|
"loss": 0.3329, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 8.29343365253078, |
|
"grad_norm": 18.643985748291016, |
|
"learning_rate": 9.522723818209454e-06, |
|
"loss": 0.4801, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 8.310533515731874, |
|
"grad_norm": 4.800582408905029, |
|
"learning_rate": 9.427724578203374e-06, |
|
"loss": 0.557, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 8.327633378932969, |
|
"grad_norm": 22.22751808166504, |
|
"learning_rate": 9.332725338197294e-06, |
|
"loss": 0.3648, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 8.344733242134064, |
|
"grad_norm": 5.446302890777588, |
|
"learning_rate": 9.237726098191216e-06, |
|
"loss": 0.2558, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 8.361833105335158, |
|
"grad_norm": 0.26866602897644043, |
|
"learning_rate": 9.142726858185136e-06, |
|
"loss": 0.3962, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 8.378932968536251, |
|
"grad_norm": 3.1288976669311523, |
|
"learning_rate": 9.047727618179055e-06, |
|
"loss": 0.439, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 8.396032831737346, |
|
"grad_norm": 2.740288496017456, |
|
"learning_rate": 8.952728378172975e-06, |
|
"loss": 0.3076, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 8.41313269493844, |
|
"grad_norm": 4.094404697418213, |
|
"learning_rate": 8.857729138166896e-06, |
|
"loss": 0.3551, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 8.430232558139535, |
|
"grad_norm": 9.859013557434082, |
|
"learning_rate": 8.762729898160816e-06, |
|
"loss": 0.3046, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 8.44733242134063, |
|
"grad_norm": 7.303380966186523, |
|
"learning_rate": 8.667730658154734e-06, |
|
"loss": 0.2405, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 8.464432284541724, |
|
"grad_norm": 11.945883750915527, |
|
"learning_rate": 8.572731418148655e-06, |
|
"loss": 0.367, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 8.481532147742818, |
|
"grad_norm": 8.770705223083496, |
|
"learning_rate": 8.477732178142575e-06, |
|
"loss": 0.3977, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 8.498632010943913, |
|
"grad_norm": 5.229104042053223, |
|
"learning_rate": 8.382732938136495e-06, |
|
"loss": 0.3075, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 8.515731874145008, |
|
"grad_norm": 44.49745178222656, |
|
"learning_rate": 8.287733698130415e-06, |
|
"loss": 0.373, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 8.5328317373461, |
|
"grad_norm": 11.067756652832031, |
|
"learning_rate": 8.192734458124334e-06, |
|
"loss": 0.5501, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 8.549931600547195, |
|
"grad_norm": 3.7558584213256836, |
|
"learning_rate": 8.097735218118254e-06, |
|
"loss": 0.3831, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.56703146374829, |
|
"grad_norm": 6.008462429046631, |
|
"learning_rate": 8.002735978112176e-06, |
|
"loss": 0.2394, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 8.584131326949384, |
|
"grad_norm": 10.782341003417969, |
|
"learning_rate": 7.907736738106097e-06, |
|
"loss": 0.2815, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 8.601231190150479, |
|
"grad_norm": 3.08451247215271, |
|
"learning_rate": 7.812737498100015e-06, |
|
"loss": 0.4385, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 8.618331053351573, |
|
"grad_norm": 2.4561235904693604, |
|
"learning_rate": 7.717738258093935e-06, |
|
"loss": 0.3698, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 8.635430916552668, |
|
"grad_norm": 6.739116668701172, |
|
"learning_rate": 7.622739018087856e-06, |
|
"loss": 0.3201, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 8.652530779753763, |
|
"grad_norm": 11.243478775024414, |
|
"learning_rate": 7.527739778081776e-06, |
|
"loss": 0.4415, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 8.669630642954857, |
|
"grad_norm": 3.1412322521209717, |
|
"learning_rate": 7.432740538075695e-06, |
|
"loss": 0.2533, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 8.68673050615595, |
|
"grad_norm": 14.60197639465332, |
|
"learning_rate": 7.337741298069616e-06, |
|
"loss": 0.4057, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 8.703830369357044, |
|
"grad_norm": 9.934842109680176, |
|
"learning_rate": 7.242742058063535e-06, |
|
"loss": 0.3252, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 8.720930232558139, |
|
"grad_norm": 1.3907521963119507, |
|
"learning_rate": 7.147742818057455e-06, |
|
"loss": 0.4068, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 8.738030095759234, |
|
"grad_norm": 5.904654502868652, |
|
"learning_rate": 7.052743578051376e-06, |
|
"loss": 0.3572, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 8.755129958960328, |
|
"grad_norm": 12.644196510314941, |
|
"learning_rate": 6.957744338045295e-06, |
|
"loss": 0.3342, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 8.772229822161423, |
|
"grad_norm": 13.406341552734375, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 0.3859, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 8.789329685362517, |
|
"grad_norm": 7.523469924926758, |
|
"learning_rate": 6.7677458580331365e-06, |
|
"loss": 0.2771, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 8.806429548563612, |
|
"grad_norm": 2.058061122894287, |
|
"learning_rate": 6.672746618027057e-06, |
|
"loss": 0.3956, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 13.852447509765625, |
|
"learning_rate": 6.577747378020976e-06, |
|
"loss": 0.288, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 8.840629274965801, |
|
"grad_norm": 3.28694748878479, |
|
"learning_rate": 6.4827481380148965e-06, |
|
"loss": 0.3175, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 8.857729138166894, |
|
"grad_norm": 4.923558235168457, |
|
"learning_rate": 6.387748898008816e-06, |
|
"loss": 0.4003, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 8.874829001367988, |
|
"grad_norm": 13.867571830749512, |
|
"learning_rate": 6.292749658002736e-06, |
|
"loss": 0.3514, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 8.891928864569083, |
|
"grad_norm": 3.354799747467041, |
|
"learning_rate": 6.1977504179966565e-06, |
|
"loss": 0.3073, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.909028727770178, |
|
"grad_norm": 20.982271194458008, |
|
"learning_rate": 6.102751177990576e-06, |
|
"loss": 0.4001, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 8.926128590971272, |
|
"grad_norm": 1.5266101360321045, |
|
"learning_rate": 6.007751937984497e-06, |
|
"loss": 0.2836, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 8.943228454172367, |
|
"grad_norm": 4.203621864318848, |
|
"learning_rate": 5.9127526979784164e-06, |
|
"loss": 0.3879, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 8.960328317373461, |
|
"grad_norm": 13.059199333190918, |
|
"learning_rate": 5.817753457972337e-06, |
|
"loss": 0.2895, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 8.977428180574556, |
|
"grad_norm": 11.570258140563965, |
|
"learning_rate": 5.722754217966256e-06, |
|
"loss": 0.3616, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 8.99452804377565, |
|
"grad_norm": 31.507492065429688, |
|
"learning_rate": 5.627754977960176e-06, |
|
"loss": 0.3524, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9231819733697507, |
|
"eval_f1_macro": 0.8652261940397432, |
|
"eval_f1_micro": 0.9231819733697507, |
|
"eval_f1_weighted": 0.9229570596156854, |
|
"eval_loss": 0.22409066557884216, |
|
"eval_precision_macro": 0.939434014505588, |
|
"eval_precision_micro": 0.9231819733697507, |
|
"eval_precision_weighted": 0.928643976460822, |
|
"eval_recall_macro": 0.824494199524642, |
|
"eval_recall_micro": 0.9231819733697507, |
|
"eval_recall_weighted": 0.9231819733697507, |
|
"eval_runtime": 19.264, |
|
"eval_samples_per_second": 152.046, |
|
"eval_steps_per_second": 9.552, |
|
"step": 13158 |
|
}, |
|
{ |
|
"epoch": 9.011627906976743, |
|
"grad_norm": 3.1529383659362793, |
|
"learning_rate": 5.532755737954097e-06, |
|
"loss": 0.2935, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 9.028727770177838, |
|
"grad_norm": 1.6082165241241455, |
|
"learning_rate": 5.437756497948017e-06, |
|
"loss": 0.2694, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 9.045827633378932, |
|
"grad_norm": 6.932997703552246, |
|
"learning_rate": 5.342757257941937e-06, |
|
"loss": 0.4234, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 9.062927496580027, |
|
"grad_norm": 2.4087891578674316, |
|
"learning_rate": 5.247758017935857e-06, |
|
"loss": 0.297, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 9.080027359781122, |
|
"grad_norm": 8.607876777648926, |
|
"learning_rate": 5.152758777929777e-06, |
|
"loss": 0.3279, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 9.097127222982216, |
|
"grad_norm": 4.843038082122803, |
|
"learning_rate": 5.057759537923696e-06, |
|
"loss": 0.2534, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 9.11422708618331, |
|
"grad_norm": 9.388402938842773, |
|
"learning_rate": 4.962760297917617e-06, |
|
"loss": 0.2849, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 9.131326949384405, |
|
"grad_norm": 2.4661998748779297, |
|
"learning_rate": 4.867761057911537e-06, |
|
"loss": 0.3157, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 9.1484268125855, |
|
"grad_norm": 13.333016395568848, |
|
"learning_rate": 4.772761817905457e-06, |
|
"loss": 0.2772, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 9.165526675786595, |
|
"grad_norm": 6.937953948974609, |
|
"learning_rate": 4.6777625778993775e-06, |
|
"loss": 0.3582, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 9.182626538987687, |
|
"grad_norm": 5.6831159591674805, |
|
"learning_rate": 4.582763337893297e-06, |
|
"loss": 0.3183, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 9.199726402188782, |
|
"grad_norm": 7.25540018081665, |
|
"learning_rate": 4.487764097887217e-06, |
|
"loss": 0.4322, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 9.216826265389876, |
|
"grad_norm": 4.3177103996276855, |
|
"learning_rate": 4.392764857881137e-06, |
|
"loss": 0.3983, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 9.233926128590971, |
|
"grad_norm": 15.372535705566406, |
|
"learning_rate": 4.297765617875058e-06, |
|
"loss": 0.3684, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 9.251025991792066, |
|
"grad_norm": 8.219186782836914, |
|
"learning_rate": 4.202766377868977e-06, |
|
"loss": 0.2893, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 9.26812585499316, |
|
"grad_norm": 14.162530899047852, |
|
"learning_rate": 4.1077671378628974e-06, |
|
"loss": 0.3841, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 9.285225718194255, |
|
"grad_norm": 2.816765308380127, |
|
"learning_rate": 4.012767897856817e-06, |
|
"loss": 0.4276, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 9.30232558139535, |
|
"grad_norm": 1.3700157403945923, |
|
"learning_rate": 3.917768657850737e-06, |
|
"loss": 0.4496, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 9.319425444596444, |
|
"grad_norm": 8.893135070800781, |
|
"learning_rate": 3.822769417844657e-06, |
|
"loss": 0.2779, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 9.336525307797537, |
|
"grad_norm": 6.580329895019531, |
|
"learning_rate": 3.7277701778385777e-06, |
|
"loss": 0.341, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 9.353625170998631, |
|
"grad_norm": 6.170793533325195, |
|
"learning_rate": 3.6327709378324975e-06, |
|
"loss": 0.3211, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 9.370725034199726, |
|
"grad_norm": 6.2319254875183105, |
|
"learning_rate": 3.537771697826418e-06, |
|
"loss": 0.3529, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 9.38782489740082, |
|
"grad_norm": 3.14901065826416, |
|
"learning_rate": 3.4427724578203377e-06, |
|
"loss": 0.2847, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 9.404924760601915, |
|
"grad_norm": 12.451719284057617, |
|
"learning_rate": 3.3477732178142575e-06, |
|
"loss": 0.3679, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 9.42202462380301, |
|
"grad_norm": 2.5386195182800293, |
|
"learning_rate": 3.2527739778081774e-06, |
|
"loss": 0.2476, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 9.439124487004104, |
|
"grad_norm": 11.419671058654785, |
|
"learning_rate": 3.157774737802098e-06, |
|
"loss": 0.3914, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 9.456224350205199, |
|
"grad_norm": 23.787368774414062, |
|
"learning_rate": 3.0627754977960175e-06, |
|
"loss": 0.3023, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 9.473324213406293, |
|
"grad_norm": 13.726613998413086, |
|
"learning_rate": 2.9677762577899378e-06, |
|
"loss": 0.3243, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 9.490424076607388, |
|
"grad_norm": 3.7777926921844482, |
|
"learning_rate": 2.8727770177838576e-06, |
|
"loss": 0.3515, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 9.50752393980848, |
|
"grad_norm": 3.651082992553711, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.3076, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 9.524623803009575, |
|
"grad_norm": 2.7207062244415283, |
|
"learning_rate": 2.682778537771698e-06, |
|
"loss": 0.3613, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 9.54172366621067, |
|
"grad_norm": 6.451671600341797, |
|
"learning_rate": 2.587779297765618e-06, |
|
"loss": 0.2136, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 9.558823529411764, |
|
"grad_norm": 10.220746040344238, |
|
"learning_rate": 2.492780057759538e-06, |
|
"loss": 0.3348, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 9.575923392612859, |
|
"grad_norm": 14.093595504760742, |
|
"learning_rate": 2.397780817753458e-06, |
|
"loss": 0.2827, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.593023255813954, |
|
"grad_norm": 2.391063928604126, |
|
"learning_rate": 2.302781577747378e-06, |
|
"loss": 0.3201, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 9.610123119015048, |
|
"grad_norm": 15.106823921203613, |
|
"learning_rate": 2.207782337741298e-06, |
|
"loss": 0.3192, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 9.627222982216143, |
|
"grad_norm": 4.812911510467529, |
|
"learning_rate": 2.112783097735218e-06, |
|
"loss": 0.3065, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 9.644322845417237, |
|
"grad_norm": 4.565815448760986, |
|
"learning_rate": 2.0177838577291384e-06, |
|
"loss": 0.3443, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 9.661422708618332, |
|
"grad_norm": 0.13094140589237213, |
|
"learning_rate": 1.9227846177230583e-06, |
|
"loss": 0.2713, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 9.678522571819425, |
|
"grad_norm": 22.36683464050293, |
|
"learning_rate": 1.8277853777169783e-06, |
|
"loss": 0.314, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 9.69562243502052, |
|
"grad_norm": 8.649237632751465, |
|
"learning_rate": 1.7327861377108984e-06, |
|
"loss": 0.3816, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 9.712722298221614, |
|
"grad_norm": 2.255821466445923, |
|
"learning_rate": 1.6377868977048183e-06, |
|
"loss": 0.3827, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 9.729822161422709, |
|
"grad_norm": 5.888030052185059, |
|
"learning_rate": 1.5427876576987383e-06, |
|
"loss": 0.3207, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 9.746922024623803, |
|
"grad_norm": 1.6394869089126587, |
|
"learning_rate": 1.4477884176926586e-06, |
|
"loss": 0.2782, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 9.764021887824898, |
|
"grad_norm": 0.9336591362953186, |
|
"learning_rate": 1.3527891776865787e-06, |
|
"loss": 0.3653, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 9.781121751025992, |
|
"grad_norm": 8.919906616210938, |
|
"learning_rate": 1.2577899376804985e-06, |
|
"loss": 0.2396, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 9.798221614227087, |
|
"grad_norm": 6.571496963500977, |
|
"learning_rate": 1.1627906976744186e-06, |
|
"loss": 0.303, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 9.815321477428181, |
|
"grad_norm": 13.167415618896484, |
|
"learning_rate": 1.0677914576683389e-06, |
|
"loss": 0.2993, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 9.832421340629274, |
|
"grad_norm": 1.0842267274856567, |
|
"learning_rate": 9.727922176622587e-07, |
|
"loss": 0.3435, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 9.849521203830369, |
|
"grad_norm": 4.068078517913818, |
|
"learning_rate": 8.777929776561788e-07, |
|
"loss": 0.2995, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 9.866621067031463, |
|
"grad_norm": 11.969517707824707, |
|
"learning_rate": 7.827937376500988e-07, |
|
"loss": 0.3289, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 9.883720930232558, |
|
"grad_norm": 9.880623817443848, |
|
"learning_rate": 6.877944976440189e-07, |
|
"loss": 0.292, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 9.900820793433653, |
|
"grad_norm": 11.973766326904297, |
|
"learning_rate": 5.92795257637939e-07, |
|
"loss": 0.3398, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 9.917920656634747, |
|
"grad_norm": 2.8612163066864014, |
|
"learning_rate": 4.977960176318589e-07, |
|
"loss": 0.414, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 9.935020519835842, |
|
"grad_norm": 31.290515899658203, |
|
"learning_rate": 4.0279677762577904e-07, |
|
"loss": 0.3436, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 9.952120383036936, |
|
"grad_norm": 6.3889241218566895, |
|
"learning_rate": 3.0779753761969905e-07, |
|
"loss": 0.3069, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 9.96922024623803, |
|
"grad_norm": 9.988734245300293, |
|
"learning_rate": 2.127982976136191e-07, |
|
"loss": 0.4289, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 9.986320109439124, |
|
"grad_norm": 13.143084526062012, |
|
"learning_rate": 1.1779905760753915e-07, |
|
"loss": 0.2766, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9269375213383407, |
|
"eval_f1_macro": 0.881587062204185, |
|
"eval_f1_micro": 0.9269375213383407, |
|
"eval_f1_weighted": 0.9267500134300362, |
|
"eval_loss": 0.22053596377372742, |
|
"eval_precision_macro": 0.9520135455160805, |
|
"eval_precision_micro": 0.9269375213383407, |
|
"eval_precision_weighted": 0.932072731880276, |
|
"eval_recall_macro": 0.8425714533291321, |
|
"eval_recall_micro": 0.9269375213383407, |
|
"eval_recall_weighted": 0.9269375213383407, |
|
"eval_runtime": 19.2006, |
|
"eval_samples_per_second": 152.547, |
|
"eval_steps_per_second": 9.583, |
|
"step": 14620 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 14620, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 9.058483691559752e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|