howdyaendra's picture
Upload folder using huggingface_hub
fd9905f verified
raw
history blame contribute delete
No virus
45.5 kB
{
"best_metric": 0.2612117528915405,
"best_model_checkpoint": "xblock-social-screenshots-5/checkpoint-6738",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 6738,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 14.132967948913574,
"learning_rate": 1.8545994065281898e-06,
"loss": 0.6958,
"step": 25
},
{
"epoch": 0.02,
"grad_norm": 8.136152267456055,
"learning_rate": 3.6350148367952525e-06,
"loss": 0.6975,
"step": 50
},
{
"epoch": 0.03,
"grad_norm": 0.9680777788162231,
"learning_rate": 5.489614243323442e-06,
"loss": 0.5213,
"step": 75
},
{
"epoch": 0.04,
"grad_norm": 4.900412559509277,
"learning_rate": 7.270029673590505e-06,
"loss": 0.4804,
"step": 100
},
{
"epoch": 0.06,
"grad_norm": 3.952802896499634,
"learning_rate": 9.124629080118695e-06,
"loss": 0.4743,
"step": 125
},
{
"epoch": 0.07,
"grad_norm": 14.785197257995605,
"learning_rate": 1.0979228486646884e-05,
"loss": 0.5276,
"step": 150
},
{
"epoch": 0.08,
"grad_norm": 13.082988739013672,
"learning_rate": 1.2833827893175073e-05,
"loss": 0.5019,
"step": 175
},
{
"epoch": 0.09,
"grad_norm": 8.51102352142334,
"learning_rate": 1.4688427299703264e-05,
"loss": 0.6297,
"step": 200
},
{
"epoch": 0.1,
"grad_norm": 5.031483173370361,
"learning_rate": 1.6543026706231455e-05,
"loss": 0.4062,
"step": 225
},
{
"epoch": 0.11,
"grad_norm": 7.973475456237793,
"learning_rate": 1.8397626112759644e-05,
"loss": 0.5997,
"step": 250
},
{
"epoch": 0.12,
"grad_norm": 11.180716514587402,
"learning_rate": 2.0252225519287833e-05,
"loss": 0.478,
"step": 275
},
{
"epoch": 0.13,
"grad_norm": 4.953334331512451,
"learning_rate": 2.2106824925816026e-05,
"loss": 0.3911,
"step": 300
},
{
"epoch": 0.14,
"grad_norm": 7.498977184295654,
"learning_rate": 2.3961424332344215e-05,
"loss": 0.477,
"step": 325
},
{
"epoch": 0.16,
"grad_norm": 0.19468317925930023,
"learning_rate": 2.58160237388724e-05,
"loss": 0.338,
"step": 350
},
{
"epoch": 0.17,
"grad_norm": 0.9361104965209961,
"learning_rate": 2.7670623145400593e-05,
"loss": 0.5228,
"step": 375
},
{
"epoch": 0.18,
"grad_norm": 1.6056278944015503,
"learning_rate": 2.9525222551928783e-05,
"loss": 0.526,
"step": 400
},
{
"epoch": 0.19,
"grad_norm": 6.435601234436035,
"learning_rate": 3.137982195845697e-05,
"loss": 0.3893,
"step": 425
},
{
"epoch": 0.2,
"grad_norm": 0.40519052743911743,
"learning_rate": 3.323442136498516e-05,
"loss": 0.3945,
"step": 450
},
{
"epoch": 0.21,
"grad_norm": 0.11996802687644958,
"learning_rate": 3.508902077151335e-05,
"loss": 0.5464,
"step": 475
},
{
"epoch": 0.22,
"grad_norm": 5.791567325592041,
"learning_rate": 3.6943620178041546e-05,
"loss": 0.6363,
"step": 500
},
{
"epoch": 0.23,
"grad_norm": 0.3942394256591797,
"learning_rate": 3.8798219584569735e-05,
"loss": 0.368,
"step": 525
},
{
"epoch": 0.24,
"grad_norm": 11.67949390411377,
"learning_rate": 4.0652818991097924e-05,
"loss": 0.4232,
"step": 550
},
{
"epoch": 0.26,
"grad_norm": 6.389534950256348,
"learning_rate": 4.2507418397626114e-05,
"loss": 0.4988,
"step": 575
},
{
"epoch": 0.27,
"grad_norm": 22.40272331237793,
"learning_rate": 4.43620178041543e-05,
"loss": 0.707,
"step": 600
},
{
"epoch": 0.28,
"grad_norm": 3.8825833797454834,
"learning_rate": 4.621661721068249e-05,
"loss": 0.576,
"step": 625
},
{
"epoch": 0.29,
"grad_norm": 3.9929873943328857,
"learning_rate": 4.807121661721069e-05,
"loss": 0.4785,
"step": 650
},
{
"epoch": 0.3,
"grad_norm": 8.819046974182129,
"learning_rate": 4.992581602373888e-05,
"loss": 0.5884,
"step": 675
},
{
"epoch": 0.31,
"grad_norm": 10.694670677185059,
"learning_rate": 4.9802110817941956e-05,
"loss": 0.5243,
"step": 700
},
{
"epoch": 0.32,
"grad_norm": 3.784719944000244,
"learning_rate": 4.9595976253298154e-05,
"loss": 0.6428,
"step": 725
},
{
"epoch": 0.33,
"grad_norm": 7.733468055725098,
"learning_rate": 4.938984168865435e-05,
"loss": 0.612,
"step": 750
},
{
"epoch": 0.35,
"grad_norm": 7.867452144622803,
"learning_rate": 4.918370712401056e-05,
"loss": 0.582,
"step": 775
},
{
"epoch": 0.36,
"grad_norm": 12.518653869628906,
"learning_rate": 4.8977572559366755e-05,
"loss": 0.569,
"step": 800
},
{
"epoch": 0.37,
"grad_norm": 7.27546501159668,
"learning_rate": 4.877143799472296e-05,
"loss": 0.6449,
"step": 825
},
{
"epoch": 0.38,
"grad_norm": 16.084596633911133,
"learning_rate": 4.856530343007916e-05,
"loss": 0.6471,
"step": 850
},
{
"epoch": 0.39,
"grad_norm": 4.305793285369873,
"learning_rate": 4.8359168865435357e-05,
"loss": 0.5507,
"step": 875
},
{
"epoch": 0.4,
"grad_norm": 17.53583335876465,
"learning_rate": 4.815303430079156e-05,
"loss": 0.4701,
"step": 900
},
{
"epoch": 0.41,
"grad_norm": 6.12671422958374,
"learning_rate": 4.794689973614776e-05,
"loss": 0.551,
"step": 925
},
{
"epoch": 0.42,
"grad_norm": 2.3855419158935547,
"learning_rate": 4.774076517150396e-05,
"loss": 0.6347,
"step": 950
},
{
"epoch": 0.43,
"grad_norm": 7.226621627807617,
"learning_rate": 4.7534630606860156e-05,
"loss": 0.3186,
"step": 975
},
{
"epoch": 0.45,
"grad_norm": 7.834379196166992,
"learning_rate": 4.732849604221636e-05,
"loss": 0.5085,
"step": 1000
},
{
"epoch": 0.46,
"grad_norm": 0.51530522108078,
"learning_rate": 4.7122361477572566e-05,
"loss": 0.3593,
"step": 1025
},
{
"epoch": 0.47,
"grad_norm": 1.3713924884796143,
"learning_rate": 4.6916226912928764e-05,
"loss": 0.5763,
"step": 1050
},
{
"epoch": 0.48,
"grad_norm": 1.3441976308822632,
"learning_rate": 4.671009234828496e-05,
"loss": 0.6034,
"step": 1075
},
{
"epoch": 0.49,
"grad_norm": 14.422416687011719,
"learning_rate": 4.650395778364116e-05,
"loss": 0.4459,
"step": 1100
},
{
"epoch": 0.5,
"grad_norm": 4.849581241607666,
"learning_rate": 4.6297823218997365e-05,
"loss": 0.4681,
"step": 1125
},
{
"epoch": 0.51,
"grad_norm": 11.974592208862305,
"learning_rate": 4.6091688654353563e-05,
"loss": 0.5693,
"step": 1150
},
{
"epoch": 0.52,
"grad_norm": 8.925352096557617,
"learning_rate": 4.588555408970976e-05,
"loss": 0.4427,
"step": 1175
},
{
"epoch": 0.53,
"grad_norm": 7.595994472503662,
"learning_rate": 4.5679419525065967e-05,
"loss": 0.3736,
"step": 1200
},
{
"epoch": 0.55,
"grad_norm": 8.393537521362305,
"learning_rate": 4.5473284960422165e-05,
"loss": 0.6534,
"step": 1225
},
{
"epoch": 0.56,
"grad_norm": 6.042502403259277,
"learning_rate": 4.526715039577837e-05,
"loss": 0.4326,
"step": 1250
},
{
"epoch": 0.57,
"grad_norm": 13.696694374084473,
"learning_rate": 4.506101583113457e-05,
"loss": 0.5176,
"step": 1275
},
{
"epoch": 0.58,
"grad_norm": 4.518089771270752,
"learning_rate": 4.4854881266490766e-05,
"loss": 0.4211,
"step": 1300
},
{
"epoch": 0.59,
"grad_norm": 11.04053783416748,
"learning_rate": 4.4648746701846964e-05,
"loss": 0.5664,
"step": 1325
},
{
"epoch": 0.6,
"grad_norm": 16.009056091308594,
"learning_rate": 4.444261213720316e-05,
"loss": 0.5545,
"step": 1350
},
{
"epoch": 0.61,
"grad_norm": 4.595952033996582,
"learning_rate": 4.423647757255937e-05,
"loss": 0.4318,
"step": 1375
},
{
"epoch": 0.62,
"grad_norm": 5.188466548919678,
"learning_rate": 4.403034300791557e-05,
"loss": 0.4286,
"step": 1400
},
{
"epoch": 0.63,
"grad_norm": 6.234115123748779,
"learning_rate": 4.382420844327177e-05,
"loss": 0.5876,
"step": 1425
},
{
"epoch": 0.65,
"grad_norm": 8.778355598449707,
"learning_rate": 4.361807387862797e-05,
"loss": 0.6304,
"step": 1450
},
{
"epoch": 0.66,
"grad_norm": 9.595151901245117,
"learning_rate": 4.3411939313984173e-05,
"loss": 0.5844,
"step": 1475
},
{
"epoch": 0.67,
"grad_norm": 9.795525550842285,
"learning_rate": 4.320580474934037e-05,
"loss": 0.4714,
"step": 1500
},
{
"epoch": 0.68,
"grad_norm": 14.277398109436035,
"learning_rate": 4.299967018469657e-05,
"loss": 0.5627,
"step": 1525
},
{
"epoch": 0.69,
"grad_norm": 5.245518684387207,
"learning_rate": 4.2793535620052775e-05,
"loss": 0.5474,
"step": 1550
},
{
"epoch": 0.7,
"grad_norm": 6.895930290222168,
"learning_rate": 4.258740105540897e-05,
"loss": 0.4686,
"step": 1575
},
{
"epoch": 0.71,
"grad_norm": 7.74411153793335,
"learning_rate": 4.238126649076518e-05,
"loss": 0.5021,
"step": 1600
},
{
"epoch": 0.72,
"grad_norm": 2.97990083694458,
"learning_rate": 4.2175131926121376e-05,
"loss": 0.4173,
"step": 1625
},
{
"epoch": 0.73,
"grad_norm": 8.026514053344727,
"learning_rate": 4.1968997361477574e-05,
"loss": 0.3728,
"step": 1650
},
{
"epoch": 0.75,
"grad_norm": 0.3547731339931488,
"learning_rate": 4.176286279683377e-05,
"loss": 0.4983,
"step": 1675
},
{
"epoch": 0.76,
"grad_norm": 4.483277797698975,
"learning_rate": 4.155672823218997e-05,
"loss": 0.5235,
"step": 1700
},
{
"epoch": 0.77,
"grad_norm": 7.000768184661865,
"learning_rate": 4.1350593667546175e-05,
"loss": 0.4955,
"step": 1725
},
{
"epoch": 0.78,
"grad_norm": 4.905660152435303,
"learning_rate": 4.114445910290238e-05,
"loss": 0.4803,
"step": 1750
},
{
"epoch": 0.79,
"grad_norm": 4.236353397369385,
"learning_rate": 4.093832453825858e-05,
"loss": 0.5105,
"step": 1775
},
{
"epoch": 0.8,
"grad_norm": 8.685340881347656,
"learning_rate": 4.073218997361478e-05,
"loss": 0.6345,
"step": 1800
},
{
"epoch": 0.81,
"grad_norm": 2.6251416206359863,
"learning_rate": 4.052605540897098e-05,
"loss": 0.6859,
"step": 1825
},
{
"epoch": 0.82,
"grad_norm": 13.661340713500977,
"learning_rate": 4.031992084432718e-05,
"loss": 0.3122,
"step": 1850
},
{
"epoch": 0.83,
"grad_norm": 1.7971667051315308,
"learning_rate": 4.011378627968338e-05,
"loss": 0.5103,
"step": 1875
},
{
"epoch": 0.85,
"grad_norm": 4.921170234680176,
"learning_rate": 3.9907651715039576e-05,
"loss": 0.4259,
"step": 1900
},
{
"epoch": 0.86,
"grad_norm": 6.340487480163574,
"learning_rate": 3.970151715039578e-05,
"loss": 0.5088,
"step": 1925
},
{
"epoch": 0.87,
"grad_norm": 8.182121276855469,
"learning_rate": 3.9495382585751986e-05,
"loss": 0.5638,
"step": 1950
},
{
"epoch": 0.88,
"grad_norm": 3.3480095863342285,
"learning_rate": 3.9289248021108184e-05,
"loss": 0.5788,
"step": 1975
},
{
"epoch": 0.89,
"grad_norm": 1.820002555847168,
"learning_rate": 3.908311345646438e-05,
"loss": 0.5043,
"step": 2000
},
{
"epoch": 0.9,
"grad_norm": 5.409160614013672,
"learning_rate": 3.887697889182058e-05,
"loss": 0.3171,
"step": 2025
},
{
"epoch": 0.91,
"grad_norm": 6.675960063934326,
"learning_rate": 3.867084432717678e-05,
"loss": 0.4536,
"step": 2050
},
{
"epoch": 0.92,
"grad_norm": 2.4080889225006104,
"learning_rate": 3.8464709762532984e-05,
"loss": 0.5149,
"step": 2075
},
{
"epoch": 0.93,
"grad_norm": 1.0316126346588135,
"learning_rate": 3.825857519788918e-05,
"loss": 0.6131,
"step": 2100
},
{
"epoch": 0.95,
"grad_norm": 6.637559413909912,
"learning_rate": 3.805244063324539e-05,
"loss": 0.4527,
"step": 2125
},
{
"epoch": 0.96,
"grad_norm": 5.249868869781494,
"learning_rate": 3.7846306068601585e-05,
"loss": 0.3764,
"step": 2150
},
{
"epoch": 0.97,
"grad_norm": 8.488882064819336,
"learning_rate": 3.764017150395778e-05,
"loss": 0.4237,
"step": 2175
},
{
"epoch": 0.98,
"grad_norm": 1.0089231729507446,
"learning_rate": 3.743403693931399e-05,
"loss": 0.5939,
"step": 2200
},
{
"epoch": 0.99,
"grad_norm": 0.9522852301597595,
"learning_rate": 3.7227902374670186e-05,
"loss": 0.4297,
"step": 2225
},
{
"epoch": 1.0,
"eval_accuracy": 0.9071889606053861,
"eval_f1_macro": 0.4640938055477737,
"eval_f1_micro": 0.9071889606053861,
"eval_f1_weighted": 0.8894145383985912,
"eval_loss": 0.35839545726776123,
"eval_precision_macro": 0.6916894534747811,
"eval_precision_micro": 0.9071889606053861,
"eval_precision_weighted": 0.8955058437354184,
"eval_recall_macro": 0.4208919396551037,
"eval_recall_micro": 0.9071889606053861,
"eval_recall_weighted": 0.9071889606053861,
"eval_runtime": 391.8611,
"eval_samples_per_second": 11.466,
"eval_steps_per_second": 0.717,
"step": 2246
},
{
"epoch": 1.0,
"grad_norm": 4.51116943359375,
"learning_rate": 3.7021767810026384e-05,
"loss": 0.5182,
"step": 2250
},
{
"epoch": 1.01,
"grad_norm": 4.062203884124756,
"learning_rate": 3.681563324538258e-05,
"loss": 0.4014,
"step": 2275
},
{
"epoch": 1.02,
"grad_norm": 4.1213884353637695,
"learning_rate": 3.660949868073879e-05,
"loss": 0.3564,
"step": 2300
},
{
"epoch": 1.04,
"grad_norm": 4.061648845672607,
"learning_rate": 3.640336411609499e-05,
"loss": 0.5185,
"step": 2325
},
{
"epoch": 1.05,
"grad_norm": 5.865363597869873,
"learning_rate": 3.619722955145119e-05,
"loss": 0.5669,
"step": 2350
},
{
"epoch": 1.06,
"grad_norm": 1.3118332624435425,
"learning_rate": 3.599109498680739e-05,
"loss": 0.3586,
"step": 2375
},
{
"epoch": 1.07,
"grad_norm": 8.255626678466797,
"learning_rate": 3.578496042216359e-05,
"loss": 0.5938,
"step": 2400
},
{
"epoch": 1.08,
"grad_norm": 11.17790412902832,
"learning_rate": 3.557882585751979e-05,
"loss": 0.3843,
"step": 2425
},
{
"epoch": 1.09,
"grad_norm": 9.643479347229004,
"learning_rate": 3.5380936675461745e-05,
"loss": 0.5595,
"step": 2450
},
{
"epoch": 1.1,
"grad_norm": 0.35103797912597656,
"learning_rate": 3.5174802110817943e-05,
"loss": 0.413,
"step": 2475
},
{
"epoch": 1.11,
"grad_norm": 0.7915975451469421,
"learning_rate": 3.496866754617414e-05,
"loss": 0.3686,
"step": 2500
},
{
"epoch": 1.12,
"grad_norm": 5.222273826599121,
"learning_rate": 3.4762532981530347e-05,
"loss": 0.3873,
"step": 2525
},
{
"epoch": 1.14,
"grad_norm": 6.224874019622803,
"learning_rate": 3.4556398416886545e-05,
"loss": 0.5069,
"step": 2550
},
{
"epoch": 1.15,
"grad_norm": 12.55453109741211,
"learning_rate": 3.435026385224275e-05,
"loss": 0.255,
"step": 2575
},
{
"epoch": 1.16,
"grad_norm": 11.516471862792969,
"learning_rate": 3.414412928759895e-05,
"loss": 0.4712,
"step": 2600
},
{
"epoch": 1.17,
"grad_norm": 1.0335161685943604,
"learning_rate": 3.3937994722955146e-05,
"loss": 0.4652,
"step": 2625
},
{
"epoch": 1.18,
"grad_norm": 5.176596641540527,
"learning_rate": 3.3731860158311344e-05,
"loss": 0.3918,
"step": 2650
},
{
"epoch": 1.19,
"grad_norm": 3.3881676197052,
"learning_rate": 3.352572559366754e-05,
"loss": 0.3869,
"step": 2675
},
{
"epoch": 1.2,
"grad_norm": 4.0972514152526855,
"learning_rate": 3.331959102902375e-05,
"loss": 0.44,
"step": 2700
},
{
"epoch": 1.21,
"grad_norm": 0.4464218020439148,
"learning_rate": 3.311345646437995e-05,
"loss": 0.3373,
"step": 2725
},
{
"epoch": 1.22,
"grad_norm": 6.553329944610596,
"learning_rate": 3.290732189973615e-05,
"loss": 0.3585,
"step": 2750
},
{
"epoch": 1.24,
"grad_norm": 6.4266157150268555,
"learning_rate": 3.270118733509235e-05,
"loss": 0.4454,
"step": 2775
},
{
"epoch": 1.25,
"grad_norm": 0.8840596079826355,
"learning_rate": 3.2495052770448553e-05,
"loss": 0.396,
"step": 2800
},
{
"epoch": 1.26,
"grad_norm": 6.651895523071289,
"learning_rate": 3.228891820580475e-05,
"loss": 0.4029,
"step": 2825
},
{
"epoch": 1.27,
"grad_norm": 8.035750389099121,
"learning_rate": 3.208278364116095e-05,
"loss": 0.5253,
"step": 2850
},
{
"epoch": 1.28,
"grad_norm": 0.7661600112915039,
"learning_rate": 3.187664907651715e-05,
"loss": 0.498,
"step": 2875
},
{
"epoch": 1.29,
"grad_norm": 3.3388662338256836,
"learning_rate": 3.167051451187335e-05,
"loss": 0.4511,
"step": 2900
},
{
"epoch": 1.3,
"grad_norm": 4.266098499298096,
"learning_rate": 3.146437994722956e-05,
"loss": 0.5038,
"step": 2925
},
{
"epoch": 1.31,
"grad_norm": 9.547815322875977,
"learning_rate": 3.1258245382585756e-05,
"loss": 0.3055,
"step": 2950
},
{
"epoch": 1.32,
"grad_norm": 5.78660774230957,
"learning_rate": 3.1052110817941954e-05,
"loss": 0.3645,
"step": 2975
},
{
"epoch": 1.34,
"grad_norm": 0.6312762498855591,
"learning_rate": 3.084597625329815e-05,
"loss": 0.5396,
"step": 3000
},
{
"epoch": 1.35,
"grad_norm": 9.60580825805664,
"learning_rate": 3.063984168865435e-05,
"loss": 0.3903,
"step": 3025
},
{
"epoch": 1.36,
"grad_norm": 8.78200626373291,
"learning_rate": 3.043370712401056e-05,
"loss": 0.2628,
"step": 3050
},
{
"epoch": 1.37,
"grad_norm": 0.5894768834114075,
"learning_rate": 3.0227572559366757e-05,
"loss": 0.3829,
"step": 3075
},
{
"epoch": 1.38,
"grad_norm": 4.919884204864502,
"learning_rate": 3.0021437994722955e-05,
"loss": 0.4657,
"step": 3100
},
{
"epoch": 1.39,
"grad_norm": 4.6643290519714355,
"learning_rate": 2.9815303430079157e-05,
"loss": 0.4419,
"step": 3125
},
{
"epoch": 1.4,
"grad_norm": 4.99531888961792,
"learning_rate": 2.9609168865435355e-05,
"loss": 0.6053,
"step": 3150
},
{
"epoch": 1.41,
"grad_norm": 2.8033461570739746,
"learning_rate": 2.940303430079156e-05,
"loss": 0.3048,
"step": 3175
},
{
"epoch": 1.42,
"grad_norm": 10.75854206085205,
"learning_rate": 2.9196899736147758e-05,
"loss": 0.3911,
"step": 3200
},
{
"epoch": 1.44,
"grad_norm": 1.392392635345459,
"learning_rate": 2.899076517150396e-05,
"loss": 0.43,
"step": 3225
},
{
"epoch": 1.45,
"grad_norm": 4.793901443481445,
"learning_rate": 2.8784630606860158e-05,
"loss": 0.4389,
"step": 3250
},
{
"epoch": 1.46,
"grad_norm": 6.22283935546875,
"learning_rate": 2.8578496042216363e-05,
"loss": 0.3379,
"step": 3275
},
{
"epoch": 1.47,
"grad_norm": 2.443415403366089,
"learning_rate": 2.837236147757256e-05,
"loss": 0.4967,
"step": 3300
},
{
"epoch": 1.48,
"grad_norm": 4.5518059730529785,
"learning_rate": 2.8166226912928762e-05,
"loss": 0.5465,
"step": 3325
},
{
"epoch": 1.49,
"grad_norm": 6.078768253326416,
"learning_rate": 2.796009234828496e-05,
"loss": 0.4807,
"step": 3350
},
{
"epoch": 1.5,
"grad_norm": 0.6977243423461914,
"learning_rate": 2.775395778364116e-05,
"loss": 0.5693,
"step": 3375
},
{
"epoch": 1.51,
"grad_norm": 13.189055442810059,
"learning_rate": 2.7547823218997364e-05,
"loss": 0.2355,
"step": 3400
},
{
"epoch": 1.52,
"grad_norm": 11.238237380981445,
"learning_rate": 2.7341688654353565e-05,
"loss": 0.5586,
"step": 3425
},
{
"epoch": 1.54,
"grad_norm": 8.910079956054688,
"learning_rate": 2.7135554089709763e-05,
"loss": 0.4066,
"step": 3450
},
{
"epoch": 1.55,
"grad_norm": 1.7681870460510254,
"learning_rate": 2.692941952506596e-05,
"loss": 0.5567,
"step": 3475
},
{
"epoch": 1.56,
"grad_norm": 0.8575474619865417,
"learning_rate": 2.6723284960422163e-05,
"loss": 0.2942,
"step": 3500
},
{
"epoch": 1.57,
"grad_norm": 6.890367031097412,
"learning_rate": 2.6517150395778368e-05,
"loss": 0.2915,
"step": 3525
},
{
"epoch": 1.58,
"grad_norm": 0.6504009962081909,
"learning_rate": 2.6311015831134566e-05,
"loss": 0.4993,
"step": 3550
},
{
"epoch": 1.59,
"grad_norm": 5.363488674163818,
"learning_rate": 2.6104881266490768e-05,
"loss": 0.4317,
"step": 3575
},
{
"epoch": 1.6,
"grad_norm": 0.34819716215133667,
"learning_rate": 2.5898746701846966e-05,
"loss": 0.3354,
"step": 3600
},
{
"epoch": 1.61,
"grad_norm": 0.5899884104728699,
"learning_rate": 2.5692612137203164e-05,
"loss": 0.3711,
"step": 3625
},
{
"epoch": 1.63,
"grad_norm": 3.2884409427642822,
"learning_rate": 2.548647757255937e-05,
"loss": 0.3231,
"step": 3650
},
{
"epoch": 1.64,
"grad_norm": 10.364724159240723,
"learning_rate": 2.528034300791557e-05,
"loss": 0.5173,
"step": 3675
},
{
"epoch": 1.65,
"grad_norm": 5.121739864349365,
"learning_rate": 2.507420844327177e-05,
"loss": 0.4939,
"step": 3700
},
{
"epoch": 1.66,
"grad_norm": 6.0736589431762695,
"learning_rate": 2.486807387862797e-05,
"loss": 0.4922,
"step": 3725
},
{
"epoch": 1.67,
"grad_norm": 11.522198677062988,
"learning_rate": 2.466193931398417e-05,
"loss": 0.4233,
"step": 3750
},
{
"epoch": 1.68,
"grad_norm": 2.02380633354187,
"learning_rate": 2.4455804749340373e-05,
"loss": 0.3657,
"step": 3775
},
{
"epoch": 1.69,
"grad_norm": 7.379997730255127,
"learning_rate": 2.424967018469657e-05,
"loss": 0.4719,
"step": 3800
},
{
"epoch": 1.7,
"grad_norm": 9.087469100952148,
"learning_rate": 2.404353562005277e-05,
"loss": 0.4912,
"step": 3825
},
{
"epoch": 1.71,
"grad_norm": 6.239768028259277,
"learning_rate": 2.383740105540897e-05,
"loss": 0.4496,
"step": 3850
},
{
"epoch": 1.73,
"grad_norm": 0.5235075354576111,
"learning_rate": 2.3631266490765173e-05,
"loss": 0.3338,
"step": 3875
},
{
"epoch": 1.74,
"grad_norm": 4.942290782928467,
"learning_rate": 2.3425131926121374e-05,
"loss": 0.5681,
"step": 3900
},
{
"epoch": 1.75,
"grad_norm": 0.7366746068000793,
"learning_rate": 2.3218997361477572e-05,
"loss": 0.4026,
"step": 3925
},
{
"epoch": 1.76,
"grad_norm": 6.261937141418457,
"learning_rate": 2.3012862796833774e-05,
"loss": 0.3486,
"step": 3950
},
{
"epoch": 1.77,
"grad_norm": 4.27028751373291,
"learning_rate": 2.2806728232189976e-05,
"loss": 0.2636,
"step": 3975
},
{
"epoch": 1.78,
"grad_norm": 1.5691050291061401,
"learning_rate": 2.260883905013193e-05,
"loss": 0.4031,
"step": 4000
},
{
"epoch": 1.79,
"grad_norm": 8.969446182250977,
"learning_rate": 2.2402704485488127e-05,
"loss": 0.3943,
"step": 4025
},
{
"epoch": 1.8,
"grad_norm": 0.4185885190963745,
"learning_rate": 2.219656992084433e-05,
"loss": 0.4099,
"step": 4050
},
{
"epoch": 1.81,
"grad_norm": 3.5010409355163574,
"learning_rate": 2.1990435356200527e-05,
"loss": 0.5192,
"step": 4075
},
{
"epoch": 1.83,
"grad_norm": 0.3482280969619751,
"learning_rate": 2.1784300791556732e-05,
"loss": 0.4143,
"step": 4100
},
{
"epoch": 1.84,
"grad_norm": 0.623389720916748,
"learning_rate": 2.157816622691293e-05,
"loss": 0.3566,
"step": 4125
},
{
"epoch": 1.85,
"grad_norm": 11.738636016845703,
"learning_rate": 2.1372031662269128e-05,
"loss": 0.4869,
"step": 4150
},
{
"epoch": 1.86,
"grad_norm": 18.07844352722168,
"learning_rate": 2.116589709762533e-05,
"loss": 0.2863,
"step": 4175
},
{
"epoch": 1.87,
"grad_norm": 7.016993522644043,
"learning_rate": 2.095976253298153e-05,
"loss": 0.3461,
"step": 4200
},
{
"epoch": 1.88,
"grad_norm": 0.18062768876552582,
"learning_rate": 2.0753627968337733e-05,
"loss": 0.2242,
"step": 4225
},
{
"epoch": 1.89,
"grad_norm": 8.72131061553955,
"learning_rate": 2.054749340369393e-05,
"loss": 0.4769,
"step": 4250
},
{
"epoch": 1.9,
"grad_norm": 0.8265817165374756,
"learning_rate": 2.0341358839050133e-05,
"loss": 0.4334,
"step": 4275
},
{
"epoch": 1.91,
"grad_norm": 2.490007162094116,
"learning_rate": 2.0135224274406334e-05,
"loss": 0.457,
"step": 4300
},
{
"epoch": 1.93,
"grad_norm": 0.18633964657783508,
"learning_rate": 1.9929089709762532e-05,
"loss": 0.4192,
"step": 4325
},
{
"epoch": 1.94,
"grad_norm": 5.816620826721191,
"learning_rate": 1.9722955145118734e-05,
"loss": 0.3111,
"step": 4350
},
{
"epoch": 1.95,
"grad_norm": 5.391794204711914,
"learning_rate": 1.9516820580474935e-05,
"loss": 0.276,
"step": 4375
},
{
"epoch": 1.96,
"grad_norm": 0.8628760576248169,
"learning_rate": 1.9310686015831137e-05,
"loss": 0.3737,
"step": 4400
},
{
"epoch": 1.97,
"grad_norm": 4.97860050201416,
"learning_rate": 1.9104551451187335e-05,
"loss": 0.376,
"step": 4425
},
{
"epoch": 1.98,
"grad_norm": 4.89571475982666,
"learning_rate": 1.8898416886543537e-05,
"loss": 0.3772,
"step": 4450
},
{
"epoch": 1.99,
"grad_norm": 0.35662633180618286,
"learning_rate": 1.8692282321899738e-05,
"loss": 0.4631,
"step": 4475
},
{
"epoch": 2.0,
"eval_accuracy": 0.9203204985533051,
"eval_f1_macro": 0.49143994642521266,
"eval_f1_micro": 0.9203204985533051,
"eval_f1_weighted": 0.9061028561941445,
"eval_loss": 0.2875824272632599,
"eval_precision_macro": 0.6640335095209342,
"eval_precision_micro": 0.9203204985533051,
"eval_precision_weighted": 0.9054269421170664,
"eval_recall_macro": 0.445379050503802,
"eval_recall_micro": 0.9203204985533051,
"eval_recall_weighted": 0.9203204985533051,
"eval_runtime": 404.3838,
"eval_samples_per_second": 11.111,
"eval_steps_per_second": 0.695,
"step": 4492
},
{
"epoch": 2.0,
"grad_norm": 10.790550231933594,
"learning_rate": 1.8486147757255936e-05,
"loss": 0.536,
"step": 4500
},
{
"epoch": 2.01,
"grad_norm": 8.45910930633545,
"learning_rate": 1.8280013192612138e-05,
"loss": 0.4415,
"step": 4525
},
{
"epoch": 2.03,
"grad_norm": 5.119017124176025,
"learning_rate": 1.807387862796834e-05,
"loss": 0.3585,
"step": 4550
},
{
"epoch": 2.04,
"grad_norm": 1.2371793985366821,
"learning_rate": 1.786774406332454e-05,
"loss": 0.3951,
"step": 4575
},
{
"epoch": 2.05,
"grad_norm": 4.3467607498168945,
"learning_rate": 1.766160949868074e-05,
"loss": 0.4048,
"step": 4600
},
{
"epoch": 2.06,
"grad_norm": 0.7943634986877441,
"learning_rate": 1.7455474934036937e-05,
"loss": 0.3604,
"step": 4625
},
{
"epoch": 2.07,
"grad_norm": 1.8456642627716064,
"learning_rate": 1.7249340369393142e-05,
"loss": 0.4308,
"step": 4650
},
{
"epoch": 2.08,
"grad_norm": 15.450132369995117,
"learning_rate": 1.704320580474934e-05,
"loss": 0.2849,
"step": 4675
},
{
"epoch": 2.09,
"grad_norm": 0.7097306251525879,
"learning_rate": 1.6837071240105542e-05,
"loss": 0.3189,
"step": 4700
},
{
"epoch": 2.1,
"grad_norm": 0.0466163270175457,
"learning_rate": 1.663093667546174e-05,
"loss": 0.3747,
"step": 4725
},
{
"epoch": 2.11,
"grad_norm": 17.914644241333008,
"learning_rate": 1.6424802110817945e-05,
"loss": 0.4635,
"step": 4750
},
{
"epoch": 2.13,
"grad_norm": 5.257259845733643,
"learning_rate": 1.6218667546174143e-05,
"loss": 0.3882,
"step": 4775
},
{
"epoch": 2.14,
"grad_norm": 0.1565193384885788,
"learning_rate": 1.601253298153034e-05,
"loss": 0.4073,
"step": 4800
},
{
"epoch": 2.15,
"grad_norm": 13.001235008239746,
"learning_rate": 1.5806398416886546e-05,
"loss": 0.3174,
"step": 4825
},
{
"epoch": 2.16,
"grad_norm": 11.252735137939453,
"learning_rate": 1.5600263852242745e-05,
"loss": 0.4794,
"step": 4850
},
{
"epoch": 2.17,
"grad_norm": 8.534846305847168,
"learning_rate": 1.5394129287598946e-05,
"loss": 0.2951,
"step": 4875
},
{
"epoch": 2.18,
"grad_norm": 4.504175662994385,
"learning_rate": 1.5187994722955146e-05,
"loss": 0.5374,
"step": 4900
},
{
"epoch": 2.19,
"grad_norm": 4.395377159118652,
"learning_rate": 1.4981860158311347e-05,
"loss": 0.3697,
"step": 4925
},
{
"epoch": 2.2,
"grad_norm": 11.302129745483398,
"learning_rate": 1.4775725593667547e-05,
"loss": 0.2116,
"step": 4950
},
{
"epoch": 2.22,
"grad_norm": 0.7363251447677612,
"learning_rate": 1.4569591029023747e-05,
"loss": 0.327,
"step": 4975
},
{
"epoch": 2.23,
"grad_norm": 4.721381664276123,
"learning_rate": 1.4363456464379949e-05,
"loss": 0.3867,
"step": 5000
},
{
"epoch": 2.24,
"grad_norm": 2.9688565731048584,
"learning_rate": 1.4157321899736149e-05,
"loss": 0.4413,
"step": 5025
},
{
"epoch": 2.25,
"grad_norm": 0.40781280398368835,
"learning_rate": 1.395118733509235e-05,
"loss": 0.3637,
"step": 5050
},
{
"epoch": 2.26,
"grad_norm": 1.3190653324127197,
"learning_rate": 1.374505277044855e-05,
"loss": 0.3532,
"step": 5075
},
{
"epoch": 2.27,
"grad_norm": 0.6920987963676453,
"learning_rate": 1.3538918205804748e-05,
"loss": 0.3523,
"step": 5100
},
{
"epoch": 2.28,
"grad_norm": 10.344520568847656,
"learning_rate": 1.3332783641160951e-05,
"loss": 0.4375,
"step": 5125
},
{
"epoch": 2.29,
"grad_norm": 12.584922790527344,
"learning_rate": 1.312664907651715e-05,
"loss": 0.4301,
"step": 5150
},
{
"epoch": 2.3,
"grad_norm": 0.4461086094379425,
"learning_rate": 1.2920514511873353e-05,
"loss": 0.3158,
"step": 5175
},
{
"epoch": 2.32,
"grad_norm": 9.555744171142578,
"learning_rate": 1.2714379947229551e-05,
"loss": 0.3468,
"step": 5200
},
{
"epoch": 2.33,
"grad_norm": 12.140357971191406,
"learning_rate": 1.2508245382585754e-05,
"loss": 0.4215,
"step": 5225
},
{
"epoch": 2.34,
"grad_norm": 4.640113830566406,
"learning_rate": 1.2302110817941952e-05,
"loss": 0.3538,
"step": 5250
},
{
"epoch": 2.35,
"grad_norm": 8.983073234558105,
"learning_rate": 1.2095976253298154e-05,
"loss": 0.2868,
"step": 5275
},
{
"epoch": 2.36,
"grad_norm": 6.697389125823975,
"learning_rate": 1.1889841688654354e-05,
"loss": 0.4148,
"step": 5300
},
{
"epoch": 2.37,
"grad_norm": 9.571817398071289,
"learning_rate": 1.1683707124010555e-05,
"loss": 0.4891,
"step": 5325
},
{
"epoch": 2.38,
"grad_norm": 6.644136905670166,
"learning_rate": 1.1477572559366755e-05,
"loss": 0.2858,
"step": 5350
},
{
"epoch": 2.39,
"grad_norm": 19.55898666381836,
"learning_rate": 1.1271437994722955e-05,
"loss": 0.4059,
"step": 5375
},
{
"epoch": 2.4,
"grad_norm": 6.888569355010986,
"learning_rate": 1.1065303430079157e-05,
"loss": 0.3415,
"step": 5400
},
{
"epoch": 2.42,
"grad_norm": 7.856134414672852,
"learning_rate": 1.0859168865435356e-05,
"loss": 0.3327,
"step": 5425
},
{
"epoch": 2.43,
"grad_norm": 15.82084846496582,
"learning_rate": 1.0653034300791558e-05,
"loss": 0.4321,
"step": 5450
},
{
"epoch": 2.44,
"grad_norm": 14.98440170288086,
"learning_rate": 1.0446899736147758e-05,
"loss": 0.4672,
"step": 5475
},
{
"epoch": 2.45,
"grad_norm": 0.872367799282074,
"learning_rate": 1.0240765171503958e-05,
"loss": 0.2711,
"step": 5500
},
{
"epoch": 2.46,
"grad_norm": 5.635341167449951,
"learning_rate": 1.0034630606860158e-05,
"loss": 0.3636,
"step": 5525
},
{
"epoch": 2.47,
"grad_norm": 12.989480972290039,
"learning_rate": 9.82849604221636e-06,
"loss": 0.3524,
"step": 5550
},
{
"epoch": 2.48,
"grad_norm": 16.43426513671875,
"learning_rate": 9.622361477572559e-06,
"loss": 0.2381,
"step": 5575
},
{
"epoch": 2.49,
"grad_norm": 1.0547945499420166,
"learning_rate": 9.41622691292876e-06,
"loss": 0.3293,
"step": 5600
},
{
"epoch": 2.5,
"grad_norm": 13.630729675292969,
"learning_rate": 9.210092348284962e-06,
"loss": 0.3658,
"step": 5625
},
{
"epoch": 2.52,
"grad_norm": 12.972505569458008,
"learning_rate": 9.003957783641162e-06,
"loss": 0.2829,
"step": 5650
},
{
"epoch": 2.53,
"grad_norm": 10.048601150512695,
"learning_rate": 8.797823218997362e-06,
"loss": 0.3867,
"step": 5675
},
{
"epoch": 2.54,
"grad_norm": 1.120229721069336,
"learning_rate": 8.591688654353562e-06,
"loss": 0.4166,
"step": 5700
},
{
"epoch": 2.55,
"grad_norm": 0.9482748508453369,
"learning_rate": 8.385554089709763e-06,
"loss": 0.4281,
"step": 5725
},
{
"epoch": 2.56,
"grad_norm": 0.29686295986175537,
"learning_rate": 8.179419525065963e-06,
"loss": 0.3187,
"step": 5750
},
{
"epoch": 2.57,
"grad_norm": 9.385336875915527,
"learning_rate": 7.973284960422165e-06,
"loss": 0.3553,
"step": 5775
},
{
"epoch": 2.58,
"grad_norm": 5.910414695739746,
"learning_rate": 7.767150395778365e-06,
"loss": 0.2821,
"step": 5800
},
{
"epoch": 2.59,
"grad_norm": 5.932247161865234,
"learning_rate": 7.561015831134564e-06,
"loss": 0.2706,
"step": 5825
},
{
"epoch": 2.6,
"grad_norm": 1.826149821281433,
"learning_rate": 7.354881266490765e-06,
"loss": 0.3742,
"step": 5850
},
{
"epoch": 2.62,
"grad_norm": 9.548162460327148,
"learning_rate": 7.148746701846966e-06,
"loss": 0.3124,
"step": 5875
},
{
"epoch": 2.63,
"grad_norm": 10.59200668334961,
"learning_rate": 6.9426121372031665e-06,
"loss": 0.2541,
"step": 5900
},
{
"epoch": 2.64,
"grad_norm": 6.801640033721924,
"learning_rate": 6.736477572559367e-06,
"loss": 0.336,
"step": 5925
},
{
"epoch": 2.65,
"grad_norm": 6.312964916229248,
"learning_rate": 6.530343007915568e-06,
"loss": 0.5251,
"step": 5950
},
{
"epoch": 2.66,
"grad_norm": 10.121294975280762,
"learning_rate": 6.324208443271768e-06,
"loss": 0.3999,
"step": 5975
},
{
"epoch": 2.67,
"grad_norm": 11.066811561584473,
"learning_rate": 6.1180738786279684e-06,
"loss": 0.3101,
"step": 6000
},
{
"epoch": 2.68,
"grad_norm": 0.14530642330646515,
"learning_rate": 5.911939313984169e-06,
"loss": 0.2483,
"step": 6025
},
{
"epoch": 2.69,
"grad_norm": 8.127425193786621,
"learning_rate": 5.70580474934037e-06,
"loss": 0.2684,
"step": 6050
},
{
"epoch": 2.7,
"grad_norm": 4.671697616577148,
"learning_rate": 5.4996701846965706e-06,
"loss": 0.3339,
"step": 6075
},
{
"epoch": 2.72,
"grad_norm": 7.663967609405518,
"learning_rate": 5.29353562005277e-06,
"loss": 0.2823,
"step": 6100
},
{
"epoch": 2.73,
"grad_norm": 4.24953556060791,
"learning_rate": 5.087401055408971e-06,
"loss": 0.2481,
"step": 6125
},
{
"epoch": 2.74,
"grad_norm": 6.942299842834473,
"learning_rate": 4.881266490765172e-06,
"loss": 0.3467,
"step": 6150
},
{
"epoch": 2.75,
"grad_norm": 2.23897123336792,
"learning_rate": 4.6751319261213725e-06,
"loss": 0.4663,
"step": 6175
},
{
"epoch": 2.76,
"grad_norm": 21.0181827545166,
"learning_rate": 4.468997361477572e-06,
"loss": 0.2012,
"step": 6200
},
{
"epoch": 2.77,
"grad_norm": 6.582679748535156,
"learning_rate": 4.262862796833773e-06,
"loss": 0.3391,
"step": 6225
},
{
"epoch": 2.78,
"grad_norm": 0.8921090960502625,
"learning_rate": 4.056728232189974e-06,
"loss": 0.2925,
"step": 6250
},
{
"epoch": 2.79,
"grad_norm": 0.729013204574585,
"learning_rate": 3.8505936675461745e-06,
"loss": 0.2918,
"step": 6275
},
{
"epoch": 2.8,
"grad_norm": 0.47634056210517883,
"learning_rate": 3.6527044854881267e-06,
"loss": 0.3309,
"step": 6300
},
{
"epoch": 2.82,
"grad_norm": 0.7455437183380127,
"learning_rate": 3.4465699208443274e-06,
"loss": 0.2839,
"step": 6325
},
{
"epoch": 2.83,
"grad_norm": 17.21619415283203,
"learning_rate": 3.240435356200528e-06,
"loss": 0.3023,
"step": 6350
},
{
"epoch": 2.84,
"grad_norm": 0.5437944531440735,
"learning_rate": 3.0343007915567284e-06,
"loss": 0.2888,
"step": 6375
},
{
"epoch": 2.85,
"grad_norm": 4.063761234283447,
"learning_rate": 2.8281662269129287e-06,
"loss": 0.4373,
"step": 6400
},
{
"epoch": 2.86,
"grad_norm": 6.74634313583374,
"learning_rate": 2.6220316622691294e-06,
"loss": 0.4396,
"step": 6425
},
{
"epoch": 2.87,
"grad_norm": 3.216498613357544,
"learning_rate": 2.41589709762533e-06,
"loss": 0.3116,
"step": 6450
},
{
"epoch": 2.88,
"grad_norm": 1.6705697774887085,
"learning_rate": 2.2097625329815304e-06,
"loss": 0.3643,
"step": 6475
},
{
"epoch": 2.89,
"grad_norm": 4.8092851638793945,
"learning_rate": 2.003627968337731e-06,
"loss": 0.2643,
"step": 6500
},
{
"epoch": 2.91,
"grad_norm": 16.015827178955078,
"learning_rate": 1.7974934036939316e-06,
"loss": 0.4048,
"step": 6525
},
{
"epoch": 2.92,
"grad_norm": 2.7783210277557373,
"learning_rate": 1.5913588390501319e-06,
"loss": 0.3777,
"step": 6550
},
{
"epoch": 2.93,
"grad_norm": 0.9853120446205139,
"learning_rate": 1.3852242744063324e-06,
"loss": 0.2335,
"step": 6575
},
{
"epoch": 2.94,
"grad_norm": 0.24746793508529663,
"learning_rate": 1.179089709762533e-06,
"loss": 0.1812,
"step": 6600
},
{
"epoch": 2.95,
"grad_norm": 2.739319324493408,
"learning_rate": 9.729551451187335e-07,
"loss": 0.2132,
"step": 6625
},
{
"epoch": 2.96,
"grad_norm": 0.38551005721092224,
"learning_rate": 7.66820580474934e-07,
"loss": 0.2686,
"step": 6650
},
{
"epoch": 2.97,
"grad_norm": 6.976538181304932,
"learning_rate": 5.606860158311346e-07,
"loss": 0.507,
"step": 6675
},
{
"epoch": 2.98,
"grad_norm": 6.835049152374268,
"learning_rate": 3.5455145118733513e-07,
"loss": 0.4169,
"step": 6700
},
{
"epoch": 2.99,
"grad_norm": 0.45143523812294006,
"learning_rate": 1.4841688654353562e-07,
"loss": 0.3045,
"step": 6725
},
{
"epoch": 3.0,
"eval_accuracy": 0.9278878255063432,
"eval_f1_macro": 0.5509939923795275,
"eval_f1_micro": 0.9278878255063432,
"eval_f1_weighted": 0.915533252030031,
"eval_loss": 0.2612117528915405,
"eval_precision_macro": 0.7507175360173887,
"eval_precision_micro": 0.9278878255063432,
"eval_precision_weighted": 0.9184859185112592,
"eval_recall_macro": 0.4872137731200702,
"eval_recall_micro": 0.9278878255063432,
"eval_recall_weighted": 0.9278878255063432,
"eval_runtime": 408.2523,
"eval_samples_per_second": 11.005,
"eval_steps_per_second": 0.688,
"step": 6738
}
],
"logging_steps": 25,
"max_steps": 6738,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 4.1760701843670835e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}