abhishek's picture
abhishek HF staff
Upload folder using huggingface_hub
ed49a0c
raw
history blame contribute delete
No virus
25.2 kB
{
"best_metric": 0.3305796980857849,
"best_model_checkpoint": "autotrain-m1b56-8ger6/checkpoint-3462",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 3462,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014442518775274409,
"grad_norm": 52.66768264770508,
"learning_rate": 2.0192307692307692e-06,
"loss": 8.6614,
"step": 25
},
{
"epoch": 0.028885037550548817,
"grad_norm": 70.3343276977539,
"learning_rate": 4.423076923076924e-06,
"loss": 7.3266,
"step": 50
},
{
"epoch": 0.043327556325823226,
"grad_norm": 64.09215545654297,
"learning_rate": 6.730769230769231e-06,
"loss": 1.6514,
"step": 75
},
{
"epoch": 0.057770075101097634,
"grad_norm": 29.524045944213867,
"learning_rate": 9.134615384615384e-06,
"loss": 1.0548,
"step": 100
},
{
"epoch": 0.07221259387637204,
"grad_norm": 14.635738372802734,
"learning_rate": 1.153846153846154e-05,
"loss": 0.9082,
"step": 125
},
{
"epoch": 0.08665511265164645,
"grad_norm": 19.21646499633789,
"learning_rate": 1.3942307692307693e-05,
"loss": 0.5165,
"step": 150
},
{
"epoch": 0.10109763142692085,
"grad_norm": 34.39830017089844,
"learning_rate": 1.6346153846153847e-05,
"loss": 0.5166,
"step": 175
},
{
"epoch": 0.11554015020219527,
"grad_norm": 12.46200180053711,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.6898,
"step": 200
},
{
"epoch": 0.12998266897746968,
"grad_norm": 44.481101989746094,
"learning_rate": 2.1153846153846154e-05,
"loss": 0.5929,
"step": 225
},
{
"epoch": 0.14442518775274407,
"grad_norm": 14.731172561645508,
"learning_rate": 2.355769230769231e-05,
"loss": 0.5474,
"step": 250
},
{
"epoch": 0.1588677065280185,
"grad_norm": 27.139829635620117,
"learning_rate": 2.586538461538462e-05,
"loss": 0.7523,
"step": 275
},
{
"epoch": 0.1733102253032929,
"grad_norm": 13.601739883422852,
"learning_rate": 2.826923076923077e-05,
"loss": 0.6244,
"step": 300
},
{
"epoch": 0.1877527440785673,
"grad_norm": 18.18332862854004,
"learning_rate": 3.0673076923076926e-05,
"loss": 0.618,
"step": 325
},
{
"epoch": 0.2021952628538417,
"grad_norm": 6.123499870300293,
"learning_rate": 3.307692307692308e-05,
"loss": 0.7482,
"step": 350
},
{
"epoch": 0.21663778162911612,
"grad_norm": 10.999884605407715,
"learning_rate": 3.548076923076924e-05,
"loss": 0.5383,
"step": 375
},
{
"epoch": 0.23108030040439054,
"grad_norm": 22.577754974365234,
"learning_rate": 3.788461538461538e-05,
"loss": 0.5854,
"step": 400
},
{
"epoch": 0.24552281917966493,
"grad_norm": 31.469331741333008,
"learning_rate": 4.028846153846154e-05,
"loss": 0.5403,
"step": 425
},
{
"epoch": 0.25996533795493937,
"grad_norm": 8.64493465423584,
"learning_rate": 4.269230769230769e-05,
"loss": 0.6795,
"step": 450
},
{
"epoch": 0.27440785673021373,
"grad_norm": 7.3773393630981445,
"learning_rate": 4.509615384615385e-05,
"loss": 0.5801,
"step": 475
},
{
"epoch": 0.28885037550548814,
"grad_norm": 11.987143516540527,
"learning_rate": 4.75e-05,
"loss": 0.508,
"step": 500
},
{
"epoch": 0.30329289428076256,
"grad_norm": 7.039488792419434,
"learning_rate": 4.9903846153846154e-05,
"loss": 0.5415,
"step": 525
},
{
"epoch": 0.317735413056037,
"grad_norm": 13.371904373168945,
"learning_rate": 4.9743205649475715e-05,
"loss": 0.5646,
"step": 550
},
{
"epoch": 0.3321779318313114,
"grad_norm": 32.50100326538086,
"learning_rate": 4.947571153434625e-05,
"loss": 0.7043,
"step": 575
},
{
"epoch": 0.3466204506065858,
"grad_norm": 25.765533447265625,
"learning_rate": 4.920821741921678e-05,
"loss": 0.5854,
"step": 600
},
{
"epoch": 0.3610629693818602,
"grad_norm": 33.29277420043945,
"learning_rate": 4.894072330408731e-05,
"loss": 0.7418,
"step": 625
},
{
"epoch": 0.3755054881571346,
"grad_norm": 23.995582580566406,
"learning_rate": 4.8673229188957844e-05,
"loss": 0.7084,
"step": 650
},
{
"epoch": 0.389948006932409,
"grad_norm": 24.641578674316406,
"learning_rate": 4.840573507382838e-05,
"loss": 0.6657,
"step": 675
},
{
"epoch": 0.4043905257076834,
"grad_norm": 16.46844482421875,
"learning_rate": 4.813824095869891e-05,
"loss": 0.4334,
"step": 700
},
{
"epoch": 0.41883304448295783,
"grad_norm": 15.955053329467773,
"learning_rate": 4.7870746843569445e-05,
"loss": 0.4989,
"step": 725
},
{
"epoch": 0.43327556325823224,
"grad_norm": 20.922630310058594,
"learning_rate": 4.760325272843998e-05,
"loss": 0.5395,
"step": 750
},
{
"epoch": 0.44771808203350666,
"grad_norm": 33.461490631103516,
"learning_rate": 4.733575861331051e-05,
"loss": 0.7912,
"step": 775
},
{
"epoch": 0.4621606008087811,
"grad_norm": 28.123592376708984,
"learning_rate": 4.7068264498181045e-05,
"loss": 0.7634,
"step": 800
},
{
"epoch": 0.47660311958405543,
"grad_norm": 6.648211479187012,
"learning_rate": 4.6800770383051574e-05,
"loss": 0.5912,
"step": 825
},
{
"epoch": 0.49104563835932985,
"grad_norm": 6.500886917114258,
"learning_rate": 4.653327626792211e-05,
"loss": 0.595,
"step": 850
},
{
"epoch": 0.5054881571346043,
"grad_norm": 17.182079315185547,
"learning_rate": 4.626578215279264e-05,
"loss": 0.6416,
"step": 875
},
{
"epoch": 0.5199306759098787,
"grad_norm": 20.7849063873291,
"learning_rate": 4.5998288037663175e-05,
"loss": 0.4613,
"step": 900
},
{
"epoch": 0.5343731946851531,
"grad_norm": 6.231338977813721,
"learning_rate": 4.5730793922533704e-05,
"loss": 0.4601,
"step": 925
},
{
"epoch": 0.5488157134604275,
"grad_norm": 37.750099182128906,
"learning_rate": 4.546329980740424e-05,
"loss": 0.6588,
"step": 950
},
{
"epoch": 0.5632582322357019,
"grad_norm": 13.106170654296875,
"learning_rate": 4.5195805692274775e-05,
"loss": 0.6352,
"step": 975
},
{
"epoch": 0.5777007510109763,
"grad_norm": 12.079797744750977,
"learning_rate": 4.4928311577145304e-05,
"loss": 0.6447,
"step": 1000
},
{
"epoch": 0.5921432697862508,
"grad_norm": 14.380491256713867,
"learning_rate": 4.466081746201583e-05,
"loss": 0.4655,
"step": 1025
},
{
"epoch": 0.6065857885615251,
"grad_norm": 18.562135696411133,
"learning_rate": 4.4393323346886376e-05,
"loss": 0.4547,
"step": 1050
},
{
"epoch": 0.6210283073367996,
"grad_norm": 22.869455337524414,
"learning_rate": 4.4125829231756905e-05,
"loss": 0.5574,
"step": 1075
},
{
"epoch": 0.635470826112074,
"grad_norm": 15.650333404541016,
"learning_rate": 4.3858335116627433e-05,
"loss": 0.5149,
"step": 1100
},
{
"epoch": 0.6499133448873483,
"grad_norm": 19.41951560974121,
"learning_rate": 4.359084100149797e-05,
"loss": 0.5395,
"step": 1125
},
{
"epoch": 0.6643558636626228,
"grad_norm": 14.825141906738281,
"learning_rate": 4.3323346886368505e-05,
"loss": 0.3763,
"step": 1150
},
{
"epoch": 0.6787983824378971,
"grad_norm": 10.986475944519043,
"learning_rate": 4.3055852771239034e-05,
"loss": 0.383,
"step": 1175
},
{
"epoch": 0.6932409012131716,
"grad_norm": 6.805363655090332,
"learning_rate": 4.278835865610957e-05,
"loss": 0.4967,
"step": 1200
},
{
"epoch": 0.707683419988446,
"grad_norm": 9.07087230682373,
"learning_rate": 4.25208645409801e-05,
"loss": 0.4451,
"step": 1225
},
{
"epoch": 0.7221259387637204,
"grad_norm": 15.876053810119629,
"learning_rate": 4.2253370425850634e-05,
"loss": 0.4178,
"step": 1250
},
{
"epoch": 0.7365684575389948,
"grad_norm": 10.015765190124512,
"learning_rate": 4.198587631072117e-05,
"loss": 0.5196,
"step": 1275
},
{
"epoch": 0.7510109763142692,
"grad_norm": 14.343164443969727,
"learning_rate": 4.17183821955917e-05,
"loss": 0.3987,
"step": 1300
},
{
"epoch": 0.7654534950895436,
"grad_norm": 7.555254936218262,
"learning_rate": 4.145088808046223e-05,
"loss": 0.4268,
"step": 1325
},
{
"epoch": 0.779896013864818,
"grad_norm": 19.02402114868164,
"learning_rate": 4.1183393965332764e-05,
"loss": 0.5197,
"step": 1350
},
{
"epoch": 0.7943385326400925,
"grad_norm": 17.702478408813477,
"learning_rate": 4.09158998502033e-05,
"loss": 0.3667,
"step": 1375
},
{
"epoch": 0.8087810514153668,
"grad_norm": 14.175180435180664,
"learning_rate": 4.064840573507383e-05,
"loss": 0.4353,
"step": 1400
},
{
"epoch": 0.8232235701906413,
"grad_norm": 11.903855323791504,
"learning_rate": 4.0380911619944364e-05,
"loss": 0.5338,
"step": 1425
},
{
"epoch": 0.8376660889659157,
"grad_norm": 13.651313781738281,
"learning_rate": 4.01134175048149e-05,
"loss": 0.4009,
"step": 1450
},
{
"epoch": 0.85210860774119,
"grad_norm": 5.367980480194092,
"learning_rate": 3.984592338968543e-05,
"loss": 0.5129,
"step": 1475
},
{
"epoch": 0.8665511265164645,
"grad_norm": 18.119848251342773,
"learning_rate": 3.957842927455596e-05,
"loss": 0.4251,
"step": 1500
},
{
"epoch": 0.8809936452917388,
"grad_norm": 8.821969032287598,
"learning_rate": 3.9310935159426494e-05,
"loss": 0.3965,
"step": 1525
},
{
"epoch": 0.8954361640670133,
"grad_norm": 5.4203782081604,
"learning_rate": 3.904344104429703e-05,
"loss": 0.427,
"step": 1550
},
{
"epoch": 0.9098786828422877,
"grad_norm": 10.101963996887207,
"learning_rate": 3.877594692916756e-05,
"loss": 0.3891,
"step": 1575
},
{
"epoch": 0.9243212016175621,
"grad_norm": 5.893563747406006,
"learning_rate": 3.8508452814038094e-05,
"loss": 0.4165,
"step": 1600
},
{
"epoch": 0.9387637203928365,
"grad_norm": 6.127294540405273,
"learning_rate": 3.8240958698908623e-05,
"loss": 0.5194,
"step": 1625
},
{
"epoch": 0.9532062391681109,
"grad_norm": 13.313063621520996,
"learning_rate": 3.797346458377916e-05,
"loss": 0.4108,
"step": 1650
},
{
"epoch": 0.9676487579433853,
"grad_norm": 14.13697624206543,
"learning_rate": 3.7705970468649695e-05,
"loss": 0.4612,
"step": 1675
},
{
"epoch": 0.9820912767186597,
"grad_norm": 8.540029525756836,
"learning_rate": 3.7438476353520224e-05,
"loss": 0.4081,
"step": 1700
},
{
"epoch": 0.9965337954939342,
"grad_norm": 15.01349925994873,
"learning_rate": 3.717098223839075e-05,
"loss": 0.4559,
"step": 1725
},
{
"epoch": 1.0,
"eval_explained_variance": 0.6565504670143127,
"eval_loss": 0.5362390279769897,
"eval_mae": 0.5734534859657288,
"eval_mse": 0.5362390279769897,
"eval_r2": 0.5139721769346014,
"eval_rmse": 0.7322834134101868,
"eval_runtime": 21.5346,
"eval_samples_per_second": 160.765,
"eval_steps_per_second": 10.077,
"step": 1731
},
{
"epoch": 1.0109763142692085,
"grad_norm": 21.412073135375977,
"learning_rate": 3.6903488123261295e-05,
"loss": 0.4508,
"step": 1750
},
{
"epoch": 1.025418833044483,
"grad_norm": 8.56926155090332,
"learning_rate": 3.6635994008131824e-05,
"loss": 0.4829,
"step": 1775
},
{
"epoch": 1.0398613518197575,
"grad_norm": 9.545137405395508,
"learning_rate": 3.6368499893002353e-05,
"loss": 0.4499,
"step": 1800
},
{
"epoch": 1.0543038705950318,
"grad_norm": 27.51245880126953,
"learning_rate": 3.610100577787289e-05,
"loss": 0.4109,
"step": 1825
},
{
"epoch": 1.0687463893703062,
"grad_norm": 12.80823802947998,
"learning_rate": 3.583351166274342e-05,
"loss": 0.3662,
"step": 1850
},
{
"epoch": 1.0831889081455806,
"grad_norm": 7.883637428283691,
"learning_rate": 3.5566017547613954e-05,
"loss": 0.4184,
"step": 1875
},
{
"epoch": 1.097631426920855,
"grad_norm": 7.406681537628174,
"learning_rate": 3.529852343248449e-05,
"loss": 0.3439,
"step": 1900
},
{
"epoch": 1.1120739456961295,
"grad_norm": 19.036779403686523,
"learning_rate": 3.503102931735502e-05,
"loss": 0.4009,
"step": 1925
},
{
"epoch": 1.1265164644714039,
"grad_norm": 8.52979850769043,
"learning_rate": 3.476353520222555e-05,
"loss": 0.4397,
"step": 1950
},
{
"epoch": 1.1409589832466782,
"grad_norm": 16.250051498413086,
"learning_rate": 3.449604108709609e-05,
"loss": 0.4651,
"step": 1975
},
{
"epoch": 1.1554015020219526,
"grad_norm": 5.9189300537109375,
"learning_rate": 3.422854697196662e-05,
"loss": 0.4419,
"step": 2000
},
{
"epoch": 1.169844020797227,
"grad_norm": 11.332290649414062,
"learning_rate": 3.396105285683715e-05,
"loss": 0.3755,
"step": 2025
},
{
"epoch": 1.1842865395725015,
"grad_norm": 9.792673110961914,
"learning_rate": 3.3693558741707684e-05,
"loss": 0.358,
"step": 2050
},
{
"epoch": 1.1987290583477759,
"grad_norm": 14.335423469543457,
"learning_rate": 3.342606462657822e-05,
"loss": 0.3512,
"step": 2075
},
{
"epoch": 1.2131715771230502,
"grad_norm": 9.749696731567383,
"learning_rate": 3.315857051144875e-05,
"loss": 0.4072,
"step": 2100
},
{
"epoch": 1.2276140958983246,
"grad_norm": 9.317971229553223,
"learning_rate": 3.2891076396319284e-05,
"loss": 0.4359,
"step": 2125
},
{
"epoch": 1.242056614673599,
"grad_norm": 14.866842269897461,
"learning_rate": 3.262358228118981e-05,
"loss": 0.4314,
"step": 2150
},
{
"epoch": 1.2564991334488735,
"grad_norm": 6.312429428100586,
"learning_rate": 3.235608816606035e-05,
"loss": 0.3819,
"step": 2175
},
{
"epoch": 1.270941652224148,
"grad_norm": 5.175512313842773,
"learning_rate": 3.208859405093088e-05,
"loss": 0.4469,
"step": 2200
},
{
"epoch": 1.2853841709994223,
"grad_norm": 16.6768856048584,
"learning_rate": 3.1821099935801414e-05,
"loss": 0.4156,
"step": 2225
},
{
"epoch": 1.2998266897746968,
"grad_norm": 5.419372081756592,
"learning_rate": 3.155360582067194e-05,
"loss": 0.3966,
"step": 2250
},
{
"epoch": 1.314269208549971,
"grad_norm": 9.6641263961792,
"learning_rate": 3.128611170554248e-05,
"loss": 0.3622,
"step": 2275
},
{
"epoch": 1.3287117273252456,
"grad_norm": 11.433446884155273,
"learning_rate": 3.1018617590413014e-05,
"loss": 0.5056,
"step": 2300
},
{
"epoch": 1.34315424610052,
"grad_norm": 8.54787540435791,
"learning_rate": 3.075112347528354e-05,
"loss": 0.3228,
"step": 2325
},
{
"epoch": 1.3575967648757943,
"grad_norm": 24.707653045654297,
"learning_rate": 3.0483629360154076e-05,
"loss": 0.3755,
"step": 2350
},
{
"epoch": 1.3720392836510689,
"grad_norm": 13.02287483215332,
"learning_rate": 3.021613524502461e-05,
"loss": 0.313,
"step": 2375
},
{
"epoch": 1.3864818024263432,
"grad_norm": 7.0347771644592285,
"learning_rate": 2.9948641129895144e-05,
"loss": 0.3618,
"step": 2400
},
{
"epoch": 1.4009243212016176,
"grad_norm": 3.799116611480713,
"learning_rate": 2.9681147014765676e-05,
"loss": 0.3338,
"step": 2425
},
{
"epoch": 1.415366839976892,
"grad_norm": 10.154156684875488,
"learning_rate": 2.941365289963621e-05,
"loss": 0.3184,
"step": 2450
},
{
"epoch": 1.4298093587521663,
"grad_norm": 22.4088191986084,
"learning_rate": 2.9146158784506744e-05,
"loss": 0.3218,
"step": 2475
},
{
"epoch": 1.4442518775274409,
"grad_norm": 5.571261405944824,
"learning_rate": 2.8878664669377277e-05,
"loss": 0.4167,
"step": 2500
},
{
"epoch": 1.4586943963027152,
"grad_norm": 10.851147651672363,
"learning_rate": 2.8611170554247806e-05,
"loss": 0.3881,
"step": 2525
},
{
"epoch": 1.4731369150779896,
"grad_norm": 30.00836181640625,
"learning_rate": 2.8343676439118338e-05,
"loss": 0.3706,
"step": 2550
},
{
"epoch": 1.487579433853264,
"grad_norm": 12.909472465515137,
"learning_rate": 2.8076182323988874e-05,
"loss": 0.4325,
"step": 2575
},
{
"epoch": 1.5020219526285383,
"grad_norm": 10.231127738952637,
"learning_rate": 2.7808688208859406e-05,
"loss": 0.3357,
"step": 2600
},
{
"epoch": 1.516464471403813,
"grad_norm": 7.157652378082275,
"learning_rate": 2.754119409372994e-05,
"loss": 0.3927,
"step": 2625
},
{
"epoch": 1.5309069901790873,
"grad_norm": 13.10181999206543,
"learning_rate": 2.727369997860047e-05,
"loss": 0.3189,
"step": 2650
},
{
"epoch": 1.5453495089543616,
"grad_norm": 12.194095611572266,
"learning_rate": 2.7006205863471007e-05,
"loss": 0.4226,
"step": 2675
},
{
"epoch": 1.5597920277296362,
"grad_norm": 18.289899826049805,
"learning_rate": 2.673871174834154e-05,
"loss": 0.2865,
"step": 2700
},
{
"epoch": 1.5742345465049103,
"grad_norm": 4.3070068359375,
"learning_rate": 2.647121763321207e-05,
"loss": 0.3107,
"step": 2725
},
{
"epoch": 1.588677065280185,
"grad_norm": 26.21879768371582,
"learning_rate": 2.62037235180826e-05,
"loss": 0.3243,
"step": 2750
},
{
"epoch": 1.6031195840554593,
"grad_norm": 8.495038986206055,
"learning_rate": 2.593622940295314e-05,
"loss": 0.3285,
"step": 2775
},
{
"epoch": 1.6175621028307337,
"grad_norm": 17.74176788330078,
"learning_rate": 2.566873528782367e-05,
"loss": 0.3819,
"step": 2800
},
{
"epoch": 1.6320046216060082,
"grad_norm": 8.67226505279541,
"learning_rate": 2.54012411726942e-05,
"loss": 0.4388,
"step": 2825
},
{
"epoch": 1.6464471403812824,
"grad_norm": 9.305310249328613,
"learning_rate": 2.5133747057564733e-05,
"loss": 0.3801,
"step": 2850
},
{
"epoch": 1.660889659156557,
"grad_norm": 16.156944274902344,
"learning_rate": 2.4866252942435266e-05,
"loss": 0.337,
"step": 2875
},
{
"epoch": 1.6753321779318313,
"grad_norm": 18.950183868408203,
"learning_rate": 2.45987588273058e-05,
"loss": 0.3913,
"step": 2900
},
{
"epoch": 1.6897746967071057,
"grad_norm": 4.8534321784973145,
"learning_rate": 2.4331264712176334e-05,
"loss": 0.3677,
"step": 2925
},
{
"epoch": 1.7042172154823803,
"grad_norm": 7.860241413116455,
"learning_rate": 2.4063770597046866e-05,
"loss": 0.3495,
"step": 2950
},
{
"epoch": 1.7186597342576544,
"grad_norm": 10.027009010314941,
"learning_rate": 2.37962764819174e-05,
"loss": 0.3787,
"step": 2975
},
{
"epoch": 1.733102253032929,
"grad_norm": 13.39957046508789,
"learning_rate": 2.352878236678793e-05,
"loss": 0.3064,
"step": 3000
},
{
"epoch": 1.7475447718082033,
"grad_norm": 8.104743957519531,
"learning_rate": 2.3261288251658463e-05,
"loss": 0.3703,
"step": 3025
},
{
"epoch": 1.7619872905834777,
"grad_norm": 7.085102558135986,
"learning_rate": 2.2993794136529e-05,
"loss": 0.3488,
"step": 3050
},
{
"epoch": 1.7764298093587523,
"grad_norm": 8.273953437805176,
"learning_rate": 2.272630002139953e-05,
"loss": 0.3574,
"step": 3075
},
{
"epoch": 1.7908723281340264,
"grad_norm": 5.399058818817139,
"learning_rate": 2.2458805906270064e-05,
"loss": 0.3699,
"step": 3100
},
{
"epoch": 1.805314846909301,
"grad_norm": 25.818262100219727,
"learning_rate": 2.2191311791140596e-05,
"loss": 0.2714,
"step": 3125
},
{
"epoch": 1.8197573656845754,
"grad_norm": 8.441669464111328,
"learning_rate": 2.192381767601113e-05,
"loss": 0.3225,
"step": 3150
},
{
"epoch": 1.8341998844598497,
"grad_norm": 3.318145751953125,
"learning_rate": 2.165632356088166e-05,
"loss": 0.3359,
"step": 3175
},
{
"epoch": 1.8486424032351243,
"grad_norm": 3.700218439102173,
"learning_rate": 2.1388829445752197e-05,
"loss": 0.3138,
"step": 3200
},
{
"epoch": 1.8630849220103987,
"grad_norm": 4.9609246253967285,
"learning_rate": 2.1121335330622726e-05,
"loss": 0.3429,
"step": 3225
},
{
"epoch": 1.877527440785673,
"grad_norm": 11.287262916564941,
"learning_rate": 2.085384121549326e-05,
"loss": 0.3926,
"step": 3250
},
{
"epoch": 1.8919699595609474,
"grad_norm": 13.642833709716797,
"learning_rate": 2.0586347100363794e-05,
"loss": 0.3484,
"step": 3275
},
{
"epoch": 1.9064124783362217,
"grad_norm": 5.669510364532471,
"learning_rate": 2.0318852985234326e-05,
"loss": 0.3307,
"step": 3300
},
{
"epoch": 1.9208549971114963,
"grad_norm": 11.987211227416992,
"learning_rate": 2.005135887010486e-05,
"loss": 0.2979,
"step": 3325
},
{
"epoch": 1.9352975158867707,
"grad_norm": 8.090258598327637,
"learning_rate": 1.978386475497539e-05,
"loss": 0.2786,
"step": 3350
},
{
"epoch": 1.949740034662045,
"grad_norm": 5.356060028076172,
"learning_rate": 1.9516370639845923e-05,
"loss": 0.281,
"step": 3375
},
{
"epoch": 1.9641825534373196,
"grad_norm": 9.354238510131836,
"learning_rate": 1.924887652471646e-05,
"loss": 0.2761,
"step": 3400
},
{
"epoch": 1.9786250722125938,
"grad_norm": 12.076613426208496,
"learning_rate": 1.8981382409586988e-05,
"loss": 0.3394,
"step": 3425
},
{
"epoch": 1.9930675909878683,
"grad_norm": 6.9947428703308105,
"learning_rate": 1.8713888294457524e-05,
"loss": 0.3652,
"step": 3450
},
{
"epoch": 2.0,
"eval_explained_variance": 0.700911283493042,
"eval_loss": 0.3305796980857849,
"eval_mae": 0.44259902834892273,
"eval_mse": 0.3305796980857849,
"eval_r2": 0.7003744220771353,
"eval_rmse": 0.5749605894088745,
"eval_runtime": 21.4982,
"eval_samples_per_second": 161.037,
"eval_steps_per_second": 10.094,
"step": 3462
}
],
"logging_steps": 25,
"max_steps": 5193,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 7285479708948480.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}