abhishek's picture
abhishek HF staff
Upload folder using huggingface_hub
039932b verified
raw
history blame
No virus
37.6 kB
{
"best_metric": 0.282262921333313,
"best_model_checkpoint": "autotrain-m96nh-snymb/checkpoint-5193",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 5193,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014442518775274409,
"grad_norm": 212.5823516845703,
"learning_rate": 7.692307692307692e-08,
"loss": 10.235,
"step": 25
},
{
"epoch": 0.028885037550548817,
"grad_norm": 81.24781799316406,
"learning_rate": 1.7307692307692305e-07,
"loss": 10.1291,
"step": 50
},
{
"epoch": 0.043327556325823226,
"grad_norm": 302.68389892578125,
"learning_rate": 2.692307692307692e-07,
"loss": 9.6898,
"step": 75
},
{
"epoch": 0.057770075101097634,
"grad_norm": 327.724853515625,
"learning_rate": 3.615384615384615e-07,
"loss": 10.4787,
"step": 100
},
{
"epoch": 0.07221259387637204,
"grad_norm": 135.6590576171875,
"learning_rate": 4.538461538461538e-07,
"loss": 9.5916,
"step": 125
},
{
"epoch": 0.08665511265164645,
"grad_norm": 108.32626342773438,
"learning_rate": 5.5e-07,
"loss": 8.8043,
"step": 150
},
{
"epoch": 0.10109763142692085,
"grad_norm": 73.20294189453125,
"learning_rate": 6.461538461538462e-07,
"loss": 7.1787,
"step": 175
},
{
"epoch": 0.11554015020219527,
"grad_norm": 73.47607421875,
"learning_rate": 7.423076923076923e-07,
"loss": 5.024,
"step": 200
},
{
"epoch": 0.12998266897746968,
"grad_norm": 73.16763305664062,
"learning_rate": 8.384615384615384e-07,
"loss": 2.586,
"step": 225
},
{
"epoch": 0.14442518775274407,
"grad_norm": 42.77627182006836,
"learning_rate": 9.346153846153846e-07,
"loss": 0.9176,
"step": 250
},
{
"epoch": 0.1588677065280185,
"grad_norm": 73.87055969238281,
"learning_rate": 1.0307692307692306e-06,
"loss": 1.0465,
"step": 275
},
{
"epoch": 0.1733102253032929,
"grad_norm": 29.49479103088379,
"learning_rate": 1.1269230769230768e-06,
"loss": 0.6761,
"step": 300
},
{
"epoch": 0.1877527440785673,
"grad_norm": 78.30584716796875,
"learning_rate": 1.2230769230769231e-06,
"loss": 0.5363,
"step": 325
},
{
"epoch": 0.2021952628538417,
"grad_norm": 23.788848876953125,
"learning_rate": 1.3192307692307692e-06,
"loss": 0.437,
"step": 350
},
{
"epoch": 0.21663778162911612,
"grad_norm": 5.925709247589111,
"learning_rate": 1.4153846153846155e-06,
"loss": 0.5172,
"step": 375
},
{
"epoch": 0.23108030040439054,
"grad_norm": 32.11520767211914,
"learning_rate": 1.5115384615384615e-06,
"loss": 0.509,
"step": 400
},
{
"epoch": 0.24552281917966493,
"grad_norm": 14.887007713317871,
"learning_rate": 1.6076923076923076e-06,
"loss": 0.6094,
"step": 425
},
{
"epoch": 0.25996533795493937,
"grad_norm": 31.848352432250977,
"learning_rate": 1.7038461538461536e-06,
"loss": 0.5056,
"step": 450
},
{
"epoch": 0.27440785673021373,
"grad_norm": 72.35002136230469,
"learning_rate": 1.8e-06,
"loss": 0.4752,
"step": 475
},
{
"epoch": 0.28885037550548814,
"grad_norm": 12.196418762207031,
"learning_rate": 1.896153846153846e-06,
"loss": 0.375,
"step": 500
},
{
"epoch": 0.30329289428076256,
"grad_norm": 25.701213836669922,
"learning_rate": 1.9923076923076923e-06,
"loss": 0.4961,
"step": 525
},
{
"epoch": 0.317735413056037,
"grad_norm": 41.281166076660156,
"learning_rate": 1.9901562165632354e-06,
"loss": 0.4455,
"step": 550
},
{
"epoch": 0.3321779318313114,
"grad_norm": 19.428579330444336,
"learning_rate": 1.979456451958057e-06,
"loss": 0.6306,
"step": 575
},
{
"epoch": 0.3466204506065858,
"grad_norm": 18.564756393432617,
"learning_rate": 1.9687566873528784e-06,
"loss": 0.399,
"step": 600
},
{
"epoch": 0.3610629693818602,
"grad_norm": 41.66643524169922,
"learning_rate": 1.9580569227476994e-06,
"loss": 0.4581,
"step": 625
},
{
"epoch": 0.3755054881571346,
"grad_norm": 41.7965202331543,
"learning_rate": 1.947357158142521e-06,
"loss": 0.4799,
"step": 650
},
{
"epoch": 0.389948006932409,
"grad_norm": 9.624334335327148,
"learning_rate": 1.936657393537342e-06,
"loss": 0.4632,
"step": 675
},
{
"epoch": 0.4043905257076834,
"grad_norm": 11.227400779724121,
"learning_rate": 1.9259576289321635e-06,
"loss": 0.3521,
"step": 700
},
{
"epoch": 0.41883304448295783,
"grad_norm": 22.89958953857422,
"learning_rate": 1.915257864326985e-06,
"loss": 0.4414,
"step": 725
},
{
"epoch": 0.43327556325823224,
"grad_norm": 26.13202476501465,
"learning_rate": 1.904558099721806e-06,
"loss": 0.3836,
"step": 750
},
{
"epoch": 0.44771808203350666,
"grad_norm": 55.14106750488281,
"learning_rate": 1.8938583351166273e-06,
"loss": 0.5554,
"step": 775
},
{
"epoch": 0.4621606008087811,
"grad_norm": 32.7313346862793,
"learning_rate": 1.8831585705114485e-06,
"loss": 0.4613,
"step": 800
},
{
"epoch": 0.47660311958405543,
"grad_norm": 14.63770866394043,
"learning_rate": 1.87245880590627e-06,
"loss": 0.3672,
"step": 825
},
{
"epoch": 0.49104563835932985,
"grad_norm": 39.93565368652344,
"learning_rate": 1.8617590413010913e-06,
"loss": 0.495,
"step": 850
},
{
"epoch": 0.5054881571346043,
"grad_norm": 13.94717025756836,
"learning_rate": 1.8510592766959126e-06,
"loss": 0.3883,
"step": 875
},
{
"epoch": 0.5199306759098787,
"grad_norm": 19.33357810974121,
"learning_rate": 1.8403595120907338e-06,
"loss": 0.3607,
"step": 900
},
{
"epoch": 0.5343731946851531,
"grad_norm": 9.50666618347168,
"learning_rate": 1.8296597474855553e-06,
"loss": 0.4309,
"step": 925
},
{
"epoch": 0.5488157134604275,
"grad_norm": 99.26518249511719,
"learning_rate": 1.8189599828803766e-06,
"loss": 0.4734,
"step": 950
},
{
"epoch": 0.5632582322357019,
"grad_norm": 25.336233139038086,
"learning_rate": 1.8082602182751978e-06,
"loss": 0.5486,
"step": 975
},
{
"epoch": 0.5777007510109763,
"grad_norm": 41.74341583251953,
"learning_rate": 1.7975604536700191e-06,
"loss": 0.378,
"step": 1000
},
{
"epoch": 0.5921432697862508,
"grad_norm": 37.5980224609375,
"learning_rate": 1.7868606890648406e-06,
"loss": 0.3274,
"step": 1025
},
{
"epoch": 0.6065857885615251,
"grad_norm": 46.92686080932617,
"learning_rate": 1.7761609244596619e-06,
"loss": 0.3263,
"step": 1050
},
{
"epoch": 0.6210283073367996,
"grad_norm": 20.776403427124023,
"learning_rate": 1.7654611598544831e-06,
"loss": 0.3563,
"step": 1075
},
{
"epoch": 0.635470826112074,
"grad_norm": 26.412818908691406,
"learning_rate": 1.7547613952493044e-06,
"loss": 0.354,
"step": 1100
},
{
"epoch": 0.6499133448873483,
"grad_norm": 35.234580993652344,
"learning_rate": 1.7440616306441259e-06,
"loss": 0.4225,
"step": 1125
},
{
"epoch": 0.6643558636626228,
"grad_norm": 38.81793212890625,
"learning_rate": 1.7333618660389472e-06,
"loss": 0.3189,
"step": 1150
},
{
"epoch": 0.6787983824378971,
"grad_norm": 26.322595596313477,
"learning_rate": 1.7226621014337684e-06,
"loss": 0.3566,
"step": 1175
},
{
"epoch": 0.6932409012131716,
"grad_norm": 19.16035270690918,
"learning_rate": 1.7119623368285897e-06,
"loss": 0.3212,
"step": 1200
},
{
"epoch": 0.707683419988446,
"grad_norm": 26.820486068725586,
"learning_rate": 1.7012625722234112e-06,
"loss": 0.3318,
"step": 1225
},
{
"epoch": 0.7221259387637204,
"grad_norm": 9.583172798156738,
"learning_rate": 1.6905628076182322e-06,
"loss": 0.3306,
"step": 1250
},
{
"epoch": 0.7365684575389948,
"grad_norm": 67.81623840332031,
"learning_rate": 1.6798630430130535e-06,
"loss": 0.3503,
"step": 1275
},
{
"epoch": 0.7510109763142692,
"grad_norm": 26.901622772216797,
"learning_rate": 1.6691632784078748e-06,
"loss": 0.3112,
"step": 1300
},
{
"epoch": 0.7654534950895436,
"grad_norm": 19.61265754699707,
"learning_rate": 1.6584635138026963e-06,
"loss": 0.3354,
"step": 1325
},
{
"epoch": 0.779896013864818,
"grad_norm": 26.634790420532227,
"learning_rate": 1.6477637491975175e-06,
"loss": 0.4247,
"step": 1350
},
{
"epoch": 0.7943385326400925,
"grad_norm": 12.382301330566406,
"learning_rate": 1.6370639845923388e-06,
"loss": 0.3341,
"step": 1375
},
{
"epoch": 0.8087810514153668,
"grad_norm": 17.31427574157715,
"learning_rate": 1.62636421998716e-06,
"loss": 0.3274,
"step": 1400
},
{
"epoch": 0.8232235701906413,
"grad_norm": 11.828612327575684,
"learning_rate": 1.6156644553819815e-06,
"loss": 0.3575,
"step": 1425
},
{
"epoch": 0.8376660889659157,
"grad_norm": 12.682332038879395,
"learning_rate": 1.6049646907768028e-06,
"loss": 0.2944,
"step": 1450
},
{
"epoch": 0.85210860774119,
"grad_norm": 14.496199607849121,
"learning_rate": 1.594264926171624e-06,
"loss": 0.3447,
"step": 1475
},
{
"epoch": 0.8665511265164645,
"grad_norm": 20.546905517578125,
"learning_rate": 1.5835651615664453e-06,
"loss": 0.3162,
"step": 1500
},
{
"epoch": 0.8809936452917388,
"grad_norm": 12.139562606811523,
"learning_rate": 1.5728653969612668e-06,
"loss": 0.3553,
"step": 1525
},
{
"epoch": 0.8954361640670133,
"grad_norm": 38.39575958251953,
"learning_rate": 1.562165632356088e-06,
"loss": 0.3929,
"step": 1550
},
{
"epoch": 0.9098786828422877,
"grad_norm": 9.470243453979492,
"learning_rate": 1.5514658677509094e-06,
"loss": 0.2997,
"step": 1575
},
{
"epoch": 0.9243212016175621,
"grad_norm": 14.675395965576172,
"learning_rate": 1.5407661031457306e-06,
"loss": 0.3275,
"step": 1600
},
{
"epoch": 0.9387637203928365,
"grad_norm": 3.3243517875671387,
"learning_rate": 1.5300663385405521e-06,
"loss": 0.426,
"step": 1625
},
{
"epoch": 0.9532062391681109,
"grad_norm": 12.924434661865234,
"learning_rate": 1.5193665739353734e-06,
"loss": 0.2976,
"step": 1650
},
{
"epoch": 0.9676487579433853,
"grad_norm": 46.80762481689453,
"learning_rate": 1.5086668093301947e-06,
"loss": 0.3499,
"step": 1675
},
{
"epoch": 0.9820912767186597,
"grad_norm": 15.062408447265625,
"learning_rate": 1.497967044725016e-06,
"loss": 0.3225,
"step": 1700
},
{
"epoch": 0.9965337954939342,
"grad_norm": 22.281009674072266,
"learning_rate": 1.4872672801198374e-06,
"loss": 0.4113,
"step": 1725
},
{
"epoch": 1.0,
"eval_explained_variance": 0.7363581657409668,
"eval_loss": 0.3119768798351288,
"eval_mae": 0.4405648112297058,
"eval_mse": 0.31180134415626526,
"eval_r2": 0.7173944470629221,
"eval_rmse": 0.5583917498588562,
"eval_runtime": 80.5336,
"eval_samples_per_second": 42.988,
"eval_steps_per_second": 2.695,
"step": 1731
},
{
"epoch": 1.0109763142692085,
"grad_norm": 27.233137130737305,
"learning_rate": 1.4765675155146587e-06,
"loss": 0.3579,
"step": 1750
},
{
"epoch": 1.025418833044483,
"grad_norm": 23.662677764892578,
"learning_rate": 1.46586775090948e-06,
"loss": 0.3746,
"step": 1775
},
{
"epoch": 1.0398613518197575,
"grad_norm": 27.650678634643555,
"learning_rate": 1.4551679863043012e-06,
"loss": 0.3723,
"step": 1800
},
{
"epoch": 1.0543038705950318,
"grad_norm": 38.274314880371094,
"learning_rate": 1.4444682216991227e-06,
"loss": 0.3705,
"step": 1825
},
{
"epoch": 1.0687463893703062,
"grad_norm": 43.43634796142578,
"learning_rate": 1.433768457093944e-06,
"loss": 0.2841,
"step": 1850
},
{
"epoch": 1.0831889081455806,
"grad_norm": 19.07291030883789,
"learning_rate": 1.4230686924887652e-06,
"loss": 0.4436,
"step": 1875
},
{
"epoch": 1.097631426920855,
"grad_norm": 36.660614013671875,
"learning_rate": 1.4123689278835865e-06,
"loss": 0.3031,
"step": 1900
},
{
"epoch": 1.1120739456961295,
"grad_norm": 10.8666353225708,
"learning_rate": 1.401669163278408e-06,
"loss": 0.2895,
"step": 1925
},
{
"epoch": 1.1265164644714039,
"grad_norm": 34.650394439697266,
"learning_rate": 1.3909693986732293e-06,
"loss": 0.3506,
"step": 1950
},
{
"epoch": 1.1409589832466782,
"grad_norm": 43.247623443603516,
"learning_rate": 1.3802696340680503e-06,
"loss": 0.3555,
"step": 1975
},
{
"epoch": 1.1554015020219526,
"grad_norm": 18.716602325439453,
"learning_rate": 1.3695698694628716e-06,
"loss": 0.3931,
"step": 2000
},
{
"epoch": 1.169844020797227,
"grad_norm": 31.356761932373047,
"learning_rate": 1.358870104857693e-06,
"loss": 0.2976,
"step": 2025
},
{
"epoch": 1.1842865395725015,
"grad_norm": 18.609111785888672,
"learning_rate": 1.3481703402525143e-06,
"loss": 0.3163,
"step": 2050
},
{
"epoch": 1.1987290583477759,
"grad_norm": 31.023008346557617,
"learning_rate": 1.3374705756473356e-06,
"loss": 0.3454,
"step": 2075
},
{
"epoch": 1.2131715771230502,
"grad_norm": 27.93479347229004,
"learning_rate": 1.3267708110421569e-06,
"loss": 0.3452,
"step": 2100
},
{
"epoch": 1.2276140958983246,
"grad_norm": 23.254547119140625,
"learning_rate": 1.3160710464369784e-06,
"loss": 0.3486,
"step": 2125
},
{
"epoch": 1.242056614673599,
"grad_norm": 45.776458740234375,
"learning_rate": 1.3053712818317996e-06,
"loss": 0.3586,
"step": 2150
},
{
"epoch": 1.2564991334488735,
"grad_norm": 14.92525863647461,
"learning_rate": 1.294671517226621e-06,
"loss": 0.3338,
"step": 2175
},
{
"epoch": 1.270941652224148,
"grad_norm": 20.12270736694336,
"learning_rate": 1.2839717526214422e-06,
"loss": 0.3437,
"step": 2200
},
{
"epoch": 1.2853841709994223,
"grad_norm": 41.65699005126953,
"learning_rate": 1.2732719880162636e-06,
"loss": 0.3264,
"step": 2225
},
{
"epoch": 1.2998266897746968,
"grad_norm": 32.03495788574219,
"learning_rate": 1.262572223411085e-06,
"loss": 0.3404,
"step": 2250
},
{
"epoch": 1.314269208549971,
"grad_norm": 4.864631175994873,
"learning_rate": 1.2518724588059062e-06,
"loss": 0.296,
"step": 2275
},
{
"epoch": 1.3287117273252456,
"grad_norm": 10.562322616577148,
"learning_rate": 1.2411726942007275e-06,
"loss": 0.3442,
"step": 2300
},
{
"epoch": 1.34315424610052,
"grad_norm": 33.48724365234375,
"learning_rate": 1.230472929595549e-06,
"loss": 0.257,
"step": 2325
},
{
"epoch": 1.3575967648757943,
"grad_norm": 19.912137985229492,
"learning_rate": 1.2197731649903702e-06,
"loss": 0.2968,
"step": 2350
},
{
"epoch": 1.3720392836510689,
"grad_norm": 22.246639251708984,
"learning_rate": 1.2090734003851915e-06,
"loss": 0.2793,
"step": 2375
},
{
"epoch": 1.3864818024263432,
"grad_norm": 18.22015380859375,
"learning_rate": 1.1983736357800127e-06,
"loss": 0.3079,
"step": 2400
},
{
"epoch": 1.4009243212016176,
"grad_norm": 15.965062141418457,
"learning_rate": 1.1876738711748342e-06,
"loss": 0.335,
"step": 2425
},
{
"epoch": 1.415366839976892,
"grad_norm": 20.45452117919922,
"learning_rate": 1.1769741065696555e-06,
"loss": 0.3061,
"step": 2450
},
{
"epoch": 1.4298093587521663,
"grad_norm": 13.89696216583252,
"learning_rate": 1.1662743419644768e-06,
"loss": 0.275,
"step": 2475
},
{
"epoch": 1.4442518775274409,
"grad_norm": 35.64567947387695,
"learning_rate": 1.155574577359298e-06,
"loss": 0.3471,
"step": 2500
},
{
"epoch": 1.4586943963027152,
"grad_norm": 14.65186882019043,
"learning_rate": 1.1448748127541195e-06,
"loss": 0.314,
"step": 2525
},
{
"epoch": 1.4731369150779896,
"grad_norm": 14.541102409362793,
"learning_rate": 1.1341750481489408e-06,
"loss": 0.2916,
"step": 2550
},
{
"epoch": 1.487579433853264,
"grad_norm": 37.96781539916992,
"learning_rate": 1.123475283543762e-06,
"loss": 0.3849,
"step": 2575
},
{
"epoch": 1.5020219526285383,
"grad_norm": 16.675336837768555,
"learning_rate": 1.1127755189385833e-06,
"loss": 0.2672,
"step": 2600
},
{
"epoch": 1.516464471403813,
"grad_norm": 28.15886116027832,
"learning_rate": 1.1020757543334048e-06,
"loss": 0.3031,
"step": 2625
},
{
"epoch": 1.5309069901790873,
"grad_norm": 28.914554595947266,
"learning_rate": 1.091375989728226e-06,
"loss": 0.2585,
"step": 2650
},
{
"epoch": 1.5453495089543616,
"grad_norm": 31.952404022216797,
"learning_rate": 1.0806762251230471e-06,
"loss": 0.3405,
"step": 2675
},
{
"epoch": 1.5597920277296362,
"grad_norm": 62.01006317138672,
"learning_rate": 1.0699764605178684e-06,
"loss": 0.2892,
"step": 2700
},
{
"epoch": 1.5742345465049103,
"grad_norm": 15.054553985595703,
"learning_rate": 1.0592766959126899e-06,
"loss": 0.2902,
"step": 2725
},
{
"epoch": 1.588677065280185,
"grad_norm": 39.178443908691406,
"learning_rate": 1.0485769313075112e-06,
"loss": 0.2743,
"step": 2750
},
{
"epoch": 1.6031195840554593,
"grad_norm": 43.06193923950195,
"learning_rate": 1.0378771667023324e-06,
"loss": 0.2982,
"step": 2775
},
{
"epoch": 1.6175621028307337,
"grad_norm": 43.87297821044922,
"learning_rate": 1.0271774020971537e-06,
"loss": 0.2902,
"step": 2800
},
{
"epoch": 1.6320046216060082,
"grad_norm": 21.78912925720215,
"learning_rate": 1.0164776374919752e-06,
"loss": 0.3465,
"step": 2825
},
{
"epoch": 1.6464471403812824,
"grad_norm": 15.053204536437988,
"learning_rate": 1.0057778728867964e-06,
"loss": 0.3213,
"step": 2850
},
{
"epoch": 1.660889659156557,
"grad_norm": 21.79863166809082,
"learning_rate": 9.950781082816177e-07,
"loss": 0.3278,
"step": 2875
},
{
"epoch": 1.6753321779318313,
"grad_norm": 58.025299072265625,
"learning_rate": 9.843783436764392e-07,
"loss": 0.2828,
"step": 2900
},
{
"epoch": 1.6897746967071057,
"grad_norm": 22.137096405029297,
"learning_rate": 9.736785790712605e-07,
"loss": 0.3023,
"step": 2925
},
{
"epoch": 1.7042172154823803,
"grad_norm": 19.531232833862305,
"learning_rate": 9.629788144660817e-07,
"loss": 0.3039,
"step": 2950
},
{
"epoch": 1.7186597342576544,
"grad_norm": 59.77436065673828,
"learning_rate": 9.52279049860903e-07,
"loss": 0.3376,
"step": 2975
},
{
"epoch": 1.733102253032929,
"grad_norm": 27.803564071655273,
"learning_rate": 9.415792852557243e-07,
"loss": 0.2839,
"step": 3000
},
{
"epoch": 1.7475447718082033,
"grad_norm": 21.773244857788086,
"learning_rate": 9.308795206505456e-07,
"loss": 0.3372,
"step": 3025
},
{
"epoch": 1.7619872905834777,
"grad_norm": 11.421875953674316,
"learning_rate": 9.201797560453669e-07,
"loss": 0.3754,
"step": 3050
},
{
"epoch": 1.7764298093587523,
"grad_norm": 14.211411476135254,
"learning_rate": 9.094799914401883e-07,
"loss": 0.3214,
"step": 3075
},
{
"epoch": 1.7908723281340264,
"grad_norm": 43.777278900146484,
"learning_rate": 8.987802268350096e-07,
"loss": 0.3508,
"step": 3100
},
{
"epoch": 1.805314846909301,
"grad_norm": 38.14100646972656,
"learning_rate": 8.880804622298309e-07,
"loss": 0.2535,
"step": 3125
},
{
"epoch": 1.8197573656845754,
"grad_norm": 15.347945213317871,
"learning_rate": 8.773806976246522e-07,
"loss": 0.3121,
"step": 3150
},
{
"epoch": 1.8341998844598497,
"grad_norm": 8.05485725402832,
"learning_rate": 8.666809330194736e-07,
"loss": 0.3227,
"step": 3175
},
{
"epoch": 1.8486424032351243,
"grad_norm": 11.664706230163574,
"learning_rate": 8.559811684142948e-07,
"loss": 0.3061,
"step": 3200
},
{
"epoch": 1.8630849220103987,
"grad_norm": 7.515502452850342,
"learning_rate": 8.452814038091161e-07,
"loss": 0.2753,
"step": 3225
},
{
"epoch": 1.877527440785673,
"grad_norm": 30.233638763427734,
"learning_rate": 8.345816392039374e-07,
"loss": 0.3518,
"step": 3250
},
{
"epoch": 1.8919699595609474,
"grad_norm": 16.609712600708008,
"learning_rate": 8.238818745987588e-07,
"loss": 0.3087,
"step": 3275
},
{
"epoch": 1.9064124783362217,
"grad_norm": 12.235444068908691,
"learning_rate": 8.1318210999358e-07,
"loss": 0.3224,
"step": 3300
},
{
"epoch": 1.9208549971114963,
"grad_norm": 36.453224182128906,
"learning_rate": 8.024823453884014e-07,
"loss": 0.3311,
"step": 3325
},
{
"epoch": 1.9352975158867707,
"grad_norm": 21.512168884277344,
"learning_rate": 7.917825807832227e-07,
"loss": 0.2857,
"step": 3350
},
{
"epoch": 1.949740034662045,
"grad_norm": 9.703317642211914,
"learning_rate": 7.81082816178044e-07,
"loss": 0.2662,
"step": 3375
},
{
"epoch": 1.9641825534373196,
"grad_norm": 17.714481353759766,
"learning_rate": 7.703830515728653e-07,
"loss": 0.291,
"step": 3400
},
{
"epoch": 1.9786250722125938,
"grad_norm": 22.379777908325195,
"learning_rate": 7.596832869676867e-07,
"loss": 0.3267,
"step": 3425
},
{
"epoch": 1.9930675909878683,
"grad_norm": 8.563464164733887,
"learning_rate": 7.48983522362508e-07,
"loss": 0.2939,
"step": 3450
},
{
"epoch": 2.0,
"eval_explained_variance": 0.7469815611839294,
"eval_loss": 0.31648534536361694,
"eval_mae": 0.4420657455921173,
"eval_mse": 0.3162277936935425,
"eval_r2": 0.7133824489512686,
"eval_rmse": 0.5623413324356079,
"eval_runtime": 80.5259,
"eval_samples_per_second": 42.992,
"eval_steps_per_second": 2.695,
"step": 3462
},
{
"epoch": 2.0075101097631425,
"grad_norm": 31.694887161254883,
"learning_rate": 7.382837577573293e-07,
"loss": 0.2527,
"step": 3475
},
{
"epoch": 2.021952628538417,
"grad_norm": 24.721397399902344,
"learning_rate": 7.275839931521506e-07,
"loss": 0.2815,
"step": 3500
},
{
"epoch": 2.0363951473136916,
"grad_norm": 33.44636917114258,
"learning_rate": 7.16884228546972e-07,
"loss": 0.3586,
"step": 3525
},
{
"epoch": 2.050837666088966,
"grad_norm": 44.72824478149414,
"learning_rate": 7.061844639417933e-07,
"loss": 0.3163,
"step": 3550
},
{
"epoch": 2.0652801848642404,
"grad_norm": 22.65967559814453,
"learning_rate": 6.954846993366146e-07,
"loss": 0.2523,
"step": 3575
},
{
"epoch": 2.079722703639515,
"grad_norm": 9.611360549926758,
"learning_rate": 6.847849347314358e-07,
"loss": 0.2609,
"step": 3600
},
{
"epoch": 2.094165222414789,
"grad_norm": 19.328899383544922,
"learning_rate": 6.740851701262572e-07,
"loss": 0.3327,
"step": 3625
},
{
"epoch": 2.1086077411900637,
"grad_norm": 17.564197540283203,
"learning_rate": 6.633854055210784e-07,
"loss": 0.2777,
"step": 3650
},
{
"epoch": 2.123050259965338,
"grad_norm": 35.05995178222656,
"learning_rate": 6.526856409158998e-07,
"loss": 0.2516,
"step": 3675
},
{
"epoch": 2.1374927787406124,
"grad_norm": 17.389116287231445,
"learning_rate": 6.419858763107211e-07,
"loss": 0.2387,
"step": 3700
},
{
"epoch": 2.151935297515887,
"grad_norm": 18.017724990844727,
"learning_rate": 6.312861117055425e-07,
"loss": 0.2764,
"step": 3725
},
{
"epoch": 2.166377816291161,
"grad_norm": 13.583812713623047,
"learning_rate": 6.205863471003637e-07,
"loss": 0.2717,
"step": 3750
},
{
"epoch": 2.1808203350664357,
"grad_norm": 18.499242782592773,
"learning_rate": 6.098865824951851e-07,
"loss": 0.2439,
"step": 3775
},
{
"epoch": 2.19526285384171,
"grad_norm": 17.863845825195312,
"learning_rate": 5.991868178900064e-07,
"loss": 0.2498,
"step": 3800
},
{
"epoch": 2.2097053726169844,
"grad_norm": 42.79360580444336,
"learning_rate": 5.884870532848277e-07,
"loss": 0.2986,
"step": 3825
},
{
"epoch": 2.224147891392259,
"grad_norm": 18.10019302368164,
"learning_rate": 5.77787288679649e-07,
"loss": 0.2692,
"step": 3850
},
{
"epoch": 2.238590410167533,
"grad_norm": 52.769935607910156,
"learning_rate": 5.670875240744704e-07,
"loss": 0.3265,
"step": 3875
},
{
"epoch": 2.2530329289428077,
"grad_norm": 42.038516998291016,
"learning_rate": 5.563877594692917e-07,
"loss": 0.3196,
"step": 3900
},
{
"epoch": 2.267475447718082,
"grad_norm": 14.1666898727417,
"learning_rate": 5.45687994864113e-07,
"loss": 0.2888,
"step": 3925
},
{
"epoch": 2.2819179664933564,
"grad_norm": 16.471778869628906,
"learning_rate": 5.349882302589342e-07,
"loss": 0.2782,
"step": 3950
},
{
"epoch": 2.296360485268631,
"grad_norm": 9.197157859802246,
"learning_rate": 5.242884656537556e-07,
"loss": 0.3127,
"step": 3975
},
{
"epoch": 2.310803004043905,
"grad_norm": 19.208568572998047,
"learning_rate": 5.135887010485768e-07,
"loss": 0.2572,
"step": 4000
},
{
"epoch": 2.3252455228191797,
"grad_norm": 5.966078758239746,
"learning_rate": 5.028889364433982e-07,
"loss": 0.2631,
"step": 4025
},
{
"epoch": 2.339688041594454,
"grad_norm": 27.037731170654297,
"learning_rate": 4.921891718382196e-07,
"loss": 0.2794,
"step": 4050
},
{
"epoch": 2.3541305603697285,
"grad_norm": 39.20252990722656,
"learning_rate": 4.814894072330409e-07,
"loss": 0.2656,
"step": 4075
},
{
"epoch": 2.368573079145003,
"grad_norm": 32.399147033691406,
"learning_rate": 4.7078964262786213e-07,
"loss": 0.2654,
"step": 4100
},
{
"epoch": 2.383015597920277,
"grad_norm": 23.706451416015625,
"learning_rate": 4.6008987802268346e-07,
"loss": 0.2887,
"step": 4125
},
{
"epoch": 2.3974581166955518,
"grad_norm": 15.86970043182373,
"learning_rate": 4.493901134175048e-07,
"loss": 0.2568,
"step": 4150
},
{
"epoch": 2.4119006354708263,
"grad_norm": 27.933916091918945,
"learning_rate": 4.386903488123261e-07,
"loss": 0.2536,
"step": 4175
},
{
"epoch": 2.4263431542461005,
"grad_norm": 16.812334060668945,
"learning_rate": 4.279905842071474e-07,
"loss": 0.2542,
"step": 4200
},
{
"epoch": 2.440785673021375,
"grad_norm": 38.82505416870117,
"learning_rate": 4.172908196019687e-07,
"loss": 0.2872,
"step": 4225
},
{
"epoch": 2.455228191796649,
"grad_norm": 23.149492263793945,
"learning_rate": 4.0659105499679e-07,
"loss": 0.2818,
"step": 4250
},
{
"epoch": 2.4696707105719238,
"grad_norm": 43.19930648803711,
"learning_rate": 3.9589129039161134e-07,
"loss": 0.213,
"step": 4275
},
{
"epoch": 2.484113229347198,
"grad_norm": 23.671152114868164,
"learning_rate": 3.8519152578643266e-07,
"loss": 0.3698,
"step": 4300
},
{
"epoch": 2.4985557481224725,
"grad_norm": 88.69607543945312,
"learning_rate": 3.74491761181254e-07,
"loss": 0.2968,
"step": 4325
},
{
"epoch": 2.512998266897747,
"grad_norm": 9.653864860534668,
"learning_rate": 3.637919965760753e-07,
"loss": 0.2777,
"step": 4350
},
{
"epoch": 2.527440785673021,
"grad_norm": 11.768026351928711,
"learning_rate": 3.5309223197089663e-07,
"loss": 0.3236,
"step": 4375
},
{
"epoch": 2.541883304448296,
"grad_norm": 15.171217918395996,
"learning_rate": 3.423924673657179e-07,
"loss": 0.3077,
"step": 4400
},
{
"epoch": 2.5563258232235704,
"grad_norm": 9.879386901855469,
"learning_rate": 3.316927027605392e-07,
"loss": 0.2684,
"step": 4425
},
{
"epoch": 2.5707683419988445,
"grad_norm": 9.355985641479492,
"learning_rate": 3.2099293815536054e-07,
"loss": 0.2526,
"step": 4450
},
{
"epoch": 2.585210860774119,
"grad_norm": 4.87063455581665,
"learning_rate": 3.1029317355018186e-07,
"loss": 0.2689,
"step": 4475
},
{
"epoch": 2.5996533795493937,
"grad_norm": 20.083267211914062,
"learning_rate": 2.995934089450032e-07,
"loss": 0.259,
"step": 4500
},
{
"epoch": 2.614095898324668,
"grad_norm": 12.317808151245117,
"learning_rate": 2.888936443398245e-07,
"loss": 0.2232,
"step": 4525
},
{
"epoch": 2.628538417099942,
"grad_norm": 28.255945205688477,
"learning_rate": 2.7819387973464583e-07,
"loss": 0.2466,
"step": 4550
},
{
"epoch": 2.6429809358752165,
"grad_norm": 72.9136734008789,
"learning_rate": 2.674941151294671e-07,
"loss": 0.2693,
"step": 4575
},
{
"epoch": 2.657423454650491,
"grad_norm": 44.20970153808594,
"learning_rate": 2.567943505242884e-07,
"loss": 0.2527,
"step": 4600
},
{
"epoch": 2.6718659734257653,
"grad_norm": 17.912519454956055,
"learning_rate": 2.460945859191098e-07,
"loss": 0.2706,
"step": 4625
},
{
"epoch": 2.68630849220104,
"grad_norm": 48.64137649536133,
"learning_rate": 2.3539482131393107e-07,
"loss": 0.2795,
"step": 4650
},
{
"epoch": 2.7007510109763144,
"grad_norm": 39.6313362121582,
"learning_rate": 2.246950567087524e-07,
"loss": 0.2669,
"step": 4675
},
{
"epoch": 2.7151935297515886,
"grad_norm": 12.336877822875977,
"learning_rate": 2.139952921035737e-07,
"loss": 0.2791,
"step": 4700
},
{
"epoch": 2.729636048526863,
"grad_norm": 11.376914024353027,
"learning_rate": 2.03295527498395e-07,
"loss": 0.2599,
"step": 4725
},
{
"epoch": 2.7440785673021377,
"grad_norm": 26.109207153320312,
"learning_rate": 1.9259576289321633e-07,
"loss": 0.3112,
"step": 4750
},
{
"epoch": 2.758521086077412,
"grad_norm": 13.475809097290039,
"learning_rate": 1.8189599828803765e-07,
"loss": 0.2797,
"step": 4775
},
{
"epoch": 2.7729636048526864,
"grad_norm": 23.833911895751953,
"learning_rate": 1.7119623368285895e-07,
"loss": 0.264,
"step": 4800
},
{
"epoch": 2.7874061236279606,
"grad_norm": 7.037588119506836,
"learning_rate": 1.6049646907768027e-07,
"loss": 0.3202,
"step": 4825
},
{
"epoch": 2.801848642403235,
"grad_norm": 36.29332733154297,
"learning_rate": 1.497967044725016e-07,
"loss": 0.286,
"step": 4850
},
{
"epoch": 2.8162911611785093,
"grad_norm": 10.196252822875977,
"learning_rate": 1.3909693986732292e-07,
"loss": 0.3008,
"step": 4875
},
{
"epoch": 2.830733679953784,
"grad_norm": 27.923114776611328,
"learning_rate": 1.283971752621442e-07,
"loss": 0.2778,
"step": 4900
},
{
"epoch": 2.8451761987290585,
"grad_norm": 5.924576282501221,
"learning_rate": 1.1769741065696553e-07,
"loss": 0.2237,
"step": 4925
},
{
"epoch": 2.8596187175043326,
"grad_norm": 16.432357788085938,
"learning_rate": 1.0699764605178686e-07,
"loss": 0.2532,
"step": 4950
},
{
"epoch": 2.874061236279607,
"grad_norm": 10.846713066101074,
"learning_rate": 9.629788144660816e-08,
"loss": 0.2277,
"step": 4975
},
{
"epoch": 2.8885037550548818,
"grad_norm": 21.979785919189453,
"learning_rate": 8.559811684142947e-08,
"loss": 0.3065,
"step": 5000
},
{
"epoch": 2.902946273830156,
"grad_norm": 17.25764274597168,
"learning_rate": 7.48983522362508e-08,
"loss": 0.2449,
"step": 5025
},
{
"epoch": 2.9173887926054305,
"grad_norm": 20.356718063354492,
"learning_rate": 6.41985876310721e-08,
"loss": 0.2357,
"step": 5050
},
{
"epoch": 2.9318313113807046,
"grad_norm": 18.828088760375977,
"learning_rate": 5.349882302589343e-08,
"loss": 0.2329,
"step": 5075
},
{
"epoch": 2.946273830155979,
"grad_norm": 10.398417472839355,
"learning_rate": 4.279905842071474e-08,
"loss": 0.2279,
"step": 5100
},
{
"epoch": 2.9607163489312533,
"grad_norm": 40.09988784790039,
"learning_rate": 3.209929381553605e-08,
"loss": 0.2337,
"step": 5125
},
{
"epoch": 2.975158867706528,
"grad_norm": 22.620283126831055,
"learning_rate": 2.139952921035737e-08,
"loss": 0.2783,
"step": 5150
},
{
"epoch": 2.9896013864818025,
"grad_norm": 33.585792541503906,
"learning_rate": 1.0699764605178684e-08,
"loss": 0.2726,
"step": 5175
},
{
"epoch": 3.0,
"eval_explained_variance": 0.7570163011550903,
"eval_loss": 0.282262921333313,
"eval_mae": 0.4189736545085907,
"eval_mse": 0.2820460796356201,
"eval_r2": 0.74436353679844,
"eval_rmse": 0.5310801267623901,
"eval_runtime": 80.5385,
"eval_samples_per_second": 42.986,
"eval_steps_per_second": 2.694,
"step": 5193
}
],
"logging_steps": 25,
"max_steps": 5193,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 7.743257397795226e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}