OpenELM-1_1B-SFT-max-reward / trainer_state.json
CharlesLi's picture
Model save
70f90fb verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1047,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0028653295128939827,
"grad_norm": 257.7583052380938,
"learning_rate": 1.904761904761905e-07,
"loss": 9.25,
"step": 1
},
{
"epoch": 0.014326647564469915,
"grad_norm": 241.29073677978286,
"learning_rate": 9.523809523809525e-07,
"loss": 9.2969,
"step": 5
},
{
"epoch": 0.02865329512893983,
"grad_norm": 225.58659295977776,
"learning_rate": 1.904761904761905e-06,
"loss": 9.0344,
"step": 10
},
{
"epoch": 0.04297994269340974,
"grad_norm": 87.70853454265482,
"learning_rate": 2.8571428571428573e-06,
"loss": 7.9078,
"step": 15
},
{
"epoch": 0.05730659025787966,
"grad_norm": 43.9183222857209,
"learning_rate": 3.80952380952381e-06,
"loss": 6.8359,
"step": 20
},
{
"epoch": 0.07163323782234957,
"grad_norm": 31.094485209793813,
"learning_rate": 4.761904761904762e-06,
"loss": 5.4719,
"step": 25
},
{
"epoch": 0.08595988538681948,
"grad_norm": 13.666017374081363,
"learning_rate": 5.7142857142857145e-06,
"loss": 4.1055,
"step": 30
},
{
"epoch": 0.10028653295128939,
"grad_norm": 10.161245236008533,
"learning_rate": 6.666666666666667e-06,
"loss": 3.25,
"step": 35
},
{
"epoch": 0.11461318051575932,
"grad_norm": 3.2026690104839344,
"learning_rate": 7.61904761904762e-06,
"loss": 2.3469,
"step": 40
},
{
"epoch": 0.12893982808022922,
"grad_norm": 1.9300134153720614,
"learning_rate": 8.571428571428571e-06,
"loss": 1.9957,
"step": 45
},
{
"epoch": 0.14326647564469913,
"grad_norm": 0.868748661209738,
"learning_rate": 9.523809523809525e-06,
"loss": 1.7973,
"step": 50
},
{
"epoch": 0.15759312320916904,
"grad_norm": 0.8403053660761957,
"learning_rate": 1.0476190476190477e-05,
"loss": 1.777,
"step": 55
},
{
"epoch": 0.17191977077363896,
"grad_norm": 0.6204488839484352,
"learning_rate": 1.1428571428571429e-05,
"loss": 1.6609,
"step": 60
},
{
"epoch": 0.18624641833810887,
"grad_norm": 0.46072894517704527,
"learning_rate": 1.2380952380952383e-05,
"loss": 1.5734,
"step": 65
},
{
"epoch": 0.20057306590257878,
"grad_norm": 0.40370134421433296,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.568,
"step": 70
},
{
"epoch": 0.2148997134670487,
"grad_norm": 0.4261850895407627,
"learning_rate": 1.4285714285714287e-05,
"loss": 1.5102,
"step": 75
},
{
"epoch": 0.22922636103151864,
"grad_norm": 0.385686551593444,
"learning_rate": 1.523809523809524e-05,
"loss": 1.5504,
"step": 80
},
{
"epoch": 0.24355300859598855,
"grad_norm": 0.3521586438938912,
"learning_rate": 1.6190476190476193e-05,
"loss": 1.4641,
"step": 85
},
{
"epoch": 0.25787965616045844,
"grad_norm": 0.37387478168247884,
"learning_rate": 1.7142857142857142e-05,
"loss": 1.5211,
"step": 90
},
{
"epoch": 0.2722063037249284,
"grad_norm": 0.3334520851844191,
"learning_rate": 1.8095238095238097e-05,
"loss": 1.4211,
"step": 95
},
{
"epoch": 0.28653295128939826,
"grad_norm": 0.3374208227712567,
"learning_rate": 1.904761904761905e-05,
"loss": 1.4105,
"step": 100
},
{
"epoch": 0.3008595988538682,
"grad_norm": 0.3494984015612909,
"learning_rate": 2e-05,
"loss": 1.3641,
"step": 105
},
{
"epoch": 0.3151862464183381,
"grad_norm": 0.3291034349461991,
"learning_rate": 1.999860973403976e-05,
"loss": 1.4148,
"step": 110
},
{
"epoch": 0.32951289398280803,
"grad_norm": 0.3435680154282283,
"learning_rate": 1.999443932272694e-05,
"loss": 1.4477,
"step": 115
},
{
"epoch": 0.3438395415472779,
"grad_norm": 0.3218335176321063,
"learning_rate": 1.99874899256577e-05,
"loss": 1.3348,
"step": 120
},
{
"epoch": 0.35816618911174786,
"grad_norm": 0.3169060110914379,
"learning_rate": 1.997776347513409e-05,
"loss": 1.3887,
"step": 125
},
{
"epoch": 0.37249283667621774,
"grad_norm": 0.31497640525358284,
"learning_rate": 1.9965262675626726e-05,
"loss": 1.348,
"step": 130
},
{
"epoch": 0.3868194842406877,
"grad_norm": 0.31512378020322396,
"learning_rate": 1.994999100302281e-05,
"loss": 1.3641,
"step": 135
},
{
"epoch": 0.40114613180515757,
"grad_norm": 0.33739494340791354,
"learning_rate": 1.9931952703659655e-05,
"loss": 1.3059,
"step": 140
},
{
"epoch": 0.4154727793696275,
"grad_norm": 0.31137992113387536,
"learning_rate": 1.991115279314398e-05,
"loss": 1.3754,
"step": 145
},
{
"epoch": 0.4297994269340974,
"grad_norm": 0.36579857260459275,
"learning_rate": 1.9887597054957304e-05,
"loss": 1.3375,
"step": 150
},
{
"epoch": 0.44412607449856734,
"grad_norm": 0.33522933646678277,
"learning_rate": 1.9861292038847818e-05,
"loss": 1.3645,
"step": 155
},
{
"epoch": 0.4584527220630373,
"grad_norm": 0.32743853323105904,
"learning_rate": 1.983224505900921e-05,
"loss": 1.3012,
"step": 160
},
{
"epoch": 0.47277936962750716,
"grad_norm": 0.3294434325423303,
"learning_rate": 1.9800464192046956e-05,
"loss": 1.368,
"step": 165
},
{
"epoch": 0.4871060171919771,
"grad_norm": 0.3258072749981886,
"learning_rate": 1.976595827473255e-05,
"loss": 1.3148,
"step": 170
},
{
"epoch": 0.501432664756447,
"grad_norm": 0.3864088044514439,
"learning_rate": 1.9728736901546454e-05,
"loss": 1.3098,
"step": 175
},
{
"epoch": 0.5157593123209169,
"grad_norm": 0.3364423513855298,
"learning_rate": 1.968881042201029e-05,
"loss": 1.3059,
"step": 180
},
{
"epoch": 0.5300859598853869,
"grad_norm": 0.34214151520580405,
"learning_rate": 1.9646189937809145e-05,
"loss": 1.3352,
"step": 185
},
{
"epoch": 0.5444126074498568,
"grad_norm": 0.3053274945471879,
"learning_rate": 1.9600887299704694e-05,
"loss": 1.3387,
"step": 190
},
{
"epoch": 0.5587392550143266,
"grad_norm": 0.3041769711953325,
"learning_rate": 1.9552915104240067e-05,
"loss": 1.3188,
"step": 195
},
{
"epoch": 0.5730659025787965,
"grad_norm": 0.31747827349059277,
"learning_rate": 1.950228669023735e-05,
"loss": 1.343,
"step": 200
},
{
"epoch": 0.5873925501432665,
"grad_norm": 0.30247749181312306,
"learning_rate": 1.9449016135088657e-05,
"loss": 1.3676,
"step": 205
},
{
"epoch": 0.6017191977077364,
"grad_norm": 0.31090923349754856,
"learning_rate": 1.9393118250841897e-05,
"loss": 1.3371,
"step": 210
},
{
"epoch": 0.6160458452722063,
"grad_norm": 0.3058957610153376,
"learning_rate": 1.9334608580082204e-05,
"loss": 1.3062,
"step": 215
},
{
"epoch": 0.6303724928366762,
"grad_norm": 0.3201938803930117,
"learning_rate": 1.9273503391610307e-05,
"loss": 1.309,
"step": 220
},
{
"epoch": 0.6446991404011462,
"grad_norm": 0.30643913797653993,
"learning_rate": 1.920981967591891e-05,
"loss": 1.3035,
"step": 225
},
{
"epoch": 0.6590257879656161,
"grad_norm": 0.3091049342115299,
"learning_rate": 1.914357514046844e-05,
"loss": 1.3672,
"step": 230
},
{
"epoch": 0.673352435530086,
"grad_norm": 0.3211019921093764,
"learning_rate": 1.9074788204763438e-05,
"loss": 1.3309,
"step": 235
},
{
"epoch": 0.6876790830945558,
"grad_norm": 0.3142136640616353,
"learning_rate": 1.9003477995230942e-05,
"loss": 1.3301,
"step": 240
},
{
"epoch": 0.7020057306590258,
"grad_norm": 0.2969129447372341,
"learning_rate": 1.8929664339902342e-05,
"loss": 1.2844,
"step": 245
},
{
"epoch": 0.7163323782234957,
"grad_norm": 0.32166405234870055,
"learning_rate": 1.8853367762900117e-05,
"loss": 1.2605,
"step": 250
},
{
"epoch": 0.7306590257879656,
"grad_norm": 0.3214367335695634,
"learning_rate": 1.8774609478731048e-05,
"loss": 1.2793,
"step": 255
},
{
"epoch": 0.7449856733524355,
"grad_norm": 0.31356330763968654,
"learning_rate": 1.8693411386387445e-05,
"loss": 1.3105,
"step": 260
},
{
"epoch": 0.7593123209169055,
"grad_norm": 0.28277004921839216,
"learning_rate": 1.8609796063258076e-05,
"loss": 1.3352,
"step": 265
},
{
"epoch": 0.7736389684813754,
"grad_norm": 0.29899645005685277,
"learning_rate": 1.8523786758850436e-05,
"loss": 1.2777,
"step": 270
},
{
"epoch": 0.7879656160458453,
"grad_norm": 0.2904988184310563,
"learning_rate": 1.8435407388326167e-05,
"loss": 1.2992,
"step": 275
},
{
"epoch": 0.8022922636103151,
"grad_norm": 0.3003438883173033,
"learning_rate": 1.834468252585135e-05,
"loss": 1.3004,
"step": 280
},
{
"epoch": 0.8166189111747851,
"grad_norm": 0.3125023034962173,
"learning_rate": 1.8251637397763597e-05,
"loss": 1.227,
"step": 285
},
{
"epoch": 0.830945558739255,
"grad_norm": 0.30438015798153173,
"learning_rate": 1.8156297875557777e-05,
"loss": 1.259,
"step": 290
},
{
"epoch": 0.8452722063037249,
"grad_norm": 0.30190146232606113,
"learning_rate": 1.8058690468692366e-05,
"loss": 1.2824,
"step": 295
},
{
"epoch": 0.8595988538681948,
"grad_norm": 0.3023797380550246,
"learning_rate": 1.7958842317218413e-05,
"loss": 1.277,
"step": 300
},
{
"epoch": 0.8739255014326648,
"grad_norm": 0.3102084836884206,
"learning_rate": 1.7856781184233152e-05,
"loss": 1.1988,
"step": 305
},
{
"epoch": 0.8882521489971347,
"grad_norm": 0.2970584447964946,
"learning_rate": 1.7752535448160395e-05,
"loss": 1.2727,
"step": 310
},
{
"epoch": 0.9025787965616046,
"grad_norm": 0.30487145100844953,
"learning_rate": 1.7646134094859816e-05,
"loss": 1.2566,
"step": 315
},
{
"epoch": 0.9169054441260746,
"grad_norm": 0.3048375178754921,
"learning_rate": 1.7537606709567336e-05,
"loss": 1.2457,
"step": 320
},
{
"epoch": 0.9312320916905444,
"grad_norm": 0.2948102666221556,
"learning_rate": 1.742698346866886e-05,
"loss": 1.2965,
"step": 325
},
{
"epoch": 0.9455587392550143,
"grad_norm": 0.31264839407509326,
"learning_rate": 1.731429513130964e-05,
"loss": 1.2801,
"step": 330
},
{
"epoch": 0.9598853868194842,
"grad_norm": 0.29735923550950955,
"learning_rate": 1.7199573030841577e-05,
"loss": 1.2605,
"step": 335
},
{
"epoch": 0.9742120343839542,
"grad_norm": 0.3039565840031905,
"learning_rate": 1.708284906611091e-05,
"loss": 1.234,
"step": 340
},
{
"epoch": 0.9885386819484241,
"grad_norm": 0.3073836805156935,
"learning_rate": 1.696415569258862e-05,
"loss": 1.259,
"step": 345
},
{
"epoch": 1.002865329512894,
"grad_norm": 0.31664041219314865,
"learning_rate": 1.6843525913346087e-05,
"loss": 1.2664,
"step": 350
},
{
"epoch": 1.0171919770773639,
"grad_norm": 0.29695347553484913,
"learning_rate": 1.6720993269878486e-05,
"loss": 1.217,
"step": 355
},
{
"epoch": 1.0315186246418337,
"grad_norm": 0.300646007452569,
"learning_rate": 1.659659183277847e-05,
"loss": 1.2168,
"step": 360
},
{
"epoch": 1.0458452722063036,
"grad_norm": 0.3055551516874735,
"learning_rate": 1.647035619226271e-05,
"loss": 1.1906,
"step": 365
},
{
"epoch": 1.0601719197707737,
"grad_norm": 0.30061799520952165,
"learning_rate": 1.634232144855401e-05,
"loss": 1.2289,
"step": 370
},
{
"epoch": 1.0744985673352436,
"grad_norm": 0.29060382670998225,
"learning_rate": 1.6212523202121547e-05,
"loss": 1.2109,
"step": 375
},
{
"epoch": 1.0888252148997135,
"grad_norm": 0.30881741047203054,
"learning_rate": 1.6080997543782063e-05,
"loss": 1.2297,
"step": 380
},
{
"epoch": 1.1031518624641834,
"grad_norm": 0.29077207858842863,
"learning_rate": 1.5947781044664696e-05,
"loss": 1.2512,
"step": 385
},
{
"epoch": 1.1174785100286533,
"grad_norm": 0.2984009106567584,
"learning_rate": 1.581291074604226e-05,
"loss": 1.1762,
"step": 390
},
{
"epoch": 1.1318051575931232,
"grad_norm": 0.28683159244265233,
"learning_rate": 1.5676424149031798e-05,
"loss": 1.1719,
"step": 395
},
{
"epoch": 1.146131805157593,
"grad_norm": 0.3016666749794573,
"learning_rate": 1.5538359204167285e-05,
"loss": 1.2754,
"step": 400
},
{
"epoch": 1.1604584527220632,
"grad_norm": 0.2861211530074967,
"learning_rate": 1.5398754300847346e-05,
"loss": 1.2566,
"step": 405
},
{
"epoch": 1.174785100286533,
"grad_norm": 0.3013734628684354,
"learning_rate": 1.525764825666097e-05,
"loss": 1.1691,
"step": 410
},
{
"epoch": 1.189111747851003,
"grad_norm": 0.29971634545543485,
"learning_rate": 1.5115080306594172e-05,
"loss": 1.1811,
"step": 415
},
{
"epoch": 1.2034383954154728,
"grad_norm": 0.31994782495783924,
"learning_rate": 1.4971090092120544e-05,
"loss": 1.2414,
"step": 420
},
{
"epoch": 1.2177650429799427,
"grad_norm": 0.29658597163323713,
"learning_rate": 1.4825717650178846e-05,
"loss": 1.1973,
"step": 425
},
{
"epoch": 1.2320916905444126,
"grad_norm": 0.30035988308067263,
"learning_rate": 1.4679003402040593e-05,
"loss": 1.2164,
"step": 430
},
{
"epoch": 1.2464183381088825,
"grad_norm": 0.30992527789489627,
"learning_rate": 1.4530988142070802e-05,
"loss": 1.1625,
"step": 435
},
{
"epoch": 1.2607449856733524,
"grad_norm": 0.30039271345012736,
"learning_rate": 1.438171302638498e-05,
"loss": 1.2523,
"step": 440
},
{
"epoch": 1.2750716332378222,
"grad_norm": 0.301258281954922,
"learning_rate": 1.4231219561405533e-05,
"loss": 1.2164,
"step": 445
},
{
"epoch": 1.2893982808022924,
"grad_norm": 0.30070972022384745,
"learning_rate": 1.4079549592320782e-05,
"loss": 1.2371,
"step": 450
},
{
"epoch": 1.3037249283667622,
"grad_norm": 0.2937945088213742,
"learning_rate": 1.3926745291449773e-05,
"loss": 1.2227,
"step": 455
},
{
"epoch": 1.3180515759312321,
"grad_norm": 0.2927965843522735,
"learning_rate": 1.3772849146516114e-05,
"loss": 1.2098,
"step": 460
},
{
"epoch": 1.332378223495702,
"grad_norm": 0.29991379304860427,
"learning_rate": 1.3617903948834155e-05,
"loss": 1.1414,
"step": 465
},
{
"epoch": 1.346704871060172,
"grad_norm": 0.2895474514716426,
"learning_rate": 1.34619527814107e-05,
"loss": 1.2188,
"step": 470
},
{
"epoch": 1.3610315186246418,
"grad_norm": 0.30135691213637283,
"learning_rate": 1.3305039006965657e-05,
"loss": 1.2746,
"step": 475
},
{
"epoch": 1.3753581661891117,
"grad_norm": 0.2808237365976051,
"learning_rate": 1.3147206255874886e-05,
"loss": 1.1936,
"step": 480
},
{
"epoch": 1.3896848137535818,
"grad_norm": 0.29073762199706943,
"learning_rate": 1.2988498414038635e-05,
"loss": 1.1734,
"step": 485
},
{
"epoch": 1.4040114613180517,
"grad_norm": 0.3166900967170064,
"learning_rate": 1.282895961067893e-05,
"loss": 1.1973,
"step": 490
},
{
"epoch": 1.4183381088825215,
"grad_norm": 0.3011306233980793,
"learning_rate": 1.2668634206069305e-05,
"loss": 1.2238,
"step": 495
},
{
"epoch": 1.4326647564469914,
"grad_norm": 0.2901803615383375,
"learning_rate": 1.2507566779200273e-05,
"loss": 1.2496,
"step": 500
},
{
"epoch": 1.4469914040114613,
"grad_norm": 0.29696637870766024,
"learning_rate": 1.2345802115384014e-05,
"loss": 1.1768,
"step": 505
},
{
"epoch": 1.4613180515759312,
"grad_norm": 0.2863270222332892,
"learning_rate": 1.2183385193801655e-05,
"loss": 1.2156,
"step": 510
},
{
"epoch": 1.475644699140401,
"grad_norm": 0.3025458017583608,
"learning_rate": 1.2020361174996694e-05,
"loss": 1.173,
"step": 515
},
{
"epoch": 1.4899713467048712,
"grad_norm": 0.287124352936146,
"learning_rate": 1.1856775388317936e-05,
"loss": 1.1773,
"step": 520
},
{
"epoch": 1.5042979942693409,
"grad_norm": 0.30845106692202556,
"learning_rate": 1.1692673319315541e-05,
"loss": 1.2316,
"step": 525
},
{
"epoch": 1.518624641833811,
"grad_norm": 0.2934898590653195,
"learning_rate": 1.1528100597093617e-05,
"loss": 1.1652,
"step": 530
},
{
"epoch": 1.5329512893982808,
"grad_norm": 0.5593622869918241,
"learning_rate": 1.13631029816229e-05,
"loss": 1.2328,
"step": 535
},
{
"epoch": 1.5472779369627507,
"grad_norm": 0.29465220623927774,
"learning_rate": 1.1197726351017052e-05,
"loss": 1.1785,
"step": 540
},
{
"epoch": 1.5616045845272206,
"grad_norm": 0.2914162673673657,
"learning_rate": 1.1032016688776106e-05,
"loss": 1.2613,
"step": 545
},
{
"epoch": 1.5759312320916905,
"grad_norm": 0.2990525095465911,
"learning_rate": 1.0866020071000597e-05,
"loss": 1.2006,
"step": 550
},
{
"epoch": 1.5902578796561606,
"grad_norm": 0.2855200603052628,
"learning_rate": 1.0699782653579973e-05,
"loss": 1.2094,
"step": 555
},
{
"epoch": 1.6045845272206303,
"grad_norm": 0.2909907948504594,
"learning_rate": 1.0533350659358779e-05,
"loss": 1.2035,
"step": 560
},
{
"epoch": 1.6189111747851004,
"grad_norm": 0.289103776445201,
"learning_rate": 1.0366770365284271e-05,
"loss": 1.1848,
"step": 565
},
{
"epoch": 1.63323782234957,
"grad_norm": 0.2825139343735471,
"learning_rate": 1.0200088089538944e-05,
"loss": 1.2031,
"step": 570
},
{
"epoch": 1.6475644699140402,
"grad_norm": 0.27750397580689223,
"learning_rate": 1.0033350178661633e-05,
"loss": 1.1998,
"step": 575
},
{
"epoch": 1.66189111747851,
"grad_norm": 0.2845108454176054,
"learning_rate": 9.866602994660688e-06,
"loss": 1.1523,
"step": 580
},
{
"epoch": 1.67621776504298,
"grad_norm": 0.2903678786080609,
"learning_rate": 9.699892902122887e-06,
"loss": 1.1922,
"step": 585
},
{
"epoch": 1.6905444126074498,
"grad_norm": 0.2935036283873705,
"learning_rate": 9.53326625532161e-06,
"loss": 1.2277,
"step": 590
},
{
"epoch": 1.7048710601719197,
"grad_norm": 0.29136738669186435,
"learning_rate": 9.366769385327875e-06,
"loss": 1.1641,
"step": 595
},
{
"epoch": 1.7191977077363898,
"grad_norm": 0.29992898897918896,
"learning_rate": 9.200448587127852e-06,
"loss": 1.1887,
"step": 600
},
{
"epoch": 1.7335243553008595,
"grad_norm": 0.2980387182263642,
"learning_rate": 9.034350106750383e-06,
"loss": 1.2117,
"step": 605
},
{
"epoch": 1.7478510028653296,
"grad_norm": 0.2973187539429791,
"learning_rate": 8.868520128408134e-06,
"loss": 1.2273,
"step": 610
},
{
"epoch": 1.7621776504297995,
"grad_norm": 0.2939664139251852,
"learning_rate": 8.703004761655918e-06,
"loss": 1.2121,
"step": 615
},
{
"epoch": 1.7765042979942693,
"grad_norm": 0.2948724111626734,
"learning_rate": 8.537850028569796e-06,
"loss": 1.1727,
"step": 620
},
{
"epoch": 1.7908309455587392,
"grad_norm": 0.2900310921230476,
"learning_rate": 8.37310185095048e-06,
"loss": 1.1705,
"step": 625
},
{
"epoch": 1.8051575931232091,
"grad_norm": 0.2878990205353967,
"learning_rate": 8.208806037554645e-06,
"loss": 1.1781,
"step": 630
},
{
"epoch": 1.8194842406876792,
"grad_norm": 0.29396661211641484,
"learning_rate": 8.045008271357644e-06,
"loss": 1.2625,
"step": 635
},
{
"epoch": 1.8338108882521489,
"grad_norm": 0.2853159419583723,
"learning_rate": 7.88175409685122e-06,
"loss": 1.1562,
"step": 640
},
{
"epoch": 1.848137535816619,
"grad_norm": 0.2924378192139063,
"learning_rate": 7.719088907379705e-06,
"loss": 1.2141,
"step": 645
},
{
"epoch": 1.8624641833810889,
"grad_norm": 0.2835974358624726,
"learning_rate": 7.557057932518274e-06,
"loss": 1.1344,
"step": 650
},
{
"epoch": 1.8767908309455588,
"grad_norm": 0.2852512548851884,
"learning_rate": 7.39570622549669e-06,
"loss": 1.2395,
"step": 655
},
{
"epoch": 1.8911174785100286,
"grad_norm": 0.29264683297829125,
"learning_rate": 7.235078650672141e-06,
"loss": 1.1797,
"step": 660
},
{
"epoch": 1.9054441260744985,
"grad_norm": 0.2898654877917546,
"learning_rate": 7.075219871054528e-06,
"loss": 1.2227,
"step": 665
},
{
"epoch": 1.9197707736389686,
"grad_norm": 0.29421755996555327,
"learning_rate": 6.91617433588781e-06,
"loss": 1.1711,
"step": 670
},
{
"epoch": 1.9340974212034383,
"grad_norm": 0.2800223774944341,
"learning_rate": 6.757986268290713e-06,
"loss": 1.2025,
"step": 675
},
{
"epoch": 1.9484240687679084,
"grad_norm": 0.28987949528203105,
"learning_rate": 6.600699652960383e-06,
"loss": 1.1891,
"step": 680
},
{
"epoch": 1.962750716332378,
"grad_norm": 0.2845787595193169,
"learning_rate": 6.4443582239422744e-06,
"loss": 1.1602,
"step": 685
},
{
"epoch": 1.9770773638968482,
"grad_norm": 0.27697906700119446,
"learning_rate": 6.289005452469778e-06,
"loss": 1.2195,
"step": 690
},
{
"epoch": 1.991404011461318,
"grad_norm": 0.2864230953591413,
"learning_rate": 6.134684534876892e-06,
"loss": 1.1859,
"step": 695
},
{
"epoch": 2.005730659025788,
"grad_norm": 0.3004607532045262,
"learning_rate": 5.981438380587355e-06,
"loss": 1.2074,
"step": 700
},
{
"epoch": 2.020057306590258,
"grad_norm": 0.29024736883757196,
"learning_rate": 5.829309600183536e-06,
"loss": 1.1371,
"step": 705
},
{
"epoch": 2.0343839541547277,
"grad_norm": 0.285433189631626,
"learning_rate": 5.678340493558427e-06,
"loss": 1.2063,
"step": 710
},
{
"epoch": 2.048710601719198,
"grad_norm": 0.2957416068435454,
"learning_rate": 5.528573038154028e-06,
"loss": 1.1945,
"step": 715
},
{
"epoch": 2.0630372492836675,
"grad_norm": 0.2948232121670883,
"learning_rate": 5.380048877289381e-06,
"loss": 1.1439,
"step": 720
},
{
"epoch": 2.0773638968481376,
"grad_norm": 0.2919439807242836,
"learning_rate": 5.232809308581504e-06,
"loss": 1.1496,
"step": 725
},
{
"epoch": 2.0916905444126073,
"grad_norm": 0.2867698512059567,
"learning_rate": 5.086895272462475e-06,
"loss": 1.1186,
"step": 730
},
{
"epoch": 2.1060171919770774,
"grad_norm": 0.2917668731887502,
"learning_rate": 4.942347340795803e-06,
"loss": 1.125,
"step": 735
},
{
"epoch": 2.1203438395415475,
"grad_norm": 0.2870682782334859,
"learning_rate": 4.799205705595294e-06,
"loss": 1.0992,
"step": 740
},
{
"epoch": 2.134670487106017,
"grad_norm": 0.2869641039546839,
"learning_rate": 4.657510167849525e-06,
"loss": 1.141,
"step": 745
},
{
"epoch": 2.1489971346704873,
"grad_norm": 0.28154741238148256,
"learning_rate": 4.5173001264550665e-06,
"loss": 1.0984,
"step": 750
},
{
"epoch": 2.163323782234957,
"grad_norm": 0.29022223670208136,
"learning_rate": 4.378614567261487e-06,
"loss": 1.1313,
"step": 755
},
{
"epoch": 2.177650429799427,
"grad_norm": 0.28112870621050123,
"learning_rate": 4.241492052231213e-06,
"loss": 1.1865,
"step": 760
},
{
"epoch": 2.1919770773638967,
"grad_norm": 0.27708366870966095,
"learning_rate": 4.105970708717244e-06,
"loss": 1.1467,
"step": 765
},
{
"epoch": 2.206303724928367,
"grad_norm": 0.28678919627949717,
"learning_rate": 3.972088218861738e-06,
"loss": 1.1592,
"step": 770
},
{
"epoch": 2.2206303724928365,
"grad_norm": 0.2813821171114588,
"learning_rate": 3.83988180911836e-06,
"loss": 1.1549,
"step": 775
},
{
"epoch": 2.2349570200573066,
"grad_norm": 0.29052831071379115,
"learning_rate": 3.7093882399013504e-06,
"loss": 1.1742,
"step": 780
},
{
"epoch": 2.2492836676217767,
"grad_norm": 0.28664925785362466,
"learning_rate": 3.580643795364166e-06,
"loss": 1.1883,
"step": 785
},
{
"epoch": 2.2636103151862463,
"grad_norm": 0.28360378572017647,
"learning_rate": 3.4536842733105702e-06,
"loss": 1.1783,
"step": 790
},
{
"epoch": 2.2779369627507164,
"grad_norm": 0.2784641405735339,
"learning_rate": 3.3285449752409315e-06,
"loss": 1.1119,
"step": 795
},
{
"epoch": 2.292263610315186,
"grad_norm": 0.28470450318470536,
"learning_rate": 3.205260696536534e-06,
"loss": 1.15,
"step": 800
},
{
"epoch": 2.306590257879656,
"grad_norm": 0.27908057259095853,
"learning_rate": 3.083865716784592e-06,
"loss": 1.1469,
"step": 805
},
{
"epoch": 2.3209169054441263,
"grad_norm": 0.28684050381253245,
"learning_rate": 2.964393790246728e-06,
"loss": 1.0791,
"step": 810
},
{
"epoch": 2.335243553008596,
"grad_norm": 0.2833942155479012,
"learning_rate": 2.846878136473472e-06,
"loss": 1.1854,
"step": 815
},
{
"epoch": 2.349570200573066,
"grad_norm": 0.2813005058217904,
"learning_rate": 2.7313514310674826e-06,
"loss": 1.1102,
"step": 820
},
{
"epoch": 2.3638968481375358,
"grad_norm": 0.27630773934160313,
"learning_rate": 2.6178457965979543e-06,
"loss": 1.1111,
"step": 825
},
{
"epoch": 2.378223495702006,
"grad_norm": 0.28322453198420466,
"learning_rate": 2.506392793668869e-06,
"loss": 1.1262,
"step": 830
},
{
"epoch": 2.3925501432664755,
"grad_norm": 0.27434180065283353,
"learning_rate": 2.3970234121434555e-06,
"loss": 1.158,
"step": 835
},
{
"epoch": 2.4068767908309456,
"grad_norm": 0.28158095452878135,
"learning_rate": 2.2897680625273623e-06,
"loss": 1.1152,
"step": 840
},
{
"epoch": 2.4212034383954153,
"grad_norm": 0.29278876714519303,
"learning_rate": 2.1846565675129074e-06,
"loss": 1.1395,
"step": 845
},
{
"epoch": 2.4355300859598854,
"grad_norm": 0.27557568657805176,
"learning_rate": 2.0817181536868035e-06,
"loss": 1.1469,
"step": 850
},
{
"epoch": 2.4498567335243555,
"grad_norm": 0.2754040319685035,
"learning_rate": 1.9809814434036e-06,
"loss": 1.1393,
"step": 855
},
{
"epoch": 2.464183381088825,
"grad_norm": 0.2810022564605419,
"learning_rate": 1.8824744468271506e-06,
"loss": 1.1043,
"step": 860
},
{
"epoch": 2.4785100286532953,
"grad_norm": 0.27969700189605484,
"learning_rate": 1.786224554142285e-06,
"loss": 1.116,
"step": 865
},
{
"epoch": 2.492836676217765,
"grad_norm": 0.27897638050493495,
"learning_rate": 1.6922585279389037e-06,
"loss": 1.1367,
"step": 870
},
{
"epoch": 2.507163323782235,
"grad_norm": 0.2815481626728245,
"learning_rate": 1.6006024957705357e-06,
"loss": 1.1365,
"step": 875
},
{
"epoch": 2.5214899713467047,
"grad_norm": 0.2769674435573708,
"learning_rate": 1.5112819428894976e-06,
"loss": 1.1832,
"step": 880
},
{
"epoch": 2.535816618911175,
"grad_norm": 0.28423912540069224,
"learning_rate": 1.4243217051606285e-06,
"loss": 1.2,
"step": 885
},
{
"epoch": 2.5501432664756445,
"grad_norm": 0.28433511602868367,
"learning_rate": 1.339745962155613e-06,
"loss": 1.1258,
"step": 890
},
{
"epoch": 2.5644699140401146,
"grad_norm": 0.27492757921736494,
"learning_rate": 1.2575782304297647e-06,
"loss": 1.1631,
"step": 895
},
{
"epoch": 2.5787965616045847,
"grad_norm": 0.28357440374503834,
"learning_rate": 1.1778413569831726e-06,
"loss": 1.1508,
"step": 900
},
{
"epoch": 2.5931232091690544,
"grad_norm": 0.28309913251371766,
"learning_rate": 1.1005575129080203e-06,
"loss": 1.1596,
"step": 905
},
{
"epoch": 2.6074498567335245,
"grad_norm": 0.2830020221949328,
"learning_rate": 1.0257481872238483e-06,
"loss": 1.1809,
"step": 910
},
{
"epoch": 2.621776504297994,
"grad_norm": 0.2738198006004891,
"learning_rate": 9.534341809024583e-07,
"loss": 1.0836,
"step": 915
},
{
"epoch": 2.6361031518624642,
"grad_norm": 0.27780926764956043,
"learning_rate": 8.836356010841385e-07,
"loss": 1.1633,
"step": 920
},
{
"epoch": 2.6504297994269344,
"grad_norm": 0.28167674862359654,
"learning_rate": 8.16371855486805e-07,
"loss": 1.152,
"step": 925
},
{
"epoch": 2.664756446991404,
"grad_norm": 0.28736011956752383,
"learning_rate": 7.516616470096317e-07,
"loss": 1.1127,
"step": 930
},
{
"epoch": 2.6790830945558737,
"grad_norm": 0.27167353567975117,
"learning_rate": 6.895229685326443e-07,
"loss": 1.1574,
"step": 935
},
{
"epoch": 2.693409742120344,
"grad_norm": 0.29082932902589526,
"learning_rate": 6.299730979137419e-07,
"loss": 1.1426,
"step": 940
},
{
"epoch": 2.707736389684814,
"grad_norm": 0.27278739042779193,
"learning_rate": 5.730285931845381e-07,
"loss": 1.1113,
"step": 945
},
{
"epoch": 2.7220630372492836,
"grad_norm": 0.2695715592534727,
"learning_rate": 5.187052879463394e-07,
"loss": 1.1182,
"step": 950
},
{
"epoch": 2.7363896848137537,
"grad_norm": 0.2767698424495714,
"learning_rate": 4.6701828696757213e-07,
"loss": 1.1264,
"step": 955
},
{
"epoch": 2.7507163323782233,
"grad_norm": 0.2862004769810508,
"learning_rate": 4.1798196198384545e-07,
"loss": 1.1766,
"step": 960
},
{
"epoch": 2.7650429799426934,
"grad_norm": 0.27956749462161845,
"learning_rate": 3.716099477018475e-07,
"loss": 1.1463,
"step": 965
},
{
"epoch": 2.7793696275071635,
"grad_norm": 0.28226367416458953,
"learning_rate": 3.279151380081691e-07,
"loss": 1.1898,
"step": 970
},
{
"epoch": 2.793696275071633,
"grad_norm": 0.2759717986692799,
"learning_rate": 2.8690968238412444e-07,
"loss": 1.1193,
"step": 975
},
{
"epoch": 2.8080229226361033,
"grad_norm": 0.2913913242847074,
"learning_rate": 2.4860498252753827e-07,
"loss": 1.2113,
"step": 980
},
{
"epoch": 2.822349570200573,
"grad_norm": 0.28827782734894336,
"learning_rate": 2.130116891824796e-07,
"loss": 1.1344,
"step": 985
},
{
"epoch": 2.836676217765043,
"grad_norm": 0.2726866974754793,
"learning_rate": 1.8013969917777484e-07,
"loss": 1.1385,
"step": 990
},
{
"epoch": 2.8510028653295127,
"grad_norm": 0.2818762723051636,
"learning_rate": 1.4999815267517593e-07,
"loss": 1.1732,
"step": 995
},
{
"epoch": 2.865329512893983,
"grad_norm": 0.2806296409646362,
"learning_rate": 1.225954306279009e-07,
"loss": 1.1609,
"step": 1000
},
{
"epoch": 2.8796561604584525,
"grad_norm": 0.27946799660404814,
"learning_rate": 9.793915245028595e-08,
"loss": 1.1875,
"step": 1005
},
{
"epoch": 2.8939828080229226,
"grad_norm": 0.2854513869083756,
"learning_rate": 7.603617389918106e-08,
"loss": 1.1342,
"step": 1010
},
{
"epoch": 2.9083094555873927,
"grad_norm": 0.2764976233678434,
"learning_rate": 5.689258516768825e-08,
"loss": 1.1789,
"step": 1015
},
{
"epoch": 2.9226361031518624,
"grad_norm": 0.2843787086373923,
"learning_rate": 4.05137091917629e-08,
"loss": 1.177,
"step": 1020
},
{
"epoch": 2.9369627507163325,
"grad_norm": 0.2793191895679787,
"learning_rate": 2.6904100170150883e-08,
"loss": 1.1459,
"step": 1025
},
{
"epoch": 2.951289398280802,
"grad_norm": 0.2820836844171885,
"learning_rate": 1.6067542298083826e-08,
"loss": 1.2051,
"step": 1030
},
{
"epoch": 2.9656160458452723,
"grad_norm": 0.280116788571808,
"learning_rate": 8.007048715068522e-09,
"loss": 1.1293,
"step": 1035
},
{
"epoch": 2.9799426934097424,
"grad_norm": 0.2760942837882742,
"learning_rate": 2.7248606670760012e-09,
"loss": 1.1348,
"step": 1040
},
{
"epoch": 2.994269340974212,
"grad_norm": 0.2785194035391107,
"learning_rate": 2.2244688335226749e-10,
"loss": 1.1268,
"step": 1045
},
{
"epoch": 3.0,
"step": 1047,
"total_flos": 99070802657280.0,
"train_loss": 1.4185899444842407,
"train_runtime": 3548.2371,
"train_samples_per_second": 18.883,
"train_steps_per_second": 0.295
}
],
"logging_steps": 5,
"max_steps": 1047,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 99070802657280.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}