BraylonDash's picture
Model save
4843c21 verified
raw
history blame contribute delete
No virus
23.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 468,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0638297872340426e-07,
"logits/chosen": 0.1359557956457138,
"logits/rejected": 0.030706744641065598,
"logps/chosen": -736.0869140625,
"logps/rejected": -613.6344604492188,
"loss": 2.0331,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.0638297872340427e-06,
"logits/chosen": 0.11667777597904205,
"logits/rejected": 0.26604601740837097,
"logps/chosen": -546.5281982421875,
"logps/rejected": -597.5736083984375,
"loss": 2.1592,
"rewards/accuracies": 0.4583333432674408,
"rewards/chosen": 0.0007250224007293582,
"rewards/margins": 0.00040180076030083,
"rewards/rejected": 0.0003232216986361891,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 2.1276595744680853e-06,
"logits/chosen": 0.16373148560523987,
"logits/rejected": 0.2677033543586731,
"logps/chosen": -604.6590576171875,
"logps/rejected": -649.482177734375,
"loss": 2.0972,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.0005862273974344134,
"rewards/margins": -0.0003054165281355381,
"rewards/rejected": -0.0002808108984027058,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 3.191489361702128e-06,
"logits/chosen": 0.14978544414043427,
"logits/rejected": 0.1915779411792755,
"logps/chosen": -594.8548583984375,
"logps/rejected": -588.2429809570312,
"loss": 2.122,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.004188057966530323,
"rewards/margins": 0.0009490737575106323,
"rewards/rejected": -0.0051371315494179726,
"step": 30
},
{
"epoch": 0.09,
"learning_rate": 4.255319148936171e-06,
"logits/chosen": 0.16862796247005463,
"logits/rejected": 0.23586151003837585,
"logps/chosen": -574.7235107421875,
"logps/rejected": -631.8544921875,
"loss": 2.1863,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.01229151152074337,
"rewards/margins": 0.005582691170275211,
"rewards/rejected": -0.017874203622341156,
"step": 40
},
{
"epoch": 0.11,
"learning_rate": 4.999373573764188e-06,
"logits/chosen": 0.1411871314048767,
"logits/rejected": 0.2258455753326416,
"logps/chosen": -612.8582763671875,
"logps/rejected": -636.5026245117188,
"loss": 2.1508,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.04941480979323387,
"rewards/margins": 0.019247086718678474,
"rewards/rejected": -0.0686618983745575,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 4.988245838331339e-06,
"logits/chosen": 0.17244111001491547,
"logits/rejected": 0.17342150211334229,
"logps/chosen": -634.6348266601562,
"logps/rejected": -667.5384521484375,
"loss": 2.0758,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.13025899231433868,
"rewards/margins": 0.05111612752079964,
"rewards/rejected": -0.18137511610984802,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 4.963268819535228e-06,
"logits/chosen": 0.12650486826896667,
"logits/rejected": 0.14093999564647675,
"logps/chosen": -608.5107421875,
"logps/rejected": -702.1578369140625,
"loss": 2.0556,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.21341009438037872,
"rewards/margins": 0.09893321990966797,
"rewards/rejected": -0.3123432993888855,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 4.9245815365216115e-06,
"logits/chosen": 0.19184628129005432,
"logits/rejected": 0.2408786565065384,
"logps/chosen": -679.4183349609375,
"logps/rejected": -609.7093505859375,
"loss": 2.1137,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.21463651955127716,
"rewards/margins": 0.05772104859352112,
"rewards/rejected": -0.2723575234413147,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 4.872399318152594e-06,
"logits/chosen": 0.1250939965248108,
"logits/rejected": 0.18045032024383545,
"logps/chosen": -622.2333374023438,
"logps/rejected": -655.4575805664062,
"loss": 2.0044,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.1839352548122406,
"rewards/margins": 0.10977420955896378,
"rewards/rejected": -0.2937094569206238,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 4.807012604511542e-06,
"logits/chosen": 0.18265239894390106,
"logits/rejected": 0.2614283859729767,
"logps/chosen": -649.8997802734375,
"logps/rejected": -658.8975830078125,
"loss": 1.9995,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.16064395010471344,
"rewards/margins": 0.08805385231971741,
"rewards/rejected": -0.24869783222675323,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 4.728785330347771e-06,
"logits/chosen": 0.2479465901851654,
"logits/rejected": 0.2932817339897156,
"logps/chosen": -674.0836181640625,
"logps/rejected": -645.6417236328125,
"loss": 1.895,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.12688389420509338,
"rewards/margins": 0.08782283961772919,
"rewards/rejected": -0.21470670402050018,
"step": 110
},
{
"epoch": 0.26,
"learning_rate": 4.63815289945858e-06,
"logits/chosen": 0.19643843173980713,
"logits/rejected": 0.2974274456501007,
"logps/chosen": -573.49658203125,
"logps/rejected": -666.606689453125,
"loss": 1.89,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.09824337065219879,
"rewards/margins": 0.13982543349266052,
"rewards/rejected": -0.2380688190460205,
"step": 120
},
{
"epoch": 0.28,
"learning_rate": 4.535619761282989e-06,
"logits/chosen": 0.23821644484996796,
"logits/rejected": 0.288485586643219,
"logps/chosen": -590.9158935546875,
"logps/rejected": -623.23974609375,
"loss": 1.9389,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.14589470624923706,
"rewards/margins": 0.12624357640743256,
"rewards/rejected": -0.2721382975578308,
"step": 130
},
{
"epoch": 0.3,
"learning_rate": 4.42175660319555e-06,
"logits/chosen": 0.2631734013557434,
"logits/rejected": 0.2810806632041931,
"logps/chosen": -645.8680419921875,
"logps/rejected": -654.8004760742188,
"loss": 1.8203,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.20492109656333923,
"rewards/margins": 0.20386295020580292,
"rewards/rejected": -0.40878406167030334,
"step": 140
},
{
"epoch": 0.32,
"learning_rate": 4.297197174127619e-06,
"logits/chosen": 0.2586398422718048,
"logits/rejected": 0.3086986839771271,
"logps/chosen": -619.4220581054688,
"logps/rejected": -697.2005615234375,
"loss": 1.7553,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.24971242249011993,
"rewards/margins": 0.2221045196056366,
"rewards/rejected": -0.4718169569969177,
"step": 150
},
{
"epoch": 0.34,
"learning_rate": 4.162634757195418e-06,
"logits/chosen": 0.2681664526462555,
"logits/rejected": 0.2807798683643341,
"logps/chosen": -630.39306640625,
"logps/rejected": -645.6117553710938,
"loss": 1.8404,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.19384464621543884,
"rewards/margins": 0.1983100175857544,
"rewards/rejected": -0.39215466380119324,
"step": 160
},
{
"epoch": 0.36,
"learning_rate": 4.018818310967843e-06,
"logits/chosen": 0.27496370673179626,
"logits/rejected": 0.30781346559524536,
"logps/chosen": -559.2887573242188,
"logps/rejected": -601.3917846679688,
"loss": 1.8382,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.0747746005654335,
"rewards/margins": 0.19791939854621887,
"rewards/rejected": -0.2726939916610718,
"step": 170
},
{
"epoch": 0.38,
"learning_rate": 3.866548300851254e-06,
"logits/chosen": 0.2482290267944336,
"logits/rejected": 0.2852781414985657,
"logps/chosen": -620.8068237304688,
"logps/rejected": -665.9005737304688,
"loss": 1.8229,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.10497160255908966,
"rewards/margins": 0.20543234050273895,
"rewards/rejected": -0.3104039430618286,
"step": 180
},
{
"epoch": 0.41,
"learning_rate": 3.706672243793271e-06,
"logits/chosen": 0.2958913743495941,
"logits/rejected": 0.3795389235019684,
"logps/chosen": -611.8587646484375,
"logps/rejected": -658.9635009765625,
"loss": 1.7752,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.0870656967163086,
"rewards/margins": 0.23995642364025116,
"rewards/rejected": -0.32702213525772095,
"step": 190
},
{
"epoch": 0.43,
"learning_rate": 3.5400799911032357e-06,
"logits/chosen": 0.2935205101966858,
"logits/rejected": 0.3416239321231842,
"logps/chosen": -660.2877197265625,
"logps/rejected": -730.04541015625,
"loss": 1.7351,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.1828436255455017,
"rewards/margins": 0.3010478913784027,
"rewards/rejected": -0.4838915765285492,
"step": 200
},
{
"epoch": 0.45,
"learning_rate": 3.3676987756445894e-06,
"logits/chosen": 0.24807122349739075,
"logits/rejected": 0.32862648367881775,
"logps/chosen": -605.8773193359375,
"logps/rejected": -641.6677856445312,
"loss": 1.8245,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.13868902623653412,
"rewards/margins": 0.2735101878643036,
"rewards/rejected": -0.4121991991996765,
"step": 210
},
{
"epoch": 0.47,
"learning_rate": 3.1904880509659397e-06,
"logits/chosen": 0.270724892616272,
"logits/rejected": 0.3151053786277771,
"logps/chosen": -650.7314453125,
"logps/rejected": -708.2312622070312,
"loss": 1.735,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.16527561843395233,
"rewards/margins": 0.2484448254108429,
"rewards/rejected": -0.4137204587459564,
"step": 220
},
{
"epoch": 0.49,
"learning_rate": 3.0094341510955697e-06,
"logits/chosen": 0.19233042001724243,
"logits/rejected": 0.29483872652053833,
"logps/chosen": -663.5474243164062,
"logps/rejected": -743.0173950195312,
"loss": 1.7378,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.14797742664813995,
"rewards/margins": 0.3706679344177246,
"rewards/rejected": -0.5186454057693481,
"step": 230
},
{
"epoch": 0.51,
"learning_rate": 2.825544800722376e-06,
"logits/chosen": 0.2124979943037033,
"logits/rejected": 0.3365432620048523,
"logps/chosen": -619.9740600585938,
"logps/rejected": -700.7166748046875,
"loss": 1.8168,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.1956629902124405,
"rewards/margins": 0.2987174093723297,
"rewards/rejected": -0.494380384683609,
"step": 240
},
{
"epoch": 0.53,
"learning_rate": 2.639843506318899e-06,
"logits/chosen": 0.2796134054660797,
"logits/rejected": 0.2740449607372284,
"logps/chosen": -582.3416748046875,
"logps/rejected": -674.327880859375,
"loss": 1.8901,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.19822832942008972,
"rewards/margins": 0.19228845834732056,
"rewards/rejected": -0.3905167877674103,
"step": 250
},
{
"epoch": 0.55,
"learning_rate": 2.4533638594248094e-06,
"logits/chosen": 0.25897207856178284,
"logits/rejected": 0.31485193967819214,
"logps/chosen": -604.8118896484375,
"logps/rejected": -667.9144897460938,
"loss": 1.8606,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.14205999672412872,
"rewards/margins": 0.28450149297714233,
"rewards/rejected": -0.42656150460243225,
"step": 260
},
{
"epoch": 0.58,
"learning_rate": 2.2671437837980943e-06,
"logits/chosen": 0.22259187698364258,
"logits/rejected": 0.22855930030345917,
"logps/chosen": -593.6612548828125,
"logps/rejected": -673.6566162109375,
"loss": 1.7486,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.14823240041732788,
"rewards/margins": 0.2802043557167053,
"rewards/rejected": -0.4284366965293884,
"step": 270
},
{
"epoch": 0.6,
"learning_rate": 2.082219758453629e-06,
"logits/chosen": 0.2169434130191803,
"logits/rejected": 0.2703471779823303,
"logps/chosen": -611.6048583984375,
"logps/rejected": -682.5806884765625,
"loss": 1.6556,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.11671599000692368,
"rewards/margins": 0.26952022314071655,
"rewards/rejected": -0.3862362205982208,
"step": 280
},
{
"epoch": 0.62,
"learning_rate": 1.899621048743019e-06,
"logits/chosen": 0.22146745026111603,
"logits/rejected": 0.34733515977859497,
"logps/chosen": -603.9933471679688,
"logps/rejected": -673.3649291992188,
"loss": 1.7238,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.20439250767230988,
"rewards/margins": 0.2682177424430847,
"rewards/rejected": -0.4726102352142334,
"step": 290
},
{
"epoch": 0.64,
"learning_rate": 1.7203639775848423e-06,
"logits/chosen": 0.19099445641040802,
"logits/rejected": 0.3011043667793274,
"logps/chosen": -606.6263427734375,
"logps/rejected": -639.6136474609375,
"loss": 1.8381,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.17796705663204193,
"rewards/margins": 0.23042461276054382,
"rewards/rejected": -0.40839165449142456,
"step": 300
},
{
"epoch": 0.66,
"learning_rate": 1.5454462687309445e-06,
"logits/chosen": 0.2036764919757843,
"logits/rejected": 0.26239025592803955,
"logps/chosen": -602.3845825195312,
"logps/rejected": -666.4627075195312,
"loss": 1.8042,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.1518932580947876,
"rewards/margins": 0.2536298632621765,
"rewards/rejected": -0.4055231511592865,
"step": 310
},
{
"epoch": 0.68,
"learning_rate": 1.3758414935535147e-06,
"logits/chosen": 0.21739721298217773,
"logits/rejected": 0.2840099334716797,
"logps/chosen": -636.0455322265625,
"logps/rejected": -709.1137084960938,
"loss": 1.65,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.16815349459648132,
"rewards/margins": 0.29733169078826904,
"rewards/rejected": -0.465485155582428,
"step": 320
},
{
"epoch": 0.7,
"learning_rate": 1.2124936522614622e-06,
"logits/chosen": 0.20938508212566376,
"logits/rejected": 0.22490420937538147,
"logps/chosen": -615.7994995117188,
"logps/rejected": -669.2200927734375,
"loss": 1.7098,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.18394342064857483,
"rewards/margins": 0.31033387780189514,
"rewards/rejected": -0.49427732825279236,
"step": 330
},
{
"epoch": 0.73,
"learning_rate": 1.0563119197063934e-06,
"logits/chosen": 0.23827771842479706,
"logits/rejected": 0.2663131356239319,
"logps/chosen": -612.7750244140625,
"logps/rejected": -685.60107421875,
"loss": 1.7109,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.19161880016326904,
"rewards/margins": 0.26392242312431335,
"rewards/rejected": -0.4555412232875824,
"step": 340
},
{
"epoch": 0.75,
"learning_rate": 9.081655850224449e-07,
"logits/chosen": 0.19827114045619965,
"logits/rejected": 0.2343660295009613,
"logps/chosen": -628.5892333984375,
"logps/rejected": -699.3311767578125,
"loss": 1.6981,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.23514249920845032,
"rewards/margins": 0.30311545729637146,
"rewards/rejected": -0.5382579565048218,
"step": 350
},
{
"epoch": 0.77,
"learning_rate": 7.688792132653111e-07,
"logits/chosen": 0.19120459258556366,
"logits/rejected": 0.2861759066581726,
"logps/chosen": -659.7528076171875,
"logps/rejected": -748.490234375,
"loss": 1.6967,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.19031907618045807,
"rewards/margins": 0.34352895617485046,
"rewards/rejected": -0.533847987651825,
"step": 360
},
{
"epoch": 0.79,
"learning_rate": 6.392280559802341e-07,
"logits/chosen": 0.2406836450099945,
"logits/rejected": 0.23908407986164093,
"logps/chosen": -658.35400390625,
"logps/rejected": -720.8883666992188,
"loss": 1.7368,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.23702308535575867,
"rewards/margins": 0.24957367777824402,
"rewards/rejected": -0.48659682273864746,
"step": 370
},
{
"epoch": 0.81,
"learning_rate": 5.199337362431792e-07,
"logits/chosen": 0.26719361543655396,
"logits/rejected": 0.1743316501379013,
"logps/chosen": -621.3897094726562,
"logps/rejected": -680.0,
"loss": 1.7425,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.22942551970481873,
"rewards/margins": 0.26667481660842896,
"rewards/rejected": -0.49610036611557007,
"step": 380
},
{
"epoch": 0.83,
"learning_rate": 4.1166023219176176e-07,
"logits/chosen": 0.21561181545257568,
"logits/rejected": 0.286629855632782,
"logps/chosen": -654.0867919921875,
"logps/rejected": -668.467529296875,
"loss": 1.6798,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.21663355827331543,
"rewards/margins": 0.2600599527359009,
"rewards/rejected": -0.4766935408115387,
"step": 390
},
{
"epoch": 0.85,
"learning_rate": 3.150101814011136e-07,
"logits/chosen": 0.16323356330394745,
"logits/rejected": 0.21500280499458313,
"logps/chosen": -600.4713134765625,
"logps/rejected": -730.5057983398438,
"loss": 1.7084,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.19050315022468567,
"rewards/margins": 0.28324562311172485,
"rewards/rejected": -0.47374874353408813,
"step": 400
},
{
"epoch": 0.87,
"learning_rate": 2.3052152667409289e-07,
"logits/chosen": 0.1962326616048813,
"logits/rejected": 0.22506949305534363,
"logps/chosen": -614.2760009765625,
"logps/rejected": -675.3383178710938,
"loss": 1.7679,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.1459427773952484,
"rewards/margins": 0.3252793252468109,
"rewards/rejected": -0.4712221026420593,
"step": 410
},
{
"epoch": 0.9,
"learning_rate": 1.5866452191498488e-07,
"logits/chosen": 0.20015636086463928,
"logits/rejected": 0.25162121653556824,
"logps/chosen": -651.9236450195312,
"logps/rejected": -707.2882080078125,
"loss": 1.7514,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.23218846321105957,
"rewards/margins": 0.2290785312652588,
"rewards/rejected": -0.46126699447631836,
"step": 420
},
{
"epoch": 0.92,
"learning_rate": 9.983911475163727e-08,
"logits/chosen": 0.16698592901229858,
"logits/rejected": 0.2591376304626465,
"logps/chosen": -590.045166015625,
"logps/rejected": -642.6705322265625,
"loss": 1.8093,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.17988340556621552,
"rewards/margins": 0.23005299270153046,
"rewards/rejected": -0.4099363684654236,
"step": 430
},
{
"epoch": 0.94,
"learning_rate": 5.437272047405712e-08,
"logits/chosen": 0.1858983337879181,
"logits/rejected": 0.3158418536186218,
"logps/chosen": -559.8682250976562,
"logps/rejected": -648.7040405273438,
"loss": 1.7686,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.19649046659469604,
"rewards/margins": 0.26454511284828186,
"rewards/rejected": -0.4610355794429779,
"step": 440
},
{
"epoch": 0.96,
"learning_rate": 2.251839967945535e-08,
"logits/chosen": 0.13786078989505768,
"logits/rejected": 0.2333669662475586,
"logps/chosen": -645.2703857421875,
"logps/rejected": -707.0418090820312,
"loss": 1.6172,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.17300908267498016,
"rewards/margins": 0.3292023241519928,
"rewards/rejected": -0.5022113919258118,
"step": 450
},
{
"epoch": 0.98,
"learning_rate": 4.453449766758933e-09,
"logits/chosen": 0.1742466688156128,
"logits/rejected": 0.2268284559249878,
"logps/chosen": -576.7985229492188,
"logps/rejected": -652.7803344726562,
"loss": 1.7297,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.24071533977985382,
"rewards/margins": 0.18981412053108215,
"rewards/rejected": -0.4305294454097748,
"step": 460
},
{
"epoch": 1.0,
"step": 468,
"total_flos": 0.0,
"train_loss": 1.8394590188295414,
"train_runtime": 15861.5475,
"train_samples_per_second": 1.891,
"train_steps_per_second": 0.03
}
],
"logging_steps": 10,
"max_steps": 468,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}