|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9959925193694897, |
|
"eval_steps": 400, |
|
"global_step": 233, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02137323002938819, |
|
"grad_norm": 0.4608515202999115, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.7747037410736084, |
|
"logits/rejected": -1.6486629247665405, |
|
"logps/chosen": -247.47836303710938, |
|
"logps/ref_chosen": -247.4757537841797, |
|
"logps/ref_rejected": -250.2177734375, |
|
"logps/rejected": -250.17874145507812, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -2.605724148452282e-05, |
|
"rewards/margins": -0.00041639525443315506, |
|
"rewards/rejected": 0.0003903379547409713, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04274646005877638, |
|
"grad_norm": 0.426495224237442, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.7335236072540283, |
|
"logits/rejected": -1.6989978551864624, |
|
"logps/chosen": -222.6909637451172, |
|
"logps/ref_chosen": -222.6491241455078, |
|
"logps/ref_rejected": -223.95663452148438, |
|
"logps/rejected": -223.9930877685547, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00041838129982352257, |
|
"rewards/margins": -5.400222653406672e-05, |
|
"rewards/rejected": -0.00036437893868424, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06411969008816458, |
|
"grad_norm": 0.4453659653663635, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.9023773670196533, |
|
"logits/rejected": -1.789849042892456, |
|
"logps/chosen": -218.5724334716797, |
|
"logps/ref_chosen": -218.7084503173828, |
|
"logps/ref_rejected": -224.755615234375, |
|
"logps/rejected": -224.6824493408203, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0013600520323961973, |
|
"rewards/margins": 0.0006284656701609492, |
|
"rewards/rejected": 0.000731586420442909, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08549292011755276, |
|
"grad_norm": 0.5101017951965332, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.7127611637115479, |
|
"logits/rejected": -1.6293315887451172, |
|
"logps/chosen": -226.1074676513672, |
|
"logps/ref_chosen": -226.7457275390625, |
|
"logps/ref_rejected": -235.77908325195312, |
|
"logps/rejected": -235.2657928466797, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.006382433231920004, |
|
"rewards/margins": 0.0012494683032855392, |
|
"rewards/rejected": 0.005132964812219143, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10686615014694095, |
|
"grad_norm": 0.4738335609436035, |
|
"learning_rate": 4.999717571181741e-07, |
|
"logits/chosen": -1.6099249124526978, |
|
"logits/rejected": -1.5539109706878662, |
|
"logps/chosen": -229.36843872070312, |
|
"logps/ref_chosen": -230.34494018554688, |
|
"logps/ref_rejected": -231.64236450195312, |
|
"logps/rejected": -230.74813842773438, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.009765096008777618, |
|
"rewards/margins": 0.000822968955617398, |
|
"rewards/rejected": 0.008942126296460629, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12823938017632916, |
|
"grad_norm": 0.4367460608482361, |
|
"learning_rate": 4.98983926127519e-07, |
|
"logits/chosen": -1.6448577642440796, |
|
"logits/rejected": -1.560329794883728, |
|
"logps/chosen": -239.9384002685547, |
|
"logps/ref_chosen": -241.2040557861328, |
|
"logps/ref_rejected": -253.18862915039062, |
|
"logps/rejected": -251.95547485351562, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.012656150385737419, |
|
"rewards/margins": 0.00032438611378893256, |
|
"rewards/rejected": 0.012331764213740826, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14961261020571734, |
|
"grad_norm": 0.5036317706108093, |
|
"learning_rate": 4.965903258506806e-07, |
|
"logits/chosen": -1.65009343624115, |
|
"logits/rejected": -1.6685165166854858, |
|
"logps/chosen": -240.6787109375, |
|
"logps/ref_chosen": -242.33291625976562, |
|
"logps/ref_rejected": -237.6911163330078, |
|
"logps/rejected": -236.1189422607422, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.016541726887226105, |
|
"rewards/margins": 0.0008201012387871742, |
|
"rewards/rejected": 0.015721624717116356, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17098584023510552, |
|
"grad_norm": 0.5212914347648621, |
|
"learning_rate": 4.928044706128802e-07, |
|
"logits/chosen": -1.6572792530059814, |
|
"logits/rejected": -1.6342990398406982, |
|
"logps/chosen": -224.078857421875, |
|
"logps/ref_chosen": -226.43637084960938, |
|
"logps/ref_rejected": -224.00546264648438, |
|
"logps/rejected": -221.7003173828125, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.02357516996562481, |
|
"rewards/margins": 0.0005238516023382545, |
|
"rewards/rejected": 0.023051317781209946, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19235907026449373, |
|
"grad_norm": 0.5110143423080444, |
|
"learning_rate": 4.876477354446189e-07, |
|
"logits/chosen": -1.4905364513397217, |
|
"logits/rejected": -1.3957011699676514, |
|
"logps/chosen": -216.25308227539062, |
|
"logps/ref_chosen": -219.16494750976562, |
|
"logps/ref_rejected": -227.38040161132812, |
|
"logps/rejected": -224.87564086914062, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.02911846712231636, |
|
"rewards/margins": 0.004071122966706753, |
|
"rewards/rejected": 0.025047341361641884, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2137323002938819, |
|
"grad_norm": 0.48523762822151184, |
|
"learning_rate": 4.811492353977365e-07, |
|
"logits/chosen": -1.7010364532470703, |
|
"logits/rejected": -1.6736198663711548, |
|
"logps/chosen": -218.8837127685547, |
|
"logps/ref_chosen": -221.23171997070312, |
|
"logps/ref_rejected": -223.6177215576172, |
|
"logps/rejected": -221.6636199951172, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.023480093106627464, |
|
"rewards/margins": 0.0039388458244502544, |
|
"rewards/rejected": 0.019541248679161072, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2351055303232701, |
|
"grad_norm": 0.4816797971725464, |
|
"learning_rate": 4.7334566116112327e-07, |
|
"logits/chosen": -1.62349534034729, |
|
"logits/rejected": -1.5281016826629639, |
|
"logps/chosen": -237.206787109375, |
|
"logps/ref_chosen": -239.38412475585938, |
|
"logps/ref_rejected": -245.71304321289062, |
|
"logps/rejected": -244.2113800048828, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.021773329004645348, |
|
"rewards/margins": 0.006756873335689306, |
|
"rewards/rejected": 0.015016456134617329, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2564787603526583, |
|
"grad_norm": 0.5273976922035217, |
|
"learning_rate": 4.6428107190419983e-07, |
|
"logits/chosen": -1.6468950510025024, |
|
"logits/rejected": -1.599461317062378, |
|
"logps/chosen": -228.3268585205078, |
|
"logps/ref_chosen": -231.1789093017578, |
|
"logps/ref_rejected": -231.9095001220703, |
|
"logps/rejected": -229.9440460205078, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.028520625084638596, |
|
"rewards/margins": 0.008865959011018276, |
|
"rewards/rejected": 0.019654670730233192, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2778519903820465, |
|
"grad_norm": 0.47698166966438293, |
|
"learning_rate": 4.540066465177783e-07, |
|
"logits/chosen": -1.7030376195907593, |
|
"logits/rejected": -1.7270011901855469, |
|
"logps/chosen": -218.37466430664062, |
|
"logps/ref_chosen": -222.1732635498047, |
|
"logps/ref_rejected": -221.90371704101562, |
|
"logps/rejected": -219.0262451171875, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03798612207174301, |
|
"rewards/margins": 0.009211419150233269, |
|
"rewards/rejected": 0.028774702921509743, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2992252204114347, |
|
"grad_norm": 0.4908115863800049, |
|
"learning_rate": 4.425803946568032e-07, |
|
"logits/chosen": -1.701042890548706, |
|
"logits/rejected": -1.642853021621704, |
|
"logps/chosen": -237.1160430908203, |
|
"logps/ref_chosen": -241.13235473632812, |
|
"logps/ref_rejected": -247.3893585205078, |
|
"logps/rejected": -243.56692504882812, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.040162790566682816, |
|
"rewards/margins": 0.0019384495681151748, |
|
"rewards/rejected": 0.038224343210458755, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32059845044082286, |
|
"grad_norm": 0.48811107873916626, |
|
"learning_rate": 4.300668292164329e-07, |
|
"logits/chosen": -1.6175544261932373, |
|
"logits/rejected": -1.6155774593353271, |
|
"logps/chosen": -223.8777618408203, |
|
"logps/ref_chosen": -228.91860961914062, |
|
"logps/ref_rejected": -227.78170776367188, |
|
"logps/rejected": -223.22732543945312, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.05040856450796127, |
|
"rewards/margins": 0.004864625167101622, |
|
"rewards/rejected": 0.04554395005106926, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.34197168047021104, |
|
"grad_norm": 0.5498376488685608, |
|
"learning_rate": 4.165366020906683e-07, |
|
"logits/chosen": -1.721421480178833, |
|
"logits/rejected": -1.6703542470932007, |
|
"logps/chosen": -220.573486328125, |
|
"logps/ref_chosen": -226.90060424804688, |
|
"logps/ref_rejected": -232.0827178955078, |
|
"logps/rejected": -227.0341339111328, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.06327112019062042, |
|
"rewards/margins": 0.012785114347934723, |
|
"rewards/rejected": 0.0504860058426857, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36334491049959927, |
|
"grad_norm": 0.5343174338340759, |
|
"learning_rate": 4.0206610527004607e-07, |
|
"logits/chosen": -1.630051612854004, |
|
"logits/rejected": -1.571542739868164, |
|
"logps/chosen": -231.68496704101562, |
|
"logps/ref_chosen": -237.4697723388672, |
|
"logps/ref_rejected": -240.751953125, |
|
"logps/rejected": -236.31600952148438, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05784807354211807, |
|
"rewards/margins": 0.013488592579960823, |
|
"rewards/rejected": 0.0443594828248024, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.38471814052898745, |
|
"grad_norm": 0.5112692713737488, |
|
"learning_rate": 3.867370395306068e-07, |
|
"logits/chosen": -1.7595088481903076, |
|
"logits/rejected": -1.7580636739730835, |
|
"logps/chosen": -211.63906860351562, |
|
"logps/ref_chosen": -217.63436889648438, |
|
"logps/ref_rejected": -222.6137237548828, |
|
"logps/rejected": -217.2650909423828, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05995314195752144, |
|
"rewards/margins": 0.00646712351590395, |
|
"rewards/rejected": 0.053486019372940063, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 0.4654058516025543, |
|
"learning_rate": 3.7063595314933156e-07, |
|
"logits/chosen": -1.8619199991226196, |
|
"logits/rejected": -1.786892294883728, |
|
"logps/chosen": -208.5725555419922, |
|
"logps/ref_chosen": -213.7164306640625, |
|
"logps/ref_rejected": -228.556396484375, |
|
"logps/rejected": -224.4815216064453, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.051438819617033005, |
|
"rewards/margins": 0.010690188966691494, |
|
"rewards/rejected": 0.04074862599372864, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4274646005877638, |
|
"grad_norm": 0.5265087485313416, |
|
"learning_rate": 3.5385375325047163e-07, |
|
"logits/chosen": -1.6727230548858643, |
|
"logits/rejected": -1.677062749862671, |
|
"logps/chosen": -239.5093994140625, |
|
"logps/ref_chosen": -245.71194458007812, |
|
"logps/ref_rejected": -240.1134490966797, |
|
"logps/rejected": -235.6671142578125, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.06202547624707222, |
|
"rewards/margins": 0.01756184920668602, |
|
"rewards/rejected": 0.0444636233150959, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.448837830617152, |
|
"grad_norm": 0.53775554895401, |
|
"learning_rate": 3.36485192541719e-07, |
|
"logits/chosen": -1.8463099002838135, |
|
"logits/rejected": -1.7264705896377563, |
|
"logps/chosen": -224.50320434570312, |
|
"logps/ref_chosen": -232.00527954101562, |
|
"logps/ref_rejected": -232.0154266357422, |
|
"logps/rejected": -225.75454711914062, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0750209242105484, |
|
"rewards/margins": 0.012411920353770256, |
|
"rewards/rejected": 0.062609001994133, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4702110606465402, |
|
"grad_norm": 0.5438077449798584, |
|
"learning_rate": 3.186283343381213e-07, |
|
"logits/chosen": -1.7997539043426514, |
|
"logits/rejected": -1.7138378620147705, |
|
"logps/chosen": -220.4825897216797, |
|
"logps/ref_chosen": -229.9724578857422, |
|
"logps/ref_rejected": -238.1800079345703, |
|
"logps/rejected": -230.29736328125, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.09489865601062775, |
|
"rewards/margins": 0.016072329133749008, |
|
"rewards/rejected": 0.07882632315158844, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4915842906759284, |
|
"grad_norm": 0.5453912019729614, |
|
"learning_rate": 3.003839988942255e-07, |
|
"logits/chosen": -1.8438644409179688, |
|
"logits/rejected": -1.7028881311416626, |
|
"logps/chosen": -203.79205322265625, |
|
"logps/ref_chosen": -214.1478729248047, |
|
"logps/ref_rejected": -226.24618530273438, |
|
"logps/rejected": -217.4800567626953, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1035580188035965, |
|
"rewards/margins": 0.015896398574113846, |
|
"rewards/rejected": 0.08766160905361176, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5129575207053166, |
|
"grad_norm": 0.5030398964881897, |
|
"learning_rate": 2.8185519417047623e-07, |
|
"logits/chosen": -1.8514922857284546, |
|
"logits/rejected": -1.7740070819854736, |
|
"logps/chosen": -214.818359375, |
|
"logps/ref_chosen": -227.9495086669922, |
|
"logps/ref_rejected": -230.5752410888672, |
|
"logps/rejected": -218.9449005126953, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.13131138682365417, |
|
"rewards/margins": 0.015008069574832916, |
|
"rewards/rejected": 0.11630330979824066, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5343307507347048, |
|
"grad_norm": 0.5339066982269287, |
|
"learning_rate": 2.631465342477719e-07, |
|
"logits/chosen": -1.9007892608642578, |
|
"logits/rejected": -1.8334102630615234, |
|
"logps/chosen": -218.14743041992188, |
|
"logps/ref_chosen": -232.6212158203125, |
|
"logps/ref_rejected": -234.5932159423828, |
|
"logps/rejected": -222.1468505859375, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.1447378695011139, |
|
"rewards/margins": 0.020274382084608078, |
|
"rewards/rejected": 0.12446349859237671, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.555703980764093, |
|
"grad_norm": 0.5313855409622192, |
|
"learning_rate": 2.44363648673827e-07, |
|
"logits/chosen": -1.7636210918426514, |
|
"logits/rejected": -1.7406389713287354, |
|
"logps/chosen": -211.9698944091797, |
|
"logps/ref_chosen": -226.790771484375, |
|
"logps/ref_rejected": -231.8648223876953, |
|
"logps/rejected": -219.543212890625, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.1482090801000595, |
|
"rewards/margins": 0.024992961436510086, |
|
"rewards/rejected": 0.12321610748767853, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5770772107934812, |
|
"grad_norm": 0.5537051558494568, |
|
"learning_rate": 2.2561258607618294e-07, |
|
"logits/chosen": -1.8008477687835693, |
|
"logits/rejected": -1.8080832958221436, |
|
"logps/chosen": -234.68893432617188, |
|
"logps/ref_chosen": -247.26119995117188, |
|
"logps/ref_rejected": -241.82345581054688, |
|
"logps/rejected": -231.585693359375, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.12572243809700012, |
|
"rewards/margins": 0.023345012217760086, |
|
"rewards/rejected": 0.10237739980220795, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5984504408228694, |
|
"grad_norm": 0.5528976321220398, |
|
"learning_rate": 2.069992154090854e-07, |
|
"logits/chosen": -1.775397539138794, |
|
"logits/rejected": -1.6931631565093994, |
|
"logps/chosen": -219.74072265625, |
|
"logps/ref_chosen": -230.71826171875, |
|
"logps/ref_rejected": -227.7001953125, |
|
"logps/rejected": -218.38241577148438, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.10977540910243988, |
|
"rewards/margins": 0.01659761555492878, |
|
"rewards/rejected": 0.09317778795957565, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6198236708522575, |
|
"grad_norm": 0.5473525524139404, |
|
"learning_rate": 1.886286282148002e-07, |
|
"logits/chosen": -1.7711913585662842, |
|
"logits/rejected": -1.7026926279067993, |
|
"logps/chosen": -195.3854217529297, |
|
"logps/ref_chosen": -208.07254028320312, |
|
"logps/ref_rejected": -210.4279022216797, |
|
"logps/rejected": -199.79165649414062, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.12687113881111145, |
|
"rewards/margins": 0.020508771762251854, |
|
"rewards/rejected": 0.10636236518621445, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6411969008816457, |
|
"grad_norm": 0.5966719388961792, |
|
"learning_rate": 1.7060454527421686e-07, |
|
"logits/chosen": -1.8688771724700928, |
|
"logits/rejected": -1.810694932937622, |
|
"logps/chosen": -211.9062042236328, |
|
"logps/ref_chosen": -224.8968505859375, |
|
"logps/ref_rejected": -226.1548309326172, |
|
"logps/rejected": -215.7084503173828, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.12990659475326538, |
|
"rewards/margins": 0.02544253133237362, |
|
"rewards/rejected": 0.10446406900882721, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6625701309110339, |
|
"grad_norm": 0.5334843993186951, |
|
"learning_rate": 1.5302873099680374e-07, |
|
"logits/chosen": -1.786595344543457, |
|
"logits/rejected": -1.7971456050872803, |
|
"logps/chosen": -225.0083465576172, |
|
"logps/ref_chosen": -237.4626922607422, |
|
"logps/ref_rejected": -234.39547729492188, |
|
"logps/rejected": -223.2943572998047, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.12454362213611603, |
|
"rewards/margins": 0.013532285578548908, |
|
"rewards/rejected": 0.1110113263130188, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6839433609404221, |
|
"grad_norm": 0.5639063715934753, |
|
"learning_rate": 1.360004188562841e-07, |
|
"logits/chosen": -2.0527145862579346, |
|
"logits/rejected": -1.9811140298843384, |
|
"logps/chosen": -217.0570068359375, |
|
"logps/ref_chosen": -231.03369140625, |
|
"logps/ref_rejected": -232.6383819580078, |
|
"logps/rejected": -220.0625457763672, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.1397666186094284, |
|
"rewards/margins": 0.014008410274982452, |
|
"rewards/rejected": 0.12575821578502655, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7053165909698104, |
|
"grad_norm": 0.5417853593826294, |
|
"learning_rate": 1.1961575111603586e-07, |
|
"logits/chosen": -1.8371235132217407, |
|
"logits/rejected": -1.7954612970352173, |
|
"logps/chosen": -220.7694854736328, |
|
"logps/ref_chosen": -234.5041046142578, |
|
"logps/ref_rejected": -235.61181640625, |
|
"logps/rejected": -224.56640625, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.1373465657234192, |
|
"rewards/margins": 0.026892542839050293, |
|
"rewards/rejected": 0.1104540079832077, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7266898209991985, |
|
"grad_norm": 0.565830409526825, |
|
"learning_rate": 1.0396723600754143e-07, |
|
"logits/chosen": -1.8288425207138062, |
|
"logits/rejected": -1.83499276638031, |
|
"logps/chosen": -213.2861785888672, |
|
"logps/ref_chosen": -227.1809844970703, |
|
"logps/ref_rejected": -230.8953094482422, |
|
"logps/rejected": -218.4414520263672, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.13894793391227722, |
|
"rewards/margins": 0.014409348368644714, |
|
"rewards/rejected": 0.12453857809305191, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7480630510285867, |
|
"grad_norm": 0.5855058431625366, |
|
"learning_rate": 8.914322542666822e-08, |
|
"logits/chosen": -1.8145122528076172, |
|
"logits/rejected": -1.7646887302398682, |
|
"logps/chosen": -212.070068359375, |
|
"logps/ref_chosen": -224.17794799804688, |
|
"logps/ref_rejected": -225.526123046875, |
|
"logps/rejected": -214.7656707763672, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.12107895314693451, |
|
"rewards/margins": 0.013474419713020325, |
|
"rewards/rejected": 0.10760452598333359, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7694362810579749, |
|
"grad_norm": 0.6223751902580261, |
|
"learning_rate": 7.522741609672193e-08, |
|
"logits/chosen": -1.8675405979156494, |
|
"logits/rejected": -1.8476943969726562, |
|
"logps/chosen": -216.3776092529297, |
|
"logps/ref_chosen": -230.77182006835938, |
|
"logps/ref_rejected": -227.00619506835938, |
|
"logps/rejected": -214.32931518554688, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.1439422070980072, |
|
"rewards/margins": 0.017173700034618378, |
|
"rewards/rejected": 0.12676851451396942, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7908095110873631, |
|
"grad_norm": 0.5778200030326843, |
|
"learning_rate": 6.229837701471644e-08, |
|
"logits/chosen": -1.9124794006347656, |
|
"logits/rejected": -1.8135532140731812, |
|
"logps/chosen": -216.97702026367188, |
|
"logps/ref_chosen": -229.8362274169922, |
|
"logps/ref_rejected": -233.65390014648438, |
|
"logps/rejected": -222.93417358398438, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1285921037197113, |
|
"rewards/margins": 0.021394768729805946, |
|
"rewards/rejected": 0.10719730705022812, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 0.5558175444602966, |
|
"learning_rate": 5.0429105848910996e-08, |
|
"logits/chosen": -1.9621855020523071, |
|
"logits/rejected": -1.9175077676773071, |
|
"logps/chosen": -215.39450073242188, |
|
"logps/ref_chosen": -229.72836303710938, |
|
"logps/ref_rejected": -233.65237426757812, |
|
"logps/rejected": -222.21798706054688, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.14333853125572205, |
|
"rewards/margins": 0.028994807973504066, |
|
"rewards/rejected": 0.11434372514486313, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8335559711461394, |
|
"grad_norm": 0.5308636426925659, |
|
"learning_rate": 3.968661679220467e-08, |
|
"logits/chosen": -1.971208930015564, |
|
"logits/rejected": -1.9112732410430908, |
|
"logps/chosen": -210.79598999023438, |
|
"logps/ref_chosen": -224.2023468017578, |
|
"logps/ref_rejected": -224.3248748779297, |
|
"logps/rejected": -212.8175811767578, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1340634524822235, |
|
"rewards/margins": 0.018990488722920418, |
|
"rewards/rejected": 0.11507296562194824, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8549292011755276, |
|
"grad_norm": 0.615912675857544, |
|
"learning_rate": 3.013156219837776e-08, |
|
"logits/chosen": -1.7899879217147827, |
|
"logits/rejected": -1.6696176528930664, |
|
"logps/chosen": -215.92288208007812, |
|
"logps/ref_chosen": -228.88381958007812, |
|
"logps/ref_rejected": -231.0583953857422, |
|
"logps/rejected": -220.5959930419922, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.12960924208164215, |
|
"rewards/margins": 0.024985069409012794, |
|
"rewards/rejected": 0.1046241745352745, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8763024312049158, |
|
"grad_norm": 0.590220034122467, |
|
"learning_rate": 2.1817890137430932e-08, |
|
"logits/chosen": -1.81471848487854, |
|
"logits/rejected": -1.714023232460022, |
|
"logps/chosen": -205.69888305664062, |
|
"logps/ref_chosen": -221.30752563476562, |
|
"logps/ref_rejected": -224.98486328125, |
|
"logps/rejected": -211.78884887695312, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.15608620643615723, |
|
"rewards/margins": 0.024126073345541954, |
|
"rewards/rejected": 0.13196012377738953, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.897675661234304, |
|
"grad_norm": 0.5369106531143188, |
|
"learning_rate": 1.479253980347392e-08, |
|
"logits/chosen": -1.8037662506103516, |
|
"logits/rejected": -1.7787643671035767, |
|
"logps/chosen": -225.9608612060547, |
|
"logps/ref_chosen": -241.4657440185547, |
|
"logps/ref_rejected": -241.3707733154297, |
|
"logps/rejected": -228.4087371826172, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.15504886209964752, |
|
"rewards/margins": 0.025428583845496178, |
|
"rewards/rejected": 0.1296202689409256, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9190488912636923, |
|
"grad_norm": 0.5737273097038269, |
|
"learning_rate": 9.095176494896661e-09, |
|
"logits/chosen": -1.8023388385772705, |
|
"logits/rejected": -1.7160924673080444, |
|
"logps/chosen": -218.32034301757812, |
|
"logps/ref_chosen": -231.6717071533203, |
|
"logps/ref_rejected": -236.741943359375, |
|
"logps/rejected": -225.2128448486328, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.13351376354694366, |
|
"rewards/margins": 0.018222931772470474, |
|
"rewards/rejected": 0.11529083549976349, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9404221212930804, |
|
"grad_norm": 0.6087775826454163, |
|
"learning_rate": 4.757967663132689e-09, |
|
"logits/chosen": -1.833620309829712, |
|
"logits/rejected": -1.7870299816131592, |
|
"logps/chosen": -221.86032104492188, |
|
"logps/ref_chosen": -236.0878448486328, |
|
"logps/ref_rejected": -230.54141235351562, |
|
"logps/rejected": -218.8464813232422, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.14227530360221863, |
|
"rewards/margins": 0.025325754657387733, |
|
"rewards/rejected": 0.11694953590631485, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9617953513224686, |
|
"grad_norm": 0.6274195909500122, |
|
"learning_rate": 1.8054012944479224e-09, |
|
"logits/chosen": -1.7650978565216064, |
|
"logits/rejected": -1.7383601665496826, |
|
"logps/chosen": -231.64111328125, |
|
"logps/ref_chosen": -244.44155883789062, |
|
"logps/ref_rejected": -240.8953094482422, |
|
"logps/rejected": -230.3839874267578, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.12800416350364685, |
|
"rewards/margins": 0.022890925407409668, |
|
"rewards/rejected": 0.10511324554681778, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9831685813518568, |
|
"grad_norm": 0.5350868105888367, |
|
"learning_rate": 2.541476501764228e-10, |
|
"logits/chosen": -1.8503191471099854, |
|
"logits/rejected": -1.878313660621643, |
|
"logps/chosen": -206.16665649414062, |
|
"logps/ref_chosen": -219.6629638671875, |
|
"logps/ref_rejected": -212.42172241210938, |
|
"logps/rejected": -200.54551696777344, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.13496311008930206, |
|
"rewards/margins": 0.016201000660657883, |
|
"rewards/rejected": 0.11876209825277328, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.9959925193694897, |
|
"step": 233, |
|
"total_flos": 0.0, |
|
"train_loss": 0.49642937480124283, |
|
"train_runtime": 16410.2083, |
|
"train_samples_per_second": 3.649, |
|
"train_steps_per_second": 0.014 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 233, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|