|
{ |
|
"best_metric": 0.8629826903343201, |
|
"best_model_checkpoint": "saves/Mistral-7B-Instruct-v0.2/lora/orpo-salt/checkpoint-1500", |
|
"epoch": 2.9969690846635686, |
|
"eval_steps": 500, |
|
"global_step": 1854, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01616488179430188, |
|
"grad_norm": 16.64879608154297, |
|
"learning_rate": 4.999648198770648e-06, |
|
"logits/chosen": -2.4989278316497803, |
|
"logits/rejected": -2.5208303928375244, |
|
"logps/chosen": -1.9139716625213623, |
|
"logps/rejected": -3.1082823276519775, |
|
"loss": 1.9977, |
|
"odds_ratio_loss": 0.8370735049247742, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.19139717519283295, |
|
"rewards/margins": 0.1194310411810875, |
|
"rewards/rejected": -0.31082823872566223, |
|
"sft_loss": 1.9139716625213623, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03232976358860376, |
|
"grad_norm": 13.894062042236328, |
|
"learning_rate": 4.998578646361359e-06, |
|
"logits/chosen": -2.5156219005584717, |
|
"logits/rejected": -2.51640248298645, |
|
"logps/chosen": -1.635488748550415, |
|
"logps/rejected": -2.132800817489624, |
|
"loss": 1.7095, |
|
"odds_ratio_loss": 0.7404953241348267, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.16354887187480927, |
|
"rewards/margins": 0.04973122477531433, |
|
"rewards/rejected": -0.2132801115512848, |
|
"sft_loss": 1.635488748550415, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04849464538290564, |
|
"grad_norm": 23.089773178100586, |
|
"learning_rate": 4.996791614004449e-06, |
|
"logits/chosen": -2.518998861312866, |
|
"logits/rejected": -2.544835090637207, |
|
"logps/chosen": -1.6531565189361572, |
|
"logps/rejected": -2.541318893432617, |
|
"loss": 1.7385, |
|
"odds_ratio_loss": 0.8539272546768188, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.165315642952919, |
|
"rewards/margins": 0.08881621062755585, |
|
"rewards/rejected": -0.25413185358047485, |
|
"sft_loss": 1.6531565189361572, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06465952717720752, |
|
"grad_norm": 13.833389282226562, |
|
"learning_rate": 4.994287614855618e-06, |
|
"logits/chosen": -2.518852472305298, |
|
"logits/rejected": -2.551032066345215, |
|
"logps/chosen": -1.7646430730819702, |
|
"logps/rejected": -2.508850574493408, |
|
"loss": 1.8742, |
|
"odds_ratio_loss": 1.0958486795425415, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.17646430432796478, |
|
"rewards/margins": 0.07442077249288559, |
|
"rewards/rejected": -0.2508850693702698, |
|
"sft_loss": 1.7646430730819702, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0808244089715094, |
|
"grad_norm": 28.34682846069336, |
|
"learning_rate": 4.991067367951343e-06, |
|
"logits/chosen": -2.5992355346679688, |
|
"logits/rejected": -2.5891082286834717, |
|
"logps/chosen": -1.345651388168335, |
|
"logps/rejected": -2.2306911945343018, |
|
"loss": 1.4115, |
|
"odds_ratio_loss": 0.6583842039108276, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13456514477729797, |
|
"rewards/margins": 0.08850395679473877, |
|
"rewards/rejected": -0.22306910157203674, |
|
"sft_loss": 1.345651388168335, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09698929076581128, |
|
"grad_norm": 3.4724316596984863, |
|
"learning_rate": 4.987131798002389e-06, |
|
"logits/chosen": -2.539771556854248, |
|
"logits/rejected": -2.5456976890563965, |
|
"logps/chosen": -1.3674490451812744, |
|
"logps/rejected": -2.1061840057373047, |
|
"loss": 1.4542, |
|
"odds_ratio_loss": 0.8671566247940063, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1367449164390564, |
|
"rewards/margins": 0.07387349754571915, |
|
"rewards/rejected": -0.21061840653419495, |
|
"sft_loss": 1.3674490451812744, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11315417256011315, |
|
"grad_norm": 46.33675003051758, |
|
"learning_rate": 4.982482035128285e-06, |
|
"logits/chosen": -2.5208637714385986, |
|
"logits/rejected": -2.528776168823242, |
|
"logps/chosen": -1.4248360395431519, |
|
"logps/rejected": -2.067411184310913, |
|
"loss": 1.5025, |
|
"odds_ratio_loss": 0.7764666676521301, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.14248362183570862, |
|
"rewards/margins": 0.06425751000642776, |
|
"rewards/rejected": -0.2067411243915558, |
|
"sft_loss": 1.4248360395431519, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12931905435441504, |
|
"grad_norm": 25.993545532226562, |
|
"learning_rate": 4.9771194145328e-06, |
|
"logits/chosen": -2.5788090229034424, |
|
"logits/rejected": -2.572688341140747, |
|
"logps/chosen": -1.0824676752090454, |
|
"logps/rejected": -1.7445621490478516, |
|
"loss": 1.1449, |
|
"odds_ratio_loss": 0.6242043972015381, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.10824675858020782, |
|
"rewards/margins": 0.06620947271585464, |
|
"rewards/rejected": -0.17445623874664307, |
|
"sft_loss": 1.0824676752090454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1454839361487169, |
|
"grad_norm": 19.184228897094727, |
|
"learning_rate": 4.971045476120532e-06, |
|
"logits/chosen": -2.5863890647888184, |
|
"logits/rejected": -2.591404914855957, |
|
"logps/chosen": -1.080370306968689, |
|
"logps/rejected": -1.753382682800293, |
|
"loss": 1.1463, |
|
"odds_ratio_loss": 0.6591774821281433, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10803703963756561, |
|
"rewards/margins": 0.06730123609304428, |
|
"rewards/rejected": -0.1753382831811905, |
|
"sft_loss": 1.080370306968689, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1616488179430188, |
|
"grad_norm": 5.7092084884643555, |
|
"learning_rate": 4.964261964054713e-06, |
|
"logits/chosen": -2.5851123332977295, |
|
"logits/rejected": -2.5928287506103516, |
|
"logps/chosen": -1.20145583152771, |
|
"logps/rejected": -1.920117735862732, |
|
"loss": 1.2771, |
|
"odds_ratio_loss": 0.7563266754150391, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12014558166265488, |
|
"rewards/margins": 0.07186620682477951, |
|
"rewards/rejected": -0.1920117884874344, |
|
"sft_loss": 1.20145583152771, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17781369973732067, |
|
"grad_norm": 4.211212635040283, |
|
"learning_rate": 4.956770826256372e-06, |
|
"logits/chosen": -2.6192798614501953, |
|
"logits/rejected": -2.6177656650543213, |
|
"logps/chosen": -1.1085783243179321, |
|
"logps/rejected": -1.4738147258758545, |
|
"loss": 1.1766, |
|
"odds_ratio_loss": 0.6805119514465332, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11085782200098038, |
|
"rewards/margins": 0.03652365505695343, |
|
"rewards/rejected": -0.1473814696073532, |
|
"sft_loss": 1.1085783243179321, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19397858153162256, |
|
"grad_norm": 3.4872381687164307, |
|
"learning_rate": 4.94857421384497e-06, |
|
"logits/chosen": -2.602118968963623, |
|
"logits/rejected": -2.6089630126953125, |
|
"logps/chosen": -1.0341213941574097, |
|
"logps/rejected": -1.5845638513565063, |
|
"loss": 1.1041, |
|
"odds_ratio_loss": 0.6995517611503601, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10341213643550873, |
|
"rewards/margins": 0.05504424497485161, |
|
"rewards/rejected": -0.15845640003681183, |
|
"sft_loss": 1.0341213941574097, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21014346332592443, |
|
"grad_norm": 5.468324661254883, |
|
"learning_rate": 4.939674480520701e-06, |
|
"logits/chosen": -2.6128063201904297, |
|
"logits/rejected": -2.6255507469177246, |
|
"logps/chosen": -0.9619969129562378, |
|
"logps/rejected": -1.390077829360962, |
|
"loss": 1.0297, |
|
"odds_ratio_loss": 0.6766607165336609, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09619969129562378, |
|
"rewards/margins": 0.04280809685587883, |
|
"rewards/rejected": -0.1390077769756317, |
|
"sft_loss": 0.9619969129562378, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2263083451202263, |
|
"grad_norm": 5.18142032623291, |
|
"learning_rate": 4.930074181888613e-06, |
|
"logits/chosen": -2.6814427375793457, |
|
"logits/rejected": -2.7020936012268066, |
|
"logps/chosen": -0.9705274701118469, |
|
"logps/rejected": -1.315450668334961, |
|
"loss": 1.0341, |
|
"odds_ratio_loss": 0.636103630065918, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09705274552106857, |
|
"rewards/margins": 0.03449232131242752, |
|
"rewards/rejected": -0.1315450817346573, |
|
"sft_loss": 0.9705274701118469, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2424732269145282, |
|
"grad_norm": 1.4752620458602905, |
|
"learning_rate": 4.91977607472475e-06, |
|
"logits/chosen": -2.704951524734497, |
|
"logits/rejected": -2.7246315479278564, |
|
"logps/chosen": -1.0248619318008423, |
|
"logps/rejected": -1.4426223039627075, |
|
"loss": 1.0895, |
|
"odds_ratio_loss": 0.6460444331169128, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10248619318008423, |
|
"rewards/margins": 0.04177603870630264, |
|
"rewards/rejected": -0.14426222443580627, |
|
"sft_loss": 1.0248619318008423, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2586381087088301, |
|
"grad_norm": 2.9540135860443115, |
|
"learning_rate": 4.908783116184534e-06, |
|
"logits/chosen": -2.671297550201416, |
|
"logits/rejected": -2.676952838897705, |
|
"logps/chosen": -0.9303582906723022, |
|
"logps/rejected": -1.28878653049469, |
|
"loss": 0.991, |
|
"odds_ratio_loss": 0.6061214208602905, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09303583949804306, |
|
"rewards/margins": 0.03584280610084534, |
|
"rewards/rejected": -0.1288786381483078, |
|
"sft_loss": 0.9303582906723022, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27480299050313195, |
|
"grad_norm": 2.913118839263916, |
|
"learning_rate": 4.897098462953598e-06, |
|
"logits/chosen": -2.7513809204101562, |
|
"logits/rejected": -2.7600345611572266, |
|
"logps/chosen": -0.8939758539199829, |
|
"logps/rejected": -1.4527159929275513, |
|
"loss": 0.9601, |
|
"odds_ratio_loss": 0.661632239818573, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08939759433269501, |
|
"rewards/margins": 0.05587399750947952, |
|
"rewards/rejected": -0.14527159929275513, |
|
"sft_loss": 0.8939758539199829, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2909678722974338, |
|
"grad_norm": 1.985352635383606, |
|
"learning_rate": 4.884725470341331e-06, |
|
"logits/chosen": -2.7102103233337402, |
|
"logits/rejected": -2.739673137664795, |
|
"logps/chosen": -0.8302527666091919, |
|
"logps/rejected": -1.2092260122299194, |
|
"loss": 0.8851, |
|
"odds_ratio_loss": 0.5487207174301147, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08302526921033859, |
|
"rewards/margins": 0.03789733722805977, |
|
"rewards/rejected": -0.12092261016368866, |
|
"sft_loss": 0.8302527666091919, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3071327540917357, |
|
"grad_norm": 8.031681060791016, |
|
"learning_rate": 4.871667691317377e-06, |
|
"logits/chosen": -2.764559745788574, |
|
"logits/rejected": -2.767064332962036, |
|
"logps/chosen": -1.0171376466751099, |
|
"logps/rejected": -1.1592780351638794, |
|
"loss": 1.0939, |
|
"odds_ratio_loss": 0.7678386569023132, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.10171377658843994, |
|
"rewards/margins": 0.01421402208507061, |
|
"rewards/rejected": -0.1159278005361557, |
|
"sft_loss": 1.0171376466751099, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3232976358860376, |
|
"grad_norm": 4.448939323425293, |
|
"learning_rate": 4.857928875491392e-06, |
|
"logits/chosen": -2.750746965408325, |
|
"logits/rejected": -2.7596051692962646, |
|
"logps/chosen": -0.8164304494857788, |
|
"logps/rejected": -1.0888216495513916, |
|
"loss": 0.8794, |
|
"odds_ratio_loss": 0.6294754147529602, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08164305239915848, |
|
"rewards/margins": 0.027239132672548294, |
|
"rewards/rejected": -0.10888218879699707, |
|
"sft_loss": 0.8164304494857788, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33946251768033947, |
|
"grad_norm": 2.216554641723633, |
|
"learning_rate": 4.843512968036314e-06, |
|
"logits/chosen": -2.7625343799591064, |
|
"logits/rejected": -2.7599010467529297, |
|
"logps/chosen": -0.833400547504425, |
|
"logps/rejected": -1.0677030086517334, |
|
"loss": 0.8944, |
|
"odds_ratio_loss": 0.6096410751342773, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08334006369113922, |
|
"rewards/margins": 0.0234302319586277, |
|
"rewards/rejected": -0.10677029192447662, |
|
"sft_loss": 0.833400547504425, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35562739947464134, |
|
"grad_norm": 1.4112659692764282, |
|
"learning_rate": 4.828424108555486e-06, |
|
"logits/chosen": -2.807507276535034, |
|
"logits/rejected": -2.803765296936035, |
|
"logps/chosen": -1.0460469722747803, |
|
"logps/rejected": -1.4173492193222046, |
|
"loss": 1.1091, |
|
"odds_ratio_loss": 0.6301766037940979, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10460470616817474, |
|
"rewards/margins": 0.037130214273929596, |
|
"rewards/rejected": -0.14173491299152374, |
|
"sft_loss": 1.0460469722747803, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3717922812689432, |
|
"grad_norm": 0.9852223992347717, |
|
"learning_rate": 4.812666629893957e-06, |
|
"logits/chosen": -2.795703649520874, |
|
"logits/rejected": -2.8211073875427246, |
|
"logps/chosen": -0.891126275062561, |
|
"logps/rejected": -1.0855722427368164, |
|
"loss": 0.9626, |
|
"odds_ratio_loss": 0.7152143716812134, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08911262452602386, |
|
"rewards/margins": 0.019444596022367477, |
|
"rewards/rejected": -0.10855722427368164, |
|
"sft_loss": 0.891126275062561, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3879571630632451, |
|
"grad_norm": 2.482409954071045, |
|
"learning_rate": 4.796245056894273e-06, |
|
"logits/chosen": -2.757913112640381, |
|
"logits/rejected": -2.794553518295288, |
|
"logps/chosen": -0.9089745283126831, |
|
"logps/rejected": -1.3391778469085693, |
|
"loss": 0.9804, |
|
"odds_ratio_loss": 0.7146768569946289, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09089745581150055, |
|
"rewards/margins": 0.0430203452706337, |
|
"rewards/rejected": -0.13391780853271484, |
|
"sft_loss": 0.9089745283126831, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.404122044857547, |
|
"grad_norm": 1.3123791217803955, |
|
"learning_rate": 4.779164105097148e-06, |
|
"logits/chosen": -2.796814441680908, |
|
"logits/rejected": -2.8013055324554443, |
|
"logps/chosen": -0.8589127659797668, |
|
"logps/rejected": -1.3229057788848877, |
|
"loss": 0.9186, |
|
"odds_ratio_loss": 0.5965861082077026, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08589127659797668, |
|
"rewards/margins": 0.046399302780628204, |
|
"rewards/rejected": -0.1322905719280243, |
|
"sft_loss": 0.8589127659797668, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42028692665184886, |
|
"grad_norm": 2.171173095703125, |
|
"learning_rate": 4.761428679387373e-06, |
|
"logits/chosen": -2.790588617324829, |
|
"logits/rejected": -2.7970798015594482, |
|
"logps/chosen": -0.8536098599433899, |
|
"logps/rejected": -1.0807464122772217, |
|
"loss": 0.9168, |
|
"odds_ratio_loss": 0.6316367387771606, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08536098152399063, |
|
"rewards/margins": 0.022713668644428253, |
|
"rewards/rejected": -0.10807464271783829, |
|
"sft_loss": 0.8536098599433899, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4364518084461507, |
|
"grad_norm": 3.753523111343384, |
|
"learning_rate": 4.7430438725853515e-06, |
|
"logits/chosen": -2.7550888061523438, |
|
"logits/rejected": -2.766615629196167, |
|
"logps/chosen": -0.913661003112793, |
|
"logps/rejected": -1.41799795627594, |
|
"loss": 0.9739, |
|
"odds_ratio_loss": 0.6024969816207886, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.09136610478162766, |
|
"rewards/margins": 0.0504336841404438, |
|
"rewards/rejected": -0.14179977774620056, |
|
"sft_loss": 0.913661003112793, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4526166902404526, |
|
"grad_norm": 1.5982986688613892, |
|
"learning_rate": 4.724014963984669e-06, |
|
"logits/chosen": -2.798797130584717, |
|
"logits/rejected": -2.8145482540130615, |
|
"logps/chosen": -0.8752357363700867, |
|
"logps/rejected": -1.1694762706756592, |
|
"loss": 0.9358, |
|
"odds_ratio_loss": 0.6060217618942261, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08752357959747314, |
|
"rewards/margins": 0.029424061998724937, |
|
"rewards/rejected": -0.11694763600826263, |
|
"sft_loss": 0.8752357363700867, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4687815720347545, |
|
"grad_norm": 3.8735010623931885, |
|
"learning_rate": 4.704347417836116e-06, |
|
"logits/chosen": -2.7753589153289795, |
|
"logits/rejected": -2.829224109649658, |
|
"logps/chosen": -0.7804813385009766, |
|
"logps/rejected": -1.1957075595855713, |
|
"loss": 0.8432, |
|
"odds_ratio_loss": 0.6271591186523438, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07804813235998154, |
|
"rewards/margins": 0.04152262955904007, |
|
"rewards/rejected": -0.1195707693696022, |
|
"sft_loss": 0.7804813385009766, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4849464538290564, |
|
"grad_norm": 2.0640830993652344, |
|
"learning_rate": 4.684046881778603e-06, |
|
"logits/chosen": -2.8023476600646973, |
|
"logits/rejected": -2.8235526084899902, |
|
"logps/chosen": -0.8398802876472473, |
|
"logps/rejected": -0.9978183507919312, |
|
"loss": 0.9045, |
|
"odds_ratio_loss": 0.6464654803276062, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08398802578449249, |
|
"rewards/margins": 0.015793804079294205, |
|
"rewards/rejected": -0.099781833589077, |
|
"sft_loss": 0.8398802876472473, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5011113356233583, |
|
"grad_norm": 1.626105785369873, |
|
"learning_rate": 4.663119185217409e-06, |
|
"logits/chosen": -2.796461343765259, |
|
"logits/rejected": -2.8225197792053223, |
|
"logps/chosen": -0.8273599743843079, |
|
"logps/rejected": -1.096482515335083, |
|
"loss": 0.8875, |
|
"odds_ratio_loss": 0.6016198396682739, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08273600041866302, |
|
"rewards/margins": 0.026912260800600052, |
|
"rewards/rejected": -0.10964826494455338, |
|
"sft_loss": 0.8273599743843079, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5172762174176602, |
|
"grad_norm": 1.5098748207092285, |
|
"learning_rate": 4.641570337650232e-06, |
|
"logits/chosen": -2.847539186477661, |
|
"logits/rejected": -2.85341215133667, |
|
"logps/chosen": -0.7699432969093323, |
|
"logps/rejected": -1.0820213556289673, |
|
"loss": 0.8268, |
|
"odds_ratio_loss": 0.5688191652297974, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07699433714151382, |
|
"rewards/margins": 0.03120780549943447, |
|
"rewards/rejected": -0.10820214450359344, |
|
"sft_loss": 0.7699432969093323, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.533441099211962, |
|
"grad_norm": 1.3477349281311035, |
|
"learning_rate": 4.61940652694154e-06, |
|
"logits/chosen": -2.7625374794006348, |
|
"logits/rejected": -2.8054728507995605, |
|
"logps/chosen": -0.8576439023017883, |
|
"logps/rejected": -1.2374662160873413, |
|
"loss": 0.9224, |
|
"odds_ratio_loss": 0.6476989984512329, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08576439321041107, |
|
"rewards/margins": 0.03798223286867142, |
|
"rewards/rejected": -0.12374663352966309, |
|
"sft_loss": 0.8576439023017883, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5496059810062639, |
|
"grad_norm": 2.094233274459839, |
|
"learning_rate": 4.596634117545689e-06, |
|
"logits/chosen": -2.8440895080566406, |
|
"logits/rejected": -2.8477485179901123, |
|
"logps/chosen": -0.8450831174850464, |
|
"logps/rejected": -1.1874289512634277, |
|
"loss": 0.9084, |
|
"odds_ratio_loss": 0.6333492994308472, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08450832217931747, |
|
"rewards/margins": 0.03423457592725754, |
|
"rewards/rejected": -0.11874288320541382, |
|
"sft_loss": 0.8450831174850464, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"grad_norm": 1.2610398530960083, |
|
"learning_rate": 4.573259648679335e-06, |
|
"logits/chosen": -2.8393020629882812, |
|
"logits/rejected": -2.8172850608825684, |
|
"logps/chosen": -0.8293860554695129, |
|
"logps/rejected": -1.1484854221343994, |
|
"loss": 0.8924, |
|
"odds_ratio_loss": 0.6304416060447693, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08293859660625458, |
|
"rewards/margins": 0.03190993517637253, |
|
"rewards/rejected": -0.1148485392332077, |
|
"sft_loss": 0.8293860554695129, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5819357445948676, |
|
"grad_norm": 7.934630870819092, |
|
"learning_rate": 4.549289832443663e-06, |
|
"logits/chosen": -2.8159756660461426, |
|
"logits/rejected": -2.8409628868103027, |
|
"logps/chosen": -0.885659396648407, |
|
"logps/rejected": -1.2282092571258545, |
|
"loss": 0.9498, |
|
"odds_ratio_loss": 0.641811192035675, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08856594562530518, |
|
"rewards/margins": 0.03425499051809311, |
|
"rewards/rejected": -0.12282093614339828, |
|
"sft_loss": 0.885659396648407, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5981006263891695, |
|
"grad_norm": 1.7960658073425293, |
|
"learning_rate": 4.524731551896978e-06, |
|
"logits/chosen": -2.8090755939483643, |
|
"logits/rejected": -2.825777292251587, |
|
"logps/chosen": -0.7784116864204407, |
|
"logps/rejected": -0.9700002670288086, |
|
"loss": 0.8424, |
|
"odds_ratio_loss": 0.6396910548210144, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.07784116268157959, |
|
"rewards/margins": 0.01915885880589485, |
|
"rewards/rejected": -0.09700002521276474, |
|
"sft_loss": 0.7784116864204407, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6142655081834714, |
|
"grad_norm": 3.268920421600342, |
|
"learning_rate": 4.4995918590781925e-06, |
|
"logits/chosen": -2.853820562362671, |
|
"logits/rejected": -2.8512935638427734, |
|
"logps/chosen": -0.8428764343261719, |
|
"logps/rejected": -1.0172072649002075, |
|
"loss": 0.9104, |
|
"odds_ratio_loss": 0.6751636266708374, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08428764343261719, |
|
"rewards/margins": 0.01743307337164879, |
|
"rewards/rejected": -0.10172072798013687, |
|
"sft_loss": 0.8428764343261719, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6304303899777733, |
|
"grad_norm": 1.0598444938659668, |
|
"learning_rate": 4.473877972981797e-06, |
|
"logits/chosen": -2.7993013858795166, |
|
"logits/rejected": -2.789777994155884, |
|
"logps/chosen": -0.8297500610351562, |
|
"logps/rejected": -1.0850985050201416, |
|
"loss": 0.8895, |
|
"odds_ratio_loss": 0.5971348881721497, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08297501504421234, |
|
"rewards/margins": 0.025534838438034058, |
|
"rewards/rejected": -0.1085098534822464, |
|
"sft_loss": 0.8297500610351562, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6465952717720752, |
|
"grad_norm": 1.9357444047927856, |
|
"learning_rate": 4.447597277484894e-06, |
|
"logits/chosen": -2.7699055671691895, |
|
"logits/rejected": -2.798750400543213, |
|
"logps/chosen": -0.7733790874481201, |
|
"logps/rejected": -0.9783531427383423, |
|
"loss": 0.8347, |
|
"odds_ratio_loss": 0.6135808825492859, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07733791321516037, |
|
"rewards/margins": 0.020497407764196396, |
|
"rewards/rejected": -0.09783531725406647, |
|
"sft_loss": 0.7733790874481201, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6627601535663771, |
|
"grad_norm": 1.4025357961654663, |
|
"learning_rate": 4.42075731922687e-06, |
|
"logits/chosen": -2.8587729930877686, |
|
"logits/rejected": -2.87328839302063, |
|
"logps/chosen": -0.9505017995834351, |
|
"logps/rejected": -1.1930662393569946, |
|
"loss": 1.0132, |
|
"odds_ratio_loss": 0.6273903250694275, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09505018591880798, |
|
"rewards/margins": 0.024256447330117226, |
|
"rewards/rejected": -0.11930663883686066, |
|
"sft_loss": 0.9505017995834351, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6789250353606789, |
|
"grad_norm": 5.174298286437988, |
|
"learning_rate": 4.3933658054423465e-06, |
|
"logits/chosen": -2.8345279693603516, |
|
"logits/rejected": -2.83827543258667, |
|
"logps/chosen": -0.80866539478302, |
|
"logps/rejected": -1.174803614616394, |
|
"loss": 0.8664, |
|
"odds_ratio_loss": 0.5777753591537476, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08086653053760529, |
|
"rewards/margins": 0.036613818258047104, |
|
"rewards/rejected": -0.11748035252094269, |
|
"sft_loss": 0.80866539478302, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6950899171549808, |
|
"grad_norm": 2.207981586456299, |
|
"learning_rate": 4.365430601748003e-06, |
|
"logits/chosen": -2.8343446254730225, |
|
"logits/rejected": -2.857731342315674, |
|
"logps/chosen": -0.9037211537361145, |
|
"logps/rejected": -1.0559289455413818, |
|
"loss": 0.9705, |
|
"odds_ratio_loss": 0.6677287817001343, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09037211537361145, |
|
"rewards/margins": 0.01522077340632677, |
|
"rewards/rejected": -0.10559289157390594, |
|
"sft_loss": 0.9037211537361145, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7112547989492827, |
|
"grad_norm": 15.49936580657959, |
|
"learning_rate": 4.336959729883925e-06, |
|
"logits/chosen": -2.8130838871002197, |
|
"logits/rejected": -2.8357608318328857, |
|
"logps/chosen": -0.8217814564704895, |
|
"logps/rejected": -0.9188777804374695, |
|
"loss": 0.8923, |
|
"odds_ratio_loss": 0.7047211527824402, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08217814564704895, |
|
"rewards/margins": 0.00970962829887867, |
|
"rewards/rejected": -0.09188777953386307, |
|
"sft_loss": 0.8217814564704895, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7274196807435845, |
|
"grad_norm": 1.7557275295257568, |
|
"learning_rate": 4.307961365410118e-06, |
|
"logits/chosen": -2.790027379989624, |
|
"logits/rejected": -2.809622049331665, |
|
"logps/chosen": -0.840091347694397, |
|
"logps/rejected": -1.0152480602264404, |
|
"loss": 0.9039, |
|
"odds_ratio_loss": 0.6380866169929504, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08400914072990417, |
|
"rewards/margins": 0.017515674233436584, |
|
"rewards/rejected": -0.10152481496334076, |
|
"sft_loss": 0.840091347694397, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7435845625378864, |
|
"grad_norm": 2.8990914821624756, |
|
"learning_rate": 4.278443835358854e-06, |
|
"logits/chosen": -2.812924861907959, |
|
"logits/rejected": -2.811110734939575, |
|
"logps/chosen": -0.8139681816101074, |
|
"logps/rejected": -1.0690581798553467, |
|
"loss": 0.8748, |
|
"odds_ratio_loss": 0.6082891225814819, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.08139681816101074, |
|
"rewards/margins": 0.025509005412459373, |
|
"rewards/rejected": -0.10690581798553467, |
|
"sft_loss": 0.8139681816101074, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7597494443321883, |
|
"grad_norm": 2.2395644187927246, |
|
"learning_rate": 4.248415615843523e-06, |
|
"logits/chosen": -2.8422694206237793, |
|
"logits/rejected": -2.850648880004883, |
|
"logps/chosen": -0.8527294993400574, |
|
"logps/rejected": -1.0392307043075562, |
|
"loss": 0.9183, |
|
"odds_ratio_loss": 0.6552284359931946, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08527294546365738, |
|
"rewards/margins": 0.01865011267364025, |
|
"rewards/rejected": -0.10392306745052338, |
|
"sft_loss": 0.8527294993400574, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7759143261264903, |
|
"grad_norm": 2.0075857639312744, |
|
"learning_rate": 4.217885329624666e-06, |
|
"logits/chosen": -2.8313276767730713, |
|
"logits/rejected": -2.8245348930358887, |
|
"logps/chosen": -0.790324330329895, |
|
"logps/rejected": -1.0767412185668945, |
|
"loss": 0.8498, |
|
"odds_ratio_loss": 0.5943514108657837, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07903242856264114, |
|
"rewards/margins": 0.02864169515669346, |
|
"rewards/rejected": -0.10767412185668945, |
|
"sft_loss": 0.790324330329895, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 1.7681854963302612, |
|
"learning_rate": 4.186861743633911e-06, |
|
"logits/chosen": -2.8171868324279785, |
|
"logits/rejected": -2.8480162620544434, |
|
"logps/chosen": -0.7983497381210327, |
|
"logps/rejected": -1.1061131954193115, |
|
"loss": 0.8646, |
|
"odds_ratio_loss": 0.6626344919204712, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.07983498275279999, |
|
"rewards/margins": 0.03077634610235691, |
|
"rewards/rejected": -0.11061131954193115, |
|
"sft_loss": 0.7983497381210327, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"grad_norm": 1.5298829078674316, |
|
"learning_rate": 4.155353766456497e-06, |
|
"logits/chosen": -2.874368190765381, |
|
"logits/rejected": -2.8658576011657715, |
|
"logps/chosen": -0.8663871884346008, |
|
"logps/rejected": -1.0296813249588013, |
|
"loss": 0.93, |
|
"odds_ratio_loss": 0.636117160320282, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08663871884346008, |
|
"rewards/margins": 0.016329411417245865, |
|
"rewards/rejected": -0.10296813398599625, |
|
"sft_loss": 0.8663871884346008, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"eval_logits/chosen": -2.836843729019165, |
|
"eval_logits/rejected": -2.8441994190216064, |
|
"eval_logps/chosen": -0.8278239965438843, |
|
"eval_logps/rejected": -1.0567275285720825, |
|
"eval_loss": 0.8927881121635437, |
|
"eval_odds_ratio_loss": 0.6496399641036987, |
|
"eval_rewards/accuracies": 0.5772727131843567, |
|
"eval_rewards/chosen": -0.08278240263462067, |
|
"eval_rewards/margins": 0.02289034053683281, |
|
"eval_rewards/rejected": -0.10567274689674377, |
|
"eval_runtime": 194.5311, |
|
"eval_samples_per_second": 5.655, |
|
"eval_sft_loss": 0.8278239965438843, |
|
"eval_steps_per_second": 2.827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8244089715093958, |
|
"grad_norm": 1.6909329891204834, |
|
"learning_rate": 4.123370445773134e-06, |
|
"logits/chosen": -2.8691649436950684, |
|
"logits/rejected": -2.8811800479888916, |
|
"logps/chosen": -0.8283156156539917, |
|
"logps/rejected": -0.9291037321090698, |
|
"loss": 0.8973, |
|
"odds_ratio_loss": 0.689969539642334, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08283156156539917, |
|
"rewards/margins": 0.01007880363613367, |
|
"rewards/rejected": -0.09291036427021027, |
|
"sft_loss": 0.8283156156539917, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8405738533036977, |
|
"grad_norm": 4.33729362487793, |
|
"learning_rate": 4.090920965761906e-06, |
|
"logits/chosen": -2.808586597442627, |
|
"logits/rejected": -2.8186278343200684, |
|
"logps/chosen": -0.8606308698654175, |
|
"logps/rejected": -1.0332623720169067, |
|
"loss": 0.9284, |
|
"odds_ratio_loss": 0.6780760884284973, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08606309443712234, |
|
"rewards/margins": 0.0172631423920393, |
|
"rewards/rejected": -0.1033262237906456, |
|
"sft_loss": 0.8606308698654175, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8567387350979996, |
|
"grad_norm": 6.002406120300293, |
|
"learning_rate": 4.058014644460991e-06, |
|
"logits/chosen": -2.833061456680298, |
|
"logits/rejected": -2.8458170890808105, |
|
"logps/chosen": -0.8242424726486206, |
|
"logps/rejected": -0.9793018102645874, |
|
"loss": 0.8862, |
|
"odds_ratio_loss": 0.6198969483375549, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.08242423832416534, |
|
"rewards/margins": 0.015505945309996605, |
|
"rewards/rejected": -0.0979301929473877, |
|
"sft_loss": 0.8242424726486206, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8729036168923014, |
|
"grad_norm": 1.998780608177185, |
|
"learning_rate": 4.024660931092939e-06, |
|
"logits/chosen": -2.81856369972229, |
|
"logits/rejected": -2.8293251991271973, |
|
"logps/chosen": -0.8208298683166504, |
|
"logps/rejected": -1.0441166162490845, |
|
"loss": 0.8828, |
|
"odds_ratio_loss": 0.6198452115058899, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08208298683166504, |
|
"rewards/margins": 0.022328665480017662, |
|
"rewards/rejected": -0.10441166162490845, |
|
"sft_loss": 0.8208298683166504, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8890684986866033, |
|
"grad_norm": 2.4577414989471436, |
|
"learning_rate": 3.990869403351272e-06, |
|
"logits/chosen": -2.8507511615753174, |
|
"logits/rejected": -2.8566970825195312, |
|
"logps/chosen": -0.8117038011550903, |
|
"logps/rejected": -1.0751911401748657, |
|
"loss": 0.8674, |
|
"odds_ratio_loss": 0.5573362112045288, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08117038011550903, |
|
"rewards/margins": 0.026348743587732315, |
|
"rewards/rejected": -0.10751912742853165, |
|
"sft_loss": 0.8117038011550903, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9052333804809052, |
|
"grad_norm": 3.4686763286590576, |
|
"learning_rate": 3.956649764650206e-06, |
|
"logits/chosen": -2.881647825241089, |
|
"logits/rejected": -2.8819093704223633, |
|
"logps/chosen": -0.840446949005127, |
|
"logps/rejected": -1.052137017250061, |
|
"loss": 0.907, |
|
"odds_ratio_loss": 0.6658841967582703, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0840446949005127, |
|
"rewards/margins": 0.02116900309920311, |
|
"rewards/rejected": -0.1052137017250061, |
|
"sft_loss": 0.840446949005127, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9213982622752072, |
|
"grad_norm": 2.2446658611297607, |
|
"learning_rate": 3.92201184133826e-06, |
|
"logits/chosen": -2.864419460296631, |
|
"logits/rejected": -2.8783581256866455, |
|
"logps/chosen": -0.7979758381843567, |
|
"logps/rejected": -1.0608371496200562, |
|
"loss": 0.858, |
|
"odds_ratio_loss": 0.5999220609664917, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07979758828878403, |
|
"rewards/margins": 0.026286140084266663, |
|
"rewards/rejected": -0.1060837134718895, |
|
"sft_loss": 0.7979758381843567, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.937563144069509, |
|
"grad_norm": 1.9976744651794434, |
|
"learning_rate": 3.886965579876572e-06, |
|
"logits/chosen": -2.900329351425171, |
|
"logits/rejected": -2.90751051902771, |
|
"logps/chosen": -0.8153482675552368, |
|
"logps/rejected": -0.9346411824226379, |
|
"loss": 0.8816, |
|
"odds_ratio_loss": 0.6620460748672485, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08153482526540756, |
|
"rewards/margins": 0.011929300613701344, |
|
"rewards/rejected": -0.09346412122249603, |
|
"sft_loss": 0.8153482675552368, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9537280258638109, |
|
"grad_norm": 1.6091820001602173, |
|
"learning_rate": 3.851521043982716e-06, |
|
"logits/chosen": -2.8917582035064697, |
|
"logits/rejected": -2.902100086212158, |
|
"logps/chosen": -0.8334836959838867, |
|
"logps/rejected": -1.004950761795044, |
|
"loss": 0.9, |
|
"odds_ratio_loss": 0.6651790738105774, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0833483636379242, |
|
"rewards/margins": 0.017146697267889977, |
|
"rewards/rejected": -0.10049506276845932, |
|
"sft_loss": 0.8334836959838867, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9698929076581128, |
|
"grad_norm": 5.672989845275879, |
|
"learning_rate": 3.81568841174086e-06, |
|
"logits/chosen": -2.861603021621704, |
|
"logits/rejected": -2.876756191253662, |
|
"logps/chosen": -0.7806357145309448, |
|
"logps/rejected": -1.1542575359344482, |
|
"loss": 0.8442, |
|
"odds_ratio_loss": 0.6360144019126892, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07806357741355896, |
|
"rewards/margins": 0.03736215457320213, |
|
"rewards/rejected": -0.1154257282614708, |
|
"sft_loss": 0.7806357145309448, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9860577894524146, |
|
"grad_norm": 1.4658279418945312, |
|
"learning_rate": 3.7794779726790664e-06, |
|
"logits/chosen": -2.845876455307007, |
|
"logits/rejected": -2.8574581146240234, |
|
"logps/chosen": -0.7789396047592163, |
|
"logps/rejected": -1.1114189624786377, |
|
"loss": 0.8409, |
|
"odds_ratio_loss": 0.6194978952407837, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.07789396494626999, |
|
"rewards/margins": 0.0332479402422905, |
|
"rewards/rejected": -0.11114190518856049, |
|
"sft_loss": 0.7789396047592163, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0022226712467166, |
|
"grad_norm": 2.5747179985046387, |
|
"learning_rate": 3.7429001248146096e-06, |
|
"logits/chosen": -2.8244144916534424, |
|
"logits/rejected": -2.832597494125366, |
|
"logps/chosen": -0.7860082387924194, |
|
"logps/rejected": -1.0231492519378662, |
|
"loss": 0.8435, |
|
"odds_ratio_loss": 0.5752763748168945, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07860083132982254, |
|
"rewards/margins": 0.023714100942015648, |
|
"rewards/rejected": -0.10231492668390274, |
|
"sft_loss": 0.7860082387924194, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0183875530410185, |
|
"grad_norm": 1.24222993850708, |
|
"learning_rate": 3.7059653716681227e-06, |
|
"logits/chosen": -2.8329997062683105, |
|
"logits/rejected": -2.8287994861602783, |
|
"logps/chosen": -0.8590106964111328, |
|
"logps/rejected": -1.0588136911392212, |
|
"loss": 0.9265, |
|
"odds_ratio_loss": 0.6749905347824097, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08590107411146164, |
|
"rewards/margins": 0.01998029835522175, |
|
"rewards/rejected": -0.10588137060403824, |
|
"sft_loss": 0.8590106964111328, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0345524348353203, |
|
"grad_norm": 1.6466968059539795, |
|
"learning_rate": 3.668684319247463e-06, |
|
"logits/chosen": -2.8495888710021973, |
|
"logits/rejected": -2.872880220413208, |
|
"logps/chosen": -0.7487844824790955, |
|
"logps/rejected": -1.0430450439453125, |
|
"loss": 0.8035, |
|
"odds_ratio_loss": 0.5467280149459839, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.07487844675779343, |
|
"rewards/margins": 0.02942606247961521, |
|
"rewards/rejected": -0.10430450737476349, |
|
"sft_loss": 0.7487844824790955, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0507173166296222, |
|
"grad_norm": 1.1547085046768188, |
|
"learning_rate": 3.6310676730021373e-06, |
|
"logits/chosen": -2.8986639976501465, |
|
"logits/rejected": -2.900839328765869, |
|
"logps/chosen": -0.7881689071655273, |
|
"logps/rejected": -0.9517928957939148, |
|
"loss": 0.8509, |
|
"odds_ratio_loss": 0.6268683075904846, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.07881689816713333, |
|
"rewards/margins": 0.016362406313419342, |
|
"rewards/rejected": -0.09517930448055267, |
|
"sft_loss": 0.7881689071655273, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.066882198423924, |
|
"grad_norm": 3.282292604446411, |
|
"learning_rate": 3.593126234749178e-06, |
|
"logits/chosen": -2.8645131587982178, |
|
"logits/rejected": -2.898613929748535, |
|
"logps/chosen": -0.9009162187576294, |
|
"logps/rejected": -1.1612458229064941, |
|
"loss": 0.9648, |
|
"odds_ratio_loss": 0.6383681297302246, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.09009162336587906, |
|
"rewards/margins": 0.026032963767647743, |
|
"rewards/rejected": -0.11612458527088165, |
|
"sft_loss": 0.9009162187576294, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.083047080218226, |
|
"grad_norm": 1.7910722494125366, |
|
"learning_rate": 3.554870899571343e-06, |
|
"logits/chosen": -2.8563625812530518, |
|
"logits/rejected": -2.8744523525238037, |
|
"logps/chosen": -0.8285778760910034, |
|
"logps/rejected": -1.0025149583816528, |
|
"loss": 0.8927, |
|
"odds_ratio_loss": 0.6415389776229858, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08285778015851974, |
|
"rewards/margins": 0.01739371195435524, |
|
"rewards/rejected": -0.10025149583816528, |
|
"sft_loss": 0.8285778760910034, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.0992119620125278, |
|
"grad_norm": 3.5774316787719727, |
|
"learning_rate": 3.5163126526885373e-06, |
|
"logits/chosen": -2.8437960147857666, |
|
"logits/rejected": -2.870513916015625, |
|
"logps/chosen": -0.7732303142547607, |
|
"logps/rejected": -1.0101302862167358, |
|
"loss": 0.8343, |
|
"odds_ratio_loss": 0.6102721095085144, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07732303440570831, |
|
"rewards/margins": 0.023690002039074898, |
|
"rewards/rejected": -0.10101302713155746, |
|
"sft_loss": 0.7732303142547607, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1153768438068297, |
|
"grad_norm": 2.30400013923645, |
|
"learning_rate": 3.4774625663033484e-06, |
|
"logits/chosen": -2.849010467529297, |
|
"logits/rejected": -2.8660061359405518, |
|
"logps/chosen": -0.7853142619132996, |
|
"logps/rejected": -0.9644325971603394, |
|
"loss": 0.8466, |
|
"odds_ratio_loss": 0.6127563714981079, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0785314291715622, |
|
"rewards/margins": 0.017911842092871666, |
|
"rewards/rejected": -0.09644327312707901, |
|
"sft_loss": 0.7853142619132996, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"grad_norm": 1.4719704389572144, |
|
"learning_rate": 3.4383317964216067e-06, |
|
"logits/chosen": -2.8511626720428467, |
|
"logits/rejected": -2.881286382675171, |
|
"logps/chosen": -0.7790023684501648, |
|
"logps/rejected": -0.9076374173164368, |
|
"loss": 0.8484, |
|
"odds_ratio_loss": 0.6935282945632935, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0779002457857132, |
|
"rewards/margins": 0.012863497249782085, |
|
"rewards/rejected": -0.09076374769210815, |
|
"sft_loss": 0.7790023684501648, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1477066073954334, |
|
"grad_norm": 2.1927425861358643, |
|
"learning_rate": 3.398931579648877e-06, |
|
"logits/chosen": -2.8756051063537598, |
|
"logits/rejected": -2.880699872970581, |
|
"logps/chosen": -0.8047206997871399, |
|
"logps/rejected": -1.1634694337844849, |
|
"loss": 0.8667, |
|
"odds_ratio_loss": 0.6202768087387085, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08047207444906235, |
|
"rewards/margins": 0.0358748659491539, |
|
"rewards/rejected": -0.11634693294763565, |
|
"sft_loss": 0.8047206997871399, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1638714891897353, |
|
"grad_norm": 1.4328726530075073, |
|
"learning_rate": 3.359273229963813e-06, |
|
"logits/chosen": -2.8490045070648193, |
|
"logits/rejected": -2.8502037525177, |
|
"logps/chosen": -0.7575694918632507, |
|
"logps/rejected": -0.9301745295524597, |
|
"loss": 0.821, |
|
"odds_ratio_loss": 0.6343931555747986, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.07575695216655731, |
|
"rewards/margins": 0.017260495573282242, |
|
"rewards/rejected": -0.09301744401454926, |
|
"sft_loss": 0.7575694918632507, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1800363709840371, |
|
"grad_norm": 1.3170576095581055, |
|
"learning_rate": 3.319368135469285e-06, |
|
"logits/chosen": -2.8658504486083984, |
|
"logits/rejected": -2.8875842094421387, |
|
"logps/chosen": -0.8195670247077942, |
|
"logps/rejected": -1.1535929441452026, |
|
"loss": 0.8841, |
|
"odds_ratio_loss": 0.6450805068016052, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08195669949054718, |
|
"rewards/margins": 0.03340259566903114, |
|
"rewards/rejected": -0.11535929143428802, |
|
"sft_loss": 0.8195670247077942, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.196201252778339, |
|
"grad_norm": 4.55858850479126, |
|
"learning_rate": 3.279227755122228e-06, |
|
"logits/chosen": -2.858372211456299, |
|
"logits/rejected": -2.860966205596924, |
|
"logps/chosen": -0.7807797193527222, |
|
"logps/rejected": -1.1492526531219482, |
|
"loss": 0.839, |
|
"odds_ratio_loss": 0.5826634764671326, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.07807797938585281, |
|
"rewards/margins": 0.03684728592634201, |
|
"rewards/rejected": -0.11492526531219482, |
|
"sft_loss": 0.7807797193527222, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2123661345726409, |
|
"grad_norm": 2.330960988998413, |
|
"learning_rate": 3.2388636154431417e-06, |
|
"logits/chosen": -2.868211507797241, |
|
"logits/rejected": -2.898150682449341, |
|
"logps/chosen": -0.8243536949157715, |
|
"logps/rejected": -1.195150375366211, |
|
"loss": 0.883, |
|
"odds_ratio_loss": 0.5866126418113708, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08243536949157715, |
|
"rewards/margins": 0.03707967326045036, |
|
"rewards/rejected": -0.11951503902673721, |
|
"sft_loss": 0.8243536949157715, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2285310163669427, |
|
"grad_norm": 2.7208411693573, |
|
"learning_rate": 3.198287307206192e-06, |
|
"logits/chosen": -2.844311237335205, |
|
"logits/rejected": -2.8444716930389404, |
|
"logps/chosen": -0.7780786752700806, |
|
"logps/rejected": -0.9966138005256653, |
|
"loss": 0.8378, |
|
"odds_ratio_loss": 0.5971704721450806, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07780785858631134, |
|
"rewards/margins": 0.021853512153029442, |
|
"rewards/rejected": -0.09966136515140533, |
|
"sft_loss": 0.7780786752700806, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2446958981612446, |
|
"grad_norm": 1.3042361736297607, |
|
"learning_rate": 3.157510482110856e-06, |
|
"logits/chosen": -2.9084322452545166, |
|
"logits/rejected": -2.905463933944702, |
|
"logps/chosen": -0.7917675971984863, |
|
"logps/rejected": -1.0798178911209106, |
|
"loss": 0.8557, |
|
"odds_ratio_loss": 0.6388932466506958, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07917676120996475, |
|
"rewards/margins": 0.02880503609776497, |
|
"rewards/rejected": -0.10798178613185883, |
|
"sft_loss": 0.7917675971984863, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2608607799555465, |
|
"grad_norm": 1.315172553062439, |
|
"learning_rate": 3.116544849436077e-06, |
|
"logits/chosen": -2.828716993331909, |
|
"logits/rejected": -2.8282887935638428, |
|
"logps/chosen": -0.8439006805419922, |
|
"logps/rejected": -1.2042268514633179, |
|
"loss": 0.9037, |
|
"odds_ratio_loss": 0.5979124307632446, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0843900591135025, |
|
"rewards/margins": 0.03603263571858406, |
|
"rewards/rejected": -0.12042269855737686, |
|
"sft_loss": 0.8439006805419922, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2770256617498483, |
|
"grad_norm": 1.611197829246521, |
|
"learning_rate": 3.0754021726778848e-06, |
|
"logits/chosen": -2.84073543548584, |
|
"logits/rejected": -2.832176685333252, |
|
"logps/chosen": -0.7603198885917664, |
|
"logps/rejected": -1.202492117881775, |
|
"loss": 0.8152, |
|
"odds_ratio_loss": 0.5489572882652283, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.07603198289871216, |
|
"rewards/margins": 0.044217221438884735, |
|
"rewards/rejected": -0.1202491968870163, |
|
"sft_loss": 0.7603198885917664, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2931905435441502, |
|
"grad_norm": 1.179275631904602, |
|
"learning_rate": 3.0340942661714463e-06, |
|
"logits/chosen": -2.877725839614868, |
|
"logits/rejected": -2.891244411468506, |
|
"logps/chosen": -0.8281265497207642, |
|
"logps/rejected": -1.0409139394760132, |
|
"loss": 0.8904, |
|
"odds_ratio_loss": 0.6231717467308044, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08281265199184418, |
|
"rewards/margins": 0.021278750151395798, |
|
"rewards/rejected": -0.10409140586853027, |
|
"sft_loss": 0.8281265497207642, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3093554253384523, |
|
"grad_norm": 2.1846208572387695, |
|
"learning_rate": 2.992632991698512e-06, |
|
"logits/chosen": -2.8389461040496826, |
|
"logits/rejected": -2.85896635055542, |
|
"logps/chosen": -0.8289766311645508, |
|
"logps/rejected": -1.0603488683700562, |
|
"loss": 0.8918, |
|
"odds_ratio_loss": 0.6286410093307495, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08289766311645508, |
|
"rewards/margins": 0.02313724346458912, |
|
"rewards/rejected": -0.10603491216897964, |
|
"sft_loss": 0.8289766311645508, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3255203071327541, |
|
"grad_norm": 1.583357334136963, |
|
"learning_rate": 2.9510302550812537e-06, |
|
"logits/chosen": -2.845541000366211, |
|
"logits/rejected": -2.8782455921173096, |
|
"logps/chosen": -0.7186457514762878, |
|
"logps/rejected": -1.0902959108352661, |
|
"loss": 0.776, |
|
"odds_ratio_loss": 0.573469340801239, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07186457514762878, |
|
"rewards/margins": 0.03716501593589783, |
|
"rewards/rejected": -0.10902959108352661, |
|
"sft_loss": 0.7186457514762878, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.341685188927056, |
|
"grad_norm": 2.841128349304199, |
|
"learning_rate": 2.9092980027634325e-06, |
|
"logits/chosen": -2.8583426475524902, |
|
"logits/rejected": -2.874774217605591, |
|
"logps/chosen": -0.7276403903961182, |
|
"logps/rejected": -1.0125164985656738, |
|
"loss": 0.788, |
|
"odds_ratio_loss": 0.6034457683563232, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07276404649019241, |
|
"rewards/margins": 0.028487607836723328, |
|
"rewards/rejected": -0.10125164687633514, |
|
"sft_loss": 0.7276403903961182, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3578500707213579, |
|
"grad_norm": 1.7055377960205078, |
|
"learning_rate": 2.867448218379927e-06, |
|
"logits/chosen": -2.8610100746154785, |
|
"logits/rejected": -2.8836147785186768, |
|
"logps/chosen": -0.8485835790634155, |
|
"logps/rejected": -1.0031511783599854, |
|
"loss": 0.9172, |
|
"odds_ratio_loss": 0.6861482858657837, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08485837280750275, |
|
"rewards/margins": 0.015456756576895714, |
|
"rewards/rejected": -0.10031511634588242, |
|
"sft_loss": 0.8485835790634155, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3740149525156597, |
|
"grad_norm": 9.629118919372559, |
|
"learning_rate": 2.825492919315559e-06, |
|
"logits/chosen": -2.8479480743408203, |
|
"logits/rejected": -2.8763227462768555, |
|
"logps/chosen": -0.8768585324287415, |
|
"logps/rejected": -0.999729335308075, |
|
"loss": 0.9437, |
|
"odds_ratio_loss": 0.668052613735199, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08768586814403534, |
|
"rewards/margins": 0.012287073768675327, |
|
"rewards/rejected": -0.0999729260802269, |
|
"sft_loss": 0.8768585324287415, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3901798343099616, |
|
"grad_norm": 2.416870594024658, |
|
"learning_rate": 2.7834441532542482e-06, |
|
"logits/chosen": -2.8881735801696777, |
|
"logits/rejected": -2.9063100814819336, |
|
"logps/chosen": -0.7879316210746765, |
|
"logps/rejected": -1.023233413696289, |
|
"loss": 0.8456, |
|
"odds_ratio_loss": 0.5766496658325195, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07879316806793213, |
|
"rewards/margins": 0.023530183359980583, |
|
"rewards/rejected": -0.10232335329055786, |
|
"sft_loss": 0.7879316210746765, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4063447161042635, |
|
"grad_norm": 1.5628215074539185, |
|
"learning_rate": 2.74131399471945e-06, |
|
"logits/chosen": -2.855931520462036, |
|
"logits/rejected": -2.8686752319335938, |
|
"logps/chosen": -0.7991023063659668, |
|
"logps/rejected": -0.9908691644668579, |
|
"loss": 0.8644, |
|
"odds_ratio_loss": 0.6528818607330322, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07991023361682892, |
|
"rewards/margins": 0.01917668618261814, |
|
"rewards/rejected": -0.09908691793680191, |
|
"sft_loss": 0.7991023063659668, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4225095978985653, |
|
"grad_norm": 2.0555615425109863, |
|
"learning_rate": 2.6991145416068947e-06, |
|
"logits/chosen": -2.846782922744751, |
|
"logits/rejected": -2.8673818111419678, |
|
"logps/chosen": -0.8078680038452148, |
|
"logps/rejected": -0.9619809985160828, |
|
"loss": 0.8714, |
|
"odds_ratio_loss": 0.6356260180473328, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0807868018746376, |
|
"rewards/margins": 0.01541130244731903, |
|
"rewards/rejected": -0.09619811177253723, |
|
"sft_loss": 0.8078680038452148, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4386744796928672, |
|
"grad_norm": 0.9378024339675903, |
|
"learning_rate": 2.6568579117106143e-06, |
|
"logits/chosen": -2.8469960689544678, |
|
"logits/rejected": -2.850614070892334, |
|
"logps/chosen": -0.7744920253753662, |
|
"logps/rejected": -1.0393074750900269, |
|
"loss": 0.8347, |
|
"odds_ratio_loss": 0.6018751859664917, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07744920998811722, |
|
"rewards/margins": 0.026481550186872482, |
|
"rewards/rejected": -0.1039307564496994, |
|
"sft_loss": 0.7744920253753662, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.454839361487169, |
|
"grad_norm": 0.9352036118507385, |
|
"learning_rate": 2.6145562392432544e-06, |
|
"logits/chosen": -2.875109910964966, |
|
"logits/rejected": -2.887655735015869, |
|
"logps/chosen": -0.8057360649108887, |
|
"logps/rejected": -0.9923427700996399, |
|
"loss": 0.8708, |
|
"odds_ratio_loss": 0.6502856016159058, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08057360351085663, |
|
"rewards/margins": 0.01866067573428154, |
|
"rewards/rejected": -0.09923428297042847, |
|
"sft_loss": 0.8057360649108887, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.471004243281471, |
|
"grad_norm": 2.6385111808776855, |
|
"learning_rate": 2.5722216713516682e-06, |
|
"logits/chosen": -2.8550915718078613, |
|
"logits/rejected": -2.8972582817077637, |
|
"logps/chosen": -0.7460139989852905, |
|
"logps/rejected": -0.9863673448562622, |
|
"loss": 0.8057, |
|
"odds_ratio_loss": 0.5972028374671936, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07460139691829681, |
|
"rewards/margins": 0.024035323411226273, |
|
"rewards/rejected": -0.09863673150539398, |
|
"sft_loss": 0.7460139989852905, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4871691250757728, |
|
"grad_norm": 1.7198817729949951, |
|
"learning_rate": 2.5298663646288064e-06, |
|
"logits/chosen": -2.8807036876678467, |
|
"logits/rejected": -2.888306140899658, |
|
"logps/chosen": -0.7764211893081665, |
|
"logps/rejected": -1.0312559604644775, |
|
"loss": 0.8377, |
|
"odds_ratio_loss": 0.6123490333557129, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07764211297035217, |
|
"rewards/margins": 0.025483474135398865, |
|
"rewards/rejected": -0.10312558710575104, |
|
"sft_loss": 0.7764211893081665, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.503334006870075, |
|
"grad_norm": 2.7615318298339844, |
|
"learning_rate": 2.487502481622879e-06, |
|
"logits/chosen": -2.8637490272521973, |
|
"logits/rejected": -2.874497652053833, |
|
"logps/chosen": -0.8163179159164429, |
|
"logps/rejected": -0.9841713905334473, |
|
"loss": 0.8791, |
|
"odds_ratio_loss": 0.6274018287658691, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08163177967071533, |
|
"rewards/margins": 0.01678534969687462, |
|
"rewards/rejected": -0.09841714054346085, |
|
"sft_loss": 0.8163179159164429, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5194988886643768, |
|
"grad_norm": 1.7173594236373901, |
|
"learning_rate": 2.4451421873448253e-06, |
|
"logits/chosen": -2.8568150997161865, |
|
"logits/rejected": -2.879917621612549, |
|
"logps/chosen": -0.8009888529777527, |
|
"logps/rejected": -0.9833795428276062, |
|
"loss": 0.8678, |
|
"odds_ratio_loss": 0.667960524559021, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08009888231754303, |
|
"rewards/margins": 0.018239066004753113, |
|
"rewards/rejected": -0.09833794832229614, |
|
"sft_loss": 0.8009888529777527, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5356637704586786, |
|
"grad_norm": 3.4001808166503906, |
|
"learning_rate": 2.40279764577506e-06, |
|
"logits/chosen": -2.885816812515259, |
|
"logits/rejected": -2.9209980964660645, |
|
"logps/chosen": -0.8259257078170776, |
|
"logps/rejected": -0.9810823202133179, |
|
"loss": 0.8903, |
|
"odds_ratio_loss": 0.6437360048294067, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08259257674217224, |
|
"rewards/margins": 0.015515660867094994, |
|
"rewards/rejected": -0.09810823202133179, |
|
"sft_loss": 0.8259257078170776, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5518286522529805, |
|
"grad_norm": 3.7369155883789062, |
|
"learning_rate": 2.3604810163705242e-06, |
|
"logits/chosen": -2.878312587738037, |
|
"logits/rejected": -2.9087862968444824, |
|
"logps/chosen": -0.7468287944793701, |
|
"logps/rejected": -0.999441921710968, |
|
"loss": 0.8033, |
|
"odds_ratio_loss": 0.5650970339775085, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07468288391828537, |
|
"rewards/margins": 0.02526130899786949, |
|
"rewards/rejected": -0.09994419664144516, |
|
"sft_loss": 0.7468287944793701, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5679935340472824, |
|
"grad_norm": 1.2655407190322876, |
|
"learning_rate": 2.3182044505730364e-06, |
|
"logits/chosen": -2.872468948364258, |
|
"logits/rejected": -2.873964309692383, |
|
"logps/chosen": -0.7006109952926636, |
|
"logps/rejected": -0.9527314901351929, |
|
"loss": 0.7581, |
|
"odds_ratio_loss": 0.5752806067466736, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0700611025094986, |
|
"rewards/margins": 0.025212040171027184, |
|
"rewards/rejected": -0.09527313709259033, |
|
"sft_loss": 0.7006109952926636, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5841584158415842, |
|
"grad_norm": 2.9336001873016357, |
|
"learning_rate": 2.275980088319941e-06, |
|
"logits/chosen": -2.8779749870300293, |
|
"logits/rejected": -2.8763155937194824, |
|
"logps/chosen": -0.7721344232559204, |
|
"logps/rejected": -0.9309911727905273, |
|
"loss": 0.8406, |
|
"odds_ratio_loss": 0.6845985651016235, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.07721343636512756, |
|
"rewards/margins": 0.01588568463921547, |
|
"rewards/rejected": -0.09309910982847214, |
|
"sft_loss": 0.7721344232559204, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.600323297635886, |
|
"grad_norm": 2.434041738510132, |
|
"learning_rate": 2.2338200545580577e-06, |
|
"logits/chosen": -2.849057674407959, |
|
"logits/rejected": -2.873142957687378, |
|
"logps/chosen": -0.7509113550186157, |
|
"logps/rejected": -1.0347163677215576, |
|
"loss": 0.8135, |
|
"odds_ratio_loss": 0.6254162788391113, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07509114593267441, |
|
"rewards/margins": 0.02838050201535225, |
|
"rewards/rejected": -0.10347163677215576, |
|
"sft_loss": 0.7509113550186157, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"grad_norm": 1.686830997467041, |
|
"learning_rate": 2.191736455761947e-06, |
|
"logits/chosen": -2.8971669673919678, |
|
"logits/rejected": -2.9139630794525146, |
|
"logps/chosen": -0.7013322114944458, |
|
"logps/rejected": -0.8860443234443665, |
|
"loss": 0.7571, |
|
"odds_ratio_loss": 0.5572749972343445, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07013322412967682, |
|
"rewards/margins": 0.01847122237086296, |
|
"rewards/rejected": -0.08860443532466888, |
|
"sft_loss": 0.7013322114944458, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"eval_logits/chosen": -2.8644185066223145, |
|
"eval_logits/rejected": -2.8728911876678467, |
|
"eval_logps/chosen": -0.8028324842453003, |
|
"eval_logps/rejected": -1.0336546897888184, |
|
"eval_loss": 0.8679323792457581, |
|
"eval_odds_ratio_loss": 0.6509982943534851, |
|
"eval_rewards/accuracies": 0.5699999928474426, |
|
"eval_rewards/chosen": -0.08028324693441391, |
|
"eval_rewards/margins": 0.02308221347630024, |
|
"eval_rewards/rejected": -0.1033654510974884, |
|
"eval_runtime": 194.7336, |
|
"eval_samples_per_second": 5.649, |
|
"eval_sft_loss": 0.8028324842453003, |
|
"eval_steps_per_second": 2.824, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 1.7911335229873657, |
|
"learning_rate": 2.1497413764574673e-06, |
|
"logits/chosen": -2.8975167274475098, |
|
"logits/rejected": -2.8892812728881836, |
|
"logps/chosen": -0.7816007137298584, |
|
"logps/rejected": -1.069588541984558, |
|
"loss": 0.8393, |
|
"odds_ratio_loss": 0.5774248242378235, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07816006988286972, |
|
"rewards/margins": 0.02879878506064415, |
|
"rewards/rejected": -0.10695885121822357, |
|
"sft_loss": 0.7816007137298584, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6488179430187917, |
|
"grad_norm": 1.912550687789917, |
|
"learning_rate": 2.1078468757516395e-06, |
|
"logits/chosen": -2.8402116298675537, |
|
"logits/rejected": -2.8773112297058105, |
|
"logps/chosen": -0.7441704273223877, |
|
"logps/rejected": -0.9479702115058899, |
|
"loss": 0.8035, |
|
"odds_ratio_loss": 0.5933586955070496, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.07441703975200653, |
|
"rewards/margins": 0.02037998102605343, |
|
"rewards/rejected": -0.0947970300912857, |
|
"sft_loss": 0.7441704273223877, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6649828248130936, |
|
"grad_norm": 2.0232253074645996, |
|
"learning_rate": 2.0660649838698145e-06, |
|
"logits/chosen": -2.8627827167510986, |
|
"logits/rejected": -2.882736921310425, |
|
"logps/chosen": -0.7718713283538818, |
|
"logps/rejected": -1.1140234470367432, |
|
"loss": 0.832, |
|
"odds_ratio_loss": 0.6009626984596252, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.07718713581562042, |
|
"rewards/margins": 0.03421521559357643, |
|
"rewards/rejected": -0.11140235513448715, |
|
"sft_loss": 0.7718713283538818, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.6811477066073954, |
|
"grad_norm": 1.9653966426849365, |
|
"learning_rate": 2.0244076987011284e-06, |
|
"logits/chosen": -2.905303716659546, |
|
"logits/rejected": -2.9009556770324707, |
|
"logps/chosen": -0.827530562877655, |
|
"logps/rejected": -1.0324897766113281, |
|
"loss": 0.8888, |
|
"odds_ratio_loss": 0.6124246716499329, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08275305479764938, |
|
"rewards/margins": 0.020495926961302757, |
|
"rewards/rejected": -0.1032489761710167, |
|
"sft_loss": 0.827530562877655, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"grad_norm": 1.4363154172897339, |
|
"learning_rate": 1.982886982353251e-06, |
|
"logits/chosen": -2.888767957687378, |
|
"logits/rejected": -2.8874547481536865, |
|
"logps/chosen": -0.7899632453918457, |
|
"logps/rejected": -1.1214802265167236, |
|
"loss": 0.8526, |
|
"odds_ratio_loss": 0.6266939640045166, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.07899631559848785, |
|
"rewards/margins": 0.03315168619155884, |
|
"rewards/rejected": -0.11214800179004669, |
|
"sft_loss": 0.7899632453918457, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7134774701959992, |
|
"grad_norm": 1.8043084144592285, |
|
"learning_rate": 1.941514757717392e-06, |
|
"logits/chosen": -2.866079330444336, |
|
"logits/rejected": -2.879364490509033, |
|
"logps/chosen": -0.8468548655509949, |
|
"logps/rejected": -1.1184252500534058, |
|
"loss": 0.9022, |
|
"odds_ratio_loss": 0.552977442741394, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.08468548208475113, |
|
"rewards/margins": 0.02715705707669258, |
|
"rewards/rejected": -0.11184253543615341, |
|
"sft_loss": 0.8468548655509949, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.729642351990301, |
|
"grad_norm": 3.669512987136841, |
|
"learning_rate": 1.9003029050445953e-06, |
|
"logits/chosen": -2.8407020568847656, |
|
"logits/rejected": -2.8639755249023438, |
|
"logps/chosen": -0.8030735850334167, |
|
"logps/rejected": -0.9715849757194519, |
|
"loss": 0.8692, |
|
"odds_ratio_loss": 0.660782516002655, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08030736446380615, |
|
"rewards/margins": 0.016851136460900307, |
|
"rewards/rejected": -0.0971585065126419, |
|
"sft_loss": 0.8030735850334167, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.745807233784603, |
|
"grad_norm": 1.9885250329971313, |
|
"learning_rate": 1.8592632585342523e-06, |
|
"logits/chosen": -2.849134922027588, |
|
"logits/rejected": -2.8679654598236084, |
|
"logps/chosen": -0.7700011730194092, |
|
"logps/rejected": -1.0313342809677124, |
|
"loss": 0.8306, |
|
"odds_ratio_loss": 0.6062373518943787, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.07700012624263763, |
|
"rewards/margins": 0.02613331377506256, |
|
"rewards/rejected": -0.1031334400177002, |
|
"sft_loss": 0.7700011730194092, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7619721155789048, |
|
"grad_norm": 4.0624895095825195, |
|
"learning_rate": 1.8184076029358527e-06, |
|
"logits/chosen": -2.840611457824707, |
|
"logits/rejected": -2.8494577407836914, |
|
"logps/chosen": -0.7611902952194214, |
|
"logps/rejected": -0.9082427024841309, |
|
"loss": 0.8272, |
|
"odds_ratio_loss": 0.6598888635635376, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07611902803182602, |
|
"rewards/margins": 0.014705238863825798, |
|
"rewards/rejected": -0.09082427620887756, |
|
"sft_loss": 0.7611902952194214, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7781369973732066, |
|
"grad_norm": 1.7686785459518433, |
|
"learning_rate": 1.7777476701649318e-06, |
|
"logits/chosen": -2.8446550369262695, |
|
"logits/rejected": -2.85874342918396, |
|
"logps/chosen": -0.7774368524551392, |
|
"logps/rejected": -1.0228512287139893, |
|
"loss": 0.8388, |
|
"odds_ratio_loss": 0.6141053438186646, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.07774369418621063, |
|
"rewards/margins": 0.024541418999433517, |
|
"rewards/rejected": -0.10228510946035385, |
|
"sft_loss": 0.7774368524551392, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7943018791675085, |
|
"grad_norm": 2.743757724761963, |
|
"learning_rate": 1.7372951359341925e-06, |
|
"logits/chosen": -2.8636326789855957, |
|
"logits/rejected": -2.8647377490997314, |
|
"logps/chosen": -0.750954806804657, |
|
"logps/rejected": -0.9340154528617859, |
|
"loss": 0.814, |
|
"odds_ratio_loss": 0.6307731866836548, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.07509546726942062, |
|
"rewards/margins": 0.018306076526641846, |
|
"rewards/rejected": -0.09340154379606247, |
|
"sft_loss": 0.750954806804657, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8104667609618104, |
|
"grad_norm": 3.9680521488189697, |
|
"learning_rate": 1.6970616164007547e-06, |
|
"logits/chosen": -2.8542914390563965, |
|
"logits/rejected": -2.8552489280700684, |
|
"logps/chosen": -0.7380022406578064, |
|
"logps/rejected": -0.9561580419540405, |
|
"loss": 0.801, |
|
"odds_ratio_loss": 0.6301542520523071, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0738002210855484, |
|
"rewards/margins": 0.0218155849725008, |
|
"rewards/rejected": -0.09561581164598465, |
|
"sft_loss": 0.7380022406578064, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8266316427561122, |
|
"grad_norm": 2.8756582736968994, |
|
"learning_rate": 1.6570586648305276e-06, |
|
"logits/chosen": -2.8676905632019043, |
|
"logits/rejected": -2.895289897918701, |
|
"logps/chosen": -0.7943655252456665, |
|
"logps/rejected": -1.0809084177017212, |
|
"loss": 0.8591, |
|
"odds_ratio_loss": 0.6475063562393188, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07943655550479889, |
|
"rewards/margins": 0.028654297813773155, |
|
"rewards/rejected": -0.1080908551812172, |
|
"sft_loss": 0.7943655252456665, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.842796524550414, |
|
"grad_norm": 1.8805325031280518, |
|
"learning_rate": 1.6172977682806151e-06, |
|
"logits/chosen": -2.8678653240203857, |
|
"logits/rejected": -2.900193214416504, |
|
"logps/chosen": -0.7862238883972168, |
|
"logps/rejected": -1.0396199226379395, |
|
"loss": 0.8453, |
|
"odds_ratio_loss": 0.5909398198127747, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07862239331007004, |
|
"rewards/margins": 0.02533959411084652, |
|
"rewards/rejected": -0.1039619892835617, |
|
"sft_loss": 0.7862238883972168, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.858961406344716, |
|
"grad_norm": 1.586294174194336, |
|
"learning_rate": 1.5777903443007586e-06, |
|
"logits/chosen": -2.8388750553131104, |
|
"logits/rejected": -2.838686466217041, |
|
"logps/chosen": -0.7984446883201599, |
|
"logps/rejected": -1.093590497970581, |
|
"loss": 0.8601, |
|
"odds_ratio_loss": 0.6163803935050964, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07984446734189987, |
|
"rewards/margins": 0.029514577239751816, |
|
"rewards/rejected": -0.10935904830694199, |
|
"sft_loss": 0.7984446883201599, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8751262881390178, |
|
"grad_norm": 3.058032751083374, |
|
"learning_rate": 1.5385477376547226e-06, |
|
"logits/chosen": -2.853109121322632, |
|
"logits/rejected": -2.863646984100342, |
|
"logps/chosen": -0.7820562124252319, |
|
"logps/rejected": -1.004570484161377, |
|
"loss": 0.8417, |
|
"odds_ratio_loss": 0.5969026684761047, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07820562273263931, |
|
"rewards/margins": 0.022251427173614502, |
|
"rewards/rejected": -0.10045703500509262, |
|
"sft_loss": 0.7820562124252319, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.89129116993332, |
|
"grad_norm": 3.296496868133545, |
|
"learning_rate": 1.4995812170625845e-06, |
|
"logits/chosen": -2.8537023067474365, |
|
"logits/rejected": -2.8620083332061768, |
|
"logps/chosen": -0.7803040742874146, |
|
"logps/rejected": -1.1614640951156616, |
|
"loss": 0.8383, |
|
"odds_ratio_loss": 0.5798701047897339, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07803040742874146, |
|
"rewards/margins": 0.038116004317998886, |
|
"rewards/rejected": -0.11614640802145004, |
|
"sft_loss": 0.7803040742874146, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9074560517276218, |
|
"grad_norm": 2.4982151985168457, |
|
"learning_rate": 1.4609019719648666e-06, |
|
"logits/chosen": -2.8664259910583496, |
|
"logits/rejected": -2.880103826522827, |
|
"logps/chosen": -0.7934621572494507, |
|
"logps/rejected": -1.0411931276321411, |
|
"loss": 0.8522, |
|
"odds_ratio_loss": 0.5876864194869995, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07934621721506119, |
|
"rewards/margins": 0.024773094803094864, |
|
"rewards/rejected": -0.10411931574344635, |
|
"sft_loss": 0.7934621572494507, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9236209335219236, |
|
"grad_norm": 4.357522964477539, |
|
"learning_rate": 1.42252110930943e-06, |
|
"logits/chosen": -2.8305060863494873, |
|
"logits/rejected": -2.850817918777466, |
|
"logps/chosen": -0.7121320962905884, |
|
"logps/rejected": -0.97893887758255, |
|
"loss": 0.7723, |
|
"odds_ratio_loss": 0.6020933389663696, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07121320813894272, |
|
"rewards/margins": 0.02668066881597042, |
|
"rewards/rejected": -0.09789387881755829, |
|
"sft_loss": 0.7121320962905884, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9397858153162255, |
|
"grad_norm": 3.2690622806549072, |
|
"learning_rate": 1.3844496503620493e-06, |
|
"logits/chosen": -2.855846881866455, |
|
"logits/rejected": -2.885960817337036, |
|
"logps/chosen": -0.7993025779724121, |
|
"logps/rejected": -1.008312702178955, |
|
"loss": 0.8606, |
|
"odds_ratio_loss": 0.6124933362007141, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.07993026077747345, |
|
"rewards/margins": 0.020901009440422058, |
|
"rewards/rejected": -0.10083127021789551, |
|
"sft_loss": 0.7993025779724121, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9559506971105274, |
|
"grad_norm": 3.07012677192688, |
|
"learning_rate": 1.3466985275416081e-06, |
|
"logits/chosen": -2.8368687629699707, |
|
"logits/rejected": -2.8513948917388916, |
|
"logps/chosen": -0.8561896085739136, |
|
"logps/rejected": -1.0195033550262451, |
|
"loss": 0.9234, |
|
"odds_ratio_loss": 0.6718183159828186, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08561895787715912, |
|
"rewards/margins": 0.016331372782588005, |
|
"rewards/rejected": -0.10195034742355347, |
|
"sft_loss": 0.8561896085739136, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9721155789048292, |
|
"grad_norm": 4.26687479019165, |
|
"learning_rate": 1.309278581280791e-06, |
|
"logits/chosen": -2.8606760501861572, |
|
"logits/rejected": -2.868224620819092, |
|
"logps/chosen": -0.7406347990036011, |
|
"logps/rejected": -1.0179945230484009, |
|
"loss": 0.7986, |
|
"odds_ratio_loss": 0.5793353319168091, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.07406347990036011, |
|
"rewards/margins": 0.02773597277700901, |
|
"rewards/rejected": -0.10179946571588516, |
|
"sft_loss": 0.7406347990036011, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.9882804606991311, |
|
"grad_norm": 1.2442247867584229, |
|
"learning_rate": 1.272200556913199e-06, |
|
"logits/chosen": -2.8689868450164795, |
|
"logits/rejected": -2.8818325996398926, |
|
"logps/chosen": -0.812061607837677, |
|
"logps/rejected": -1.029280424118042, |
|
"loss": 0.8795, |
|
"odds_ratio_loss": 0.6747404336929321, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08120616525411606, |
|
"rewards/margins": 0.02172188088297844, |
|
"rewards/rejected": -0.102928027510643, |
|
"sft_loss": 0.812061607837677, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.004445342493433, |
|
"grad_norm": 2.5222415924072266, |
|
"learning_rate": 1.2354751015877698e-06, |
|
"logits/chosen": -2.842041015625, |
|
"logits/rejected": -2.861173629760742, |
|
"logps/chosen": -0.7999058961868286, |
|
"logps/rejected": -1.1007378101348877, |
|
"loss": 0.86, |
|
"odds_ratio_loss": 0.6008915305137634, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.07999058067798615, |
|
"rewards/margins": 0.030083194375038147, |
|
"rewards/rejected": -0.11007378250360489, |
|
"sft_loss": 0.7999058961868286, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.020610224287735, |
|
"grad_norm": 3.1796367168426514, |
|
"learning_rate": 1.1991127612113945e-06, |
|
"logits/chosen": -2.860217571258545, |
|
"logits/rejected": -2.8857686519622803, |
|
"logps/chosen": -0.7788959741592407, |
|
"logps/rejected": -1.0279576778411865, |
|
"loss": 0.8366, |
|
"odds_ratio_loss": 0.5771896839141846, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0778895914554596, |
|
"rewards/margins": 0.02490617148578167, |
|
"rewards/rejected": -0.10279576480388641, |
|
"sft_loss": 0.7788959741592407, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.036775106082037, |
|
"grad_norm": 2.174238681793213, |
|
"learning_rate": 1.1631239774206035e-06, |
|
"logits/chosen": -2.8261468410491943, |
|
"logits/rejected": -2.8276760578155518, |
|
"logps/chosen": -0.7623487114906311, |
|
"logps/rejected": -1.0154896974563599, |
|
"loss": 0.8249, |
|
"odds_ratio_loss": 0.6253183484077454, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0762348622083664, |
|
"rewards/margins": 0.02531411312520504, |
|
"rewards/rejected": -0.10154898464679718, |
|
"sft_loss": 0.7623487114906311, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.052939987876339, |
|
"grad_norm": 3.220973253250122, |
|
"learning_rate": 1.1275190845831978e-06, |
|
"logits/chosen": -2.8474819660186768, |
|
"logits/rejected": -2.8597018718719482, |
|
"logps/chosen": -0.730771541595459, |
|
"logps/rejected": -1.0029503107070923, |
|
"loss": 0.7858, |
|
"odds_ratio_loss": 0.550129234790802, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07307715713977814, |
|
"rewards/margins": 0.02721787989139557, |
|
"rewards/rejected": -0.10029502958059311, |
|
"sft_loss": 0.730771541595459, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.0691048696706407, |
|
"grad_norm": 2.44575834274292, |
|
"learning_rate": 1.0923083068306778e-06, |
|
"logits/chosen": -2.8472275733947754, |
|
"logits/rejected": -2.8387467861175537, |
|
"logps/chosen": -0.7656749486923218, |
|
"logps/rejected": -1.1094231605529785, |
|
"loss": 0.8236, |
|
"odds_ratio_loss": 0.5792102813720703, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07656749337911606, |
|
"rewards/margins": 0.03437482565641403, |
|
"rewards/rejected": -0.11094231903553009, |
|
"sft_loss": 0.7656749486923218, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.0852697514649425, |
|
"grad_norm": 1.4943968057632446, |
|
"learning_rate": 1.0575017551223348e-06, |
|
"logits/chosen": -2.829378128051758, |
|
"logits/rejected": -2.8376450538635254, |
|
"logps/chosen": -0.7342156171798706, |
|
"logps/rejected": -0.9912710189819336, |
|
"loss": 0.7958, |
|
"odds_ratio_loss": 0.6156936883926392, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07342156767845154, |
|
"rewards/margins": 0.02570553496479988, |
|
"rewards/rejected": -0.09912709891796112, |
|
"sft_loss": 0.7342156171798706, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.1014346332592444, |
|
"grad_norm": 2.5311193466186523, |
|
"learning_rate": 1.023109424341833e-06, |
|
"logits/chosen": -2.8397974967956543, |
|
"logits/rejected": -2.8779385089874268, |
|
"logps/chosen": -0.7779219746589661, |
|
"logps/rejected": -1.1433827877044678, |
|
"loss": 0.8376, |
|
"odds_ratio_loss": 0.5970156192779541, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0777921974658966, |
|
"rewards/margins": 0.03654608502984047, |
|
"rewards/rejected": -0.11433827877044678, |
|
"sft_loss": 0.7779219746589661, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1175995150535463, |
|
"grad_norm": 2.6538310050964355, |
|
"learning_rate": 9.891411904271273e-07, |
|
"logits/chosen": -2.856947422027588, |
|
"logits/rejected": -2.86110782623291, |
|
"logps/chosen": -0.7499477863311768, |
|
"logps/rejected": -0.9801033139228821, |
|
"loss": 0.8093, |
|
"odds_ratio_loss": 0.593558669090271, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0749947652220726, |
|
"rewards/margins": 0.023015562444925308, |
|
"rewards/rejected": -0.0980103388428688, |
|
"sft_loss": 0.7499477863311768, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.133764396847848, |
|
"grad_norm": 1.2850011587142944, |
|
"learning_rate": 9.556068075345363e-07, |
|
"logits/chosen": -2.8736729621887207, |
|
"logits/rejected": -2.8673884868621826, |
|
"logps/chosen": -0.7692313194274902, |
|
"logps/rejected": -0.9742280840873718, |
|
"loss": 0.8271, |
|
"odds_ratio_loss": 0.5790851712226868, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07692314684391022, |
|
"rewards/margins": 0.02049967274069786, |
|
"rewards/rejected": -0.09742281585931778, |
|
"sft_loss": 0.7692313194274902, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.14992927864215, |
|
"grad_norm": 1.7034938335418701, |
|
"learning_rate": 9.225159052377838e-07, |
|
"logits/chosen": -2.834965944290161, |
|
"logits/rejected": -2.8684887886047363, |
|
"logps/chosen": -0.796667218208313, |
|
"logps/rejected": -1.1322475671768188, |
|
"loss": 0.8554, |
|
"odds_ratio_loss": 0.587177574634552, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07966671884059906, |
|
"rewards/margins": 0.03355802968144417, |
|
"rewards/rejected": -0.11322475969791412, |
|
"sft_loss": 0.796667218208313, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.166094160436452, |
|
"grad_norm": 2.5143074989318848, |
|
"learning_rate": 8.898779857628184e-07, |
|
"logits/chosen": -2.8322224617004395, |
|
"logits/rejected": -2.8632161617279053, |
|
"logps/chosen": -0.6862845420837402, |
|
"logps/rejected": -0.923437774181366, |
|
"loss": 0.7449, |
|
"odds_ratio_loss": 0.5857266783714294, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0686284601688385, |
|
"rewards/margins": 0.02371532842516899, |
|
"rewards/rejected": -0.0923437848687172, |
|
"sft_loss": 0.6862845420837402, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.1822590422307537, |
|
"grad_norm": 1.7262011766433716, |
|
"learning_rate": 8.577024212591975e-07, |
|
"logits/chosen": -2.8671224117279053, |
|
"logits/rejected": -2.867626428604126, |
|
"logps/chosen": -0.7982193231582642, |
|
"logps/rejected": -0.9524084329605103, |
|
"loss": 0.862, |
|
"odds_ratio_loss": 0.6382196545600891, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07982192933559418, |
|
"rewards/margins": 0.01541891973465681, |
|
"rewards/rejected": -0.09524084627628326, |
|
"sft_loss": 0.7982193231582642, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.1984239240250556, |
|
"grad_norm": 1.9137386083602905, |
|
"learning_rate": 8.259984511088276e-07, |
|
"logits/chosen": -2.8300180435180664, |
|
"logits/rejected": -2.8534936904907227, |
|
"logps/chosen": -0.7877185940742493, |
|
"logps/rejected": -1.0415524244308472, |
|
"loss": 0.8505, |
|
"odds_ratio_loss": 0.6278126239776611, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07877186685800552, |
|
"rewards/margins": 0.025383388623595238, |
|
"rewards/rejected": -0.10415525734424591, |
|
"sft_loss": 0.7877185940742493, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.2145888058193575, |
|
"grad_norm": 2.398965835571289, |
|
"learning_rate": 7.947751792728237e-07, |
|
"logits/chosen": -2.8527517318725586, |
|
"logits/rejected": -2.8384506702423096, |
|
"logps/chosen": -0.7678119540214539, |
|
"logps/rejected": -1.105531930923462, |
|
"loss": 0.8275, |
|
"odds_ratio_loss": 0.5968826413154602, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07678119093179703, |
|
"rewards/margins": 0.03377201408147812, |
|
"rewards/rejected": -0.11055320501327515, |
|
"sft_loss": 0.7678119540214539, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2307536876136593, |
|
"grad_norm": 2.101724147796631, |
|
"learning_rate": 7.640415716772626e-07, |
|
"logits/chosen": -2.8620262145996094, |
|
"logits/rejected": -2.881200075149536, |
|
"logps/chosen": -0.7912808656692505, |
|
"logps/rejected": -1.0620834827423096, |
|
"loss": 0.8546, |
|
"odds_ratio_loss": 0.6336351633071899, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07912809401750565, |
|
"rewards/margins": 0.027080247178673744, |
|
"rewards/rejected": -0.10620833933353424, |
|
"sft_loss": 0.7912808656692505, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.246918569407961, |
|
"grad_norm": 1.2350420951843262, |
|
"learning_rate": 7.338064536385722e-07, |
|
"logits/chosen": -2.839816093444824, |
|
"logits/rejected": -2.84806489944458, |
|
"logps/chosen": -0.7491471171379089, |
|
"logps/rejected": -1.098024606704712, |
|
"loss": 0.8078, |
|
"odds_ratio_loss": 0.5867569446563721, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07491471618413925, |
|
"rewards/margins": 0.03488774597644806, |
|
"rewards/rejected": -0.10980246961116791, |
|
"sft_loss": 0.7491471171379089, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"grad_norm": 3.2553515434265137, |
|
"learning_rate": 7.040785073292883e-07, |
|
"logits/chosen": -2.795974016189575, |
|
"logits/rejected": -2.812316417694092, |
|
"logps/chosen": -0.8446899652481079, |
|
"logps/rejected": -1.1183385848999023, |
|
"loss": 0.9119, |
|
"odds_ratio_loss": 0.6722968220710754, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08446899801492691, |
|
"rewards/margins": 0.02736486867070198, |
|
"rewards/rejected": -0.111833855509758, |
|
"sft_loss": 0.8446899652481079, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.279248332996565, |
|
"grad_norm": 1.5375083684921265, |
|
"learning_rate": 6.748662692849297e-07, |
|
"logits/chosen": -2.8378682136535645, |
|
"logits/rejected": -2.8527588844299316, |
|
"logps/chosen": -0.7140767574310303, |
|
"logps/rejected": -1.1210377216339111, |
|
"loss": 0.7679, |
|
"odds_ratio_loss": 0.5377554893493652, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07140768319368362, |
|
"rewards/margins": 0.04069609194993973, |
|
"rewards/rejected": -0.11210376024246216, |
|
"sft_loss": 0.7140767574310303, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.295413214790867, |
|
"grad_norm": 3.371690273284912, |
|
"learning_rate": 6.46178127952686e-07, |
|
"logits/chosen": -2.8596229553222656, |
|
"logits/rejected": -2.86143159866333, |
|
"logps/chosen": -0.7527777552604675, |
|
"logps/rejected": -1.0262553691864014, |
|
"loss": 0.8073, |
|
"odds_ratio_loss": 0.5452762842178345, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07527776062488556, |
|
"rewards/margins": 0.02734777331352234, |
|
"rewards/rejected": -0.10262554883956909, |
|
"sft_loss": 0.7527777552604675, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.3115780965851687, |
|
"grad_norm": 5.5002760887146, |
|
"learning_rate": 6.180223212826289e-07, |
|
"logits/chosen": -2.8466854095458984, |
|
"logits/rejected": -2.84420108795166, |
|
"logps/chosen": -0.760028600692749, |
|
"logps/rejected": -1.0010223388671875, |
|
"loss": 0.8196, |
|
"odds_ratio_loss": 0.595847487449646, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07600285112857819, |
|
"rewards/margins": 0.024099376052618027, |
|
"rewards/rejected": -0.10010223090648651, |
|
"sft_loss": 0.760028600692749, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.3277429783794705, |
|
"grad_norm": 2.094597339630127, |
|
"learning_rate": 5.904069343621443e-07, |
|
"logits/chosen": -2.8559889793395996, |
|
"logits/rejected": -2.843318462371826, |
|
"logps/chosen": -0.7583047747612, |
|
"logps/rejected": -1.0201733112335205, |
|
"loss": 0.8157, |
|
"odds_ratio_loss": 0.5739010572433472, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07583048194646835, |
|
"rewards/margins": 0.02618684433400631, |
|
"rewards/rejected": -0.10201732814311981, |
|
"sft_loss": 0.7583047747612, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.3439078601737724, |
|
"grad_norm": 3.256753444671631, |
|
"learning_rate": 5.633398970942544e-07, |
|
"logits/chosen": -2.8187243938446045, |
|
"logits/rejected": -2.8463759422302246, |
|
"logps/chosen": -0.763822078704834, |
|
"logps/rejected": -0.9972942471504211, |
|
"loss": 0.8274, |
|
"odds_ratio_loss": 0.6356968283653259, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07638221234083176, |
|
"rewards/margins": 0.023347217589616776, |
|
"rewards/rejected": -0.09972943365573883, |
|
"sft_loss": 0.763822078704834, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.3600727419680743, |
|
"grad_norm": 2.1988418102264404, |
|
"learning_rate": 5.368289819205069e-07, |
|
"logits/chosen": -2.8621747493743896, |
|
"logits/rejected": -2.8629798889160156, |
|
"logps/chosen": -0.699676513671875, |
|
"logps/rejected": -0.9881321787834167, |
|
"loss": 0.7602, |
|
"odds_ratio_loss": 0.6056861877441406, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.06996765732765198, |
|
"rewards/margins": 0.028845559805631638, |
|
"rewards/rejected": -0.09881322085857391, |
|
"sft_loss": 0.699676513671875, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"grad_norm": 2.666426181793213, |
|
"learning_rate": 5.108818015890785e-07, |
|
"logits/chosen": -2.8656005859375, |
|
"logits/rejected": -2.889970302581787, |
|
"logps/chosen": -0.8437716364860535, |
|
"logps/rejected": -1.0408810377120972, |
|
"loss": 0.9052, |
|
"odds_ratio_loss": 0.6140363216400146, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08437716960906982, |
|
"rewards/margins": 0.01971094310283661, |
|
"rewards/rejected": -0.10408811271190643, |
|
"sft_loss": 0.8437716364860535, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.392402505556678, |
|
"grad_norm": 2.2777225971221924, |
|
"learning_rate": 4.855058069687291e-07, |
|
"logits/chosen": -2.834155559539795, |
|
"logits/rejected": -2.8524587154388428, |
|
"logps/chosen": -0.7329773306846619, |
|
"logps/rejected": -1.1425807476043701, |
|
"loss": 0.7861, |
|
"odds_ratio_loss": 0.5314901471138, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07329773157835007, |
|
"rewards/margins": 0.04096033796668053, |
|
"rewards/rejected": -0.11425807327032089, |
|
"sft_loss": 0.7329773306846619, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.40856738735098, |
|
"grad_norm": 2.6650478839874268, |
|
"learning_rate": 4.607082849092523e-07, |
|
"logits/chosen": -2.862356662750244, |
|
"logits/rejected": -2.864802598953247, |
|
"logps/chosen": -0.829633891582489, |
|
"logps/rejected": -1.0255271196365356, |
|
"loss": 0.8935, |
|
"odds_ratio_loss": 0.638370156288147, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08296339213848114, |
|
"rewards/margins": 0.019589336588978767, |
|
"rewards/rejected": -0.10255272686481476, |
|
"sft_loss": 0.829633891582489, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"grad_norm": 3.085514783859253, |
|
"learning_rate": 4.3649635614901405e-07, |
|
"logits/chosen": -2.8451571464538574, |
|
"logits/rejected": -2.8950095176696777, |
|
"logps/chosen": -0.7389890551567078, |
|
"logps/rejected": -0.8802745938301086, |
|
"loss": 0.8035, |
|
"odds_ratio_loss": 0.6446704864501953, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.07389890402555466, |
|
"rewards/margins": 0.014128552749752998, |
|
"rewards/rejected": -0.0880274623632431, |
|
"sft_loss": 0.7389890551567078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"eval_logits/chosen": -2.8472585678100586, |
|
"eval_logits/rejected": -2.8558220863342285, |
|
"eval_logps/chosen": -0.7975095510482788, |
|
"eval_logps/rejected": -1.0328320264816284, |
|
"eval_loss": 0.8629826903343201, |
|
"eval_odds_ratio_loss": 0.6547309160232544, |
|
"eval_rewards/accuracies": 0.5618181824684143, |
|
"eval_rewards/chosen": -0.07975095510482788, |
|
"eval_rewards/margins": 0.02353225089609623, |
|
"eval_rewards/rejected": -0.10328320413827896, |
|
"eval_runtime": 194.6849, |
|
"eval_samples_per_second": 5.65, |
|
"eval_sft_loss": 0.7975095510482788, |
|
"eval_steps_per_second": 2.825, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4408971509395836, |
|
"grad_norm": 1.7019646167755127, |
|
"learning_rate": 4.128769732701973e-07, |
|
"logits/chosen": -2.82879638671875, |
|
"logits/rejected": -2.832578420639038, |
|
"logps/chosen": -0.7700603604316711, |
|
"logps/rejected": -0.9951756596565247, |
|
"loss": 0.8304, |
|
"odds_ratio_loss": 0.6030290722846985, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.077006034553051, |
|
"rewards/margins": 0.022511538118124008, |
|
"rewards/rejected": -0.0995175689458847, |
|
"sft_loss": 0.7700603604316711, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.4570620327338855, |
|
"grad_norm": 2.5611681938171387, |
|
"learning_rate": 3.8985691870233046e-07, |
|
"logits/chosen": -2.882220506668091, |
|
"logits/rejected": -2.880516529083252, |
|
"logps/chosen": -0.7692660689353943, |
|
"logps/rejected": -1.0380921363830566, |
|
"loss": 0.8284, |
|
"odds_ratio_loss": 0.5917290449142456, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07692660391330719, |
|
"rewards/margins": 0.026882609352469444, |
|
"rewards/rejected": -0.10380921512842178, |
|
"sft_loss": 0.7692660689353943, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.4732269145281873, |
|
"grad_norm": 2.6633763313293457, |
|
"learning_rate": 3.6744280277467904e-07, |
|
"logits/chosen": -2.8530020713806152, |
|
"logits/rejected": -2.8719234466552734, |
|
"logps/chosen": -0.7769867181777954, |
|
"logps/rejected": -1.0218976736068726, |
|
"loss": 0.8392, |
|
"odds_ratio_loss": 0.6218123435974121, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07769867032766342, |
|
"rewards/margins": 0.024491112679243088, |
|
"rewards/rejected": -0.10218977928161621, |
|
"sft_loss": 0.7769867181777954, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.489391796322489, |
|
"grad_norm": 2.7384212017059326, |
|
"learning_rate": 3.456410618180503e-07, |
|
"logits/chosen": -2.832824468612671, |
|
"logits/rejected": -2.856114149093628, |
|
"logps/chosen": -0.7060586810112, |
|
"logps/rejected": -1.0986192226409912, |
|
"loss": 0.7646, |
|
"odds_ratio_loss": 0.5853801965713501, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.07060587406158447, |
|
"rewards/margins": 0.03925605118274689, |
|
"rewards/rejected": -0.10986192524433136, |
|
"sft_loss": 0.7060586810112, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.5055566781167915, |
|
"grad_norm": 1.9465371370315552, |
|
"learning_rate": 3.244579563165753e-07, |
|
"logits/chosen": -2.8586621284484863, |
|
"logits/rejected": -2.869255542755127, |
|
"logps/chosen": -0.7589577436447144, |
|
"logps/rejected": -1.1315686702728271, |
|
"loss": 0.8173, |
|
"odds_ratio_loss": 0.5836090445518494, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0758957713842392, |
|
"rewards/margins": 0.03726109117269516, |
|
"rewards/rejected": -0.11315685510635376, |
|
"sft_loss": 0.7589577436447144, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.521721559911093, |
|
"grad_norm": 1.2344708442687988, |
|
"learning_rate": 3.038995691099697e-07, |
|
"logits/chosen": -2.8416831493377686, |
|
"logits/rejected": -2.85313081741333, |
|
"logps/chosen": -0.7924615144729614, |
|
"logps/rejected": -1.2077696323394775, |
|
"loss": 0.8503, |
|
"odds_ratio_loss": 0.5783108472824097, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0792461559176445, |
|
"rewards/margins": 0.041530806571245193, |
|
"rewards/rejected": -0.1207769513130188, |
|
"sft_loss": 0.7924615144729614, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.5378864417053952, |
|
"grad_norm": 12.726688385009766, |
|
"learning_rate": 2.839718036468192e-07, |
|
"logits/chosen": -2.8868002891540527, |
|
"logits/rejected": -2.9153692722320557, |
|
"logps/chosen": -0.884573757648468, |
|
"logps/rejected": -1.0609769821166992, |
|
"loss": 0.9513, |
|
"odds_ratio_loss": 0.6675292253494263, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0884573832154274, |
|
"rewards/margins": 0.01764032617211342, |
|
"rewards/rejected": -0.10609769821166992, |
|
"sft_loss": 0.884573757648468, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5540513234996967, |
|
"grad_norm": 2.5232503414154053, |
|
"learning_rate": 2.646803822893723e-07, |
|
"logits/chosen": -2.8850457668304443, |
|
"logits/rejected": -2.894557476043701, |
|
"logps/chosen": -0.8000026941299438, |
|
"logps/rejected": -1.0157983303070068, |
|
"loss": 0.8627, |
|
"odds_ratio_loss": 0.6269931793212891, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08000027388334274, |
|
"rewards/margins": 0.02157955802977085, |
|
"rewards/rejected": -0.10157983005046844, |
|
"sft_loss": 0.8000026941299438, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.570216205293999, |
|
"grad_norm": 2.3380508422851562, |
|
"learning_rate": 2.460308446703341e-07, |
|
"logits/chosen": -2.8933000564575195, |
|
"logits/rejected": -2.8834781646728516, |
|
"logps/chosen": -0.791167676448822, |
|
"logps/rejected": -0.9255102276802063, |
|
"loss": 0.8556, |
|
"odds_ratio_loss": 0.6445525884628296, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.07911677658557892, |
|
"rewards/margins": 0.013434251770377159, |
|
"rewards/rejected": -0.09255101531744003, |
|
"sft_loss": 0.791167676448822, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.5863810870883004, |
|
"grad_norm": 3.6344377994537354, |
|
"learning_rate": 2.2802854610213143e-07, |
|
"logits/chosen": -2.8420848846435547, |
|
"logits/rejected": -2.8515543937683105, |
|
"logps/chosen": -0.6993797421455383, |
|
"logps/rejected": -1.0781666040420532, |
|
"loss": 0.7531, |
|
"odds_ratio_loss": 0.5369757413864136, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.06993797421455383, |
|
"rewards/margins": 0.03787868469953537, |
|
"rewards/rejected": -0.1078166589140892, |
|
"sft_loss": 0.6993797421455383, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6025459688826027, |
|
"grad_norm": 2.515239715576172, |
|
"learning_rate": 2.106786560391072e-07, |
|
"logits/chosen": -2.8365635871887207, |
|
"logits/rejected": -2.8803467750549316, |
|
"logps/chosen": -0.8032782673835754, |
|
"logps/rejected": -1.0168392658233643, |
|
"loss": 0.8638, |
|
"odds_ratio_loss": 0.6049396395683289, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0803278312087059, |
|
"rewards/margins": 0.021356089040637016, |
|
"rewards/rejected": -0.10168392956256866, |
|
"sft_loss": 0.8032782673835754, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.6187108506769046, |
|
"grad_norm": 1.520639181137085, |
|
"learning_rate": 1.9398615659308255e-07, |
|
"logits/chosen": -2.861687183380127, |
|
"logits/rejected": -2.89752459526062, |
|
"logps/chosen": -0.7549802660942078, |
|
"logps/rejected": -0.9435558319091797, |
|
"loss": 0.8181, |
|
"odds_ratio_loss": 0.6309365034103394, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07549802213907242, |
|
"rewards/margins": 0.018857568502426147, |
|
"rewards/rejected": -0.09435557574033737, |
|
"sft_loss": 0.7549802660942078, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.6348757324712064, |
|
"grad_norm": 2.2465171813964844, |
|
"learning_rate": 1.7795584110272184e-07, |
|
"logits/chosen": -2.8905723094940186, |
|
"logits/rejected": -2.877936840057373, |
|
"logps/chosen": -0.7934287786483765, |
|
"logps/rejected": -1.0050441026687622, |
|
"loss": 0.8594, |
|
"odds_ratio_loss": 0.6593586802482605, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.07934287935495377, |
|
"rewards/margins": 0.021161522716283798, |
|
"rewards/rejected": -0.10050439834594727, |
|
"sft_loss": 0.7934287786483765, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.6510406142655083, |
|
"grad_norm": 4.033486366271973, |
|
"learning_rate": 1.6259231275709636e-07, |
|
"logits/chosen": -2.8982126712799072, |
|
"logits/rejected": -2.8980660438537598, |
|
"logps/chosen": -0.7681853175163269, |
|
"logps/rejected": -0.9490568041801453, |
|
"loss": 0.8356, |
|
"odds_ratio_loss": 0.6740620732307434, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0768185406923294, |
|
"rewards/margins": 0.018087133765220642, |
|
"rewards/rejected": -0.09490568190813065, |
|
"sft_loss": 0.7681853175163269, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.66720549605981, |
|
"grad_norm": 1.5368350744247437, |
|
"learning_rate": 1.478999832738548e-07, |
|
"logits/chosen": -2.8781023025512695, |
|
"logits/rejected": -2.8767361640930176, |
|
"logps/chosen": -0.7599083185195923, |
|
"logps/rejected": -1.0983332395553589, |
|
"loss": 0.82, |
|
"odds_ratio_loss": 0.601204514503479, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07599084079265594, |
|
"rewards/margins": 0.033842481672763824, |
|
"rewards/rejected": -0.10983331501483917, |
|
"sft_loss": 0.7599083185195923, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.683370377854112, |
|
"grad_norm": 1.8103063106536865, |
|
"learning_rate": 1.338830716323769e-07, |
|
"logits/chosen": -2.8456664085388184, |
|
"logits/rejected": -2.8552403450012207, |
|
"logps/chosen": -0.8041807413101196, |
|
"logps/rejected": -0.9866863489151001, |
|
"loss": 0.8687, |
|
"odds_ratio_loss": 0.6454349756240845, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08041806519031525, |
|
"rewards/margins": 0.018250569701194763, |
|
"rewards/rejected": -0.0986686423420906, |
|
"sft_loss": 0.8041807413101196, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.699535259648414, |
|
"grad_norm": 3.796130657196045, |
|
"learning_rate": 1.205456028622723e-07, |
|
"logits/chosen": -2.8858485221862793, |
|
"logits/rejected": -2.883568286895752, |
|
"logps/chosen": -0.7273125648498535, |
|
"logps/rejected": -1.0116485357284546, |
|
"loss": 0.7835, |
|
"odds_ratio_loss": 0.5615276098251343, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.07273125648498535, |
|
"rewards/margins": 0.02843359112739563, |
|
"rewards/rejected": -0.10116485506296158, |
|
"sft_loss": 0.7273125648498535, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7157001414427158, |
|
"grad_norm": 1.619040608406067, |
|
"learning_rate": 1.0789140688756805e-07, |
|
"logits/chosen": -2.8932971954345703, |
|
"logits/rejected": -2.8933002948760986, |
|
"logps/chosen": -0.7631897926330566, |
|
"logps/rejected": -1.0072143077850342, |
|
"loss": 0.8217, |
|
"odds_ratio_loss": 0.5846946239471436, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.07631897926330566, |
|
"rewards/margins": 0.024402452632784843, |
|
"rewards/rejected": -0.10072143375873566, |
|
"sft_loss": 0.7631897926330566, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.7318650232370176, |
|
"grad_norm": 4.591987133026123, |
|
"learning_rate": 9.592411742693098e-08, |
|
"logits/chosen": -2.8280813694000244, |
|
"logits/rejected": -2.832314968109131, |
|
"logps/chosen": -0.7757545709609985, |
|
"logps/rejected": -0.9772068858146667, |
|
"loss": 0.845, |
|
"odds_ratio_loss": 0.6925373673439026, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0775754451751709, |
|
"rewards/margins": 0.020145252346992493, |
|
"rewards/rejected": -0.09772069752216339, |
|
"sft_loss": 0.7757545709609985, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7480299050313195, |
|
"grad_norm": 2.0528857707977295, |
|
"learning_rate": 8.464717095022168e-08, |
|
"logits/chosen": -2.8116049766540527, |
|
"logits/rejected": -2.8237504959106445, |
|
"logps/chosen": -0.7476006746292114, |
|
"logps/rejected": -1.0309717655181885, |
|
"loss": 0.805, |
|
"odds_ratio_loss": 0.574048638343811, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0747600644826889, |
|
"rewards/margins": 0.02833711728453636, |
|
"rewards/rejected": -0.10309717804193497, |
|
"sft_loss": 0.7476006746292114, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7641947868256214, |
|
"grad_norm": 2.445467233657837, |
|
"learning_rate": 7.406380569169841e-08, |
|
"logits/chosen": -2.860349178314209, |
|
"logits/rejected": -2.8944199085235596, |
|
"logps/chosen": -0.7957582473754883, |
|
"logps/rejected": -0.9725676774978638, |
|
"loss": 0.8593, |
|
"odds_ratio_loss": 0.6357892155647278, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07957582920789719, |
|
"rewards/margins": 0.01768093928694725, |
|
"rewards/rejected": -0.09725676476955414, |
|
"sft_loss": 0.7957582473754883, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.7803596686199232, |
|
"grad_norm": 11.543617248535156, |
|
"learning_rate": 6.417706072013808e-08, |
|
"logits/chosen": -2.8683581352233887, |
|
"logits/rejected": -2.894205331802368, |
|
"logps/chosen": -0.7598998546600342, |
|
"logps/rejected": -0.9663190841674805, |
|
"loss": 0.8231, |
|
"odds_ratio_loss": 0.6316258907318115, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07598999887704849, |
|
"rewards/margins": 0.02064192108809948, |
|
"rewards/rejected": -0.09663191437721252, |
|
"sft_loss": 0.7598998546600342, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.796524550414225, |
|
"grad_norm": 3.360384941101074, |
|
"learning_rate": 5.498977506615294e-08, |
|
"logits/chosen": -2.8601443767547607, |
|
"logits/rejected": -2.898664712905884, |
|
"logps/chosen": -0.790396511554718, |
|
"logps/rejected": -0.9606446027755737, |
|
"loss": 0.8544, |
|
"odds_ratio_loss": 0.6396982073783875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07903965562582016, |
|
"rewards/margins": 0.01702481135725975, |
|
"rewards/rejected": -0.09606447070837021, |
|
"sft_loss": 0.790396511554718, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.812689432208527, |
|
"grad_norm": 2.132490873336792, |
|
"learning_rate": 4.6504586906947756e-08, |
|
"logits/chosen": -2.8836772441864014, |
|
"logits/rejected": -2.9003067016601562, |
|
"logps/chosen": -0.8166056871414185, |
|
"logps/rejected": -0.9932202100753784, |
|
"loss": 0.8767, |
|
"odds_ratio_loss": 0.6010292768478394, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08166056871414185, |
|
"rewards/margins": 0.01766144670546055, |
|
"rewards/rejected": -0.09932202100753784, |
|
"sft_loss": 0.8166056871414185, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 7.204352855682373, |
|
"learning_rate": 3.8723932808754914e-08, |
|
"logits/chosen": -2.887660503387451, |
|
"logits/rejected": -2.9059557914733887, |
|
"logps/chosen": -0.8569768667221069, |
|
"logps/rejected": -0.9907077550888062, |
|
"loss": 0.9219, |
|
"odds_ratio_loss": 0.6491862535476685, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08569768816232681, |
|
"rewards/margins": 0.01337310392409563, |
|
"rewards/rejected": -0.09907079488039017, |
|
"sft_loss": 0.8569768667221069, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8450191957971307, |
|
"grad_norm": 3.7778828144073486, |
|
"learning_rate": 3.1650047027158014e-08, |
|
"logits/chosen": -2.8876945972442627, |
|
"logits/rejected": -2.9152872562408447, |
|
"logps/chosen": -0.7689987421035767, |
|
"logps/rejected": -0.981308102607727, |
|
"loss": 0.828, |
|
"odds_ratio_loss": 0.5896368622779846, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.07689988613128662, |
|
"rewards/margins": 0.02123093418776989, |
|
"rewards/rejected": -0.09813080728054047, |
|
"sft_loss": 0.7689987421035767, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.8611840775914326, |
|
"grad_norm": 1.726138949394226, |
|
"learning_rate": 2.5284960865517848e-08, |
|
"logits/chosen": -2.851304769515991, |
|
"logits/rejected": -2.871598243713379, |
|
"logps/chosen": -0.7240949273109436, |
|
"logps/rejected": -1.0288841724395752, |
|
"loss": 0.7798, |
|
"odds_ratio_loss": 0.5571027994155884, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0724094957113266, |
|
"rewards/margins": 0.030478913336992264, |
|
"rewards/rejected": -0.10288842022418976, |
|
"sft_loss": 0.7240949273109436, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.8773489593857344, |
|
"grad_norm": 2.2119297981262207, |
|
"learning_rate": 1.9630502091670388e-08, |
|
"logits/chosen": -2.8459057807922363, |
|
"logits/rejected": -2.866259813308716, |
|
"logps/chosen": -0.7477800250053406, |
|
"logps/rejected": -1.0080687999725342, |
|
"loss": 0.8054, |
|
"odds_ratio_loss": 0.5758811235427856, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0747780054807663, |
|
"rewards/margins": 0.026028871536254883, |
|
"rewards/rejected": -0.10080687701702118, |
|
"sft_loss": 0.7477800250053406, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.8935138411800363, |
|
"grad_norm": 2.910409450531006, |
|
"learning_rate": 1.4688294413074677e-08, |
|
"logits/chosen": -2.850733757019043, |
|
"logits/rejected": -2.8780460357666016, |
|
"logps/chosen": -0.6847941279411316, |
|
"logps/rejected": -1.00661301612854, |
|
"loss": 0.7411, |
|
"odds_ratio_loss": 0.5632899403572083, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.06847941130399704, |
|
"rewards/margins": 0.03218189254403114, |
|
"rewards/rejected": -0.10066130012273788, |
|
"sft_loss": 0.6847941279411316, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.909678722974338, |
|
"grad_norm": 2.044072389602661, |
|
"learning_rate": 1.0459757010556626e-08, |
|
"logits/chosen": -2.856724262237549, |
|
"logits/rejected": -2.877833366394043, |
|
"logps/chosen": -0.7718300223350525, |
|
"logps/rejected": -0.9458082914352417, |
|
"loss": 0.8346, |
|
"odds_ratio_loss": 0.6273509860038757, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.07718300819396973, |
|
"rewards/margins": 0.017397824674844742, |
|
"rewards/rejected": -0.09458083659410477, |
|
"sft_loss": 0.7718300223350525, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.92584360476864, |
|
"grad_norm": 1.9232614040374756, |
|
"learning_rate": 6.94610413078306e-09, |
|
"logits/chosen": -2.8028831481933594, |
|
"logits/rejected": -2.8568198680877686, |
|
"logps/chosen": -0.8266820907592773, |
|
"logps/rejected": -1.2092140913009644, |
|
"loss": 0.8869, |
|
"odds_ratio_loss": 0.6017346382141113, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08266820758581161, |
|
"rewards/margins": 0.03825319558382034, |
|
"rewards/rejected": -0.12092139571905136, |
|
"sft_loss": 0.8266820907592773, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.942008486562942, |
|
"grad_norm": 1.0960156917572021, |
|
"learning_rate": 4.14834473758563e-09, |
|
"logits/chosen": -2.8286824226379395, |
|
"logits/rejected": -2.838784694671631, |
|
"logps/chosen": -0.7189845442771912, |
|
"logps/rejected": -0.9857820272445679, |
|
"loss": 0.7756, |
|
"odds_ratio_loss": 0.5664829015731812, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0718984454870224, |
|
"rewards/margins": 0.02667975425720215, |
|
"rewards/rejected": -0.09857820719480515, |
|
"sft_loss": 0.7189845442771912, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.9581733683572438, |
|
"grad_norm": 1.63419771194458, |
|
"learning_rate": 2.067282222230349e-09, |
|
"logits/chosen": -2.8597445487976074, |
|
"logits/rejected": -2.8696541786193848, |
|
"logps/chosen": -0.7367098331451416, |
|
"logps/rejected": -1.0127137899398804, |
|
"loss": 0.7943, |
|
"odds_ratio_loss": 0.5762413740158081, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07367098331451416, |
|
"rewards/margins": 0.027600402012467384, |
|
"rewards/rejected": -0.1012713760137558, |
|
"sft_loss": 0.7367098331451416, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.9743382501515456, |
|
"grad_norm": 2.9457271099090576, |
|
"learning_rate": 7.035141727212979e-10, |
|
"logits/chosen": -2.8564071655273438, |
|
"logits/rejected": -2.8889355659484863, |
|
"logps/chosen": -0.7218343615531921, |
|
"logps/rejected": -1.0010156631469727, |
|
"loss": 0.7784, |
|
"odds_ratio_loss": 0.5654899477958679, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07218344509601593, |
|
"rewards/margins": 0.02791813388466835, |
|
"rewards/rejected": -0.10010156780481339, |
|
"sft_loss": 0.7218343615531921, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.9905031319458475, |
|
"grad_norm": 4.486654758453369, |
|
"learning_rate": 5.743220219761592e-11, |
|
"logits/chosen": -2.8505501747131348, |
|
"logits/rejected": -2.870176076889038, |
|
"logps/chosen": -0.8715106248855591, |
|
"logps/rejected": -1.054720401763916, |
|
"loss": 0.9404, |
|
"odds_ratio_loss": 0.6889584064483643, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08715107291936874, |
|
"rewards/margins": 0.0183209627866745, |
|
"rewards/rejected": -0.10547204315662384, |
|
"sft_loss": 0.8715106248855591, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.9969690846635686, |
|
"step": 1854, |
|
"total_flos": 2.1013894560546816e+18, |
|
"train_loss": 0.9013287582572352, |
|
"train_runtime": 18144.1457, |
|
"train_samples_per_second": 1.637, |
|
"train_steps_per_second": 0.102 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1854, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 2.1013894560546816e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|