|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994756161510225, |
|
"eval_steps": 100, |
|
"global_step": 953, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01048767697954903, |
|
"grad_norm": 281.5632535171625, |
|
"learning_rate": 7.000000000000001e-07, |
|
"log_odds_chosen": 0.14837229251861572, |
|
"log_odds_ratio": -0.7063122987747192, |
|
"logits/chosen": -2.4233744144439697, |
|
"logits/rejected": -2.3922557830810547, |
|
"logps/chosen": -1.0665283203125, |
|
"logps/rejected": -1.164435625076294, |
|
"loss": 3.7384, |
|
"nll_loss": 3.6487019062042236, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.05332641676068306, |
|
"rewards/margins": 0.004895367659628391, |
|
"rewards/rejected": -0.058221787214279175, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02097535395909806, |
|
"grad_norm": 3.6095114671977337, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"log_odds_chosen": 0.18771903216838837, |
|
"log_odds_ratio": -0.6616674661636353, |
|
"logits/chosen": -2.669743061065674, |
|
"logits/rejected": -2.6637511253356934, |
|
"logps/chosen": -0.8115625381469727, |
|
"logps/rejected": -0.9194537401199341, |
|
"loss": 0.598, |
|
"nll_loss": 0.5553613901138306, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04057813063263893, |
|
"rewards/margins": 0.005394552834331989, |
|
"rewards/rejected": -0.045972686260938644, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03146303093864709, |
|
"grad_norm": 2.6104338509446743, |
|
"learning_rate": 2.1e-06, |
|
"log_odds_chosen": 0.24361269176006317, |
|
"log_odds_ratio": -0.6484603881835938, |
|
"logits/chosen": -2.8152480125427246, |
|
"logits/rejected": -2.770486831665039, |
|
"logps/chosen": -0.7975724339485168, |
|
"logps/rejected": -0.9327106475830078, |
|
"loss": 0.539, |
|
"nll_loss": 0.4975182116031647, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03987862169742584, |
|
"rewards/margins": 0.006756913848221302, |
|
"rewards/rejected": -0.04663553088903427, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04195070791819612, |
|
"grad_norm": 2.6082713320666966, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"log_odds_chosen": 0.18453697860240936, |
|
"log_odds_ratio": -0.6863341331481934, |
|
"logits/chosen": -2.7431702613830566, |
|
"logits/rejected": -2.721076488494873, |
|
"logps/chosen": -0.7775384783744812, |
|
"logps/rejected": -0.8990561366081238, |
|
"loss": 0.5182, |
|
"nll_loss": 0.4802665710449219, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03887692838907242, |
|
"rewards/margins": 0.006075879093259573, |
|
"rewards/rejected": -0.04495280981063843, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05243838489774515, |
|
"grad_norm": 2.8319159240383356, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.2895735204219818, |
|
"log_odds_ratio": -0.6829751133918762, |
|
"logits/chosen": -2.6645712852478027, |
|
"logits/rejected": -2.6532058715820312, |
|
"logps/chosen": -0.7420316934585571, |
|
"logps/rejected": -0.92218017578125, |
|
"loss": 0.5346, |
|
"nll_loss": 0.4737791419029236, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03710158169269562, |
|
"rewards/margins": 0.009007426910102367, |
|
"rewards/rejected": -0.04610900953412056, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06292606187729417, |
|
"grad_norm": 2.702391106634465, |
|
"learning_rate": 4.2e-06, |
|
"log_odds_chosen": 0.23618292808532715, |
|
"log_odds_ratio": -0.6679760217666626, |
|
"logits/chosen": -2.7234179973602295, |
|
"logits/rejected": -2.701585292816162, |
|
"logps/chosen": -0.7408851385116577, |
|
"logps/rejected": -0.8674576878547668, |
|
"loss": 0.5296, |
|
"nll_loss": 0.5001371502876282, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.03704426437616348, |
|
"rewards/margins": 0.0063286214135587215, |
|
"rewards/rejected": -0.04337288811802864, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07341373885684321, |
|
"grad_norm": 2.7579557747488237, |
|
"learning_rate": 4.9e-06, |
|
"log_odds_chosen": 0.1982727348804474, |
|
"log_odds_ratio": -0.7039018869400024, |
|
"logits/chosen": -2.716829776763916, |
|
"logits/rejected": -2.7165746688842773, |
|
"logps/chosen": -0.7602167129516602, |
|
"logps/rejected": -0.8683260679244995, |
|
"loss": 0.5179, |
|
"nll_loss": 0.5095189213752747, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03801083564758301, |
|
"rewards/margins": 0.005405469331890345, |
|
"rewards/rejected": -0.043416302651166916, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08390141583639224, |
|
"grad_norm": 2.7333788754363826, |
|
"learning_rate": 5.600000000000001e-06, |
|
"log_odds_chosen": 0.19610878825187683, |
|
"log_odds_ratio": -0.6825613379478455, |
|
"logits/chosen": -2.6934926509857178, |
|
"logits/rejected": -2.6538023948669434, |
|
"logps/chosen": -0.8004279136657715, |
|
"logps/rejected": -0.9359849095344543, |
|
"loss": 0.5198, |
|
"nll_loss": 0.44797396659851074, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.040021397173404694, |
|
"rewards/margins": 0.006777846720069647, |
|
"rewards/rejected": -0.04679924249649048, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09438909281594127, |
|
"grad_norm": 2.643892428655997, |
|
"learning_rate": 6.3e-06, |
|
"log_odds_chosen": 0.32694971561431885, |
|
"log_odds_ratio": -0.6449785828590393, |
|
"logits/chosen": -2.6064088344573975, |
|
"logits/rejected": -2.600590229034424, |
|
"logps/chosen": -0.7779799699783325, |
|
"logps/rejected": -0.970491886138916, |
|
"loss": 0.5108, |
|
"nll_loss": 0.4519652724266052, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03889899700880051, |
|
"rewards/margins": 0.009625596925616264, |
|
"rewards/rejected": -0.04852459207177162, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1048767697954903, |
|
"grad_norm": 2.7386435335682178, |
|
"learning_rate": 7e-06, |
|
"log_odds_chosen": 0.24293240904808044, |
|
"log_odds_ratio": -0.65534907579422, |
|
"logits/chosen": -2.800649881362915, |
|
"logits/rejected": -2.783020257949829, |
|
"logps/chosen": -0.7912999391555786, |
|
"logps/rejected": -0.931311309337616, |
|
"loss": 0.5226, |
|
"nll_loss": 0.4863203167915344, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.03956499695777893, |
|
"rewards/margins": 0.007000570185482502, |
|
"rewards/rejected": -0.04656556248664856, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1048767697954903, |
|
"eval_log_odds_chosen": 0.2873421609401703, |
|
"eval_log_odds_ratio": -0.632556140422821, |
|
"eval_logits/chosen": -2.7859702110290527, |
|
"eval_logits/rejected": -2.758275270462036, |
|
"eval_logps/chosen": -0.7728292942047119, |
|
"eval_logps/rejected": -0.9448140263557434, |
|
"eval_loss": 0.5279971957206726, |
|
"eval_nll_loss": 0.49532046914100647, |
|
"eval_rewards/accuracies": 0.6329365372657776, |
|
"eval_rewards/chosen": -0.03864146023988724, |
|
"eval_rewards/margins": 0.008599241264164448, |
|
"eval_rewards/rejected": -0.04724070429801941, |
|
"eval_runtime": 137.6903, |
|
"eval_samples_per_second": 14.482, |
|
"eval_steps_per_second": 0.458, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11536444677503933, |
|
"grad_norm": 3.1992530570673416, |
|
"learning_rate": 6.674238124719146e-06, |
|
"log_odds_chosen": 0.34574735164642334, |
|
"log_odds_ratio": -0.612960934638977, |
|
"logits/chosen": -2.770359516143799, |
|
"logits/rejected": -2.785818099975586, |
|
"logps/chosen": -0.7360346913337708, |
|
"logps/rejected": -0.9339498281478882, |
|
"loss": 0.516, |
|
"nll_loss": 0.46663737297058105, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03680173680186272, |
|
"rewards/margins": 0.009895754046738148, |
|
"rewards/rejected": -0.04669748991727829, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12585212375458835, |
|
"grad_norm": 2.389888529611206, |
|
"learning_rate": 6.390096504226938e-06, |
|
"log_odds_chosen": 0.3332720696926117, |
|
"log_odds_ratio": -0.629552960395813, |
|
"logits/chosen": -2.765531063079834, |
|
"logits/rejected": -2.7438697814941406, |
|
"logps/chosen": -0.7498644590377808, |
|
"logps/rejected": -0.9586297273635864, |
|
"loss": 0.5424, |
|
"nll_loss": 0.5031455159187317, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03749322146177292, |
|
"rewards/margins": 0.010438265278935432, |
|
"rewards/rejected": -0.0479314923286438, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1363398007341374, |
|
"grad_norm": 2.352563456984363, |
|
"learning_rate": 6.139406135149204e-06, |
|
"log_odds_chosen": 0.22595734894275665, |
|
"log_odds_ratio": -0.6784238219261169, |
|
"logits/chosen": -2.7593860626220703, |
|
"logits/rejected": -2.743048667907715, |
|
"logps/chosen": -0.7811408042907715, |
|
"logps/rejected": -0.9164878726005554, |
|
"loss": 0.5343, |
|
"nll_loss": 0.49365147948265076, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.039057038724422455, |
|
"rewards/margins": 0.006767353508621454, |
|
"rewards/rejected": -0.04582439363002777, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14682747771368643, |
|
"grad_norm": 2.436711404156596, |
|
"learning_rate": 5.916079783099616e-06, |
|
"log_odds_chosen": 0.2472628802061081, |
|
"log_odds_ratio": -0.6597720384597778, |
|
"logits/chosen": -2.6898269653320312, |
|
"logits/rejected": -2.669379711151123, |
|
"logps/chosen": -0.8302755355834961, |
|
"logps/rejected": -0.9775524139404297, |
|
"loss": 0.5262, |
|
"nll_loss": 0.49079251289367676, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.041513778269290924, |
|
"rewards/margins": 0.007363851182162762, |
|
"rewards/rejected": -0.04887763410806656, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15731515469323545, |
|
"grad_norm": 2.622232308829729, |
|
"learning_rate": 5.715476066494083e-06, |
|
"log_odds_chosen": 0.23396515846252441, |
|
"log_odds_ratio": -0.7018890976905823, |
|
"logits/chosen": -2.6906025409698486, |
|
"logits/rejected": -2.685272455215454, |
|
"logps/chosen": -0.8395276069641113, |
|
"logps/rejected": -0.9926843643188477, |
|
"loss": 0.4873, |
|
"nll_loss": 0.4751507639884949, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.041976384818553925, |
|
"rewards/margins": 0.007657832466065884, |
|
"rewards/rejected": -0.04963421821594238, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16780283167278448, |
|
"grad_norm": 2.5349291816098587, |
|
"learning_rate": 5.533985905294663e-06, |
|
"log_odds_chosen": 0.23518291115760803, |
|
"log_odds_ratio": -0.64958655834198, |
|
"logits/chosen": -2.7026143074035645, |
|
"logits/rejected": -2.690053701400757, |
|
"logps/chosen": -0.7785183191299438, |
|
"logps/rejected": -0.9093867540359497, |
|
"loss": 0.5435, |
|
"nll_loss": 0.4887324869632721, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.038925912231206894, |
|
"rewards/margins": 0.006543423049151897, |
|
"rewards/rejected": -0.045469339936971664, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1782905086523335, |
|
"grad_norm": 2.421225073724309, |
|
"learning_rate": 5.368754921931593e-06, |
|
"log_odds_chosen": 0.3210265636444092, |
|
"log_odds_ratio": -0.6400843262672424, |
|
"logits/chosen": -2.7624573707580566, |
|
"logits/rejected": -2.7493152618408203, |
|
"logps/chosen": -0.7663661241531372, |
|
"logps/rejected": -0.9589449763298035, |
|
"loss": 0.5263, |
|
"nll_loss": 0.4972688555717468, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03831830993294716, |
|
"rewards/margins": 0.009628941304981709, |
|
"rewards/rejected": -0.047947246581315994, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18877818563188253, |
|
"grad_norm": 2.413880479048562, |
|
"learning_rate": 5.217491947499509e-06, |
|
"log_odds_chosen": 0.29789280891418457, |
|
"log_odds_ratio": -0.6485607028007507, |
|
"logits/chosen": -2.750358819961548, |
|
"logits/rejected": -2.7341530323028564, |
|
"logps/chosen": -0.8058354258537292, |
|
"logps/rejected": -0.9941579699516296, |
|
"loss": 0.5125, |
|
"nll_loss": 0.4958602488040924, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04029177129268646, |
|
"rewards/margins": 0.009416128508746624, |
|
"rewards/rejected": -0.04970790073275566, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19926586261143156, |
|
"grad_norm": 2.6903547627560362, |
|
"learning_rate": 5.078333750770082e-06, |
|
"log_odds_chosen": 0.3165002167224884, |
|
"log_odds_ratio": -0.6190484762191772, |
|
"logits/chosen": -2.766507387161255, |
|
"logits/rejected": -2.747089385986328, |
|
"logps/chosen": -0.8013149499893188, |
|
"logps/rejected": -0.9806981086730957, |
|
"loss": 0.5316, |
|
"nll_loss": 0.5532199740409851, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04006574675440788, |
|
"rewards/margins": 0.008969161659479141, |
|
"rewards/rejected": -0.04903491213917732, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2097535395909806, |
|
"grad_norm": 2.1991852076726754, |
|
"learning_rate": 4.949747468305832e-06, |
|
"log_odds_chosen": 0.33575549721717834, |
|
"log_odds_ratio": -0.651211678981781, |
|
"logits/chosen": -2.7371087074279785, |
|
"logits/rejected": -2.7220566272735596, |
|
"logps/chosen": -0.7840306162834167, |
|
"logps/rejected": -1.0072247982025146, |
|
"loss": 0.5074, |
|
"nll_loss": 0.5064893960952759, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.039201535284519196, |
|
"rewards/margins": 0.011159711517393589, |
|
"rewards/rejected": -0.05036124587059021, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2097535395909806, |
|
"eval_log_odds_chosen": 0.31895044445991516, |
|
"eval_log_odds_ratio": -0.6356511116027832, |
|
"eval_logits/chosen": -2.700209140777588, |
|
"eval_logits/rejected": -2.673612594604492, |
|
"eval_logps/chosen": -0.7611523866653442, |
|
"eval_logps/rejected": -0.9565821290016174, |
|
"eval_loss": 0.5133659839630127, |
|
"eval_nll_loss": 0.47739487886428833, |
|
"eval_rewards/accuracies": 0.6408730149269104, |
|
"eval_rewards/chosen": -0.03805762156844139, |
|
"eval_rewards/margins": 0.009771487675607204, |
|
"eval_rewards/rejected": -0.04782910645008087, |
|
"eval_runtime": 136.4881, |
|
"eval_samples_per_second": 14.609, |
|
"eval_steps_per_second": 0.462, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22024121657052964, |
|
"grad_norm": 2.2979124053363367, |
|
"learning_rate": 4.830458915396479e-06, |
|
"log_odds_chosen": 0.14570581912994385, |
|
"log_odds_ratio": -0.7079066038131714, |
|
"logits/chosen": -2.6945998668670654, |
|
"logits/rejected": -2.693587064743042, |
|
"logps/chosen": -0.7664598226547241, |
|
"logps/rejected": -0.8435371518135071, |
|
"loss": 0.5092, |
|
"nll_loss": 0.47726479172706604, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03832298889756203, |
|
"rewards/margins": 0.003853868693113327, |
|
"rewards/rejected": -0.04217685014009476, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23072889355007867, |
|
"grad_norm": 2.7379211509120998, |
|
"learning_rate": 4.719399037242694e-06, |
|
"log_odds_chosen": 0.2301570177078247, |
|
"log_odds_ratio": -0.6864482164382935, |
|
"logits/chosen": -2.7330780029296875, |
|
"logits/rejected": -2.738948106765747, |
|
"logps/chosen": -0.7607365250587463, |
|
"logps/rejected": -0.902021050453186, |
|
"loss": 0.5025, |
|
"nll_loss": 0.4629960060119629, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.038036830723285675, |
|
"rewards/margins": 0.007064227946102619, |
|
"rewards/rejected": -0.04510105401277542, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2412165705296277, |
|
"grad_norm": 2.3286309701071986, |
|
"learning_rate": 4.615663313770509e-06, |
|
"log_odds_chosen": 0.30348774790763855, |
|
"log_odds_ratio": -0.6618221402168274, |
|
"logits/chosen": -2.681114673614502, |
|
"logits/rejected": -2.680468797683716, |
|
"logps/chosen": -0.8015350103378296, |
|
"logps/rejected": -0.9835436940193176, |
|
"loss": 0.5126, |
|
"nll_loss": 0.47201746702194214, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04007675126194954, |
|
"rewards/margins": 0.009100432507693768, |
|
"rewards/rejected": -0.04917718470096588, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2517042475091767, |
|
"grad_norm": 2.498755216094707, |
|
"learning_rate": 4.51848057057532e-06, |
|
"log_odds_chosen": 0.28177785873413086, |
|
"log_odds_ratio": -0.6470693945884705, |
|
"logits/chosen": -2.7920804023742676, |
|
"logits/rejected": -2.7859511375427246, |
|
"logps/chosen": -0.7856557965278625, |
|
"logps/rejected": -0.9694973826408386, |
|
"loss": 0.5227, |
|
"nll_loss": 0.49716347455978394, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03928279131650925, |
|
"rewards/margins": 0.009192083030939102, |
|
"rewards/rejected": -0.04847487062215805, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26219192448872575, |
|
"grad_norm": 2.5700569103186335, |
|
"learning_rate": 4.427188724235731e-06, |
|
"log_odds_chosen": 0.2942022681236267, |
|
"log_odds_ratio": -0.6677531003952026, |
|
"logits/chosen": -2.761166572570801, |
|
"logits/rejected": -2.763213634490967, |
|
"logps/chosen": -0.77226322889328, |
|
"logps/rejected": -0.9335973858833313, |
|
"loss": 0.4963, |
|
"nll_loss": 0.4665839672088623, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.03861316293478012, |
|
"rewards/margins": 0.008066706359386444, |
|
"rewards/rejected": -0.04667987301945686, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2726796014682748, |
|
"grad_norm": 2.5460185754878415, |
|
"learning_rate": 4.341215710622295e-06, |
|
"log_odds_chosen": 0.31073135137557983, |
|
"log_odds_ratio": -0.6524397134780884, |
|
"logits/chosen": -2.721327304840088, |
|
"logits/rejected": -2.711200475692749, |
|
"logps/chosen": -0.7779613137245178, |
|
"logps/rejected": -0.9653064608573914, |
|
"loss": 0.478, |
|
"nll_loss": 0.40727710723876953, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.038898058235645294, |
|
"rewards/margins": 0.009367265738546848, |
|
"rewards/rejected": -0.048265330493450165, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2831672784478238, |
|
"grad_norm": 2.63045792619979, |
|
"learning_rate": 4.260064336151291e-06, |
|
"log_odds_chosen": 0.2511529326438904, |
|
"log_odds_ratio": -0.6676173806190491, |
|
"logits/chosen": -2.757246255874634, |
|
"logits/rejected": -2.7497289180755615, |
|
"logps/chosen": -0.8231350779533386, |
|
"logps/rejected": -0.9868103265762329, |
|
"loss": 0.5115, |
|
"nll_loss": 0.48606061935424805, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04115675389766693, |
|
"rewards/margins": 0.008183758705854416, |
|
"rewards/rejected": -0.04934050887823105, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29365495542737285, |
|
"grad_norm": 2.074128745122309, |
|
"learning_rate": 4.183300132670378e-06, |
|
"log_odds_chosen": 0.27424556016921997, |
|
"log_odds_ratio": -0.6629655361175537, |
|
"logits/chosen": -2.694702625274658, |
|
"logits/rejected": -2.695335626602173, |
|
"logps/chosen": -0.8050632476806641, |
|
"logps/rejected": -0.9577094912528992, |
|
"loss": 0.4891, |
|
"nll_loss": 0.4250563681125641, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0402531661093235, |
|
"rewards/margins": 0.007632312830537558, |
|
"rewards/rejected": -0.0478854700922966, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.30414263240692185, |
|
"grad_norm": 2.818316169672816, |
|
"learning_rate": 4.110541536602925e-06, |
|
"log_odds_chosen": 0.40846139192581177, |
|
"log_odds_ratio": -0.6159543991088867, |
|
"logits/chosen": -2.689415216445923, |
|
"logits/rejected": -2.6885359287261963, |
|
"logps/chosen": -0.729388952255249, |
|
"logps/rejected": -0.9667993783950806, |
|
"loss": 0.5032, |
|
"nll_loss": 0.43972086906433105, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03646944463253021, |
|
"rewards/margins": 0.011870523914694786, |
|
"rewards/rejected": -0.04833997040987015, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3146303093864709, |
|
"grad_norm": 2.6319487345124495, |
|
"learning_rate": 4.0414518843273805e-06, |
|
"log_odds_chosen": 0.2938074767589569, |
|
"log_odds_ratio": -0.675439178943634, |
|
"logits/chosen": -2.746011257171631, |
|
"logits/rejected": -2.719851016998291, |
|
"logps/chosen": -0.7730266451835632, |
|
"logps/rejected": -0.9800483584403992, |
|
"loss": 0.5265, |
|
"nll_loss": 0.45733898878097534, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03865132853388786, |
|
"rewards/margins": 0.01035108882933855, |
|
"rewards/rejected": -0.04900241643190384, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3146303093864709, |
|
"eval_log_odds_chosen": 0.32782861590385437, |
|
"eval_log_odds_ratio": -0.6374222040176392, |
|
"eval_logits/chosen": -2.75937819480896, |
|
"eval_logits/rejected": -2.731720209121704, |
|
"eval_logps/chosen": -0.7587753534317017, |
|
"eval_logps/rejected": -0.9572128653526306, |
|
"eval_loss": 0.5012248754501343, |
|
"eval_nll_loss": 0.4652516841888428, |
|
"eval_rewards/accuracies": 0.6329365372657776, |
|
"eval_rewards/chosen": -0.037938766181468964, |
|
"eval_rewards/margins": 0.009921879507601261, |
|
"eval_rewards/rejected": -0.04786064475774765, |
|
"eval_runtime": 143.3287, |
|
"eval_samples_per_second": 13.912, |
|
"eval_steps_per_second": 0.44, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3251179863660199, |
|
"grad_norm": 2.303425231373124, |
|
"learning_rate": 3.975732839729454e-06, |
|
"log_odds_chosen": 0.23192088305950165, |
|
"log_odds_ratio": -0.6818796396255493, |
|
"logits/chosen": -2.7074503898620605, |
|
"logits/rejected": -2.673837661743164, |
|
"logps/chosen": -0.7971353530883789, |
|
"logps/rejected": -0.9301053285598755, |
|
"loss": 0.5302, |
|
"nll_loss": 0.48708105087280273, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.039856769144535065, |
|
"rewards/margins": 0.006648494862020016, |
|
"rewards/rejected": -0.046505264937877655, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33560566334556896, |
|
"grad_norm": 2.5118343787899735, |
|
"learning_rate": 3.913118960624632e-06, |
|
"log_odds_chosen": 0.3314226567745209, |
|
"log_odds_ratio": -0.6417438387870789, |
|
"logits/chosen": -2.7188448905944824, |
|
"logits/rejected": -2.7005674839019775, |
|
"logps/chosen": -0.7902022004127502, |
|
"logps/rejected": -0.9723421335220337, |
|
"loss": 0.4738, |
|
"nll_loss": 0.44032588601112366, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.03951011225581169, |
|
"rewards/margins": 0.009106996469199657, |
|
"rewards/rejected": -0.048617102205753326, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34609334032511796, |
|
"grad_norm": 2.490550595224948, |
|
"learning_rate": 3.853373177942262e-06, |
|
"log_odds_chosen": 0.29606467485427856, |
|
"log_odds_ratio": -0.6935312151908875, |
|
"logits/chosen": -2.6737678050994873, |
|
"logits/rejected": -2.6778550148010254, |
|
"logps/chosen": -0.7957532405853271, |
|
"logps/rejected": -0.9609133005142212, |
|
"loss": 0.5015, |
|
"nll_loss": 0.48406466841697693, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.039787657558918, |
|
"rewards/margins": 0.008258005604147911, |
|
"rewards/rejected": -0.04804566502571106, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.356581017304667, |
|
"grad_norm": 2.455512863241718, |
|
"learning_rate": 3.796283011826483e-06, |
|
"log_odds_chosen": 0.2068498581647873, |
|
"log_odds_ratio": -0.6988531947135925, |
|
"logits/chosen": -2.656428575515747, |
|
"logits/rejected": -2.67673659324646, |
|
"logps/chosen": -0.7645977139472961, |
|
"logps/rejected": -0.9020528793334961, |
|
"loss": 0.5161, |
|
"nll_loss": 0.46574801206588745, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.038229890167713165, |
|
"rewards/margins": 0.006872760597616434, |
|
"rewards/rejected": -0.04510264843702316, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36706869428421607, |
|
"grad_norm": 2.3906859020418243, |
|
"learning_rate": 3.7416573867739415e-06, |
|
"log_odds_chosen": 0.32536062598228455, |
|
"log_odds_ratio": -0.6628221273422241, |
|
"logits/chosen": -2.7076945304870605, |
|
"logits/rejected": -2.6763672828674316, |
|
"logps/chosen": -0.7698060274124146, |
|
"logps/rejected": -0.9597750902175903, |
|
"loss": 0.4925, |
|
"nll_loss": 0.468719482421875, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03849030286073685, |
|
"rewards/margins": 0.009498453699052334, |
|
"rewards/rejected": -0.047988757491111755, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37755637126376507, |
|
"grad_norm": 2.1635991647413824, |
|
"learning_rate": 3.689323936863109e-06, |
|
"log_odds_chosen": 0.4051761031150818, |
|
"log_odds_ratio": -0.6067623496055603, |
|
"logits/chosen": -2.6350862979888916, |
|
"logits/rejected": -2.635108232498169, |
|
"logps/chosen": -0.768888533115387, |
|
"logps/rejected": -1.0009427070617676, |
|
"loss": 0.5009, |
|
"nll_loss": 0.45801717042922974, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03844442963600159, |
|
"rewards/margins": 0.011602705344557762, |
|
"rewards/rejected": -0.0500471368432045, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3880440482433141, |
|
"grad_norm": 2.3887899088845037, |
|
"learning_rate": 3.6391267143702543e-06, |
|
"log_odds_chosen": 0.4100113809108734, |
|
"log_odds_ratio": -0.6096552014350891, |
|
"logits/chosen": -2.707559108734131, |
|
"logits/rejected": -2.6750998497009277, |
|
"logps/chosen": -0.7636415362358093, |
|
"logps/rejected": -1.0189807415008545, |
|
"loss": 0.4701, |
|
"nll_loss": 0.45124197006225586, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.038182083517313004, |
|
"rewards/margins": 0.012766959145665169, |
|
"rewards/rejected": -0.050949037075042725, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3985317252228631, |
|
"grad_norm": 2.5794228625801225, |
|
"learning_rate": 3.5909242322980396e-06, |
|
"log_odds_chosen": 0.4701065421104431, |
|
"log_odds_ratio": -0.5877975821495056, |
|
"logits/chosen": -2.7147293090820312, |
|
"logits/rejected": -2.700373888015747, |
|
"logps/chosen": -0.7640558481216431, |
|
"logps/rejected": -1.0210450887680054, |
|
"loss": 0.4866, |
|
"nll_loss": 0.4662235379219055, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03820279613137245, |
|
"rewards/margins": 0.012849463149905205, |
|
"rewards/rejected": -0.05105225369334221, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4090194022024122, |
|
"grad_norm": 2.2524505662506007, |
|
"learning_rate": 3.544587784792833e-06, |
|
"log_odds_chosen": 0.15358106791973114, |
|
"log_odds_ratio": -0.6960343718528748, |
|
"logits/chosen": -2.6469695568084717, |
|
"logits/rejected": -2.6523191928863525, |
|
"logps/chosen": -0.8073819875717163, |
|
"logps/rejected": -0.9069193005561829, |
|
"loss": 0.5052, |
|
"nll_loss": 0.48589834570884705, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04036910459399223, |
|
"rewards/margins": 0.004976863972842693, |
|
"rewards/rejected": -0.0453459694981575, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4195070791819612, |
|
"grad_norm": 2.151733711875547, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.3257240355014801, |
|
"log_odds_ratio": -0.6618676781654358, |
|
"logits/chosen": -2.5556883811950684, |
|
"logits/rejected": -2.5709598064422607, |
|
"logps/chosen": -0.8370679616928101, |
|
"logps/rejected": -1.0387462377548218, |
|
"loss": 0.5194, |
|
"nll_loss": 0.471977561712265, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0418534018099308, |
|
"rewards/margins": 0.010083912871778011, |
|
"rewards/rejected": -0.05193731188774109, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4195070791819612, |
|
"eval_log_odds_chosen": 0.3606604039669037, |
|
"eval_log_odds_ratio": -0.6283872127532959, |
|
"eval_logits/chosen": -2.6973965167999268, |
|
"eval_logits/rejected": -2.664045572280884, |
|
"eval_logps/chosen": -0.7416918277740479, |
|
"eval_logps/rejected": -0.9558579921722412, |
|
"eval_loss": 0.4911641776561737, |
|
"eval_nll_loss": 0.455983966588974, |
|
"eval_rewards/accuracies": 0.6428571343421936, |
|
"eval_rewards/chosen": -0.03708459436893463, |
|
"eval_rewards/margins": 0.010708308778703213, |
|
"eval_rewards/rejected": -0.04779290035367012, |
|
"eval_runtime": 137.3177, |
|
"eval_samples_per_second": 14.521, |
|
"eval_steps_per_second": 0.459, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4299947561615102, |
|
"grad_norm": 2.234889439349526, |
|
"learning_rate": 3.457053588273564e-06, |
|
"log_odds_chosen": 0.22749297320842743, |
|
"log_odds_ratio": -0.6977051496505737, |
|
"logits/chosen": -2.6853058338165283, |
|
"logits/rejected": -2.646806001663208, |
|
"logps/chosen": -0.7714927792549133, |
|
"logps/rejected": -0.9221086502075195, |
|
"loss": 0.4951, |
|
"nll_loss": 0.43608254194259644, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.038574643433094025, |
|
"rewards/margins": 0.00753078842535615, |
|
"rewards/rejected": -0.04610542953014374, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4404824331410593, |
|
"grad_norm": 2.0285171917411766, |
|
"learning_rate": 3.4156502553198657e-06, |
|
"log_odds_chosen": 0.3810080885887146, |
|
"log_odds_ratio": -0.6389856338500977, |
|
"logits/chosen": -2.6045069694519043, |
|
"logits/rejected": -2.621366024017334, |
|
"logps/chosen": -0.7517096996307373, |
|
"logps/rejected": -0.9603899121284485, |
|
"loss": 0.4852, |
|
"nll_loss": 0.42949992418289185, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.037585485726594925, |
|
"rewards/margins": 0.01043400727212429, |
|
"rewards/rejected": -0.048019491136074066, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4509701101206083, |
|
"grad_norm": 2.508500818711511, |
|
"learning_rate": 3.375699755192885e-06, |
|
"log_odds_chosen": 0.3060067594051361, |
|
"log_odds_ratio": -0.6428481936454773, |
|
"logits/chosen": -2.6315762996673584, |
|
"logits/rejected": -2.614450216293335, |
|
"logps/chosen": -0.7450464367866516, |
|
"logps/rejected": -0.9214862585067749, |
|
"loss": 0.5054, |
|
"nll_loss": 0.4888521730899811, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.03725232556462288, |
|
"rewards/margins": 0.008821990340948105, |
|
"rewards/rejected": -0.046074315905570984, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46145778710015734, |
|
"grad_norm": 2.209049048242546, |
|
"learning_rate": 3.337119062359573e-06, |
|
"log_odds_chosen": 0.2785058617591858, |
|
"log_odds_ratio": -0.6411095857620239, |
|
"logits/chosen": -2.6460564136505127, |
|
"logits/rejected": -2.6254661083221436, |
|
"logps/chosen": -0.7616952061653137, |
|
"logps/rejected": -0.9235254526138306, |
|
"loss": 0.5024, |
|
"nll_loss": 0.46845754981040955, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.038084764033555984, |
|
"rewards/margins": 0.008091514930129051, |
|
"rewards/rejected": -0.04617627337574959, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47194546407970633, |
|
"grad_norm": 2.0098987626040574, |
|
"learning_rate": 3.2998316455372222e-06, |
|
"log_odds_chosen": 0.37491756677627563, |
|
"log_odds_ratio": -0.648253321647644, |
|
"logits/chosen": -2.6618144512176514, |
|
"logits/rejected": -2.643500566482544, |
|
"logps/chosen": -0.7266156673431396, |
|
"logps/rejected": -0.9600238800048828, |
|
"loss": 0.4828, |
|
"nll_loss": 0.4462718069553375, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03633078932762146, |
|
"rewards/margins": 0.01167040504515171, |
|
"rewards/rejected": -0.04800119251012802, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4824331410592554, |
|
"grad_norm": 2.3085421987869785, |
|
"learning_rate": 3.263766828841098e-06, |
|
"log_odds_chosen": 0.2140667885541916, |
|
"log_odds_ratio": -0.6971082091331482, |
|
"logits/chosen": -2.6545071601867676, |
|
"logits/rejected": -2.6458332538604736, |
|
"logps/chosen": -0.8354724049568176, |
|
"logps/rejected": -0.9942563772201538, |
|
"loss": 0.4871, |
|
"nll_loss": 0.48358869552612305, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04177362099289894, |
|
"rewards/margins": 0.007939198985695839, |
|
"rewards/rejected": -0.04971281811594963, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4929208180388044, |
|
"grad_norm": 2.58413257051123, |
|
"learning_rate": 3.2288592281010976e-06, |
|
"log_odds_chosen": 0.30273735523223877, |
|
"log_odds_ratio": -0.6744717359542847, |
|
"logits/chosen": -2.6462035179138184, |
|
"logits/rejected": -2.6307010650634766, |
|
"logps/chosen": -0.7793454527854919, |
|
"logps/rejected": -0.9655405879020691, |
|
"loss": 0.4932, |
|
"nll_loss": 0.4597246050834656, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.038967277854681015, |
|
"rewards/margins": 0.009309760294854641, |
|
"rewards/rejected": -0.048277031630277634, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5034084950183534, |
|
"grad_norm": 2.275276830168767, |
|
"learning_rate": 3.195048252113469e-06, |
|
"log_odds_chosen": 0.25159093737602234, |
|
"log_odds_ratio": -0.6775428056716919, |
|
"logits/chosen": -2.6590356826782227, |
|
"logits/rejected": -2.649465560913086, |
|
"logps/chosen": -0.7499970197677612, |
|
"logps/rejected": -0.8869997262954712, |
|
"loss": 0.4713, |
|
"nll_loss": 0.4634857177734375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.03749985247850418, |
|
"rewards/margins": 0.006850133184343576, |
|
"rewards/rejected": -0.04434997960925102, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5138961719979025, |
|
"grad_norm": 2.134835184101472, |
|
"learning_rate": 3.1622776601683796e-06, |
|
"log_odds_chosen": 0.2592507004737854, |
|
"log_odds_ratio": -0.6677337884902954, |
|
"logits/chosen": -2.638939619064331, |
|
"logits/rejected": -2.5990116596221924, |
|
"logps/chosen": -0.8319272994995117, |
|
"logps/rejected": -0.9564205408096313, |
|
"loss": 0.4941, |
|
"nll_loss": 0.4587552547454834, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.041596364229917526, |
|
"rewards/margins": 0.006224661134183407, |
|
"rewards/rejected": -0.04782102257013321, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5243838489774515, |
|
"grad_norm": 2.3707837495895494, |
|
"learning_rate": 3.1304951684997056e-06, |
|
"log_odds_chosen": 0.25932976603507996, |
|
"log_odds_ratio": -0.6785644292831421, |
|
"logits/chosen": -2.690480947494507, |
|
"logits/rejected": -2.6417829990386963, |
|
"logps/chosen": -0.7875474095344543, |
|
"logps/rejected": -0.9345542788505554, |
|
"loss": 0.5008, |
|
"nll_loss": 0.47637850046157837, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0393773689866066, |
|
"rewards/margins": 0.007350355386734009, |
|
"rewards/rejected": -0.04672772437334061, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5243838489774515, |
|
"eval_log_odds_chosen": 0.3873175382614136, |
|
"eval_log_odds_ratio": -0.6208989024162292, |
|
"eval_logits/chosen": -2.62943434715271, |
|
"eval_logits/rejected": -2.5956878662109375, |
|
"eval_logps/chosen": -0.7454984188079834, |
|
"eval_logps/rejected": -0.9786220192909241, |
|
"eval_loss": 0.4847143888473511, |
|
"eval_nll_loss": 0.44987979531288147, |
|
"eval_rewards/accuracies": 0.6507936716079712, |
|
"eval_rewards/chosen": -0.03727491945028305, |
|
"eval_rewards/margins": 0.011656176298856735, |
|
"eval_rewards/rejected": -0.04893109202384949, |
|
"eval_runtime": 138.4279, |
|
"eval_samples_per_second": 14.405, |
|
"eval_steps_per_second": 0.455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5348715259570005, |
|
"grad_norm": 1.9535668554599182, |
|
"learning_rate": 3.0996520993903337e-06, |
|
"log_odds_chosen": 0.32442158460617065, |
|
"log_odds_ratio": -0.6475775837898254, |
|
"logits/chosen": -2.6708967685699463, |
|
"logits/rejected": -2.649402141571045, |
|
"logps/chosen": -0.7484665513038635, |
|
"logps/rejected": -0.9413715600967407, |
|
"loss": 0.4786, |
|
"nll_loss": 0.48495978116989136, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03742332383990288, |
|
"rewards/margins": 0.00964525155723095, |
|
"rewards/rejected": -0.047068577259778976, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5453592029365496, |
|
"grad_norm": 1.9645096615425393, |
|
"learning_rate": 3.069703067574602e-06, |
|
"log_odds_chosen": 0.2872227430343628, |
|
"log_odds_ratio": -0.6613379716873169, |
|
"logits/chosen": -2.6058475971221924, |
|
"logits/rejected": -2.577051877975464, |
|
"logps/chosen": -0.8017369508743286, |
|
"logps/rejected": -0.9904945492744446, |
|
"loss": 0.4897, |
|
"nll_loss": 0.4331512451171875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04008684307336807, |
|
"rewards/margins": 0.009437882341444492, |
|
"rewards/rejected": -0.04952472820878029, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5558468799160986, |
|
"grad_norm": 1.9526548988230616, |
|
"learning_rate": 3.0406056993414858e-06, |
|
"log_odds_chosen": 0.42971426248550415, |
|
"log_odds_ratio": -0.641510009765625, |
|
"logits/chosen": -2.6119577884674072, |
|
"logits/rejected": -2.5998666286468506, |
|
"logps/chosen": -0.7399083375930786, |
|
"logps/rejected": -1.0167956352233887, |
|
"loss": 0.4914, |
|
"nll_loss": 0.41224998235702515, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03699541836977005, |
|
"rewards/margins": 0.013844366185367107, |
|
"rewards/rejected": -0.050839781761169434, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5663345568956476, |
|
"grad_norm": 1.9884035673972174, |
|
"learning_rate": 3.012320380383546e-06, |
|
"log_odds_chosen": 0.21374063193798065, |
|
"log_odds_ratio": -0.6833196878433228, |
|
"logits/chosen": -2.6167845726013184, |
|
"logits/rejected": -2.599025011062622, |
|
"logps/chosen": -0.7700163125991821, |
|
"logps/rejected": -0.890272319316864, |
|
"loss": 0.5043, |
|
"nll_loss": 0.47903138399124146, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.03850081190466881, |
|
"rewards/margins": 0.006012803874909878, |
|
"rewards/rejected": -0.04451362043619156, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5768222338751966, |
|
"grad_norm": 2.186607185927277, |
|
"learning_rate": 2.9848100289785456e-06, |
|
"log_odds_chosen": 0.45103105902671814, |
|
"log_odds_ratio": -0.6082615852355957, |
|
"logits/chosen": -2.6567091941833496, |
|
"logits/rejected": -2.609574794769287, |
|
"logps/chosen": -0.7585142850875854, |
|
"logps/rejected": -1.0295699834823608, |
|
"loss": 0.4918, |
|
"nll_loss": 0.48958802223205566, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03792571276426315, |
|
"rewards/margins": 0.01355278305709362, |
|
"rewards/rejected": -0.05147849768400192, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5873099108547457, |
|
"grad_norm": 2.1145358879634872, |
|
"learning_rate": 2.958039891549808e-06, |
|
"log_odds_chosen": 0.2827582359313965, |
|
"log_odds_ratio": -0.6594165563583374, |
|
"logits/chosen": -2.6023669242858887, |
|
"logits/rejected": -2.574957847595215, |
|
"logps/chosen": -0.7867820858955383, |
|
"logps/rejected": -0.9555041193962097, |
|
"loss": 0.4774, |
|
"nll_loss": 0.45714274048805237, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0393391028046608, |
|
"rewards/margins": 0.008436103351414204, |
|
"rewards/rejected": -0.047775208950042725, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5977975878342947, |
|
"grad_norm": 2.3757421806444343, |
|
"learning_rate": 2.9319773580418683e-06, |
|
"log_odds_chosen": 0.2533697485923767, |
|
"log_odds_ratio": -0.6926103830337524, |
|
"logits/chosen": -2.662379264831543, |
|
"logits/rejected": -2.6397509574890137, |
|
"logps/chosen": -0.7862294316291809, |
|
"logps/rejected": -0.9584717750549316, |
|
"loss": 0.463, |
|
"nll_loss": 0.4819509983062744, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.039311472326517105, |
|
"rewards/margins": 0.00861212145537138, |
|
"rewards/rejected": -0.04792358726263046, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6082852648138437, |
|
"grad_norm": 2.172213103107974, |
|
"learning_rate": 2.906591794880899e-06, |
|
"log_odds_chosen": 0.3392280340194702, |
|
"log_odds_ratio": -0.6386864185333252, |
|
"logits/chosen": -2.6814630031585693, |
|
"logits/rejected": -2.6795036792755127, |
|
"logps/chosen": -0.7794855833053589, |
|
"logps/rejected": -1.0036094188690186, |
|
"loss": 0.4996, |
|
"nll_loss": 0.4401033818721771, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.038974277675151825, |
|
"rewards/margins": 0.011206192895770073, |
|
"rewards/rejected": -0.05018047243356705, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6187729417933928, |
|
"grad_norm": 2.0671922387658377, |
|
"learning_rate": 2.8818543935741638e-06, |
|
"log_odds_chosen": 0.3985132575035095, |
|
"log_odds_ratio": -0.6514524221420288, |
|
"logits/chosen": -2.6682472229003906, |
|
"logits/rejected": -2.679994821548462, |
|
"logps/chosen": -0.7318185567855835, |
|
"logps/rejected": -0.9744182825088501, |
|
"loss": 0.4678, |
|
"nll_loss": 0.49909916520118713, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03659093379974365, |
|
"rewards/margins": 0.012129982002079487, |
|
"rewards/rejected": -0.048720914870500565, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6292606187729418, |
|
"grad_norm": 2.1967713493078604, |
|
"learning_rate": 2.8577380332470414e-06, |
|
"log_odds_chosen": 0.35757365822792053, |
|
"log_odds_ratio": -0.6395149230957031, |
|
"logits/chosen": -2.663159132003784, |
|
"logits/rejected": -2.649722099304199, |
|
"logps/chosen": -0.7385202646255493, |
|
"logps/rejected": -0.9542753100395203, |
|
"loss": 0.4725, |
|
"nll_loss": 0.4449065625667572, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.03692600876092911, |
|
"rewards/margins": 0.010787753388285637, |
|
"rewards/rejected": -0.04771377146244049, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6292606187729418, |
|
"eval_log_odds_chosen": 0.35674363374710083, |
|
"eval_log_odds_ratio": -0.631996214389801, |
|
"eval_logits/chosen": -2.647721767425537, |
|
"eval_logits/rejected": -2.6147334575653076, |
|
"eval_logps/chosen": -0.7248181104660034, |
|
"eval_logps/rejected": -0.9394434690475464, |
|
"eval_loss": 0.4794267416000366, |
|
"eval_nll_loss": 0.44346076250076294, |
|
"eval_rewards/accuracies": 0.6349206566810608, |
|
"eval_rewards/chosen": -0.03624090179800987, |
|
"eval_rewards/margins": 0.01073127705603838, |
|
"eval_rewards/rejected": -0.046972181648015976, |
|
"eval_runtime": 137.9534, |
|
"eval_samples_per_second": 14.454, |
|
"eval_steps_per_second": 0.457, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6397482957524908, |
|
"grad_norm": 2.2292431160793216, |
|
"learning_rate": 2.834217155626206e-06, |
|
"log_odds_chosen": 0.23770160973072052, |
|
"log_odds_ratio": -0.6840949654579163, |
|
"logits/chosen": -2.5699760913848877, |
|
"logits/rejected": -2.5653116703033447, |
|
"logps/chosen": -0.7841805219650269, |
|
"logps/rejected": -0.9241795539855957, |
|
"loss": 0.4832, |
|
"nll_loss": 0.4458464980125427, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.039209023118019104, |
|
"rewards/margins": 0.006999955512583256, |
|
"rewards/rejected": -0.046208981424570084, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6502359727320398, |
|
"grad_norm": 2.2910730765164247, |
|
"learning_rate": 2.811267651158746e-06, |
|
"log_odds_chosen": 0.21747846901416779, |
|
"log_odds_ratio": -0.6945130825042725, |
|
"logits/chosen": -2.724179744720459, |
|
"logits/rejected": -2.691539764404297, |
|
"logps/chosen": -0.7931413054466248, |
|
"logps/rejected": -0.943394660949707, |
|
"loss": 0.487, |
|
"nll_loss": 0.4727168679237366, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.03965706750750542, |
|
"rewards/margins": 0.007512666285037994, |
|
"rewards/rejected": -0.04716973379254341, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6607236497115889, |
|
"grad_norm": 2.2609308397995616, |
|
"learning_rate": 2.788866755113585e-06, |
|
"log_odds_chosen": 0.29844212532043457, |
|
"log_odds_ratio": -0.690433919429779, |
|
"logits/chosen": -2.718883991241455, |
|
"logits/rejected": -2.7198710441589355, |
|
"logps/chosen": -0.7700183391571045, |
|
"logps/rejected": -0.9475862383842468, |
|
"loss": 0.4893, |
|
"nll_loss": 0.48064035177230835, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.038500916212797165, |
|
"rewards/margins": 0.00887839961796999, |
|
"rewards/rejected": -0.04737931489944458, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6712113266911379, |
|
"grad_norm": 2.6649009571693107, |
|
"learning_rate": 2.7669929526473316e-06, |
|
"log_odds_chosen": 0.4156903326511383, |
|
"log_odds_ratio": -0.6158550977706909, |
|
"logits/chosen": -2.7182445526123047, |
|
"logits/rejected": -2.6942853927612305, |
|
"logps/chosen": -0.7768423557281494, |
|
"logps/rejected": -1.0251133441925049, |
|
"loss": 0.4711, |
|
"nll_loss": 0.41822823882102966, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03884211927652359, |
|
"rewards/margins": 0.012413550168275833, |
|
"rewards/rejected": -0.051255665719509125, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6816990036706869, |
|
"grad_norm": 2.0343884705834268, |
|
"learning_rate": 2.745625891934577e-06, |
|
"log_odds_chosen": 0.23737592995166779, |
|
"log_odds_ratio": -0.6948662996292114, |
|
"logits/chosen": -2.74450421333313, |
|
"logits/rejected": -2.7467565536499023, |
|
"logps/chosen": -0.7428392767906189, |
|
"logps/rejected": -0.8866605758666992, |
|
"loss": 0.4898, |
|
"nll_loss": 0.4688393175601959, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03714196756482124, |
|
"rewards/margins": 0.00719106663018465, |
|
"rewards/rejected": -0.04433303326368332, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6921866806502359, |
|
"grad_norm": 2.0637062426142556, |
|
"learning_rate": 2.7247463045653303e-06, |
|
"log_odds_chosen": 0.36518558859825134, |
|
"log_odds_ratio": -0.6426655650138855, |
|
"logits/chosen": -2.7563986778259277, |
|
"logits/rejected": -2.74312424659729, |
|
"logps/chosen": -0.7905346751213074, |
|
"logps/rejected": -1.0196200609207153, |
|
"loss": 0.4859, |
|
"nll_loss": 0.4443667531013489, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03952673822641373, |
|
"rewards/margins": 0.011454259976744652, |
|
"rewards/rejected": -0.05098099634051323, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.702674357629785, |
|
"grad_norm": 1.992995386941069, |
|
"learning_rate": 2.704335932501895e-06, |
|
"log_odds_chosen": 0.490286260843277, |
|
"log_odds_ratio": -0.6087489724159241, |
|
"logits/chosen": -2.72459077835083, |
|
"logits/rejected": -2.7280569076538086, |
|
"logps/chosen": -0.7373065948486328, |
|
"logps/rejected": -1.0489108562469482, |
|
"loss": 0.4831, |
|
"nll_loss": 0.42895203828811646, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03686532750725746, |
|
"rewards/margins": 0.01558021642267704, |
|
"rewards/rejected": -0.05244554951786995, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.713162034609334, |
|
"grad_norm": 2.8251895935339886, |
|
"learning_rate": 2.6843774609657963e-06, |
|
"log_odds_chosen": 0.3856969177722931, |
|
"log_odds_ratio": -0.6318041086196899, |
|
"logits/chosen": -2.7299182415008545, |
|
"logits/rejected": -2.699131488800049, |
|
"logps/chosen": -0.7913435697555542, |
|
"logps/rejected": -1.0201423168182373, |
|
"loss": 0.4669, |
|
"nll_loss": 0.45303601026535034, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03956717997789383, |
|
"rewards/margins": 0.011439927853643894, |
|
"rewards/rejected": -0.05100711062550545, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.723649711588883, |
|
"grad_norm": 2.3126283290431457, |
|
"learning_rate": 2.6648544566940834e-06, |
|
"log_odds_chosen": 0.21687667071819305, |
|
"log_odds_ratio": -0.7159269452095032, |
|
"logits/chosen": -2.7354016304016113, |
|
"logits/rejected": -2.722414493560791, |
|
"logps/chosen": -0.7863477468490601, |
|
"logps/rejected": -0.9429599046707153, |
|
"loss": 0.4903, |
|
"nll_loss": 0.5047397613525391, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03931739181280136, |
|
"rewards/margins": 0.007830603048205376, |
|
"rewards/rejected": -0.04714799299836159, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7341373885684321, |
|
"grad_norm": 2.323029961728673, |
|
"learning_rate": 2.6457513110645903e-06, |
|
"log_odds_chosen": 0.342260479927063, |
|
"log_odds_ratio": -0.6298097968101501, |
|
"logits/chosen": -2.679320812225342, |
|
"logits/rejected": -2.6582911014556885, |
|
"logps/chosen": -0.7469282746315002, |
|
"logps/rejected": -0.9541714787483215, |
|
"loss": 0.4875, |
|
"nll_loss": 0.4991229474544525, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03734641522169113, |
|
"rewards/margins": 0.010362156666815281, |
|
"rewards/rejected": -0.04770857095718384, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7341373885684321, |
|
"eval_log_odds_chosen": 0.417955607175827, |
|
"eval_log_odds_ratio": -0.6158252358436584, |
|
"eval_logits/chosen": -2.7213134765625, |
|
"eval_logits/rejected": -2.691012144088745, |
|
"eval_logps/chosen": -0.7365118861198425, |
|
"eval_logps/rejected": -0.9954525232315063, |
|
"eval_loss": 0.47666841745376587, |
|
"eval_nll_loss": 0.441643089056015, |
|
"eval_rewards/accuracies": 0.6408730149269104, |
|
"eval_rewards/chosen": -0.036825601011514664, |
|
"eval_rewards/margins": 0.01294703409075737, |
|
"eval_rewards/rejected": -0.049772635102272034, |
|
"eval_runtime": 140.8809, |
|
"eval_samples_per_second": 14.154, |
|
"eval_steps_per_second": 0.447, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7446250655479811, |
|
"grad_norm": 2.2253143227977055, |
|
"learning_rate": 2.627053187642805e-06, |
|
"log_odds_chosen": 0.31003057956695557, |
|
"log_odds_ratio": -0.6495457887649536, |
|
"logits/chosen": -2.7463955879211426, |
|
"logits/rejected": -2.7364678382873535, |
|
"logps/chosen": -0.7539780139923096, |
|
"logps/rejected": -0.9565252065658569, |
|
"loss": 0.4819, |
|
"nll_loss": 0.4394974708557129, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0376988984644413, |
|
"rewards/margins": 0.010127360001206398, |
|
"rewards/rejected": -0.047826264053583145, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7551127425275301, |
|
"grad_norm": 1.9919741933282713, |
|
"learning_rate": 2.6087459737497545e-06, |
|
"log_odds_chosen": 0.40133896470069885, |
|
"log_odds_ratio": -0.6439169645309448, |
|
"logits/chosen": -2.7264726161956787, |
|
"logits/rejected": -2.7285008430480957, |
|
"logps/chosen": -0.7132266759872437, |
|
"logps/rejected": -0.9523170590400696, |
|
"loss": 0.4904, |
|
"nll_loss": 0.42442673444747925, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03566133230924606, |
|
"rewards/margins": 0.011954517103731632, |
|
"rewards/rejected": -0.04761584475636482, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7656004195070791, |
|
"grad_norm": 2.5524316814232657, |
|
"learning_rate": 2.5908162356916185e-06, |
|
"log_odds_chosen": 0.1571163833141327, |
|
"log_odds_ratio": -0.7166911363601685, |
|
"logits/chosen": -2.805894613265991, |
|
"logits/rejected": -2.7996468544006348, |
|
"logps/chosen": -0.7540133595466614, |
|
"logps/rejected": -0.8382581472396851, |
|
"loss": 0.4937, |
|
"nll_loss": 0.4598192572593689, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.03770066425204277, |
|
"rewards/margins": 0.004212243482470512, |
|
"rewards/rejected": -0.041912905871868134, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7760880964866282, |
|
"grad_norm": 2.1353118528501684, |
|
"learning_rate": 2.5732511773283276e-06, |
|
"log_odds_chosen": 0.35292255878448486, |
|
"log_odds_ratio": -0.625573992729187, |
|
"logits/chosen": -2.8535656929016113, |
|
"logits/rejected": -2.8482494354248047, |
|
"logps/chosen": -0.7254922389984131, |
|
"logps/rejected": -0.9415895342826843, |
|
"loss": 0.4903, |
|
"nll_loss": 0.4391508996486664, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.036274611949920654, |
|
"rewards/margins": 0.010804859921336174, |
|
"rewards/rejected": -0.04707947373390198, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7865757734661772, |
|
"grad_norm": 2.076299852744321, |
|
"learning_rate": 2.556038601690775e-06, |
|
"log_odds_chosen": 0.27716293931007385, |
|
"log_odds_ratio": -0.6662799119949341, |
|
"logits/chosen": -2.8263370990753174, |
|
"logits/rejected": -2.8200631141662598, |
|
"logps/chosen": -0.7884274125099182, |
|
"logps/rejected": -0.9425498843193054, |
|
"loss": 0.5033, |
|
"nll_loss": 0.460857093334198, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03942137211561203, |
|
"rewards/margins": 0.00770611921325326, |
|
"rewards/rejected": -0.04712748900055885, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7970634504457262, |
|
"grad_norm": 2.144911846283459, |
|
"learning_rate": 2.539166875385041e-06, |
|
"log_odds_chosen": 0.28878992795944214, |
|
"log_odds_ratio": -0.6523956060409546, |
|
"logits/chosen": -2.827876567840576, |
|
"logits/rejected": -2.818580389022827, |
|
"logps/chosen": -0.7346550226211548, |
|
"logps/rejected": -0.9111967086791992, |
|
"loss": 0.4719, |
|
"nll_loss": 0.3698672354221344, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0367327556014061, |
|
"rewards/margins": 0.008827080950140953, |
|
"rewards/rejected": -0.0455598309636116, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8075511274252754, |
|
"grad_norm": 2.457074288822972, |
|
"learning_rate": 2.522624895547565e-06, |
|
"log_odds_chosen": 0.2632114589214325, |
|
"log_odds_ratio": -0.6844597458839417, |
|
"logits/chosen": -2.785381317138672, |
|
"logits/rejected": -2.7871222496032715, |
|
"logps/chosen": -0.796169102191925, |
|
"logps/rejected": -0.9764283895492554, |
|
"loss": 0.4935, |
|
"nll_loss": 0.4608798921108246, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.039808452129364014, |
|
"rewards/margins": 0.009012967348098755, |
|
"rewards/rejected": -0.048821426928043365, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8180388044048243, |
|
"grad_norm": 2.1250851855347417, |
|
"learning_rate": 2.506402059138015e-06, |
|
"log_odds_chosen": 0.2769099771976471, |
|
"log_odds_ratio": -0.6522020101547241, |
|
"logits/chosen": -2.8049657344818115, |
|
"logits/rejected": -2.8198862075805664, |
|
"logps/chosen": -0.7881239056587219, |
|
"logps/rejected": -0.9357802271842957, |
|
"loss": 0.5049, |
|
"nll_loss": 0.5033601522445679, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.039406199008226395, |
|
"rewards/margins": 0.0073828138411045074, |
|
"rewards/rejected": -0.0467890128493309, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8285264813843733, |
|
"grad_norm": 2.1157883450641966, |
|
"learning_rate": 2.49048823437687e-06, |
|
"log_odds_chosen": 0.4010138511657715, |
|
"log_odds_ratio": -0.6229840517044067, |
|
"logits/chosen": -2.8338706493377686, |
|
"logits/rejected": -2.8394291400909424, |
|
"logps/chosen": -0.7245864272117615, |
|
"logps/rejected": -0.9661226272583008, |
|
"loss": 0.4661, |
|
"nll_loss": 0.4065842032432556, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03622932359576225, |
|
"rewards/margins": 0.01207680907100439, |
|
"rewards/rejected": -0.04830613359808922, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8390141583639223, |
|
"grad_norm": 2.3895076758034515, |
|
"learning_rate": 2.474873734152916e-06, |
|
"log_odds_chosen": 0.48685508966445923, |
|
"log_odds_ratio": -0.5867618918418884, |
|
"logits/chosen": -2.813389301300049, |
|
"logits/rejected": -2.7975525856018066, |
|
"logps/chosen": -0.6979315876960754, |
|
"logps/rejected": -1.0023411512374878, |
|
"loss": 0.4796, |
|
"nll_loss": 0.3860110640525818, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03489658236503601, |
|
"rewards/margins": 0.015220480971038342, |
|
"rewards/rejected": -0.050117067992687225, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8390141583639223, |
|
"eval_log_odds_chosen": 0.4362943768501282, |
|
"eval_log_odds_ratio": -0.6168639063835144, |
|
"eval_logits/chosen": -2.8114309310913086, |
|
"eval_logits/rejected": -2.791295289993286, |
|
"eval_logps/chosen": -0.7415919303894043, |
|
"eval_logps/rejected": -1.016213297843933, |
|
"eval_loss": 0.4739992916584015, |
|
"eval_nll_loss": 0.4396199584007263, |
|
"eval_rewards/accuracies": 0.6507936716079712, |
|
"eval_rewards/chosen": -0.037079595029354095, |
|
"eval_rewards/margins": 0.013731070794165134, |
|
"eval_rewards/rejected": -0.050810668617486954, |
|
"eval_runtime": 137.8725, |
|
"eval_samples_per_second": 14.463, |
|
"eval_steps_per_second": 0.457, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8495018353434715, |
|
"grad_norm": 2.2171962411607398, |
|
"learning_rate": 2.459549291242073e-06, |
|
"log_odds_chosen": 0.4064277708530426, |
|
"log_odds_ratio": -0.6227105259895325, |
|
"logits/chosen": -2.8798890113830566, |
|
"logits/rejected": -2.8490796089172363, |
|
"logps/chosen": -0.729169487953186, |
|
"logps/rejected": -0.9680086970329285, |
|
"loss": 0.4744, |
|
"nll_loss": 0.4338308870792389, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03645847737789154, |
|
"rewards/margins": 0.011941960081458092, |
|
"rewards/rejected": -0.04840043932199478, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8599895123230205, |
|
"grad_norm": 2.607409368726623, |
|
"learning_rate": 2.4445060351935238e-06, |
|
"log_odds_chosen": 0.3091586232185364, |
|
"log_odds_ratio": -0.6474903225898743, |
|
"logits/chosen": -2.820725679397583, |
|
"logits/rejected": -2.804964303970337, |
|
"logps/chosen": -0.7581018805503845, |
|
"logps/rejected": -0.9343080520629883, |
|
"loss": 0.4661, |
|
"nll_loss": 0.3911210894584656, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03790510073304176, |
|
"rewards/margins": 0.00881030224263668, |
|
"rewards/rejected": -0.046715401113033295, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8704771893025695, |
|
"grad_norm": 2.6267861444652034, |
|
"learning_rate": 2.4297354707521817e-06, |
|
"log_odds_chosen": 0.21734324097633362, |
|
"log_odds_ratio": -0.7081775069236755, |
|
"logits/chosen": -2.805722236633301, |
|
"logits/rejected": -2.8377511501312256, |
|
"logps/chosen": -0.777400553226471, |
|
"logps/rejected": -0.915818989276886, |
|
"loss": 0.4873, |
|
"nll_loss": 0.4305228292942047, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.03887002915143967, |
|
"rewards/margins": 0.0069209253415465355, |
|
"rewards/rejected": -0.04579095169901848, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8809648662821186, |
|
"grad_norm": 2.1614161917289363, |
|
"learning_rate": 2.4152294576982395e-06, |
|
"log_odds_chosen": 0.21988508105278015, |
|
"log_odds_ratio": -0.6872502565383911, |
|
"logits/chosen": -2.8258466720581055, |
|
"logits/rejected": -2.8268680572509766, |
|
"logps/chosen": -0.7874829769134521, |
|
"logps/rejected": -0.9251054525375366, |
|
"loss": 0.4733, |
|
"nll_loss": 0.4440709054470062, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.03937415033578873, |
|
"rewards/margins": 0.006881123874336481, |
|
"rewards/rejected": -0.04625527560710907, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8914525432616676, |
|
"grad_norm": 2.2102319814571074, |
|
"learning_rate": 2.4009801919951233e-06, |
|
"log_odds_chosen": 0.3129335641860962, |
|
"log_odds_ratio": -0.6348214149475098, |
|
"logits/chosen": -2.8568568229675293, |
|
"logits/rejected": -2.865201473236084, |
|
"logps/chosen": -0.749543309211731, |
|
"logps/rejected": -0.9329560399055481, |
|
"loss": 0.466, |
|
"nll_loss": 0.4490523934364319, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03747716546058655, |
|
"rewards/margins": 0.009170634672045708, |
|
"rewards/rejected": -0.046647801995277405, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9019402202412166, |
|
"grad_norm": 2.082847476776939, |
|
"learning_rate": 2.3869801881466573e-06, |
|
"log_odds_chosen": 0.2860751152038574, |
|
"log_odds_ratio": -0.6700129508972168, |
|
"logits/chosen": -2.825407028198242, |
|
"logits/rejected": -2.8392233848571777, |
|
"logps/chosen": -0.7431017756462097, |
|
"logps/rejected": -0.9103603363037109, |
|
"loss": 0.4884, |
|
"nll_loss": 0.4357692301273346, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.037155088037252426, |
|
"rewards/margins": 0.008362922817468643, |
|
"rewards/rejected": -0.045518018305301666, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9124278972207656, |
|
"grad_norm": 2.188429034443825, |
|
"learning_rate": 2.3732222626728365e-06, |
|
"log_odds_chosen": 0.3270949423313141, |
|
"log_odds_ratio": -0.6543049812316895, |
|
"logits/chosen": -2.8709769248962402, |
|
"logits/rejected": -2.888324022293091, |
|
"logps/chosen": -0.7763268947601318, |
|
"logps/rejected": -0.9964207410812378, |
|
"loss": 0.454, |
|
"nll_loss": 0.4407920837402344, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03881634771823883, |
|
"rewards/margins": 0.011004697531461716, |
|
"rewards/rejected": -0.04982104152441025, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9229155742003147, |
|
"grad_norm": 1.8451620085670009, |
|
"learning_rate": 2.359699518621347e-06, |
|
"log_odds_chosen": 0.3485734164714813, |
|
"log_odds_ratio": -0.6351412534713745, |
|
"logits/chosen": -2.9025185108184814, |
|
"logits/rejected": -2.8809902667999268, |
|
"logps/chosen": -0.7233132719993591, |
|
"logps/rejected": -0.9310896992683411, |
|
"loss": 0.4524, |
|
"nll_loss": 0.4024543762207031, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03616566210985184, |
|
"rewards/margins": 0.010388821363449097, |
|
"rewards/rejected": -0.046554479748010635, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9334032511798637, |
|
"grad_norm": 1.9306573871485972, |
|
"learning_rate": 2.3464053310389682e-06, |
|
"log_odds_chosen": 0.3904303014278412, |
|
"log_odds_ratio": -0.623832106590271, |
|
"logits/chosen": -2.84079909324646, |
|
"logits/rejected": -2.8426525592803955, |
|
"logps/chosen": -0.7186557650566101, |
|
"logps/rejected": -0.9262601137161255, |
|
"loss": 0.4565, |
|
"nll_loss": 0.42616167664527893, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.035932786762714386, |
|
"rewards/margins": 0.010380217805504799, |
|
"rewards/rejected": -0.046313002705574036, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9438909281594127, |
|
"grad_norm": 2.157911532280212, |
|
"learning_rate": 2.333333333333333e-06, |
|
"log_odds_chosen": 0.3039458692073822, |
|
"log_odds_ratio": -0.6423442959785461, |
|
"logits/chosen": -2.896359920501709, |
|
"logits/rejected": -2.9049692153930664, |
|
"logps/chosen": -0.6981052756309509, |
|
"logps/rejected": -0.8672422170639038, |
|
"loss": 0.4851, |
|
"nll_loss": 0.428159236907959, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03490526229143143, |
|
"rewards/margins": 0.008456850424408913, |
|
"rewards/rejected": -0.04336211457848549, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9438909281594127, |
|
"eval_log_odds_chosen": 0.36685651540756226, |
|
"eval_log_odds_ratio": -0.6244728565216064, |
|
"eval_logits/chosen": -2.969223976135254, |
|
"eval_logits/rejected": -2.9542508125305176, |
|
"eval_logps/chosen": -0.7142534852027893, |
|
"eval_logps/rejected": -0.9323597550392151, |
|
"eval_loss": 0.47141149640083313, |
|
"eval_nll_loss": 0.4360823631286621, |
|
"eval_rewards/accuracies": 0.6527777910232544, |
|
"eval_rewards/chosen": -0.035712677985429764, |
|
"eval_rewards/margins": 0.01090531051158905, |
|
"eval_rewards/rejected": -0.046617984771728516, |
|
"eval_runtime": 138.0948, |
|
"eval_samples_per_second": 14.439, |
|
"eval_steps_per_second": 0.456, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9543786051389617, |
|
"grad_norm": 2.4004822961845957, |
|
"learning_rate": 2.3204774044612855e-06, |
|
"log_odds_chosen": 0.4948676526546478, |
|
"log_odds_ratio": -0.626745343208313, |
|
"logits/chosen": -2.963355302810669, |
|
"logits/rejected": -2.9515814781188965, |
|
"logps/chosen": -0.7483548521995544, |
|
"logps/rejected": -1.0602718591690063, |
|
"loss": 0.4776, |
|
"nll_loss": 0.42798590660095215, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03741774708032608, |
|
"rewards/margins": 0.015595847740769386, |
|
"rewards/rejected": -0.05301359295845032, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9648662821185108, |
|
"grad_norm": 2.154391749062073, |
|
"learning_rate": 2.3078316568852547e-06, |
|
"log_odds_chosen": 0.3418871760368347, |
|
"log_odds_ratio": -0.6459903717041016, |
|
"logits/chosen": -2.8877079486846924, |
|
"logits/rejected": -2.9023048877716064, |
|
"logps/chosen": -0.7208271622657776, |
|
"logps/rejected": -0.9329261779785156, |
|
"loss": 0.4496, |
|
"nll_loss": 0.39838844537734985, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.03604135662317276, |
|
"rewards/margins": 0.010604949668049812, |
|
"rewards/rejected": -0.04664631187915802, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9753539590980598, |
|
"grad_norm": 2.4150467379552776, |
|
"learning_rate": 2.2953904252438353e-06, |
|
"log_odds_chosen": 0.31212860345840454, |
|
"log_odds_ratio": -0.6628017425537109, |
|
"logits/chosen": -2.9404473304748535, |
|
"logits/rejected": -2.935260772705078, |
|
"logps/chosen": -0.7885305285453796, |
|
"logps/rejected": -1.0043061971664429, |
|
"loss": 0.4752, |
|
"nll_loss": 0.48344022035598755, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0394265279173851, |
|
"rewards/margins": 0.010788780637085438, |
|
"rewards/rejected": -0.05021531134843826, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9858416360776088, |
|
"grad_norm": 2.2491855597526786, |
|
"learning_rate": 2.2831482556870475e-06, |
|
"log_odds_chosen": 0.2697109580039978, |
|
"log_odds_ratio": -0.6924097537994385, |
|
"logits/chosen": -2.9477505683898926, |
|
"logits/rejected": -2.9367494583129883, |
|
"logps/chosen": -0.7188832759857178, |
|
"logps/rejected": -0.8695234060287476, |
|
"loss": 0.4739, |
|
"nll_loss": 0.44516521692276, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.03594416007399559, |
|
"rewards/margins": 0.007532012648880482, |
|
"rewards/rejected": -0.0434761717915535, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9963293130571579, |
|
"grad_norm": 2.438616188075854, |
|
"learning_rate": 2.2710998958306758e-06, |
|
"log_odds_chosen": 0.26511335372924805, |
|
"log_odds_ratio": -0.6899660229682922, |
|
"logits/chosen": -2.9427490234375, |
|
"logits/rejected": -2.945517063140869, |
|
"logps/chosen": -0.7803043127059937, |
|
"logps/rejected": -0.9409860372543335, |
|
"loss": 0.4993, |
|
"nll_loss": 0.4652082026004791, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03901521861553192, |
|
"rewards/margins": 0.008034082129597664, |
|
"rewards/rejected": -0.047049302607774734, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9994756161510225, |
|
"step": 953, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5301580581685054, |
|
"train_runtime": 20737.8205, |
|
"train_samples_per_second": 2.942, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 953, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|