|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.7386245727539062, |
|
"logits/rejected": -2.7273669242858887, |
|
"logps/chosen": -262.8376159667969, |
|
"logps/rejected": -255.88758850097656, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.7419047355651855, |
|
"logits/rejected": -2.7360031604766846, |
|
"logps/chosen": -305.9395446777344, |
|
"logps/rejected": -270.57177734375, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.0002741153002716601, |
|
"rewards/margins": 0.0006307306466624141, |
|
"rewards/rejected": -0.00035661537549458444, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.7987372875213623, |
|
"logits/rejected": -2.779291868209839, |
|
"logps/chosen": -296.0432434082031, |
|
"logps/rejected": -258.17041015625, |
|
"loss": 0.1055, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 2.8045265935361385e-05, |
|
"rewards/margins": 0.0010506389662623405, |
|
"rewards/rejected": -0.001022593816742301, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.83036470413208, |
|
"logits/rejected": -2.802358627319336, |
|
"logps/chosen": -300.7704162597656, |
|
"logps/rejected": -259.5246276855469, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0002994390088133514, |
|
"rewards/margins": 0.0064557394944131374, |
|
"rewards/rejected": -0.006755178328603506, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.7836341857910156, |
|
"logits/rejected": -2.757286310195923, |
|
"logps/chosen": -257.14385986328125, |
|
"logps/rejected": -248.82925415039062, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.001975560560822487, |
|
"rewards/margins": 0.018592100590467453, |
|
"rewards/rejected": -0.02056765928864479, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.773463726043701, |
|
"logits/rejected": -2.75862979888916, |
|
"logps/chosen": -255.62783813476562, |
|
"logps/rejected": -247.96707153320312, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.023576391860842705, |
|
"rewards/margins": 0.053500402718782425, |
|
"rewards/rejected": -0.07707679271697998, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.7168498039245605, |
|
"logits/rejected": -2.684145450592041, |
|
"logps/chosen": -265.1424255371094, |
|
"logps/rejected": -254.54867553710938, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.08215885609388351, |
|
"rewards/margins": 0.07760664075613022, |
|
"rewards/rejected": -0.15976549685001373, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.741403341293335, |
|
"logits/rejected": -2.7200100421905518, |
|
"logps/chosen": -289.6435852050781, |
|
"logps/rejected": -287.03662109375, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1611556112766266, |
|
"rewards/margins": 0.1475805938243866, |
|
"rewards/rejected": -0.3087361752986908, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.7668230533599854, |
|
"logits/rejected": -2.7418100833892822, |
|
"logps/chosen": -306.257568359375, |
|
"logps/rejected": -304.8079833984375, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.28854140639305115, |
|
"rewards/margins": 0.16743852198123932, |
|
"rewards/rejected": -0.45597997307777405, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.7547733783721924, |
|
"logits/rejected": -2.7255868911743164, |
|
"logps/chosen": -280.0272216796875, |
|
"logps/rejected": -278.74127197265625, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3458347022533417, |
|
"rewards/margins": 0.24216556549072266, |
|
"rewards/rejected": -0.5880002975463867, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.6905529499053955, |
|
"logits/rejected": -2.6901133060455322, |
|
"logps/chosen": -340.79461669921875, |
|
"logps/rejected": -338.3218688964844, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6042592525482178, |
|
"rewards/margins": 0.2673302888870239, |
|
"rewards/rejected": -0.8715896606445312, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.732572317123413, |
|
"eval_logits/rejected": -2.717289686203003, |
|
"eval_logps/chosen": -324.24517822265625, |
|
"eval_logps/rejected": -360.8448791503906, |
|
"eval_loss": 0.04314277693629265, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": -0.6720553636550903, |
|
"eval_rewards/margins": 0.3628607988357544, |
|
"eval_rewards/rejected": -1.0349161624908447, |
|
"eval_runtime": 53.2647, |
|
"eval_samples_per_second": 37.548, |
|
"eval_steps_per_second": 0.601, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.720991373062134, |
|
"logits/rejected": -2.688431978225708, |
|
"logps/chosen": -372.272705078125, |
|
"logps/rejected": -368.93316650390625, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7657500505447388, |
|
"rewards/margins": 0.34390324354171753, |
|
"rewards/rejected": -1.109653353691101, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.6478002071380615, |
|
"logits/rejected": -2.6571507453918457, |
|
"logps/chosen": -302.8161315917969, |
|
"logps/rejected": -345.29022216796875, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7161726951599121, |
|
"rewards/margins": 0.4097130298614502, |
|
"rewards/rejected": -1.1258857250213623, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.6481852531433105, |
|
"logits/rejected": -2.6323132514953613, |
|
"logps/chosen": -315.4286804199219, |
|
"logps/rejected": -349.3882751464844, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7872930765151978, |
|
"rewards/margins": 0.33242180943489075, |
|
"rewards/rejected": -1.1197148561477661, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.6520018577575684, |
|
"logits/rejected": -2.6314806938171387, |
|
"logps/chosen": -349.292236328125, |
|
"logps/rejected": -358.19696044921875, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8543764352798462, |
|
"rewards/margins": 0.41757732629776, |
|
"rewards/rejected": -1.271953821182251, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.5678088665008545, |
|
"logits/rejected": -2.561540126800537, |
|
"logps/chosen": -360.6986999511719, |
|
"logps/rejected": -373.29876708984375, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9629270434379578, |
|
"rewards/margins": 0.4645315110683441, |
|
"rewards/rejected": -1.4274585247039795, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.510655641555786, |
|
"logits/rejected": -2.5043094158172607, |
|
"logps/chosen": -390.89556884765625, |
|
"logps/rejected": -418.712646484375, |
|
"loss": 0.0284, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.16335129737854, |
|
"rewards/margins": 0.5290186405181885, |
|
"rewards/rejected": -1.692370057106018, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.5995492935180664, |
|
"logits/rejected": -2.5737595558166504, |
|
"logps/chosen": -412.41259765625, |
|
"logps/rejected": -393.64605712890625, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8894079327583313, |
|
"rewards/margins": 0.48425012826919556, |
|
"rewards/rejected": -1.3736579418182373, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.5762248039245605, |
|
"logits/rejected": -2.5727803707122803, |
|
"logps/chosen": -357.29132080078125, |
|
"logps/rejected": -409.08453369140625, |
|
"loss": 0.0305, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9876778721809387, |
|
"rewards/margins": 0.4671412408351898, |
|
"rewards/rejected": -1.4548190832138062, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.4623260498046875, |
|
"logits/rejected": -2.4131171703338623, |
|
"logps/chosen": -391.2403564453125, |
|
"logps/rejected": -413.74554443359375, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1796742677688599, |
|
"rewards/margins": 0.40752944350242615, |
|
"rewards/rejected": -1.5872037410736084, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.451063394546509, |
|
"logits/rejected": -2.4568967819213867, |
|
"logps/chosen": -348.5596618652344, |
|
"logps/rejected": -400.4520263671875, |
|
"loss": 0.027, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1439893245697021, |
|
"rewards/margins": 0.4998703896999359, |
|
"rewards/rejected": -1.64385986328125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.5091135501861572, |
|
"eval_logits/rejected": -2.4957656860351562, |
|
"eval_logps/chosen": -366.6208190917969, |
|
"eval_logps/rejected": -425.7963562011719, |
|
"eval_loss": 0.02966611087322235, |
|
"eval_rewards/accuracies": 0.6953125, |
|
"eval_rewards/chosen": -1.0958118438720703, |
|
"eval_rewards/margins": 0.5886186957359314, |
|
"eval_rewards/rejected": -1.6844305992126465, |
|
"eval_runtime": 53.1867, |
|
"eval_samples_per_second": 37.603, |
|
"eval_steps_per_second": 0.602, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.4863812923431396, |
|
"logits/rejected": -2.407597780227661, |
|
"logps/chosen": -417.18634033203125, |
|
"logps/rejected": -401.4176025390625, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.203452467918396, |
|
"rewards/margins": 0.41743287444114685, |
|
"rewards/rejected": -1.6208854913711548, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.5017166137695312, |
|
"logits/rejected": -2.444180965423584, |
|
"logps/chosen": -391.02996826171875, |
|
"logps/rejected": -405.67987060546875, |
|
"loss": 0.029, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1130907535552979, |
|
"rewards/margins": 0.5619007349014282, |
|
"rewards/rejected": -1.6749913692474365, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.5100924968719482, |
|
"logits/rejected": -2.4785008430480957, |
|
"logps/chosen": -392.33062744140625, |
|
"logps/rejected": -428.2881774902344, |
|
"loss": 0.029, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0476332902908325, |
|
"rewards/margins": 0.6075866222381592, |
|
"rewards/rejected": -1.6552197933197021, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.4946725368499756, |
|
"logits/rejected": -2.490581512451172, |
|
"logps/chosen": -392.0195007324219, |
|
"logps/rejected": -388.649169921875, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.875754177570343, |
|
"rewards/margins": 0.5238613486289978, |
|
"rewards/rejected": -1.3996155261993408, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.431591272354126, |
|
"logits/rejected": -2.410667896270752, |
|
"logps/chosen": -353.4183654785156, |
|
"logps/rejected": -399.400146484375, |
|
"loss": 0.0286, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0008418560028076, |
|
"rewards/margins": 0.5946453809738159, |
|
"rewards/rejected": -1.595487356185913, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.4323439598083496, |
|
"logits/rejected": -2.4124226570129395, |
|
"logps/chosen": -417.405029296875, |
|
"logps/rejected": -445.43707275390625, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1992970705032349, |
|
"rewards/margins": 0.47717300057411194, |
|
"rewards/rejected": -1.676470160484314, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.475048542022705, |
|
"logits/rejected": -2.4506657123565674, |
|
"logps/chosen": -392.7640075683594, |
|
"logps/rejected": -430.6897888183594, |
|
"loss": 0.0253, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.3913904428482056, |
|
"rewards/margins": 0.3153776526451111, |
|
"rewards/rejected": -1.7067680358886719, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.4361844062805176, |
|
"logits/rejected": -2.4216103553771973, |
|
"logps/chosen": -388.96063232421875, |
|
"logps/rejected": -430.0042419433594, |
|
"loss": 0.0261, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2630523443222046, |
|
"rewards/margins": 0.5633870363235474, |
|
"rewards/rejected": -1.8264392614364624, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.500619411468506, |
|
"logits/rejected": -2.46304988861084, |
|
"logps/chosen": -409.1497497558594, |
|
"logps/rejected": -441.10198974609375, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0858951807022095, |
|
"rewards/margins": 0.46196287870407104, |
|
"rewards/rejected": -1.5478579998016357, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.4326109886169434, |
|
"logits/rejected": -2.4290225505828857, |
|
"logps/chosen": -392.1640930175781, |
|
"logps/rejected": -414.16351318359375, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1948238611221313, |
|
"rewards/margins": 0.47188109159469604, |
|
"rewards/rejected": -1.6667048931121826, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.471911668777466, |
|
"eval_logits/rejected": -2.458660840988159, |
|
"eval_logps/chosen": -367.22003173828125, |
|
"eval_logps/rejected": -428.8892517089844, |
|
"eval_loss": 0.02865579165518284, |
|
"eval_rewards/accuracies": 0.734375, |
|
"eval_rewards/chosen": -1.1018041372299194, |
|
"eval_rewards/margins": 0.6135556101799011, |
|
"eval_rewards/rejected": -1.7153598070144653, |
|
"eval_runtime": 53.2357, |
|
"eval_samples_per_second": 37.569, |
|
"eval_steps_per_second": 0.601, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -2.4415957927703857, |
|
"logits/rejected": -2.4284932613372803, |
|
"logps/chosen": -407.1797790527344, |
|
"logps/rejected": -432.66436767578125, |
|
"loss": 0.0263, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1042468547821045, |
|
"rewards/margins": 0.6383775472640991, |
|
"rewards/rejected": -1.7426245212554932, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.4184253215789795, |
|
"logits/rejected": -2.3910305500030518, |
|
"logps/chosen": -378.49432373046875, |
|
"logps/rejected": -415.8314514160156, |
|
"loss": 0.0269, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.379817247390747, |
|
"rewards/margins": 0.4037790894508362, |
|
"rewards/rejected": -1.783596396446228, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.390575885772705, |
|
"logits/rejected": -2.3807907104492188, |
|
"logps/chosen": -402.01800537109375, |
|
"logps/rejected": -452.57598876953125, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1732884645462036, |
|
"rewards/margins": 0.4634523391723633, |
|
"rewards/rejected": -1.6367409229278564, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -2.4380133152008057, |
|
"logits/rejected": -2.4201126098632812, |
|
"logps/chosen": -409.64093017578125, |
|
"logps/rejected": -423.3724670410156, |
|
"loss": 0.0238, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.243157982826233, |
|
"rewards/margins": 0.5037888288497925, |
|
"rewards/rejected": -1.7469466924667358, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -2.4198803901672363, |
|
"logits/rejected": -2.369533061981201, |
|
"logps/chosen": -408.26910400390625, |
|
"logps/rejected": -487.27435302734375, |
|
"loss": 0.0235, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.5223808288574219, |
|
"rewards/margins": 0.7387748956680298, |
|
"rewards/rejected": -2.261155843734741, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -2.3843138217926025, |
|
"logits/rejected": -2.3418803215026855, |
|
"logps/chosen": -431.44854736328125, |
|
"logps/rejected": -472.52813720703125, |
|
"loss": 0.0225, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4586023092269897, |
|
"rewards/margins": 0.6385560035705566, |
|
"rewards/rejected": -2.097158432006836, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -2.4029147624969482, |
|
"logits/rejected": -2.3779168128967285, |
|
"logps/chosen": -383.9462890625, |
|
"logps/rejected": -458.59735107421875, |
|
"loss": 0.0216, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.391872763633728, |
|
"rewards/margins": 0.7170418500900269, |
|
"rewards/rejected": -2.108914375305176, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -2.3961706161499023, |
|
"logits/rejected": -2.36126708984375, |
|
"logps/chosen": -409.074951171875, |
|
"logps/rejected": -450.57452392578125, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4532839059829712, |
|
"rewards/margins": 0.6719815731048584, |
|
"rewards/rejected": -2.125265598297119, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -2.337531328201294, |
|
"logits/rejected": -2.3046772480010986, |
|
"logps/chosen": -442.0828552246094, |
|
"logps/rejected": -477.5122985839844, |
|
"loss": 0.0231, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.542133092880249, |
|
"rewards/margins": 0.5396715402603149, |
|
"rewards/rejected": -2.0818047523498535, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.4024269580841064, |
|
"logits/rejected": -2.3905534744262695, |
|
"logps/chosen": -429.47369384765625, |
|
"logps/rejected": -499.0704650878906, |
|
"loss": 0.0208, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6260488033294678, |
|
"rewards/margins": 0.7209797501564026, |
|
"rewards/rejected": -2.3470287322998047, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.407222270965576, |
|
"eval_logits/rejected": -2.3938333988189697, |
|
"eval_logps/chosen": -404.4031677246094, |
|
"eval_logps/rejected": -479.36236572265625, |
|
"eval_loss": 0.022896816954016685, |
|
"eval_rewards/accuracies": 0.71484375, |
|
"eval_rewards/chosen": -1.4736356735229492, |
|
"eval_rewards/margins": 0.746455192565918, |
|
"eval_rewards/rejected": -2.220090866088867, |
|
"eval_runtime": 53.1792, |
|
"eval_samples_per_second": 37.609, |
|
"eval_steps_per_second": 0.602, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -2.379589796066284, |
|
"logits/rejected": -2.371739387512207, |
|
"logps/chosen": -441.45648193359375, |
|
"logps/rejected": -446.16265869140625, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5874555110931396, |
|
"rewards/margins": 0.516998291015625, |
|
"rewards/rejected": -2.1044538021087646, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.4584357738494873, |
|
"logits/rejected": -2.4399354457855225, |
|
"logps/chosen": -441.65179443359375, |
|
"logps/rejected": -459.64208984375, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.469987392425537, |
|
"rewards/margins": 0.5242463946342468, |
|
"rewards/rejected": -1.9942338466644287, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.4622554779052734, |
|
"logits/rejected": -2.4207379817962646, |
|
"logps/chosen": -428.8905334472656, |
|
"logps/rejected": -432.05108642578125, |
|
"loss": 0.021, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.411439299583435, |
|
"rewards/margins": 0.6273307800292969, |
|
"rewards/rejected": -2.0387701988220215, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -2.443328857421875, |
|
"logits/rejected": -2.4111621379852295, |
|
"logps/chosen": -445.6102600097656, |
|
"logps/rejected": -446.654052734375, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4215877056121826, |
|
"rewards/margins": 0.5511332154273987, |
|
"rewards/rejected": -1.972720742225647, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.457451581954956, |
|
"logits/rejected": -2.4286131858825684, |
|
"logps/chosen": -411.8194885253906, |
|
"logps/rejected": -525.404296875, |
|
"loss": 0.0243, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3488930463790894, |
|
"rewards/margins": 0.9843934774398804, |
|
"rewards/rejected": -2.333286762237549, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -2.392413377761841, |
|
"logits/rejected": -2.3623125553131104, |
|
"logps/chosen": -452.897216796875, |
|
"logps/rejected": -460.3353576660156, |
|
"loss": 0.0226, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4256120920181274, |
|
"rewards/margins": 0.5736899375915527, |
|
"rewards/rejected": -1.9993021488189697, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.4287571907043457, |
|
"logits/rejected": -2.387329578399658, |
|
"logps/chosen": -451.8321228027344, |
|
"logps/rejected": -473.54388427734375, |
|
"loss": 0.0243, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4879666566848755, |
|
"rewards/margins": 0.6231549978256226, |
|
"rewards/rejected": -2.111121654510498, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.03937680171373998, |
|
"train_runtime": 4352.8265, |
|
"train_samples_per_second": 14.045, |
|
"train_steps_per_second": 0.11 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|