|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 2907, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.603872060775757, |
|
"logits/rejected": -2.555414915084839, |
|
"logps/chosen": -134.93458557128906, |
|
"logps/rejected": -134.85955810546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.729396343231201, |
|
"logits/rejected": -2.6624577045440674, |
|
"logps/chosen": -303.16815185546875, |
|
"logps/rejected": -252.68316650390625, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0021914299577474594, |
|
"rewards/margins": 0.004278275184333324, |
|
"rewards/rejected": -0.0020868456922471523, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.747986078262329, |
|
"logits/rejected": -2.6914308071136475, |
|
"logps/chosen": -328.33062744140625, |
|
"logps/rejected": -288.1149597167969, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.009702973067760468, |
|
"rewards/margins": -0.0010181829566136003, |
|
"rewards/rejected": -0.008684789761900902, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.769310712814331, |
|
"logits/rejected": -2.707155466079712, |
|
"logps/chosen": -337.4163513183594, |
|
"logps/rejected": -276.3641662597656, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.010683250613510609, |
|
"rewards/margins": -0.006997206248342991, |
|
"rewards/rejected": -0.0036860438995063305, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.7873435020446777, |
|
"logits/rejected": -2.7888503074645996, |
|
"logps/chosen": -382.8026123046875, |
|
"logps/rejected": -322.3409729003906, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0037022079341113567, |
|
"rewards/margins": 0.011960670351982117, |
|
"rewards/rejected": -0.008258461952209473, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.660407781600952, |
|
"logits/rejected": -2.6068549156188965, |
|
"logps/chosen": -396.0019836425781, |
|
"logps/rejected": -251.8150177001953, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.004951278679072857, |
|
"rewards/margins": 0.004014839418232441, |
|
"rewards/rejected": 0.0009364400175400078, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.669360399246216, |
|
"logits/rejected": -2.6709377765655518, |
|
"logps/chosen": -312.83160400390625, |
|
"logps/rejected": -266.94866943359375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007568955421447754, |
|
"rewards/margins": 0.01419537328183651, |
|
"rewards/rejected": -0.0066264173947274685, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.8407368659973145, |
|
"logits/rejected": -2.717714309692383, |
|
"logps/chosen": -418.32666015625, |
|
"logps/rejected": -295.38348388671875, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00732302526012063, |
|
"rewards/margins": 0.0100878169760108, |
|
"rewards/rejected": -0.0027647926472127438, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.7577030658721924, |
|
"logits/rejected": -2.6890294551849365, |
|
"logps/chosen": -340.7475891113281, |
|
"logps/rejected": -262.4281311035156, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02396738901734352, |
|
"rewards/margins": 0.032635144889354706, |
|
"rewards/rejected": -0.00866775494068861, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.7611238956451416, |
|
"logits/rejected": -2.721151828765869, |
|
"logps/chosen": -333.66485595703125, |
|
"logps/rejected": -255.80520629882812, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.045851536095142365, |
|
"rewards/margins": 0.06294064968824387, |
|
"rewards/rejected": -0.017089109867811203, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.646711826324463, |
|
"logits/rejected": -2.6414952278137207, |
|
"logps/chosen": -351.0623474121094, |
|
"logps/rejected": -238.76431274414062, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06126219779253006, |
|
"rewards/margins": 0.06746932864189148, |
|
"rewards/rejected": -0.0062071289867162704, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.832993984222412, |
|
"eval_logits/rejected": -2.7623188495635986, |
|
"eval_logps/chosen": -369.2313232421875, |
|
"eval_logps/rejected": -285.8178405761719, |
|
"eval_loss": 0.6668010950088501, |
|
"eval_rewards/accuracies": 0.6746031641960144, |
|
"eval_rewards/chosen": 0.06240652874112129, |
|
"eval_rewards/margins": 0.07296741008758545, |
|
"eval_rewards/rejected": -0.010560884140431881, |
|
"eval_runtime": 94.0983, |
|
"eval_samples_per_second": 21.254, |
|
"eval_steps_per_second": 0.67, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.738068103790283, |
|
"logits/rejected": -2.659058094024658, |
|
"logps/chosen": -306.91400146484375, |
|
"logps/rejected": -279.0736083984375, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.04395968094468117, |
|
"rewards/margins": 0.057025860995054245, |
|
"rewards/rejected": -0.01306617446243763, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.696042776107788, |
|
"logits/rejected": -2.673532009124756, |
|
"logps/chosen": -397.77996826171875, |
|
"logps/rejected": -285.76092529296875, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08170491456985474, |
|
"rewards/margins": 0.08871547877788544, |
|
"rewards/rejected": -0.0070105730555951595, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -2.7733514308929443, |
|
"logits/rejected": -2.7449357509613037, |
|
"logps/chosen": -363.5209045410156, |
|
"logps/rejected": -293.55010986328125, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08804867416620255, |
|
"rewards/margins": 0.12048058211803436, |
|
"rewards/rejected": -0.03243190422654152, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.7430038452148438, |
|
"logits/rejected": -2.6958515644073486, |
|
"logps/chosen": -346.5725402832031, |
|
"logps/rejected": -263.58282470703125, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.14620031416416168, |
|
"rewards/margins": 0.18137127161026, |
|
"rewards/rejected": -0.03517094999551773, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.718210220336914, |
|
"logits/rejected": -2.722475528717041, |
|
"logps/chosen": -345.8394470214844, |
|
"logps/rejected": -273.95721435546875, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.09584580361843109, |
|
"rewards/margins": 0.1124863252043724, |
|
"rewards/rejected": -0.016640519723296165, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.86755633354187, |
|
"logits/rejected": -2.796311855316162, |
|
"logps/chosen": -387.51531982421875, |
|
"logps/rejected": -289.4734802246094, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.09817598015069962, |
|
"rewards/margins": 0.20854738354682922, |
|
"rewards/rejected": -0.11037138849496841, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.7607944011688232, |
|
"logits/rejected": -2.7413511276245117, |
|
"logps/chosen": -345.589599609375, |
|
"logps/rejected": -276.03033447265625, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.08899159729480743, |
|
"rewards/margins": 0.13915829360485077, |
|
"rewards/rejected": -0.05016670748591423, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.7164695262908936, |
|
"logits/rejected": -2.703003406524658, |
|
"logps/chosen": -275.18792724609375, |
|
"logps/rejected": -236.8255615234375, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.09344159066677094, |
|
"rewards/margins": 0.23309943079948425, |
|
"rewards/rejected": -0.1396578848361969, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -2.7676727771759033, |
|
"logits/rejected": -2.69238018989563, |
|
"logps/chosen": -346.99774169921875, |
|
"logps/rejected": -237.373046875, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.1757623851299286, |
|
"rewards/margins": 0.30902284383773804, |
|
"rewards/rejected": -0.13326042890548706, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -2.6943726539611816, |
|
"logits/rejected": -2.667006015777588, |
|
"logps/chosen": -292.3045959472656, |
|
"logps/rejected": -197.8688201904297, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1381949484348297, |
|
"rewards/margins": 0.3062794804573059, |
|
"rewards/rejected": -0.1680845469236374, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.8180224895477295, |
|
"eval_logits/rejected": -2.7513797283172607, |
|
"eval_logps/chosen": -368.2522277832031, |
|
"eval_logps/rejected": -287.6386413574219, |
|
"eval_loss": 0.5995281934738159, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": 0.16031166911125183, |
|
"eval_rewards/margins": 0.35295677185058594, |
|
"eval_rewards/rejected": -0.1926451176404953, |
|
"eval_runtime": 93.7328, |
|
"eval_samples_per_second": 21.337, |
|
"eval_steps_per_second": 0.672, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.729691743850708, |
|
"logits/rejected": -2.665153980255127, |
|
"logps/chosen": -385.1280517578125, |
|
"logps/rejected": -253.19482421875, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.13179190456867218, |
|
"rewards/margins": 0.3481922745704651, |
|
"rewards/rejected": -0.2164003849029541, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.6447486877441406, |
|
"logits/rejected": -2.5924179553985596, |
|
"logps/chosen": -278.9236145019531, |
|
"logps/rejected": -248.78488159179688, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.10307928174734116, |
|
"rewards/margins": 0.3263222277164459, |
|
"rewards/rejected": -0.22324290871620178, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.627690553665161, |
|
"logits/rejected": -2.6008286476135254, |
|
"logps/chosen": -311.2276306152344, |
|
"logps/rejected": -265.7589416503906, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.10903775691986084, |
|
"rewards/margins": 0.3014870285987854, |
|
"rewards/rejected": -0.19244930148124695, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.7416341304779053, |
|
"logits/rejected": -2.696104049682617, |
|
"logps/chosen": -388.8735046386719, |
|
"logps/rejected": -255.23745727539062, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.14113590121269226, |
|
"rewards/margins": 0.35784265398979187, |
|
"rewards/rejected": -0.21670672297477722, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -2.6901490688323975, |
|
"logits/rejected": -2.6809473037719727, |
|
"logps/chosen": -334.62066650390625, |
|
"logps/rejected": -278.7101745605469, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.13628730177879333, |
|
"rewards/margins": 0.3466246426105499, |
|
"rewards/rejected": -0.21033735573291779, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.7171425819396973, |
|
"logits/rejected": -2.661024332046509, |
|
"logps/chosen": -344.5673828125, |
|
"logps/rejected": -245.82632446289062, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0464027114212513, |
|
"rewards/margins": 0.39084523916244507, |
|
"rewards/rejected": -0.3444425165653229, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.7654361724853516, |
|
"logits/rejected": -2.7372591495513916, |
|
"logps/chosen": -363.6313171386719, |
|
"logps/rejected": -281.4266662597656, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.012831795029342175, |
|
"rewards/margins": 0.3564152419567108, |
|
"rewards/rejected": -0.34358346462249756, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -2.698306083679199, |
|
"logits/rejected": -2.617525577545166, |
|
"logps/chosen": -346.89898681640625, |
|
"logps/rejected": -266.34173583984375, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1134389191865921, |
|
"rewards/margins": 0.4053812026977539, |
|
"rewards/rejected": -0.291942298412323, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.7834925651550293, |
|
"logits/rejected": -2.7064177989959717, |
|
"logps/chosen": -352.7433166503906, |
|
"logps/rejected": -262.48480224609375, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2624431550502777, |
|
"rewards/margins": 0.535737156867981, |
|
"rewards/rejected": -0.27329397201538086, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982798165137615e-07, |
|
"logits/chosen": -2.594956398010254, |
|
"logits/rejected": -2.5584628582000732, |
|
"logps/chosen": -290.9082946777344, |
|
"logps/rejected": -268.2087707519531, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.15836095809936523, |
|
"rewards/margins": 0.3992231786251068, |
|
"rewards/rejected": -0.24086228013038635, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -2.795222282409668, |
|
"eval_logits/rejected": -2.7304515838623047, |
|
"eval_logps/chosen": -367.5863952636719, |
|
"eval_logps/rejected": -288.8868408203125, |
|
"eval_loss": 0.5643562078475952, |
|
"eval_rewards/accuracies": 0.6904761791229248, |
|
"eval_rewards/chosen": 0.22689564526081085, |
|
"eval_rewards/margins": 0.5443611145019531, |
|
"eval_rewards/rejected": -0.3174654543399811, |
|
"eval_runtime": 93.6627, |
|
"eval_samples_per_second": 21.353, |
|
"eval_steps_per_second": 0.673, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963685015290519e-07, |
|
"logits/chosen": -2.7573087215423584, |
|
"logits/rejected": -2.7624802589416504, |
|
"logps/chosen": -391.13470458984375, |
|
"logps/rejected": -319.02850341796875, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.17492187023162842, |
|
"rewards/margins": 0.39395055174827576, |
|
"rewards/rejected": -0.21902871131896973, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944571865443424e-07, |
|
"logits/chosen": -2.627657175064087, |
|
"logits/rejected": -2.559328317642212, |
|
"logps/chosen": -325.2025451660156, |
|
"logps/rejected": -215.1479949951172, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.17855343222618103, |
|
"rewards/margins": 0.6376410722732544, |
|
"rewards/rejected": -0.459087610244751, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.92545871559633e-07, |
|
"logits/chosen": -2.6837377548217773, |
|
"logits/rejected": -2.681138515472412, |
|
"logps/chosen": -402.97576904296875, |
|
"logps/rejected": -295.7830810546875, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2082200050354004, |
|
"rewards/margins": 0.48193687200546265, |
|
"rewards/rejected": -0.27371686697006226, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906345565749235e-07, |
|
"logits/chosen": -2.5517609119415283, |
|
"logits/rejected": -2.561235189437866, |
|
"logps/chosen": -300.0711364746094, |
|
"logps/rejected": -265.93634033203125, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.04220394045114517, |
|
"rewards/margins": 0.34089353680610657, |
|
"rewards/rejected": -0.2986895442008972, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.88723241590214e-07, |
|
"logits/chosen": -2.6442813873291016, |
|
"logits/rejected": -2.589465856552124, |
|
"logps/chosen": -380.798583984375, |
|
"logps/rejected": -283.53033447265625, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.29559534788131714, |
|
"rewards/margins": 0.628325343132019, |
|
"rewards/rejected": -0.3327299952507019, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868119266055046e-07, |
|
"logits/chosen": -2.6972787380218506, |
|
"logits/rejected": -2.6877760887145996, |
|
"logps/chosen": -358.97381591796875, |
|
"logps/rejected": -316.7508239746094, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.135372132062912, |
|
"rewards/margins": 0.6987503170967102, |
|
"rewards/rejected": -0.5633782148361206, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.849006116207951e-07, |
|
"logits/chosen": -2.7523036003112793, |
|
"logits/rejected": -2.6924734115600586, |
|
"logps/chosen": -357.83648681640625, |
|
"logps/rejected": -301.96282958984375, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.14637404680252075, |
|
"rewards/margins": 0.7356597781181335, |
|
"rewards/rejected": -0.5892857909202576, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.829892966360856e-07, |
|
"logits/chosen": -2.696434736251831, |
|
"logits/rejected": -2.6464056968688965, |
|
"logps/chosen": -391.2596740722656, |
|
"logps/rejected": -322.278564453125, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.20244574546813965, |
|
"rewards/margins": 0.6445008516311646, |
|
"rewards/rejected": -0.4420550763607025, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810779816513762e-07, |
|
"logits/chosen": -2.6600687503814697, |
|
"logits/rejected": -2.6298000812530518, |
|
"logps/chosen": -298.4252014160156, |
|
"logps/rejected": -267.12408447265625, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.009316349402070045, |
|
"rewards/margins": 0.48411574959754944, |
|
"rewards/rejected": -0.47479939460754395, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": -2.6604857444763184, |
|
"logits/rejected": -2.611046552658081, |
|
"logps/chosen": -340.8629455566406, |
|
"logps/rejected": -230.5844268798828, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2387767732143402, |
|
"rewards/margins": 0.7385751008987427, |
|
"rewards/rejected": -0.4997982978820801, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -2.7724692821502686, |
|
"eval_logits/rejected": -2.709975004196167, |
|
"eval_logps/chosen": -367.6446533203125, |
|
"eval_logps/rejected": -290.0246276855469, |
|
"eval_loss": 0.5475609302520752, |
|
"eval_rewards/accuracies": 0.7103174328804016, |
|
"eval_rewards/chosen": 0.22107475996017456, |
|
"eval_rewards/margins": 0.6523182392120361, |
|
"eval_rewards/rejected": -0.4312434792518616, |
|
"eval_runtime": 93.5919, |
|
"eval_samples_per_second": 21.369, |
|
"eval_steps_per_second": 0.673, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772553516819572e-07, |
|
"logits/chosen": -2.6370530128479004, |
|
"logits/rejected": -2.622492551803589, |
|
"logps/chosen": -367.6494445800781, |
|
"logps/rejected": -319.9653625488281, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.20758125185966492, |
|
"rewards/margins": 0.6940993070602417, |
|
"rewards/rejected": -0.4865179657936096, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753440366972477e-07, |
|
"logits/chosen": -2.714916706085205, |
|
"logits/rejected": -2.6454455852508545, |
|
"logps/chosen": -287.719970703125, |
|
"logps/rejected": -275.41241455078125, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.04725194722414017, |
|
"rewards/margins": 0.4273280203342438, |
|
"rewards/rejected": -0.38007602095603943, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7343272171253825e-07, |
|
"logits/chosen": -2.6437063217163086, |
|
"logits/rejected": -2.648123264312744, |
|
"logps/chosen": -305.0550537109375, |
|
"logps/rejected": -266.63525390625, |
|
"loss": 0.5725, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.03232135996222496, |
|
"rewards/margins": 0.46253544092178345, |
|
"rewards/rejected": -0.4302140772342682, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.715214067278288e-07, |
|
"logits/chosen": -2.6272292137145996, |
|
"logits/rejected": -2.6141581535339355, |
|
"logps/chosen": -357.75750732421875, |
|
"logps/rejected": -256.28765869140625, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0530555360019207, |
|
"rewards/margins": 0.6745840311050415, |
|
"rewards/rejected": -0.6215284466743469, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.696100917431192e-07, |
|
"logits/chosen": -2.6640782356262207, |
|
"logits/rejected": -2.661160707473755, |
|
"logps/chosen": -395.8831481933594, |
|
"logps/rejected": -325.2682800292969, |
|
"loss": 0.5776, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.17429330945014954, |
|
"rewards/margins": 0.49202069640159607, |
|
"rewards/rejected": -0.3177274167537689, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6769877675840974e-07, |
|
"logits/chosen": -2.633216619491577, |
|
"logits/rejected": -2.6277575492858887, |
|
"logps/chosen": -319.4475402832031, |
|
"logps/rejected": -281.19732666015625, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.060733817517757416, |
|
"rewards/margins": 0.5816834568977356, |
|
"rewards/rejected": -0.52094966173172, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6578746177370027e-07, |
|
"logits/chosen": -2.5858516693115234, |
|
"logits/rejected": -2.562450647354126, |
|
"logps/chosen": -253.75051879882812, |
|
"logps/rejected": -221.74124145507812, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.15773826837539673, |
|
"rewards/margins": 0.6320953965187073, |
|
"rewards/rejected": -0.47435712814331055, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.638761467889908e-07, |
|
"logits/chosen": -2.6325182914733887, |
|
"logits/rejected": -2.6154775619506836, |
|
"logps/chosen": -358.68377685546875, |
|
"logps/rejected": -252.25399780273438, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.12426266819238663, |
|
"rewards/margins": 0.7980905175209045, |
|
"rewards/rejected": -0.6738277673721313, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6196483180428133e-07, |
|
"logits/chosen": -2.694267511367798, |
|
"logits/rejected": -2.6052515506744385, |
|
"logps/chosen": -343.802490234375, |
|
"logps/rejected": -267.68292236328125, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.17378923296928406, |
|
"rewards/margins": 0.6964474320411682, |
|
"rewards/rejected": -0.5226581692695618, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.600535168195718e-07, |
|
"logits/chosen": -2.5756261348724365, |
|
"logits/rejected": -2.5577731132507324, |
|
"logps/chosen": -269.1250305175781, |
|
"logps/rejected": -254.8256072998047, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.12170092016458511, |
|
"rewards/margins": 0.486471951007843, |
|
"rewards/rejected": -0.3647710084915161, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.7543344497680664, |
|
"eval_logits/rejected": -2.6918773651123047, |
|
"eval_logps/chosen": -367.15386962890625, |
|
"eval_logps/rejected": -290.25469970703125, |
|
"eval_loss": 0.5388306975364685, |
|
"eval_rewards/accuracies": 0.6984127163887024, |
|
"eval_rewards/chosen": 0.2701512277126312, |
|
"eval_rewards/margins": 0.7244044542312622, |
|
"eval_rewards/rejected": -0.45425331592559814, |
|
"eval_runtime": 93.436, |
|
"eval_samples_per_second": 21.405, |
|
"eval_steps_per_second": 0.674, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5814220183486234e-07, |
|
"logits/chosen": -2.5832715034484863, |
|
"logits/rejected": -2.5189692974090576, |
|
"logps/chosen": -355.7542419433594, |
|
"logps/rejected": -309.67449951171875, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03541551157832146, |
|
"rewards/margins": 0.4255645275115967, |
|
"rewards/rejected": -0.39014899730682373, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.562308868501529e-07, |
|
"logits/chosen": -2.6220099925994873, |
|
"logits/rejected": -2.5353286266326904, |
|
"logps/chosen": -334.10565185546875, |
|
"logps/rejected": -302.77496337890625, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.17356789112091064, |
|
"rewards/margins": 0.6331679224967957, |
|
"rewards/rejected": -0.459600031375885, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.543195718654434e-07, |
|
"logits/chosen": -2.5556163787841797, |
|
"logits/rejected": -2.600917100906372, |
|
"logps/chosen": -282.23663330078125, |
|
"logps/rejected": -241.3870086669922, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.048130545765161514, |
|
"rewards/margins": 0.40956515073776245, |
|
"rewards/rejected": -0.36143457889556885, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5240825688073394e-07, |
|
"logits/chosen": -2.601301908493042, |
|
"logits/rejected": -2.5350029468536377, |
|
"logps/chosen": -348.8049011230469, |
|
"logps/rejected": -257.8331298828125, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.32524555921554565, |
|
"rewards/margins": 0.8066871762275696, |
|
"rewards/rejected": -0.48144158720970154, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.504969418960244e-07, |
|
"logits/chosen": -2.665917158126831, |
|
"logits/rejected": -2.491896867752075, |
|
"logps/chosen": -324.9315490722656, |
|
"logps/rejected": -314.4938049316406, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.21840611100196838, |
|
"rewards/margins": 0.5038217306137085, |
|
"rewards/rejected": -0.2854156792163849, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4858562691131495e-07, |
|
"logits/chosen": -2.666839599609375, |
|
"logits/rejected": -2.6258294582366943, |
|
"logps/chosen": -349.91668701171875, |
|
"logps/rejected": -336.1511535644531, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2688800096511841, |
|
"rewards/margins": 0.6696378588676453, |
|
"rewards/rejected": -0.40075787901878357, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.466743119266055e-07, |
|
"logits/chosen": -2.6923105716705322, |
|
"logits/rejected": -2.6402506828308105, |
|
"logps/chosen": -340.2569274902344, |
|
"logps/rejected": -279.79827880859375, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.08635896444320679, |
|
"rewards/margins": 0.5042712688446045, |
|
"rewards/rejected": -0.4179123342037201, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.44762996941896e-07, |
|
"logits/chosen": -2.6503794193267822, |
|
"logits/rejected": -2.6499364376068115, |
|
"logps/chosen": -359.00439453125, |
|
"logps/rejected": -300.85906982421875, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3588196337223053, |
|
"rewards/margins": 0.8982511758804321, |
|
"rewards/rejected": -0.5394314527511597, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4285168195718655e-07, |
|
"logits/chosen": -2.5828802585601807, |
|
"logits/rejected": -2.551640033721924, |
|
"logps/chosen": -295.88592529296875, |
|
"logps/rejected": -282.87530517578125, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.11026722192764282, |
|
"rewards/margins": 0.5960223078727722, |
|
"rewards/rejected": -0.4857551157474518, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.40940366972477e-07, |
|
"logits/chosen": -2.7075865268707275, |
|
"logits/rejected": -2.653106689453125, |
|
"logps/chosen": -296.15606689453125, |
|
"logps/rejected": -260.5872802734375, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1281842589378357, |
|
"rewards/margins": 0.4070327877998352, |
|
"rewards/rejected": -0.2788485288619995, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.7595744132995605, |
|
"eval_logits/rejected": -2.69767689704895, |
|
"eval_logps/chosen": -366.69464111328125, |
|
"eval_logps/rejected": -290.02459716796875, |
|
"eval_loss": 0.5326046347618103, |
|
"eval_rewards/accuracies": 0.7301587462425232, |
|
"eval_rewards/chosen": 0.31607678532600403, |
|
"eval_rewards/margins": 0.7473164200782776, |
|
"eval_rewards/rejected": -0.4312395751476288, |
|
"eval_runtime": 93.591, |
|
"eval_samples_per_second": 21.37, |
|
"eval_steps_per_second": 0.673, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3902905198776756e-07, |
|
"logits/chosen": -2.6507742404937744, |
|
"logits/rejected": -2.6280486583709717, |
|
"logps/chosen": -353.1117248535156, |
|
"logps/rejected": -269.77056884765625, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.27281785011291504, |
|
"rewards/margins": 0.7093775272369385, |
|
"rewards/rejected": -0.43655967712402344, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.371177370030581e-07, |
|
"logits/chosen": -2.660865306854248, |
|
"logits/rejected": -2.587761163711548, |
|
"logps/chosen": -347.13623046875, |
|
"logps/rejected": -274.3702697753906, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.25370901823043823, |
|
"rewards/margins": 0.7508881688117981, |
|
"rewards/rejected": -0.4971791207790375, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.352064220183486e-07, |
|
"logits/chosen": -2.582853078842163, |
|
"logits/rejected": -2.5666868686676025, |
|
"logps/chosen": -275.5951232910156, |
|
"logps/rejected": -243.16189575195312, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.18081825971603394, |
|
"rewards/margins": 0.5823779106140137, |
|
"rewards/rejected": -0.4015596807003021, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3329510703363915e-07, |
|
"logits/chosen": -2.689371109008789, |
|
"logits/rejected": -2.618781566619873, |
|
"logps/chosen": -328.7333984375, |
|
"logps/rejected": -252.730712890625, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.21093907952308655, |
|
"rewards/margins": 0.7845473885536194, |
|
"rewards/rejected": -0.5736082792282104, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313837920489297e-07, |
|
"logits/chosen": -2.5996086597442627, |
|
"logits/rejected": -2.6289618015289307, |
|
"logps/chosen": -293.96502685546875, |
|
"logps/rejected": -266.9768981933594, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2461429089307785, |
|
"rewards/margins": 0.7484090924263, |
|
"rewards/rejected": -0.5022662878036499, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2947247706422016e-07, |
|
"logits/chosen": -2.575277328491211, |
|
"logits/rejected": -2.627274751663208, |
|
"logps/chosen": -324.3821105957031, |
|
"logps/rejected": -280.3650817871094, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.10573621094226837, |
|
"rewards/margins": 0.7033983469009399, |
|
"rewards/rejected": -0.5976621508598328, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.275611620795107e-07, |
|
"logits/chosen": -2.6651196479797363, |
|
"logits/rejected": -2.5892019271850586, |
|
"logps/chosen": -396.19073486328125, |
|
"logps/rejected": -298.2142333984375, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.17894011735916138, |
|
"rewards/margins": 0.7362491488456726, |
|
"rewards/rejected": -0.5573090314865112, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2564984709480123e-07, |
|
"logits/chosen": -2.682351589202881, |
|
"logits/rejected": -2.6517555713653564, |
|
"logps/chosen": -359.2548828125, |
|
"logps/rejected": -272.7398986816406, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.16825886070728302, |
|
"rewards/margins": 0.7031353712081909, |
|
"rewards/rejected": -0.5348765254020691, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2373853211009176e-07, |
|
"logits/chosen": -2.652602195739746, |
|
"logits/rejected": -2.6651628017425537, |
|
"logps/chosen": -333.54949951171875, |
|
"logps/rejected": -288.21405029296875, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.35042139887809753, |
|
"rewards/margins": 0.7546127438545227, |
|
"rewards/rejected": -0.40419134497642517, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2182721712538224e-07, |
|
"logits/chosen": -2.690903902053833, |
|
"logits/rejected": -2.5803565979003906, |
|
"logps/chosen": -371.2886657714844, |
|
"logps/rejected": -264.31353759765625, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.4146875739097595, |
|
"rewards/margins": 0.7678929567337036, |
|
"rewards/rejected": -0.35320529341697693, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -2.7563326358795166, |
|
"eval_logits/rejected": -2.6959750652313232, |
|
"eval_logps/chosen": -366.72216796875, |
|
"eval_logps/rejected": -290.1435852050781, |
|
"eval_loss": 0.5295916795730591, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": 0.31332191824913025, |
|
"eval_rewards/margins": 0.7564623355865479, |
|
"eval_rewards/rejected": -0.4431404173374176, |
|
"eval_runtime": 93.2482, |
|
"eval_samples_per_second": 21.448, |
|
"eval_steps_per_second": 0.676, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199159021406727e-07, |
|
"logits/chosen": -2.6533806324005127, |
|
"logits/rejected": -2.539382219314575, |
|
"logps/chosen": -305.8080139160156, |
|
"logps/rejected": -260.7340393066406, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2109827995300293, |
|
"rewards/margins": 0.7150342464447021, |
|
"rewards/rejected": -0.5040515065193176, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1800458715596325e-07, |
|
"logits/chosen": -2.651778221130371, |
|
"logits/rejected": -2.5990653038024902, |
|
"logps/chosen": -392.23675537109375, |
|
"logps/rejected": -305.5450439453125, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2182011604309082, |
|
"rewards/margins": 0.5736824870109558, |
|
"rewards/rejected": -0.35548135638237, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.160932721712538e-07, |
|
"logits/chosen": -2.6589245796203613, |
|
"logits/rejected": -2.6071112155914307, |
|
"logps/chosen": -344.4822692871094, |
|
"logps/rejected": -308.5715026855469, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2045893371105194, |
|
"rewards/margins": 0.5438891649246216, |
|
"rewards/rejected": -0.339299738407135, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.141819571865443e-07, |
|
"logits/chosen": -2.6027140617370605, |
|
"logits/rejected": -2.5531044006347656, |
|
"logps/chosen": -308.57275390625, |
|
"logps/rejected": -227.69961547851562, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.24511054158210754, |
|
"rewards/margins": 0.7321642637252808, |
|
"rewards/rejected": -0.4870537221431732, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1227064220183485e-07, |
|
"logits/chosen": -2.593599796295166, |
|
"logits/rejected": -2.551053524017334, |
|
"logps/chosen": -329.5063171386719, |
|
"logps/rejected": -271.2137756347656, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.058064889162778854, |
|
"rewards/margins": 0.7626748085021973, |
|
"rewards/rejected": -0.7046098113059998, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.103593272171253e-07, |
|
"logits/chosen": -2.5975751876831055, |
|
"logits/rejected": -2.611335515975952, |
|
"logps/chosen": -338.27105712890625, |
|
"logps/rejected": -318.44110107421875, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.15539416670799255, |
|
"rewards/margins": 0.6962918639183044, |
|
"rewards/rejected": -0.5408977270126343, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0844801223241586e-07, |
|
"logits/chosen": -2.5972952842712402, |
|
"logits/rejected": -2.5851306915283203, |
|
"logps/chosen": -358.0859375, |
|
"logps/rejected": -290.6552734375, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3178876042366028, |
|
"rewards/margins": 0.6411694288253784, |
|
"rewards/rejected": -0.3232818841934204, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.065366972477064e-07, |
|
"logits/chosen": -2.6409897804260254, |
|
"logits/rejected": -2.620407819747925, |
|
"logps/chosen": -362.7335510253906, |
|
"logps/rejected": -288.424072265625, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.21879442036151886, |
|
"rewards/margins": 0.7934145927429199, |
|
"rewards/rejected": -0.574620246887207, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.046253822629969e-07, |
|
"logits/chosen": -2.6204047203063965, |
|
"logits/rejected": -2.575136661529541, |
|
"logps/chosen": -335.6562194824219, |
|
"logps/rejected": -271.09759521484375, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2648167610168457, |
|
"rewards/margins": 0.7104849815368652, |
|
"rewards/rejected": -0.44566813111305237, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0271406727828745e-07, |
|
"logits/chosen": -2.650829792022705, |
|
"logits/rejected": -2.639927864074707, |
|
"logps/chosen": -326.4569091796875, |
|
"logps/rejected": -280.28082275390625, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.19319041073322296, |
|
"rewards/margins": 0.7100410461425781, |
|
"rewards/rejected": -0.5168507099151611, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -2.7454872131347656, |
|
"eval_logits/rejected": -2.6863038539886475, |
|
"eval_logps/chosen": -366.76788330078125, |
|
"eval_logps/rejected": -290.7203063964844, |
|
"eval_loss": 0.5235481858253479, |
|
"eval_rewards/accuracies": 0.7182539701461792, |
|
"eval_rewards/chosen": 0.3087466061115265, |
|
"eval_rewards/margins": 0.8095585107803345, |
|
"eval_rewards/rejected": -0.5008119344711304, |
|
"eval_runtime": 93.6963, |
|
"eval_samples_per_second": 21.346, |
|
"eval_steps_per_second": 0.672, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.00802752293578e-07, |
|
"logits/chosen": -2.605900526046753, |
|
"logits/rejected": -2.555630922317505, |
|
"logps/chosen": -345.3916931152344, |
|
"logps/rejected": -285.39794921875, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.22336134314537048, |
|
"rewards/margins": 0.6163698434829712, |
|
"rewards/rejected": -0.3930084705352783, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9889143730886847e-07, |
|
"logits/chosen": -2.7067675590515137, |
|
"logits/rejected": -2.6543965339660645, |
|
"logps/chosen": -391.22271728515625, |
|
"logps/rejected": -301.2769775390625, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2929418683052063, |
|
"rewards/margins": 0.7365559339523315, |
|
"rewards/rejected": -0.44361400604248047, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.96980122324159e-07, |
|
"logits/chosen": -2.6316628456115723, |
|
"logits/rejected": -2.583803653717041, |
|
"logps/chosen": -328.14593505859375, |
|
"logps/rejected": -247.4641876220703, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.29103153944015503, |
|
"rewards/margins": 0.9226154088973999, |
|
"rewards/rejected": -0.6315839886665344, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9506880733944953e-07, |
|
"logits/chosen": -2.62970232963562, |
|
"logits/rejected": -2.5595858097076416, |
|
"logps/chosen": -312.72320556640625, |
|
"logps/rejected": -264.06927490234375, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.2118232697248459, |
|
"rewards/margins": 0.8576194047927856, |
|
"rewards/rejected": -0.6457961797714233, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9315749235474006e-07, |
|
"logits/chosen": -2.5976932048797607, |
|
"logits/rejected": -2.6474385261535645, |
|
"logps/chosen": -352.48345947265625, |
|
"logps/rejected": -319.6570739746094, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2210746556520462, |
|
"rewards/margins": 0.6267276406288147, |
|
"rewards/rejected": -0.4056529402732849, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.912461773700306e-07, |
|
"logits/chosen": -2.7138149738311768, |
|
"logits/rejected": -2.6776392459869385, |
|
"logps/chosen": -351.1930236816406, |
|
"logps/rejected": -328.28466796875, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.16447851061820984, |
|
"rewards/margins": 0.7863180637359619, |
|
"rewards/rejected": -0.6218395233154297, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8933486238532107e-07, |
|
"logits/chosen": -2.493483304977417, |
|
"logits/rejected": -2.503645420074463, |
|
"logps/chosen": -380.6748046875, |
|
"logps/rejected": -284.23272705078125, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.2832086980342865, |
|
"rewards/margins": 0.8768035173416138, |
|
"rewards/rejected": -0.5935948491096497, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874235474006116e-07, |
|
"logits/chosen": -2.607677936553955, |
|
"logits/rejected": -2.5798025131225586, |
|
"logps/chosen": -332.20635986328125, |
|
"logps/rejected": -271.32281494140625, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.297354131937027, |
|
"rewards/margins": 0.7697429656982422, |
|
"rewards/rejected": -0.4723888337612152, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8551223241590214e-07, |
|
"logits/chosen": -2.6368677616119385, |
|
"logits/rejected": -2.571981906890869, |
|
"logps/chosen": -321.2016906738281, |
|
"logps/rejected": -274.25274658203125, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.2311544120311737, |
|
"rewards/margins": 0.679220974445343, |
|
"rewards/rejected": -0.4480666220188141, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8360091743119267e-07, |
|
"logits/chosen": -2.6375088691711426, |
|
"logits/rejected": -2.5301380157470703, |
|
"logps/chosen": -313.16973876953125, |
|
"logps/rejected": -271.7088623046875, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.27663588523864746, |
|
"rewards/margins": 0.7922950983047485, |
|
"rewards/rejected": -0.5156592130661011, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": -2.753173351287842, |
|
"eval_logits/rejected": -2.694375991821289, |
|
"eval_logps/chosen": -366.52520751953125, |
|
"eval_logps/rejected": -290.4763488769531, |
|
"eval_loss": 0.5231475830078125, |
|
"eval_rewards/accuracies": 0.738095223903656, |
|
"eval_rewards/chosen": 0.3330167233943939, |
|
"eval_rewards/margins": 0.809434175491333, |
|
"eval_rewards/rejected": -0.4764174818992615, |
|
"eval_runtime": 93.6005, |
|
"eval_samples_per_second": 21.367, |
|
"eval_steps_per_second": 0.673, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.816896024464832e-07, |
|
"logits/chosen": -2.628507614135742, |
|
"logits/rejected": -2.6525845527648926, |
|
"logps/chosen": -281.3057861328125, |
|
"logps/rejected": -249.5850067138672, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2345593422651291, |
|
"rewards/margins": 0.6988226771354675, |
|
"rewards/rejected": -0.464263379573822, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797782874617737e-07, |
|
"logits/chosen": -2.667789936065674, |
|
"logits/rejected": -2.6839444637298584, |
|
"logps/chosen": -340.7831115722656, |
|
"logps/rejected": -275.1526794433594, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.1838233470916748, |
|
"rewards/margins": 0.4514841139316559, |
|
"rewards/rejected": -0.2676607668399811, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778669724770642e-07, |
|
"logits/chosen": -2.5922303199768066, |
|
"logits/rejected": -2.578035831451416, |
|
"logps/chosen": -319.7664489746094, |
|
"logps/rejected": -242.1009063720703, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1281779408454895, |
|
"rewards/margins": 0.6992352604866028, |
|
"rewards/rejected": -0.5710573792457581, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7595565749235474e-07, |
|
"logits/chosen": -2.7459988594055176, |
|
"logits/rejected": -2.6785151958465576, |
|
"logps/chosen": -357.2470703125, |
|
"logps/rejected": -257.43853759765625, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.44489067792892456, |
|
"rewards/margins": 0.813278317451477, |
|
"rewards/rejected": -0.3683876097202301, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.740443425076452e-07, |
|
"logits/chosen": -2.6817257404327393, |
|
"logits/rejected": -2.6576712131500244, |
|
"logps/chosen": -350.62200927734375, |
|
"logps/rejected": -299.66448974609375, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2472398281097412, |
|
"rewards/margins": 0.6820510625839233, |
|
"rewards/rejected": -0.4348112642765045, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7213302752293575e-07, |
|
"logits/chosen": -2.646120309829712, |
|
"logits/rejected": -2.607883930206299, |
|
"logps/chosen": -338.7344665527344, |
|
"logps/rejected": -275.9519958496094, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.32568275928497314, |
|
"rewards/margins": 0.5753262639045715, |
|
"rewards/rejected": -0.24964356422424316, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.702217125382263e-07, |
|
"logits/chosen": -2.649812698364258, |
|
"logits/rejected": -2.5741024017333984, |
|
"logps/chosen": -369.88861083984375, |
|
"logps/rejected": -302.81536865234375, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.188663512468338, |
|
"rewards/margins": 0.8093651533126831, |
|
"rewards/rejected": -0.6207016110420227, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6831039755351677e-07, |
|
"logits/chosen": -2.6051228046417236, |
|
"logits/rejected": -2.6672070026397705, |
|
"logps/chosen": -333.53570556640625, |
|
"logps/rejected": -285.21490478515625, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.42203760147094727, |
|
"rewards/margins": 1.160388708114624, |
|
"rewards/rejected": -0.7383512258529663, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.663990825688073e-07, |
|
"logits/chosen": -2.6160953044891357, |
|
"logits/rejected": -2.553997755050659, |
|
"logps/chosen": -319.94171142578125, |
|
"logps/rejected": -277.0104064941406, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2790919244289398, |
|
"rewards/margins": 0.8934859037399292, |
|
"rewards/rejected": -0.6143940687179565, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6448776758409783e-07, |
|
"logits/chosen": -2.7189905643463135, |
|
"logits/rejected": -2.594120979309082, |
|
"logps/chosen": -345.72369384765625, |
|
"logps/rejected": -298.8501281738281, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.45513787865638733, |
|
"rewards/margins": 0.9279818534851074, |
|
"rewards/rejected": -0.4728439450263977, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/chosen": -2.7465925216674805, |
|
"eval_logits/rejected": -2.688951253890991, |
|
"eval_logps/chosen": -366.4130554199219, |
|
"eval_logps/rejected": -290.52691650390625, |
|
"eval_loss": 0.521090567111969, |
|
"eval_rewards/accuracies": 0.7301587462425232, |
|
"eval_rewards/chosen": 0.3442315459251404, |
|
"eval_rewards/margins": 0.8257037997245789, |
|
"eval_rewards/rejected": -0.48147228360176086, |
|
"eval_runtime": 93.2289, |
|
"eval_samples_per_second": 21.453, |
|
"eval_steps_per_second": 0.676, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6257645259938836e-07, |
|
"logits/chosen": -2.6213080883026123, |
|
"logits/rejected": -2.6167352199554443, |
|
"logps/chosen": -311.80169677734375, |
|
"logps/rejected": -280.1716613769531, |
|
"loss": 0.4443, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.372317373752594, |
|
"rewards/margins": 0.9983639717102051, |
|
"rewards/rejected": -0.6260465979576111, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.606651376146789e-07, |
|
"logits/chosen": -2.497844696044922, |
|
"logits/rejected": -2.5120177268981934, |
|
"logps/chosen": -311.4458312988281, |
|
"logps/rejected": -230.55569458007812, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.3275541663169861, |
|
"rewards/margins": 0.951290488243103, |
|
"rewards/rejected": -0.6237363815307617, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5875382262996937e-07, |
|
"logits/chosen": -2.5676560401916504, |
|
"logits/rejected": -2.576164722442627, |
|
"logps/chosen": -336.22955322265625, |
|
"logps/rejected": -327.7220153808594, |
|
"loss": 0.4314, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.4555473327636719, |
|
"rewards/margins": 0.9157599210739136, |
|
"rewards/rejected": -0.4602126181125641, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568425076452599e-07, |
|
"logits/chosen": -2.683770179748535, |
|
"logits/rejected": -2.5734055042266846, |
|
"logps/chosen": -348.1385192871094, |
|
"logps/rejected": -290.2071838378906, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2006378471851349, |
|
"rewards/margins": 0.8747081756591797, |
|
"rewards/rejected": -0.6740702986717224, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5493119266055044e-07, |
|
"logits/chosen": -2.529114246368408, |
|
"logits/rejected": -2.5104918479919434, |
|
"logps/chosen": -326.69842529296875, |
|
"logps/rejected": -234.59725952148438, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2911790907382965, |
|
"rewards/margins": 1.0217406749725342, |
|
"rewards/rejected": -0.7305616140365601, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5301987767584097e-07, |
|
"logits/chosen": -2.5719220638275146, |
|
"logits/rejected": -2.503883123397827, |
|
"logps/chosen": -332.20758056640625, |
|
"logps/rejected": -299.2622375488281, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.3985035717487335, |
|
"rewards/margins": 1.1057422161102295, |
|
"rewards/rejected": -0.7072386145591736, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.511085626911315e-07, |
|
"logits/chosen": -2.6049246788024902, |
|
"logits/rejected": -2.4959778785705566, |
|
"logps/chosen": -388.32818603515625, |
|
"logps/rejected": -289.9732971191406, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.37873345613479614, |
|
"rewards/margins": 1.0958116054534912, |
|
"rewards/rejected": -0.7170782089233398, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.49197247706422e-07, |
|
"logits/chosen": -2.5549204349517822, |
|
"logits/rejected": -2.5598814487457275, |
|
"logps/chosen": -276.199951171875, |
|
"logps/rejected": -245.2752685546875, |
|
"loss": 0.4359, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.11906807124614716, |
|
"rewards/margins": 0.9462801218032837, |
|
"rewards/rejected": -0.8272121548652649, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.472859327217125e-07, |
|
"logits/chosen": -2.477860450744629, |
|
"logits/rejected": -2.413344144821167, |
|
"logps/chosen": -355.36309814453125, |
|
"logps/rejected": -283.76123046875, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.2770375609397888, |
|
"rewards/margins": 1.1055151224136353, |
|
"rewards/rejected": -0.8284775614738464, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4537461773700304e-07, |
|
"logits/chosen": -2.53197979927063, |
|
"logits/rejected": -2.5624704360961914, |
|
"logps/chosen": -252.6802978515625, |
|
"logps/rejected": -211.03775024414062, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.12486497312784195, |
|
"rewards/margins": 1.0134916305541992, |
|
"rewards/rejected": -0.8886265754699707, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_logits/chosen": -2.73252272605896, |
|
"eval_logits/rejected": -2.6769962310791016, |
|
"eval_logps/chosen": -367.01220703125, |
|
"eval_logps/rejected": -291.74310302734375, |
|
"eval_loss": 0.5197181105613708, |
|
"eval_rewards/accuracies": 0.738095223903656, |
|
"eval_rewards/chosen": 0.28431636095046997, |
|
"eval_rewards/margins": 0.8874091506004333, |
|
"eval_rewards/rejected": -0.6030928492546082, |
|
"eval_runtime": 93.7509, |
|
"eval_samples_per_second": 21.333, |
|
"eval_steps_per_second": 0.672, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.434633027522936e-07, |
|
"logits/chosen": -2.629892587661743, |
|
"logits/rejected": -2.5663561820983887, |
|
"logps/chosen": -359.1500549316406, |
|
"logps/rejected": -279.5158386230469, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.4036155641078949, |
|
"rewards/margins": 0.9569507837295532, |
|
"rewards/rejected": -0.5533351898193359, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.415519877675841e-07, |
|
"logits/chosen": -2.5857465267181396, |
|
"logits/rejected": -2.565286636352539, |
|
"logps/chosen": -307.4760437011719, |
|
"logps/rejected": -296.2510070800781, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.31315886974334717, |
|
"rewards/margins": 0.915540874004364, |
|
"rewards/rejected": -0.6023820638656616, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3964067278287464e-07, |
|
"logits/chosen": -2.638646364212036, |
|
"logits/rejected": -2.590546131134033, |
|
"logps/chosen": -354.6069641113281, |
|
"logps/rejected": -261.17510986328125, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.3109307289123535, |
|
"rewards/margins": 1.081446647644043, |
|
"rewards/rejected": -0.7705159187316895, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.377293577981651e-07, |
|
"logits/chosen": -2.5345458984375, |
|
"logits/rejected": -2.5311672687530518, |
|
"logps/chosen": -288.7183532714844, |
|
"logps/rejected": -270.4634704589844, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.10796526819467545, |
|
"rewards/margins": 1.147101640701294, |
|
"rewards/rejected": -1.0391361713409424, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3581804281345565e-07, |
|
"logits/chosen": -2.6268527507781982, |
|
"logits/rejected": -2.539853572845459, |
|
"logps/chosen": -364.0658874511719, |
|
"logps/rejected": -269.86798095703125, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.19580404460430145, |
|
"rewards/margins": 0.9232183694839478, |
|
"rewards/rejected": -0.7274142503738403, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.339067278287462e-07, |
|
"logits/chosen": -2.4458861351013184, |
|
"logits/rejected": -2.478278875350952, |
|
"logps/chosen": -316.80975341796875, |
|
"logps/rejected": -268.9418640136719, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.21194472908973694, |
|
"rewards/margins": 0.9436850547790527, |
|
"rewards/rejected": -0.731740415096283, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.319954128440367e-07, |
|
"logits/chosen": -2.6692590713500977, |
|
"logits/rejected": -2.565462827682495, |
|
"logps/chosen": -363.7535400390625, |
|
"logps/rejected": -267.73858642578125, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.2941077649593353, |
|
"rewards/margins": 1.0993130207061768, |
|
"rewards/rejected": -0.8052051663398743, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3008409785932725e-07, |
|
"logits/chosen": -2.55599308013916, |
|
"logits/rejected": -2.5489039421081543, |
|
"logps/chosen": -317.7440490722656, |
|
"logps/rejected": -261.28021240234375, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.14147642254829407, |
|
"rewards/margins": 1.0507941246032715, |
|
"rewards/rejected": -0.909317672252655, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2817278287461773e-07, |
|
"logits/chosen": -2.5962812900543213, |
|
"logits/rejected": -2.5714190006256104, |
|
"logps/chosen": -291.72479248046875, |
|
"logps/rejected": -285.7821350097656, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.2120455503463745, |
|
"rewards/margins": 1.1750547885894775, |
|
"rewards/rejected": -0.963009238243103, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.262614678899082e-07, |
|
"logits/chosen": -2.585196018218994, |
|
"logits/rejected": -2.582301378250122, |
|
"logps/chosen": -314.81842041015625, |
|
"logps/rejected": -294.3887023925781, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.03839557617902756, |
|
"rewards/margins": 0.9020207524299622, |
|
"rewards/rejected": -0.863625168800354, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_logits/chosen": -2.7277021408081055, |
|
"eval_logits/rejected": -2.672938108444214, |
|
"eval_logps/chosen": -367.73968505859375, |
|
"eval_logps/rejected": -292.8726806640625, |
|
"eval_loss": 0.5184498429298401, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": 0.21156975626945496, |
|
"eval_rewards/margins": 0.9276205897331238, |
|
"eval_rewards/rejected": -0.716050922870636, |
|
"eval_runtime": 93.6697, |
|
"eval_samples_per_second": 21.352, |
|
"eval_steps_per_second": 0.673, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2435015290519874e-07, |
|
"logits/chosen": -2.5604007244110107, |
|
"logits/rejected": -2.522665023803711, |
|
"logps/chosen": -278.86138916015625, |
|
"logps/rejected": -238.6088104248047, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.17497147619724274, |
|
"rewards/margins": 1.1593296527862549, |
|
"rewards/rejected": -0.9843581318855286, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2243883792048927e-07, |
|
"logits/chosen": -2.6064276695251465, |
|
"logits/rejected": -2.6227469444274902, |
|
"logps/chosen": -359.59637451171875, |
|
"logps/rejected": -339.55731201171875, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2552815079689026, |
|
"rewards/margins": 1.0309327840805054, |
|
"rewards/rejected": -0.7756513357162476, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.205275229357798e-07, |
|
"logits/chosen": -2.6077446937561035, |
|
"logits/rejected": -2.5000662803649902, |
|
"logps/chosen": -361.3202819824219, |
|
"logps/rejected": -317.98504638671875, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.11873336881399155, |
|
"rewards/margins": 0.9340359568595886, |
|
"rewards/rejected": -0.8153026700019836, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.186162079510703e-07, |
|
"logits/chosen": -2.6293277740478516, |
|
"logits/rejected": -2.5831921100616455, |
|
"logps/chosen": -347.88690185546875, |
|
"logps/rejected": -309.56500244140625, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2751849293708801, |
|
"rewards/margins": 1.015895962715149, |
|
"rewards/rejected": -0.7407109141349792, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.167048929663608e-07, |
|
"logits/chosen": -2.574522018432617, |
|
"logits/rejected": -2.5256123542785645, |
|
"logps/chosen": -294.55194091796875, |
|
"logps/rejected": -239.9694061279297, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2018485963344574, |
|
"rewards/margins": 1.1163266897201538, |
|
"rewards/rejected": -0.9144781827926636, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1479357798165134e-07, |
|
"logits/chosen": -2.658360242843628, |
|
"logits/rejected": -2.649301290512085, |
|
"logps/chosen": -352.422607421875, |
|
"logps/rejected": -288.5453186035156, |
|
"loss": 0.4301, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.26997870206832886, |
|
"rewards/margins": 1.0690968036651611, |
|
"rewards/rejected": -0.7991181015968323, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.128822629969419e-07, |
|
"logits/chosen": -2.658994197845459, |
|
"logits/rejected": -2.6330344676971436, |
|
"logps/chosen": -349.5880432128906, |
|
"logps/rejected": -295.90374755859375, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.22147627174854279, |
|
"rewards/margins": 1.249265193939209, |
|
"rewards/rejected": -1.0277888774871826, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.109709480122324e-07, |
|
"logits/chosen": -2.5518264770507812, |
|
"logits/rejected": -2.5967154502868652, |
|
"logps/chosen": -294.47418212890625, |
|
"logps/rejected": -272.46380615234375, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.15300384163856506, |
|
"rewards/margins": 0.9860183596611023, |
|
"rewards/rejected": -0.8330146074295044, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0905963302752294e-07, |
|
"logits/chosen": -2.5392801761627197, |
|
"logits/rejected": -2.513974666595459, |
|
"logps/chosen": -308.92633056640625, |
|
"logps/rejected": -276.67645263671875, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.11630193144083023, |
|
"rewards/margins": 1.0185792446136475, |
|
"rewards/rejected": -0.9022773504257202, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.071483180428134e-07, |
|
"logits/chosen": -2.665621757507324, |
|
"logits/rejected": -2.6524558067321777, |
|
"logps/chosen": -398.4109802246094, |
|
"logps/rejected": -275.1439514160156, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.23084072768688202, |
|
"rewards/margins": 0.8804351091384888, |
|
"rewards/rejected": -0.6495944261550903, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_logits/chosen": -2.727848768234253, |
|
"eval_logits/rejected": -2.6739702224731445, |
|
"eval_logps/chosen": -367.76025390625, |
|
"eval_logps/rejected": -292.675048828125, |
|
"eval_loss": 0.5186832547187805, |
|
"eval_rewards/accuracies": 0.7420634627342224, |
|
"eval_rewards/chosen": 0.20951056480407715, |
|
"eval_rewards/margins": 0.9057956337928772, |
|
"eval_rewards/rejected": -0.6962851285934448, |
|
"eval_runtime": 93.7475, |
|
"eval_samples_per_second": 21.334, |
|
"eval_steps_per_second": 0.672, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0523700305810395e-07, |
|
"logits/chosen": -2.6590070724487305, |
|
"logits/rejected": -2.571068286895752, |
|
"logps/chosen": -349.6532287597656, |
|
"logps/rejected": -309.3611145019531, |
|
"loss": 0.4328, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.35963043570518494, |
|
"rewards/margins": 1.3134393692016602, |
|
"rewards/rejected": -0.9538089632987976, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.033256880733945e-07, |
|
"logits/chosen": -2.5749783515930176, |
|
"logits/rejected": -2.5548958778381348, |
|
"logps/chosen": -308.1100769042969, |
|
"logps/rejected": -252.40725708007812, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0827903300523758, |
|
"rewards/margins": 0.9593188166618347, |
|
"rewards/rejected": -0.8765283823013306, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.01414373088685e-07, |
|
"logits/chosen": -2.5704667568206787, |
|
"logits/rejected": -2.5135350227355957, |
|
"logps/chosen": -360.36517333984375, |
|
"logps/rejected": -275.2553405761719, |
|
"loss": 0.4293, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.13265377283096313, |
|
"rewards/margins": 1.178371548652649, |
|
"rewards/rejected": -1.0457178354263306, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9950305810397555e-07, |
|
"logits/chosen": -2.686350107192993, |
|
"logits/rejected": -2.5901427268981934, |
|
"logps/chosen": -342.09918212890625, |
|
"logps/rejected": -274.7805480957031, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.21238569915294647, |
|
"rewards/margins": 1.2895253896713257, |
|
"rewards/rejected": -1.0771398544311523, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9759174311926603e-07, |
|
"logits/chosen": -2.6003451347351074, |
|
"logits/rejected": -2.5708560943603516, |
|
"logps/chosen": -331.8475341796875, |
|
"logps/rejected": -273.82281494140625, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.016350137069821358, |
|
"rewards/margins": 0.9871395826339722, |
|
"rewards/rejected": -1.0034897327423096, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9568042813455656e-07, |
|
"logits/chosen": -2.581772804260254, |
|
"logits/rejected": -2.5919508934020996, |
|
"logps/chosen": -348.9093933105469, |
|
"logps/rejected": -245.66946411132812, |
|
"loss": 0.4551, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.1543104499578476, |
|
"rewards/margins": 1.031022310256958, |
|
"rewards/rejected": -0.8767116665840149, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.937691131498471e-07, |
|
"logits/chosen": -2.6790993213653564, |
|
"logits/rejected": -2.57248854637146, |
|
"logps/chosen": -364.57208251953125, |
|
"logps/rejected": -291.68194580078125, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1870456337928772, |
|
"rewards/margins": 1.2713440656661987, |
|
"rewards/rejected": -1.0842984914779663, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.918577981651376e-07, |
|
"logits/chosen": -2.595109224319458, |
|
"logits/rejected": -2.5366103649139404, |
|
"logps/chosen": -301.737548828125, |
|
"logps/rejected": -275.08648681640625, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.10519032180309296, |
|
"rewards/margins": 0.8715200424194336, |
|
"rewards/rejected": -0.7663297653198242, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.8994648318042816e-07, |
|
"logits/chosen": -2.621727466583252, |
|
"logits/rejected": -2.5707826614379883, |
|
"logps/chosen": -336.1698913574219, |
|
"logps/rejected": -265.3817443847656, |
|
"loss": 0.4192, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.37031644582748413, |
|
"rewards/margins": 1.0992939472198486, |
|
"rewards/rejected": -0.7289775013923645, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8803516819571863e-07, |
|
"logits/chosen": -2.695518970489502, |
|
"logits/rejected": -2.606881618499756, |
|
"logps/chosen": -396.0542297363281, |
|
"logps/rejected": -309.7992248535156, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.2515701651573181, |
|
"rewards/margins": 1.056049108505249, |
|
"rewards/rejected": -0.8044789433479309, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_logits/chosen": -2.7264468669891357, |
|
"eval_logits/rejected": -2.671557903289795, |
|
"eval_logps/chosen": -367.6226806640625, |
|
"eval_logps/rejected": -292.1658630371094, |
|
"eval_loss": 0.5153725147247314, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": 0.22327157855033875, |
|
"eval_rewards/margins": 0.8686384558677673, |
|
"eval_rewards/rejected": -0.645366907119751, |
|
"eval_runtime": 93.411, |
|
"eval_samples_per_second": 21.411, |
|
"eval_steps_per_second": 0.674, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8612385321100917e-07, |
|
"logits/chosen": -2.505145311355591, |
|
"logits/rejected": -2.462804079055786, |
|
"logps/chosen": -304.18524169921875, |
|
"logps/rejected": -281.14593505859375, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.17369304597377777, |
|
"rewards/margins": 0.9988886117935181, |
|
"rewards/rejected": -0.8251956105232239, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.842125382262997e-07, |
|
"logits/chosen": -2.5498111248016357, |
|
"logits/rejected": -2.4971938133239746, |
|
"logps/chosen": -320.25738525390625, |
|
"logps/rejected": -273.7497863769531, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.2570057809352875, |
|
"rewards/margins": 1.0560768842697144, |
|
"rewards/rejected": -0.799071192741394, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8230122324159023e-07, |
|
"logits/chosen": -2.496542453765869, |
|
"logits/rejected": -2.508702516555786, |
|
"logps/chosen": -366.1961975097656, |
|
"logps/rejected": -331.0256652832031, |
|
"loss": 0.4079, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.43470677733421326, |
|
"rewards/margins": 1.43040132522583, |
|
"rewards/rejected": -0.9956945180892944, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.8038990825688076e-07, |
|
"logits/chosen": -2.560516357421875, |
|
"logits/rejected": -2.5475456714630127, |
|
"logps/chosen": -283.05596923828125, |
|
"logps/rejected": -232.9197998046875, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.13742788136005402, |
|
"rewards/margins": 0.8214957118034363, |
|
"rewards/rejected": -0.6840678453445435, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.784785932721712e-07, |
|
"logits/chosen": -2.58160662651062, |
|
"logits/rejected": -2.504528045654297, |
|
"logps/chosen": -342.2316589355469, |
|
"logps/rejected": -277.2345275878906, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.18114377558231354, |
|
"rewards/margins": 1.0414986610412598, |
|
"rewards/rejected": -0.8603549003601074, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.765672782874617e-07, |
|
"logits/chosen": -2.579395294189453, |
|
"logits/rejected": -2.503072738647461, |
|
"logps/chosen": -343.522216796875, |
|
"logps/rejected": -258.1620178222656, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.22307118773460388, |
|
"rewards/margins": 0.9056950807571411, |
|
"rewards/rejected": -0.6826238632202148, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7465596330275225e-07, |
|
"logits/chosen": -2.5702877044677734, |
|
"logits/rejected": -2.550537109375, |
|
"logps/chosen": -358.40081787109375, |
|
"logps/rejected": -223.83908081054688, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.20269151031970978, |
|
"rewards/margins": 1.1862519979476929, |
|
"rewards/rejected": -0.9835604429244995, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.727446483180428e-07, |
|
"logits/chosen": -2.630905866622925, |
|
"logits/rejected": -2.5621542930603027, |
|
"logps/chosen": -324.4598083496094, |
|
"logps/rejected": -284.69232177734375, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.05295666307210922, |
|
"rewards/margins": 0.926548182964325, |
|
"rewards/rejected": -0.8735915422439575, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -2.4956183433532715, |
|
"logits/rejected": -2.499742031097412, |
|
"logps/chosen": -307.091552734375, |
|
"logps/rejected": -282.85186767578125, |
|
"loss": 0.4138, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.10083981603384018, |
|
"rewards/margins": 1.0654906034469604, |
|
"rewards/rejected": -0.9646507501602173, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6892201834862385e-07, |
|
"logits/chosen": -2.535083770751953, |
|
"logits/rejected": -2.5149922370910645, |
|
"logps/chosen": -342.21600341796875, |
|
"logps/rejected": -258.53076171875, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.20716246962547302, |
|
"rewards/margins": 1.2411352396011353, |
|
"rewards/rejected": -1.033972978591919, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_logits/chosen": -2.724445104598999, |
|
"eval_logits/rejected": -2.6694822311401367, |
|
"eval_logps/chosen": -367.8693542480469, |
|
"eval_logps/rejected": -292.79150390625, |
|
"eval_loss": 0.5157531499862671, |
|
"eval_rewards/accuracies": 0.738095223903656, |
|
"eval_rewards/chosen": 0.19860157370567322, |
|
"eval_rewards/margins": 0.9065337777137756, |
|
"eval_rewards/rejected": -0.7079322934150696, |
|
"eval_runtime": 93.8811, |
|
"eval_samples_per_second": 21.304, |
|
"eval_steps_per_second": 0.671, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6701070336391433e-07, |
|
"logits/chosen": -2.611865282058716, |
|
"logits/rejected": -2.6318869590759277, |
|
"logps/chosen": -405.0105895996094, |
|
"logps/rejected": -297.174560546875, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.23556602001190186, |
|
"rewards/margins": 0.9569095373153687, |
|
"rewards/rejected": -0.7213433980941772, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6509938837920486e-07, |
|
"logits/chosen": -2.6025757789611816, |
|
"logits/rejected": -2.5256543159484863, |
|
"logps/chosen": -291.0908508300781, |
|
"logps/rejected": -270.0452575683594, |
|
"loss": 0.4221, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.2756959795951843, |
|
"rewards/margins": 1.0691989660263062, |
|
"rewards/rejected": -0.793502926826477, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.631880733944954e-07, |
|
"logits/chosen": -2.6069562435150146, |
|
"logits/rejected": -2.571159839630127, |
|
"logps/chosen": -325.5701599121094, |
|
"logps/rejected": -268.36822509765625, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.26621466875076294, |
|
"rewards/margins": 0.8793444633483887, |
|
"rewards/rejected": -0.613129734992981, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.612767584097859e-07, |
|
"logits/chosen": -2.629546880722046, |
|
"logits/rejected": -2.572211742401123, |
|
"logps/chosen": -323.9189453125, |
|
"logps/rejected": -257.16033935546875, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.28304505348205566, |
|
"rewards/margins": 0.9886387586593628, |
|
"rewards/rejected": -0.7055937051773071, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5936544342507646e-07, |
|
"logits/chosen": -2.647836208343506, |
|
"logits/rejected": -2.506730556488037, |
|
"logps/chosen": -352.39337158203125, |
|
"logps/rejected": -284.34246826171875, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.32185670733451843, |
|
"rewards/margins": 1.156570553779602, |
|
"rewards/rejected": -0.8347137570381165, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5745412844036693e-07, |
|
"logits/chosen": -2.5880885124206543, |
|
"logits/rejected": -2.5855612754821777, |
|
"logps/chosen": -352.1588134765625, |
|
"logps/rejected": -263.62005615234375, |
|
"loss": 0.4388, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.24963721632957458, |
|
"rewards/margins": 1.1166155338287354, |
|
"rewards/rejected": -0.8669784665107727, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5554281345565747e-07, |
|
"logits/chosen": -2.6382758617401123, |
|
"logits/rejected": -2.5842955112457275, |
|
"logps/chosen": -328.3256530761719, |
|
"logps/rejected": -290.60968017578125, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.29845088720321655, |
|
"rewards/margins": 1.0773983001708984, |
|
"rewards/rejected": -0.7789472937583923, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.53631498470948e-07, |
|
"logits/chosen": -2.6656248569488525, |
|
"logits/rejected": -2.6021933555603027, |
|
"logps/chosen": -358.393310546875, |
|
"logps/rejected": -268.1978454589844, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.34928596019744873, |
|
"rewards/margins": 1.214766263961792, |
|
"rewards/rejected": -0.8654803037643433, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5172018348623853e-07, |
|
"logits/chosen": -2.5791847705841064, |
|
"logits/rejected": -2.5275607109069824, |
|
"logps/chosen": -348.0746765136719, |
|
"logps/rejected": -269.4404602050781, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.1630961000919342, |
|
"rewards/margins": 1.0848236083984375, |
|
"rewards/rejected": -0.9217275381088257, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4980886850152906e-07, |
|
"logits/chosen": -2.624077796936035, |
|
"logits/rejected": -2.550253391265869, |
|
"logps/chosen": -353.8025817871094, |
|
"logps/rejected": -263.6338806152344, |
|
"loss": 0.434, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.2791728377342224, |
|
"rewards/margins": 1.2417490482330322, |
|
"rewards/rejected": -0.9625762104988098, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_logits/chosen": -2.718714475631714, |
|
"eval_logits/rejected": -2.6639087200164795, |
|
"eval_logps/chosen": -367.8188171386719, |
|
"eval_logps/rejected": -292.5534973144531, |
|
"eval_loss": 0.5148204565048218, |
|
"eval_rewards/accuracies": 0.738095223903656, |
|
"eval_rewards/chosen": 0.20365335047245026, |
|
"eval_rewards/margins": 0.8877854347229004, |
|
"eval_rewards/rejected": -0.6841320395469666, |
|
"eval_runtime": 93.9834, |
|
"eval_samples_per_second": 21.28, |
|
"eval_steps_per_second": 0.67, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.478975535168196e-07, |
|
"logits/chosen": -2.595499038696289, |
|
"logits/rejected": -2.548311948776245, |
|
"logps/chosen": -330.22088623046875, |
|
"logps/rejected": -250.4892120361328, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.27131223678588867, |
|
"rewards/margins": 1.1109501123428345, |
|
"rewards/rejected": -0.8396379351615906, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.459862385321101e-07, |
|
"logits/chosen": -2.7138476371765137, |
|
"logits/rejected": -2.624547243118286, |
|
"logps/chosen": -395.07122802734375, |
|
"logps/rejected": -291.44482421875, |
|
"loss": 0.4514, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.198801189661026, |
|
"rewards/margins": 0.8508071899414062, |
|
"rewards/rejected": -0.6520059108734131, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.440749235474006e-07, |
|
"logits/chosen": -2.651254653930664, |
|
"logits/rejected": -2.5482094287872314, |
|
"logps/chosen": -360.50970458984375, |
|
"logps/rejected": -281.64727783203125, |
|
"loss": 0.4303, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.36835628747940063, |
|
"rewards/margins": 1.4067318439483643, |
|
"rewards/rejected": -1.0383756160736084, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.421636085626911e-07, |
|
"logits/chosen": -2.5148165225982666, |
|
"logits/rejected": -2.4600250720977783, |
|
"logps/chosen": -334.34246826171875, |
|
"logps/rejected": -256.6380310058594, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3089974820613861, |
|
"rewards/margins": 0.9867910146713257, |
|
"rewards/rejected": -0.6777936220169067, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.402522935779816e-07, |
|
"logits/chosen": -2.645481586456299, |
|
"logits/rejected": -2.6082756519317627, |
|
"logps/chosen": -361.30474853515625, |
|
"logps/rejected": -317.3288879394531, |
|
"loss": 0.4247, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.28494271636009216, |
|
"rewards/margins": 1.162066102027893, |
|
"rewards/rejected": -0.8771233558654785, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3834097859327215e-07, |
|
"logits/chosen": -2.5973002910614014, |
|
"logits/rejected": -2.552525281906128, |
|
"logps/chosen": -309.293701171875, |
|
"logps/rejected": -275.47344970703125, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.2845771014690399, |
|
"rewards/margins": 1.1519324779510498, |
|
"rewards/rejected": -0.8673553466796875, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3642966360856268e-07, |
|
"logits/chosen": -2.644890546798706, |
|
"logits/rejected": -2.6000285148620605, |
|
"logps/chosen": -344.3207702636719, |
|
"logps/rejected": -269.1863708496094, |
|
"loss": 0.3958, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.44529637694358826, |
|
"rewards/margins": 1.219225525856018, |
|
"rewards/rejected": -0.7739290595054626, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.345183486238532e-07, |
|
"logits/chosen": -2.603766918182373, |
|
"logits/rejected": -2.548722743988037, |
|
"logps/chosen": -322.1404724121094, |
|
"logps/rejected": -316.88507080078125, |
|
"loss": 0.424, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1287519633769989, |
|
"rewards/margins": 0.9171475172042847, |
|
"rewards/rejected": -0.7883955836296082, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3260703363914372e-07, |
|
"logits/chosen": -2.6459596157073975, |
|
"logits/rejected": -2.615452766418457, |
|
"logps/chosen": -381.41748046875, |
|
"logps/rejected": -300.71905517578125, |
|
"loss": 0.4087, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.216604083776474, |
|
"rewards/margins": 1.141821026802063, |
|
"rewards/rejected": -0.9252168536186218, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3069571865443425e-07, |
|
"logits/chosen": -2.63240122795105, |
|
"logits/rejected": -2.6180896759033203, |
|
"logps/chosen": -291.97161865234375, |
|
"logps/rejected": -283.84661865234375, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.04391371086239815, |
|
"rewards/margins": 0.8607124090194702, |
|
"rewards/rejected": -0.9046260714530945, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_logits/chosen": -2.718451976776123, |
|
"eval_logits/rejected": -2.6635725498199463, |
|
"eval_logps/chosen": -368.5582275390625, |
|
"eval_logps/rejected": -293.53076171875, |
|
"eval_loss": 0.5146352052688599, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": 0.1297152191400528, |
|
"eval_rewards/margins": 0.9115698337554932, |
|
"eval_rewards/rejected": -0.781854510307312, |
|
"eval_runtime": 93.2472, |
|
"eval_samples_per_second": 21.448, |
|
"eval_steps_per_second": 0.676, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2878440366972476e-07, |
|
"logits/chosen": -2.6936182975769043, |
|
"logits/rejected": -2.6290268898010254, |
|
"logps/chosen": -355.96124267578125, |
|
"logps/rejected": -341.81549072265625, |
|
"loss": 0.4177, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.23748740553855896, |
|
"rewards/margins": 1.278755784034729, |
|
"rewards/rejected": -1.0412683486938477, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.268730886850153e-07, |
|
"logits/chosen": -2.5507352352142334, |
|
"logits/rejected": -2.453554391860962, |
|
"logps/chosen": -344.82452392578125, |
|
"logps/rejected": -273.4502868652344, |
|
"loss": 0.4313, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.22865517437458038, |
|
"rewards/margins": 0.964372992515564, |
|
"rewards/rejected": -0.7357178330421448, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.249617737003058e-07, |
|
"logits/chosen": -2.6256864070892334, |
|
"logits/rejected": -2.5964672565460205, |
|
"logps/chosen": -369.0529479980469, |
|
"logps/rejected": -331.5936279296875, |
|
"loss": 0.4216, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.12068464607000351, |
|
"rewards/margins": 1.1605781316757202, |
|
"rewards/rejected": -1.039893388748169, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2305045871559633e-07, |
|
"logits/chosen": -2.565049171447754, |
|
"logits/rejected": -2.510228157043457, |
|
"logps/chosen": -359.94671630859375, |
|
"logps/rejected": -297.53460693359375, |
|
"loss": 0.4199, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.034449171274900436, |
|
"rewards/margins": 0.8863730430603027, |
|
"rewards/rejected": -0.920822262763977, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2113914373088686e-07, |
|
"logits/chosen": -2.656843662261963, |
|
"logits/rejected": -2.610401153564453, |
|
"logps/chosen": -363.86944580078125, |
|
"logps/rejected": -314.6811218261719, |
|
"loss": 0.4216, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.2026744782924652, |
|
"rewards/margins": 1.1955655813217163, |
|
"rewards/rejected": -0.9928911328315735, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1922782874617736e-07, |
|
"logits/chosen": -2.5376534461975098, |
|
"logits/rejected": -2.570894956588745, |
|
"logps/chosen": -300.3117980957031, |
|
"logps/rejected": -328.97003173828125, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.10297539085149765, |
|
"rewards/margins": 1.1623529195785522, |
|
"rewards/rejected": -1.0593774318695068, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1731651376146787e-07, |
|
"logits/chosen": -2.545215129852295, |
|
"logits/rejected": -2.538797616958618, |
|
"logps/chosen": -290.8783874511719, |
|
"logps/rejected": -284.0111999511719, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.048707880079746246, |
|
"rewards/margins": 0.9897578358650208, |
|
"rewards/rejected": -0.9410500526428223, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.154051987767584e-07, |
|
"logits/chosen": -2.6110103130340576, |
|
"logits/rejected": -2.5820937156677246, |
|
"logps/chosen": -335.0147705078125, |
|
"logps/rejected": -273.63763427734375, |
|
"loss": 0.4322, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0762074887752533, |
|
"rewards/margins": 1.0348106622695923, |
|
"rewards/rejected": -0.9586030840873718, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.134938837920489e-07, |
|
"logits/chosen": -2.545682430267334, |
|
"logits/rejected": -2.560351848602295, |
|
"logps/chosen": -249.69970703125, |
|
"logps/rejected": -240.6475067138672, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.14385946094989777, |
|
"rewards/margins": 1.0989620685577393, |
|
"rewards/rejected": -0.9551024436950684, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1158256880733944e-07, |
|
"logits/chosen": -2.6015000343322754, |
|
"logits/rejected": -2.561143636703491, |
|
"logps/chosen": -373.9018859863281, |
|
"logps/rejected": -261.9052429199219, |
|
"loss": 0.4128, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.21687176823616028, |
|
"rewards/margins": 1.2524230480194092, |
|
"rewards/rejected": -1.0355513095855713, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": -2.7194206714630127, |
|
"eval_logits/rejected": -2.665125608444214, |
|
"eval_logps/chosen": -368.43719482421875, |
|
"eval_logps/rejected": -293.5338439941406, |
|
"eval_loss": 0.5128700137138367, |
|
"eval_rewards/accuracies": 0.738095223903656, |
|
"eval_rewards/chosen": 0.14181897044181824, |
|
"eval_rewards/margins": 0.9239831566810608, |
|
"eval_rewards/rejected": -0.7821642160415649, |
|
"eval_runtime": 93.6913, |
|
"eval_samples_per_second": 21.347, |
|
"eval_steps_per_second": 0.672, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0967125382262994e-07, |
|
"logits/chosen": -2.5647854804992676, |
|
"logits/rejected": -2.549739360809326, |
|
"logps/chosen": -345.7157287597656, |
|
"logps/rejected": -280.04290771484375, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.2801140248775482, |
|
"rewards/margins": 1.1526819467544556, |
|
"rewards/rejected": -0.8725678324699402, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0775993883792048e-07, |
|
"logits/chosen": -2.5792086124420166, |
|
"logits/rejected": -2.5236520767211914, |
|
"logps/chosen": -358.15264892578125, |
|
"logps/rejected": -264.4053955078125, |
|
"loss": 0.4224, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.24478964507579803, |
|
"rewards/margins": 1.050995111465454, |
|
"rewards/rejected": -0.8062052726745605, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.05848623853211e-07, |
|
"logits/chosen": -2.627408504486084, |
|
"logits/rejected": -2.6247506141662598, |
|
"logps/chosen": -375.7535400390625, |
|
"logps/rejected": -278.0137023925781, |
|
"loss": 0.4119, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.061488620936870575, |
|
"rewards/margins": 1.0074807405471802, |
|
"rewards/rejected": -0.945992112159729, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0393730886850151e-07, |
|
"logits/chosen": -2.6022982597351074, |
|
"logits/rejected": -2.514789581298828, |
|
"logps/chosen": -372.36346435546875, |
|
"logps/rejected": -270.7804260253906, |
|
"loss": 0.4066, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.11869986355304718, |
|
"rewards/margins": 1.225434422492981, |
|
"rewards/rejected": -1.1067346334457397, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0202599388379205e-07, |
|
"logits/chosen": -2.643916130065918, |
|
"logits/rejected": -2.6036553382873535, |
|
"logps/chosen": -346.41107177734375, |
|
"logps/rejected": -342.21392822265625, |
|
"loss": 0.3963, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.09456288814544678, |
|
"rewards/margins": 1.0665562152862549, |
|
"rewards/rejected": -0.9719934463500977, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0011467889908258e-07, |
|
"logits/chosen": -2.6812074184417725, |
|
"logits/rejected": -2.5795161724090576, |
|
"logps/chosen": -375.649658203125, |
|
"logps/rejected": -224.4620361328125, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.3432249426841736, |
|
"rewards/margins": 1.4275915622711182, |
|
"rewards/rejected": -1.0843665599822998, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9820336391437308e-07, |
|
"logits/chosen": -2.569981575012207, |
|
"logits/rejected": -2.5870277881622314, |
|
"logps/chosen": -320.9058837890625, |
|
"logps/rejected": -265.43145751953125, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.09469388425350189, |
|
"rewards/margins": 0.9917991757392883, |
|
"rewards/rejected": -0.89710533618927, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9629204892966362e-07, |
|
"logits/chosen": -2.670983076095581, |
|
"logits/rejected": -2.632185459136963, |
|
"logps/chosen": -357.2303466796875, |
|
"logps/rejected": -270.21966552734375, |
|
"loss": 0.4084, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.3034873604774475, |
|
"rewards/margins": 1.3726078271865845, |
|
"rewards/rejected": -1.0691205263137817, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.943807339449541e-07, |
|
"logits/chosen": -2.5820472240448, |
|
"logits/rejected": -2.480771780014038, |
|
"logps/chosen": -325.0736389160156, |
|
"logps/rejected": -292.05291748046875, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.13287527859210968, |
|
"rewards/margins": 1.379725456237793, |
|
"rewards/rejected": -1.2468502521514893, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9246941896024463e-07, |
|
"logits/chosen": -2.5788347721099854, |
|
"logits/rejected": -2.6083314418792725, |
|
"logps/chosen": -331.9407958984375, |
|
"logps/rejected": -301.12176513671875, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.006614005658775568, |
|
"rewards/margins": 0.950186550617218, |
|
"rewards/rejected": -0.9435726404190063, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_logits/chosen": -2.7248175144195557, |
|
"eval_logits/rejected": -2.6709485054016113, |
|
"eval_logps/chosen": -368.8879089355469, |
|
"eval_logps/rejected": -293.96771240234375, |
|
"eval_loss": 0.5125181078910828, |
|
"eval_rewards/accuracies": 0.7420634627342224, |
|
"eval_rewards/chosen": 0.096745066344738, |
|
"eval_rewards/margins": 0.9222998023033142, |
|
"eval_rewards/rejected": -0.8255547881126404, |
|
"eval_runtime": 93.5081, |
|
"eval_samples_per_second": 21.389, |
|
"eval_steps_per_second": 0.674, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9055810397553516e-07, |
|
"logits/chosen": -2.6049692630767822, |
|
"logits/rejected": -2.5768351554870605, |
|
"logps/chosen": -335.76861572265625, |
|
"logps/rejected": -283.60418701171875, |
|
"loss": 0.4452, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1541665941476822, |
|
"rewards/margins": 1.058864951133728, |
|
"rewards/rejected": -0.904698371887207, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8864678899082566e-07, |
|
"logits/chosen": -2.58392333984375, |
|
"logits/rejected": -2.5964534282684326, |
|
"logps/chosen": -334.490966796875, |
|
"logps/rejected": -253.0091552734375, |
|
"loss": 0.4227, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.1425321400165558, |
|
"rewards/margins": 1.186306357383728, |
|
"rewards/rejected": -1.0437742471694946, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.867354740061162e-07, |
|
"logits/chosen": -2.5509891510009766, |
|
"logits/rejected": -2.574993371963501, |
|
"logps/chosen": -318.6093444824219, |
|
"logps/rejected": -283.29803466796875, |
|
"loss": 0.4151, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.036455459892749786, |
|
"rewards/margins": 1.2164264917373657, |
|
"rewards/rejected": -1.1799709796905518, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8482415902140673e-07, |
|
"logits/chosen": -2.606382131576538, |
|
"logits/rejected": -2.597195625305176, |
|
"logps/chosen": -324.63934326171875, |
|
"logps/rejected": -269.2276306152344, |
|
"loss": 0.4031, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.2206292450428009, |
|
"rewards/margins": 1.2134065628051758, |
|
"rewards/rejected": -0.9927773475646973, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8291284403669723e-07, |
|
"logits/chosen": -2.585014820098877, |
|
"logits/rejected": -2.621039628982544, |
|
"logps/chosen": -318.5997314453125, |
|
"logps/rejected": -273.28533935546875, |
|
"loss": 0.3465, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.2141040861606598, |
|
"rewards/margins": 1.1809941530227661, |
|
"rewards/rejected": -0.9668900370597839, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8100152905198777e-07, |
|
"logits/chosen": -2.4339516162872314, |
|
"logits/rejected": -2.442187547683716, |
|
"logps/chosen": -335.56097412109375, |
|
"logps/rejected": -327.67803955078125, |
|
"loss": 0.3465, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2534107565879822, |
|
"rewards/margins": 1.4552501440048218, |
|
"rewards/rejected": -1.2018392086029053, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7909021406727827e-07, |
|
"logits/chosen": -2.5640928745269775, |
|
"logits/rejected": -2.6635804176330566, |
|
"logps/chosen": -324.06988525390625, |
|
"logps/rejected": -259.5931091308594, |
|
"loss": 0.3612, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.24693092703819275, |
|
"rewards/margins": 1.3844406604766846, |
|
"rewards/rejected": -1.1375097036361694, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.771788990825688e-07, |
|
"logits/chosen": -2.653184652328491, |
|
"logits/rejected": -2.581162929534912, |
|
"logps/chosen": -341.0339050292969, |
|
"logps/rejected": -292.974853515625, |
|
"loss": 0.3735, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.31022703647613525, |
|
"rewards/margins": 1.3054556846618652, |
|
"rewards/rejected": -0.99522864818573, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7526758409785934e-07, |
|
"logits/chosen": -2.681022882461548, |
|
"logits/rejected": -2.5809178352355957, |
|
"logps/chosen": -386.1690368652344, |
|
"logps/rejected": -304.6114807128906, |
|
"loss": 0.3469, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.222796231508255, |
|
"rewards/margins": 1.5472052097320557, |
|
"rewards/rejected": -1.324409008026123, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7335626911314984e-07, |
|
"logits/chosen": -2.6000771522521973, |
|
"logits/rejected": -2.522247314453125, |
|
"logps/chosen": -287.4322509765625, |
|
"logps/rejected": -245.08792114257812, |
|
"loss": 0.3605, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.005565536208450794, |
|
"rewards/margins": 1.2278538942337036, |
|
"rewards/rejected": -1.2222882509231567, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_logits/chosen": -2.721067428588867, |
|
"eval_logits/rejected": -2.6688947677612305, |
|
"eval_logps/chosen": -369.22808837890625, |
|
"eval_logps/rejected": -294.6591491699219, |
|
"eval_loss": 0.5129652619361877, |
|
"eval_rewards/accuracies": 0.7301587462425232, |
|
"eval_rewards/chosen": 0.06272637844085693, |
|
"eval_rewards/margins": 0.9574272036552429, |
|
"eval_rewards/rejected": -0.8947007060050964, |
|
"eval_runtime": 93.5294, |
|
"eval_samples_per_second": 21.384, |
|
"eval_steps_per_second": 0.674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7144495412844037e-07, |
|
"logits/chosen": -2.6307573318481445, |
|
"logits/rejected": -2.566410541534424, |
|
"logps/chosen": -383.6712341308594, |
|
"logps/rejected": -295.6425476074219, |
|
"loss": 0.3426, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.12579414248466492, |
|
"rewards/margins": 1.5774627923965454, |
|
"rewards/rejected": -1.4516685009002686, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6953363914373088e-07, |
|
"logits/chosen": -2.708552837371826, |
|
"logits/rejected": -2.5603816509246826, |
|
"logps/chosen": -383.33270263671875, |
|
"logps/rejected": -278.8658752441406, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.13873082399368286, |
|
"rewards/margins": 1.3727829456329346, |
|
"rewards/rejected": -1.234052062034607, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6762232415902138e-07, |
|
"logits/chosen": -2.590285539627075, |
|
"logits/rejected": -2.574758768081665, |
|
"logps/chosen": -320.2268371582031, |
|
"logps/rejected": -267.88092041015625, |
|
"loss": 0.3694, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0574222207069397, |
|
"rewards/margins": 1.3185408115386963, |
|
"rewards/rejected": -1.2611186504364014, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6571100917431192e-07, |
|
"logits/chosen": -2.6858019828796387, |
|
"logits/rejected": -2.6346988677978516, |
|
"logps/chosen": -340.21624755859375, |
|
"logps/rejected": -254.05859375, |
|
"loss": 0.3503, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.2275957614183426, |
|
"rewards/margins": 1.4201277494430542, |
|
"rewards/rejected": -1.1925320625305176, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6379969418960242e-07, |
|
"logits/chosen": -2.6139819622039795, |
|
"logits/rejected": -2.611030101776123, |
|
"logps/chosen": -396.6602783203125, |
|
"logps/rejected": -331.1925048828125, |
|
"loss": 0.3537, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.23956382274627686, |
|
"rewards/margins": 1.449977159500122, |
|
"rewards/rejected": -1.2104132175445557, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6188837920489295e-07, |
|
"logits/chosen": -2.6991240978240967, |
|
"logits/rejected": -2.5340638160705566, |
|
"logps/chosen": -367.3908386230469, |
|
"logps/rejected": -262.90155029296875, |
|
"loss": 0.3552, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.16707102954387665, |
|
"rewards/margins": 1.3888185024261475, |
|
"rewards/rejected": -1.2217473983764648, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5997706422018349e-07, |
|
"logits/chosen": -2.729551315307617, |
|
"logits/rejected": -2.634174346923828, |
|
"logps/chosen": -347.735107421875, |
|
"logps/rejected": -352.8389892578125, |
|
"loss": 0.3628, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.3393300473690033, |
|
"rewards/margins": 1.329689860343933, |
|
"rewards/rejected": -0.990359902381897, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.58065749235474e-07, |
|
"logits/chosen": -2.6678922176361084, |
|
"logits/rejected": -2.5802249908447266, |
|
"logps/chosen": -405.42071533203125, |
|
"logps/rejected": -323.5943298339844, |
|
"loss": 0.363, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.4442678391933441, |
|
"rewards/margins": 1.3085780143737793, |
|
"rewards/rejected": -0.8643101453781128, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5615443425076452e-07, |
|
"logits/chosen": -2.5303499698638916, |
|
"logits/rejected": -2.5398049354553223, |
|
"logps/chosen": -333.7301025390625, |
|
"logps/rejected": -273.6751403808594, |
|
"loss": 0.3423, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.2193554937839508, |
|
"rewards/margins": 1.4085685014724731, |
|
"rewards/rejected": -1.1892130374908447, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5424311926605506e-07, |
|
"logits/chosen": -2.5035347938537598, |
|
"logits/rejected": -2.4870362281799316, |
|
"logps/chosen": -297.1385498046875, |
|
"logps/rejected": -234.04006958007812, |
|
"loss": 0.3463, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.21916255354881287, |
|
"rewards/margins": 1.5722987651824951, |
|
"rewards/rejected": -1.3531363010406494, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_logits/chosen": -2.7217917442321777, |
|
"eval_logits/rejected": -2.6708853244781494, |
|
"eval_logps/chosen": -369.4024963378906, |
|
"eval_logps/rejected": -295.1770324707031, |
|
"eval_loss": 0.5123242139816284, |
|
"eval_rewards/accuracies": 0.7420634627342224, |
|
"eval_rewards/chosen": 0.045285556465387344, |
|
"eval_rewards/margins": 0.9917705059051514, |
|
"eval_rewards/rejected": -0.9464850425720215, |
|
"eval_runtime": 93.4525, |
|
"eval_samples_per_second": 21.401, |
|
"eval_steps_per_second": 0.674, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5233180428134556e-07, |
|
"logits/chosen": -2.6595582962036133, |
|
"logits/rejected": -2.6285529136657715, |
|
"logps/chosen": -347.6577453613281, |
|
"logps/rejected": -276.10015869140625, |
|
"loss": 0.347, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2700585722923279, |
|
"rewards/margins": 1.5010900497436523, |
|
"rewards/rejected": -1.2310314178466797, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.504204892966361e-07, |
|
"logits/chosen": -2.5997138023376465, |
|
"logits/rejected": -2.5408456325531006, |
|
"logps/chosen": -377.7714538574219, |
|
"logps/rejected": -303.34222412109375, |
|
"loss": 0.35, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.1600765436887741, |
|
"rewards/margins": 1.3487540483474731, |
|
"rewards/rejected": -1.1886775493621826, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.485091743119266e-07, |
|
"logits/chosen": -2.7113072872161865, |
|
"logits/rejected": -2.543774127960205, |
|
"logps/chosen": -316.48150634765625, |
|
"logps/rejected": -299.80230712890625, |
|
"loss": 0.3588, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.09655191749334335, |
|
"rewards/margins": 1.3552639484405518, |
|
"rewards/rejected": -1.2587120532989502, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.465978593272171e-07, |
|
"logits/chosen": -2.590290069580078, |
|
"logits/rejected": -2.5230295658111572, |
|
"logps/chosen": -252.84683227539062, |
|
"logps/rejected": -184.59024047851562, |
|
"loss": 0.3436, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.08013294637203217, |
|
"rewards/margins": 1.3275083303451538, |
|
"rewards/rejected": -1.2473753690719604, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4468654434250764e-07, |
|
"logits/chosen": -2.646439790725708, |
|
"logits/rejected": -2.597733736038208, |
|
"logps/chosen": -399.18341064453125, |
|
"logps/rejected": -342.6891174316406, |
|
"loss": 0.3589, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.27550849318504333, |
|
"rewards/margins": 1.4095003604888916, |
|
"rewards/rejected": -1.133992075920105, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4277522935779814e-07, |
|
"logits/chosen": -2.5901618003845215, |
|
"logits/rejected": -2.635596990585327, |
|
"logps/chosen": -347.9039001464844, |
|
"logps/rejected": -300.8187255859375, |
|
"loss": 0.3482, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2792564630508423, |
|
"rewards/margins": 1.5975362062454224, |
|
"rewards/rejected": -1.318279504776001, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4086391437308867e-07, |
|
"logits/chosen": -2.662043333053589, |
|
"logits/rejected": -2.637568712234497, |
|
"logps/chosen": -365.7566223144531, |
|
"logps/rejected": -337.00518798828125, |
|
"loss": 0.3335, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.11271584033966064, |
|
"rewards/margins": 1.511805534362793, |
|
"rewards/rejected": -1.3990895748138428, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.389525993883792e-07, |
|
"logits/chosen": -2.6112186908721924, |
|
"logits/rejected": -2.5363845825195312, |
|
"logps/chosen": -290.4798583984375, |
|
"logps/rejected": -244.0265350341797, |
|
"loss": 0.3783, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.04764432832598686, |
|
"rewards/margins": 1.378379225730896, |
|
"rewards/rejected": -1.3307349681854248, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.370412844036697e-07, |
|
"logits/chosen": -2.583376407623291, |
|
"logits/rejected": -2.6340744495391846, |
|
"logps/chosen": -318.22344970703125, |
|
"logps/rejected": -290.05194091796875, |
|
"loss": 0.3579, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.15198606252670288, |
|
"rewards/margins": 1.2958182096481323, |
|
"rewards/rejected": -1.1438322067260742, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3512996941896024e-07, |
|
"logits/chosen": -2.5554592609405518, |
|
"logits/rejected": -2.5260140895843506, |
|
"logps/chosen": -280.261962890625, |
|
"logps/rejected": -238.7654571533203, |
|
"loss": 0.362, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10016796737909317, |
|
"rewards/margins": 1.0856527090072632, |
|
"rewards/rejected": -1.1858208179473877, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_logits/chosen": -2.7140095233917236, |
|
"eval_logits/rejected": -2.662822723388672, |
|
"eval_logps/chosen": -369.68109130859375, |
|
"eval_logps/rejected": -295.4860534667969, |
|
"eval_loss": 0.512495756149292, |
|
"eval_rewards/accuracies": 0.738095223903656, |
|
"eval_rewards/chosen": 0.017426857724785805, |
|
"eval_rewards/margins": 0.994812548160553, |
|
"eval_rewards/rejected": -0.9773856401443481, |
|
"eval_runtime": 93.8775, |
|
"eval_samples_per_second": 21.304, |
|
"eval_steps_per_second": 0.671, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3321865443425075e-07, |
|
"logits/chosen": -2.5705647468566895, |
|
"logits/rejected": -2.408140182495117, |
|
"logps/chosen": -349.8976135253906, |
|
"logps/rejected": -282.80047607421875, |
|
"loss": 0.3624, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.13661615550518036, |
|
"rewards/margins": 1.5523512363433838, |
|
"rewards/rejected": -1.4157350063323975, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3130733944954128e-07, |
|
"logits/chosen": -2.656877279281616, |
|
"logits/rejected": -2.573462963104248, |
|
"logps/chosen": -363.28594970703125, |
|
"logps/rejected": -304.9879455566406, |
|
"loss": 0.3338, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.17011162638664246, |
|
"rewards/margins": 1.5930747985839844, |
|
"rewards/rejected": -1.42296302318573, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.293960244648318e-07, |
|
"logits/chosen": -2.62562894821167, |
|
"logits/rejected": -2.5972142219543457, |
|
"logps/chosen": -373.9268798828125, |
|
"logps/rejected": -270.20172119140625, |
|
"loss": 0.3315, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2031903713941574, |
|
"rewards/margins": 1.5349807739257812, |
|
"rewards/rejected": -1.3317903280258179, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2748470948012232e-07, |
|
"logits/chosen": -2.5800623893737793, |
|
"logits/rejected": -2.5364508628845215, |
|
"logps/chosen": -343.7547302246094, |
|
"logps/rejected": -314.71014404296875, |
|
"loss": 0.3554, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.10606318712234497, |
|
"rewards/margins": 1.4685537815093994, |
|
"rewards/rejected": -1.3624905347824097, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2557339449541285e-07, |
|
"logits/chosen": -2.6662609577178955, |
|
"logits/rejected": -2.6039960384368896, |
|
"logps/chosen": -349.7870788574219, |
|
"logps/rejected": -287.8039245605469, |
|
"loss": 0.3576, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.060789804905653, |
|
"rewards/margins": 1.2319402694702148, |
|
"rewards/rejected": -1.1711504459381104, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2366207951070336e-07, |
|
"logits/chosen": -2.6050796508789062, |
|
"logits/rejected": -2.6385793685913086, |
|
"logps/chosen": -326.8080139160156, |
|
"logps/rejected": -300.26678466796875, |
|
"loss": 0.3724, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1627347767353058, |
|
"rewards/margins": 1.3461954593658447, |
|
"rewards/rejected": -1.1834605932235718, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.217507645259939e-07, |
|
"logits/chosen": -2.6019394397735596, |
|
"logits/rejected": -2.507327079772949, |
|
"logps/chosen": -340.3645935058594, |
|
"logps/rejected": -275.542236328125, |
|
"loss": 0.3367, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.09731924533843994, |
|
"rewards/margins": 1.3692798614501953, |
|
"rewards/rejected": -1.2719604969024658, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.198394495412844e-07, |
|
"logits/chosen": -2.582803726196289, |
|
"logits/rejected": -2.577101707458496, |
|
"logps/chosen": -358.19354248046875, |
|
"logps/rejected": -295.2372131347656, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.15065257251262665, |
|
"rewards/margins": 1.3026378154754639, |
|
"rewards/rejected": -1.1519852876663208, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1792813455657493e-07, |
|
"logits/chosen": -2.5862011909484863, |
|
"logits/rejected": -2.521061420440674, |
|
"logps/chosen": -332.89520263671875, |
|
"logps/rejected": -267.4942626953125, |
|
"loss": 0.3365, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.04419634863734245, |
|
"rewards/margins": 1.277284026145935, |
|
"rewards/rejected": -1.233087420463562, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1601681957186543e-07, |
|
"logits/chosen": -2.5955777168273926, |
|
"logits/rejected": -2.5993008613586426, |
|
"logps/chosen": -338.1742858886719, |
|
"logps/rejected": -275.82666015625, |
|
"loss": 0.354, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.2074280083179474, |
|
"rewards/margins": 1.3635247945785522, |
|
"rewards/rejected": -1.1560968160629272, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_logits/chosen": -2.707024574279785, |
|
"eval_logits/rejected": -2.656226634979248, |
|
"eval_logps/chosen": -369.8023986816406, |
|
"eval_logps/rejected": -295.6311340332031, |
|
"eval_loss": 0.5147502422332764, |
|
"eval_rewards/accuracies": 0.7420634627342224, |
|
"eval_rewards/chosen": 0.005297229625284672, |
|
"eval_rewards/margins": 0.9971935749053955, |
|
"eval_rewards/rejected": -0.9918965101242065, |
|
"eval_runtime": 93.4767, |
|
"eval_samples_per_second": 21.396, |
|
"eval_steps_per_second": 0.674, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1410550458715595e-07, |
|
"logits/chosen": -2.5364441871643066, |
|
"logits/rejected": -2.574249744415283, |
|
"logps/chosen": -332.3714904785156, |
|
"logps/rejected": -306.153076171875, |
|
"loss": 0.3856, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.059642672538757324, |
|
"rewards/margins": 1.174212098121643, |
|
"rewards/rejected": -1.1145694255828857, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1219418960244648e-07, |
|
"logits/chosen": -2.5725884437561035, |
|
"logits/rejected": -2.5257275104522705, |
|
"logps/chosen": -284.27044677734375, |
|
"logps/rejected": -307.5198669433594, |
|
"loss": 0.3389, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.01629200577735901, |
|
"rewards/margins": 1.2315080165863037, |
|
"rewards/rejected": -1.2478001117706299, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.10282874617737e-07, |
|
"logits/chosen": -2.639014720916748, |
|
"logits/rejected": -2.545886993408203, |
|
"logps/chosen": -304.1851501464844, |
|
"logps/rejected": -265.3128356933594, |
|
"loss": 0.3175, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.015858350321650505, |
|
"rewards/margins": 1.3480350971221924, |
|
"rewards/rejected": -1.3321768045425415, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0837155963302752e-07, |
|
"logits/chosen": -2.4680936336517334, |
|
"logits/rejected": -2.4684300422668457, |
|
"logps/chosen": -273.1257019042969, |
|
"logps/rejected": -297.2413635253906, |
|
"loss": 0.3544, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0904908999800682, |
|
"rewards/margins": 1.409021258354187, |
|
"rewards/rejected": -1.318530559539795, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0646024464831804e-07, |
|
"logits/chosen": -2.5631706714630127, |
|
"logits/rejected": -2.48854398727417, |
|
"logps/chosen": -346.63446044921875, |
|
"logps/rejected": -282.98809814453125, |
|
"loss": 0.3582, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.005453610327094793, |
|
"rewards/margins": 1.4462357759475708, |
|
"rewards/rejected": -1.4407821893692017, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0454892966360856e-07, |
|
"logits/chosen": -2.535862922668457, |
|
"logits/rejected": -2.526308536529541, |
|
"logps/chosen": -324.5105285644531, |
|
"logps/rejected": -272.35546875, |
|
"loss": 0.3577, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.08594958484172821, |
|
"rewards/margins": 1.5383434295654297, |
|
"rewards/rejected": -1.4523937702178955, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0263761467889908e-07, |
|
"logits/chosen": -2.477186679840088, |
|
"logits/rejected": -2.449218273162842, |
|
"logps/chosen": -333.4910888671875, |
|
"logps/rejected": -292.4876403808594, |
|
"loss": 0.3588, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.16426347196102142, |
|
"rewards/margins": 1.3494445085525513, |
|
"rewards/rejected": -1.185180902481079, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.007262996941896e-07, |
|
"logits/chosen": -2.559156894683838, |
|
"logits/rejected": -2.4658923149108887, |
|
"logps/chosen": -340.9134216308594, |
|
"logps/rejected": -260.46282958984375, |
|
"loss": 0.3482, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.06518319994211197, |
|
"rewards/margins": 1.252594232559204, |
|
"rewards/rejected": -1.317777395248413, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.881498470948011e-08, |
|
"logits/chosen": -2.5094966888427734, |
|
"logits/rejected": -2.458124876022339, |
|
"logps/chosen": -348.07220458984375, |
|
"logps/rejected": -263.6317443847656, |
|
"loss": 0.3545, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.03768324851989746, |
|
"rewards/margins": 1.507036805152893, |
|
"rewards/rejected": -1.4693536758422852, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.690366972477065e-08, |
|
"logits/chosen": -2.616698741912842, |
|
"logits/rejected": -2.6090917587280273, |
|
"logps/chosen": -346.8231506347656, |
|
"logps/rejected": -270.77679443359375, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.07578043639659882, |
|
"rewards/margins": 1.1882234811782837, |
|
"rewards/rejected": -1.1124428510665894, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_logits/chosen": -2.707021951675415, |
|
"eval_logits/rejected": -2.6557092666625977, |
|
"eval_logps/chosen": -369.9039001464844, |
|
"eval_logps/rejected": -295.6993713378906, |
|
"eval_loss": 0.5143767595291138, |
|
"eval_rewards/accuracies": 0.738095223903656, |
|
"eval_rewards/chosen": -0.004853170830756426, |
|
"eval_rewards/margins": 0.9938662052154541, |
|
"eval_rewards/rejected": -0.9987194538116455, |
|
"eval_runtime": 93.6622, |
|
"eval_samples_per_second": 21.353, |
|
"eval_steps_per_second": 0.673, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.499235474006116e-08, |
|
"logits/chosen": -2.618323802947998, |
|
"logits/rejected": -2.5537703037261963, |
|
"logps/chosen": -379.9132080078125, |
|
"logps/rejected": -270.57196044921875, |
|
"loss": 0.3367, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.21965615451335907, |
|
"rewards/margins": 1.6616443395614624, |
|
"rewards/rejected": -1.441988229751587, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.308103975535168e-08, |
|
"logits/chosen": -2.5823190212249756, |
|
"logits/rejected": -2.561429262161255, |
|
"logps/chosen": -348.19024658203125, |
|
"logps/rejected": -310.19281005859375, |
|
"loss": 0.3793, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.07228744775056839, |
|
"rewards/margins": 1.1632258892059326, |
|
"rewards/rejected": -1.0909385681152344, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.116972477064219e-08, |
|
"logits/chosen": -2.554273843765259, |
|
"logits/rejected": -2.5581376552581787, |
|
"logps/chosen": -361.60723876953125, |
|
"logps/rejected": -329.79510498046875, |
|
"loss": 0.3745, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.07210446894168854, |
|
"rewards/margins": 1.3393795490264893, |
|
"rewards/rejected": -1.267275094985962, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.925840978593272e-08, |
|
"logits/chosen": -2.616187572479248, |
|
"logits/rejected": -2.5371768474578857, |
|
"logps/chosen": -270.7501525878906, |
|
"logps/rejected": -303.85870361328125, |
|
"loss": 0.3541, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.1579769253730774, |
|
"rewards/margins": 1.3280938863754272, |
|
"rewards/rejected": -1.1701170206069946, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.734709480122324e-08, |
|
"logits/chosen": -2.507927656173706, |
|
"logits/rejected": -2.532486915588379, |
|
"logps/chosen": -318.96746826171875, |
|
"logps/rejected": -295.8729248046875, |
|
"loss": 0.3571, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.030292898416519165, |
|
"rewards/margins": 1.09835684299469, |
|
"rewards/rejected": -1.1286494731903076, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.543577981651376e-08, |
|
"logits/chosen": -2.575026273727417, |
|
"logits/rejected": -2.5608019828796387, |
|
"logps/chosen": -447.73883056640625, |
|
"logps/rejected": -321.723876953125, |
|
"loss": 0.356, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.23453247547149658, |
|
"rewards/margins": 1.5171207189559937, |
|
"rewards/rejected": -1.2825883626937866, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.352446483180428e-08, |
|
"logits/chosen": -2.6058762073516846, |
|
"logits/rejected": -2.586782932281494, |
|
"logps/chosen": -354.8528137207031, |
|
"logps/rejected": -262.26611328125, |
|
"loss": 0.3642, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.14801561832427979, |
|
"rewards/margins": 1.3185254335403442, |
|
"rewards/rejected": -1.170509696006775, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.161314984709481e-08, |
|
"logits/chosen": -2.6247718334198, |
|
"logits/rejected": -2.605537176132202, |
|
"logps/chosen": -337.4981994628906, |
|
"logps/rejected": -287.82305908203125, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.01781139336526394, |
|
"rewards/margins": 1.2502973079681396, |
|
"rewards/rejected": -1.2324861288070679, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.970183486238531e-08, |
|
"logits/chosen": -2.5559067726135254, |
|
"logits/rejected": -2.536808729171753, |
|
"logps/chosen": -323.12469482421875, |
|
"logps/rejected": -272.9129943847656, |
|
"loss": 0.3277, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.2089882791042328, |
|
"rewards/margins": 1.4187358617782593, |
|
"rewards/rejected": -1.209747552871704, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.779051987767583e-08, |
|
"logits/chosen": -2.652679204940796, |
|
"logits/rejected": -2.620300769805908, |
|
"logps/chosen": -362.5030822753906, |
|
"logps/rejected": -288.15765380859375, |
|
"loss": 0.3374, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.24906206130981445, |
|
"rewards/margins": 1.4693329334259033, |
|
"rewards/rejected": -1.2202708721160889, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_logits/chosen": -2.7127866744995117, |
|
"eval_logits/rejected": -2.6615724563598633, |
|
"eval_logps/chosen": -369.87030029296875, |
|
"eval_logps/rejected": -295.8825988769531, |
|
"eval_loss": 0.5143249034881592, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.001490301568992436, |
|
"eval_rewards/margins": 1.0155508518218994, |
|
"eval_rewards/rejected": -1.0170412063598633, |
|
"eval_runtime": 93.6481, |
|
"eval_samples_per_second": 21.357, |
|
"eval_steps_per_second": 0.673, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.587920489296635e-08, |
|
"logits/chosen": -2.6370816230773926, |
|
"logits/rejected": -2.6355397701263428, |
|
"logps/chosen": -350.7646484375, |
|
"logps/rejected": -293.7305908203125, |
|
"loss": 0.3595, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.11017981916666031, |
|
"rewards/margins": 1.314969539642334, |
|
"rewards/rejected": -1.2047897577285767, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.396788990825688e-08, |
|
"logits/chosen": -2.687561273574829, |
|
"logits/rejected": -2.678396701812744, |
|
"logps/chosen": -331.09088134765625, |
|
"logps/rejected": -267.24847412109375, |
|
"loss": 0.3673, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.19342190027236938, |
|
"rewards/margins": 1.5418986082077026, |
|
"rewards/rejected": -1.3484766483306885, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.20565749235474e-08, |
|
"logits/chosen": -2.540743827819824, |
|
"logits/rejected": -2.5151162147521973, |
|
"logps/chosen": -314.4068298339844, |
|
"logps/rejected": -241.954833984375, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.1343233287334442, |
|
"rewards/margins": 1.3502509593963623, |
|
"rewards/rejected": -1.2159278392791748, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.014525993883792e-08, |
|
"logits/chosen": -2.506711483001709, |
|
"logits/rejected": -2.5368690490722656, |
|
"logps/chosen": -344.0050354003906, |
|
"logps/rejected": -280.0585021972656, |
|
"loss": 0.35, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.20728519558906555, |
|
"rewards/margins": 1.4358912706375122, |
|
"rewards/rejected": -1.2286062240600586, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.823394495412843e-08, |
|
"logits/chosen": -2.5110056400299072, |
|
"logits/rejected": -2.5191993713378906, |
|
"logps/chosen": -336.6435852050781, |
|
"logps/rejected": -278.8410339355469, |
|
"loss": 0.3553, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.15368029475212097, |
|
"rewards/margins": 1.5688577890396118, |
|
"rewards/rejected": -1.415177583694458, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.632262996941895e-08, |
|
"logits/chosen": -2.514180898666382, |
|
"logits/rejected": -2.583381414413452, |
|
"logps/chosen": -319.88494873046875, |
|
"logps/rejected": -288.6555480957031, |
|
"loss": 0.3667, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.12983320653438568, |
|
"rewards/margins": 1.418076753616333, |
|
"rewards/rejected": -1.2882434129714966, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.441131498470948e-08, |
|
"logits/chosen": -2.551957607269287, |
|
"logits/rejected": -2.5424647331237793, |
|
"logps/chosen": -311.9202575683594, |
|
"logps/rejected": -276.4605407714844, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.14209267497062683, |
|
"rewards/margins": 1.3686944246292114, |
|
"rewards/rejected": -1.2266016006469727, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.5870742797851562, |
|
"logits/rejected": -2.5902791023254395, |
|
"logps/chosen": -392.72308349609375, |
|
"logps/rejected": -302.57098388671875, |
|
"loss": 0.3451, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.22271943092346191, |
|
"rewards/margins": 1.5374951362609863, |
|
"rewards/rejected": -1.3147757053375244, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.058868501529052e-08, |
|
"logits/chosen": -2.636500597000122, |
|
"logits/rejected": -2.6023454666137695, |
|
"logps/chosen": -374.46954345703125, |
|
"logps/rejected": -298.9087219238281, |
|
"loss": 0.3449, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.11469938606023788, |
|
"rewards/margins": 1.2926369905471802, |
|
"rewards/rejected": -1.177937626838684, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.8677370030581035e-08, |
|
"logits/chosen": -2.5693795680999756, |
|
"logits/rejected": -2.5163960456848145, |
|
"logps/chosen": -333.29364013671875, |
|
"logps/rejected": -262.6988525390625, |
|
"loss": 0.3417, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.1946088969707489, |
|
"rewards/margins": 1.3346517086029053, |
|
"rewards/rejected": -1.1400429010391235, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_logits/chosen": -2.7118382453918457, |
|
"eval_logits/rejected": -2.660539150238037, |
|
"eval_logps/chosen": -369.8551330566406, |
|
"eval_logps/rejected": -295.7532958984375, |
|
"eval_loss": 0.5136774778366089, |
|
"eval_rewards/accuracies": 0.7341269850730896, |
|
"eval_rewards/chosen": 2.2300651835394092e-05, |
|
"eval_rewards/margins": 1.0041303634643555, |
|
"eval_rewards/rejected": -1.004108190536499, |
|
"eval_runtime": 93.7402, |
|
"eval_samples_per_second": 21.336, |
|
"eval_steps_per_second": 0.672, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6766055045871554e-08, |
|
"logits/chosen": -2.571103572845459, |
|
"logits/rejected": -2.6057116985321045, |
|
"logps/chosen": -340.5564880371094, |
|
"logps/rejected": -302.25445556640625, |
|
"loss": 0.3308, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.1304255723953247, |
|
"rewards/margins": 1.32243013381958, |
|
"rewards/rejected": -1.192004680633545, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.485474006116208e-08, |
|
"logits/chosen": -2.660834550857544, |
|
"logits/rejected": -2.553541660308838, |
|
"logps/chosen": -352.4358825683594, |
|
"logps/rejected": -271.14251708984375, |
|
"loss": 0.3525, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.19767490029335022, |
|
"rewards/margins": 1.4519338607788086, |
|
"rewards/rejected": -1.2542589902877808, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.294342507645259e-08, |
|
"logits/chosen": -2.588761806488037, |
|
"logits/rejected": -2.5700109004974365, |
|
"logps/chosen": -398.5816955566406, |
|
"logps/rejected": -275.89947509765625, |
|
"loss": 0.3466, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.3069090247154236, |
|
"rewards/margins": 1.7067188024520874, |
|
"rewards/rejected": -1.3998098373413086, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.1032110091743117e-08, |
|
"logits/chosen": -2.434288263320923, |
|
"logits/rejected": -2.549377202987671, |
|
"logps/chosen": -280.3855895996094, |
|
"logps/rejected": -268.7284240722656, |
|
"loss": 0.3512, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.07931343466043472, |
|
"rewards/margins": 1.2083343267440796, |
|
"rewards/rejected": -1.2876479625701904, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.9120795107033635e-08, |
|
"logits/chosen": -2.586907148361206, |
|
"logits/rejected": -2.5731730461120605, |
|
"logps/chosen": -355.03466796875, |
|
"logps/rejected": -296.54437255859375, |
|
"loss": 0.3381, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.157174751162529, |
|
"rewards/margins": 1.4374758005142212, |
|
"rewards/rejected": -1.2803009748458862, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.7209480122324154e-08, |
|
"logits/chosen": -2.6315078735351562, |
|
"logits/rejected": -2.595000743865967, |
|
"logps/chosen": -337.0852966308594, |
|
"logps/rejected": -308.55670166015625, |
|
"loss": 0.3403, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06230630353093147, |
|
"rewards/margins": 1.4226971864700317, |
|
"rewards/rejected": -1.3603907823562622, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.529816513761467e-08, |
|
"logits/chosen": -2.5866010189056396, |
|
"logits/rejected": -2.6100072860717773, |
|
"logps/chosen": -385.6864013671875, |
|
"logps/rejected": -349.44439697265625, |
|
"loss": 0.346, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.2403152734041214, |
|
"rewards/margins": 1.5250906944274902, |
|
"rewards/rejected": -1.2847753763198853, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.33868501529052e-08, |
|
"logits/chosen": -2.6556694507598877, |
|
"logits/rejected": -2.5988481044769287, |
|
"logps/chosen": -300.122802734375, |
|
"logps/rejected": -253.27743530273438, |
|
"loss": 0.3544, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.014878131449222565, |
|
"rewards/margins": 1.4045097827911377, |
|
"rewards/rejected": -1.3896316289901733, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.147553516819572e-08, |
|
"logits/chosen": -2.528102159500122, |
|
"logits/rejected": -2.514404773712158, |
|
"logps/chosen": -293.94830322265625, |
|
"logps/rejected": -272.3764343261719, |
|
"loss": 0.357, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.14863362908363342, |
|
"rewards/margins": 1.587708830833435, |
|
"rewards/rejected": -1.439075231552124, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.9564220183486236e-08, |
|
"logits/chosen": -2.5406494140625, |
|
"logits/rejected": -2.4887073040008545, |
|
"logps/chosen": -334.87615966796875, |
|
"logps/rejected": -244.68984985351562, |
|
"loss": 0.3312, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.24883659183979034, |
|
"rewards/margins": 1.5429491996765137, |
|
"rewards/rejected": -1.2941125631332397, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_logits/chosen": -2.7070810794830322, |
|
"eval_logits/rejected": -2.6563408374786377, |
|
"eval_logps/chosen": -370.0519104003906, |
|
"eval_logps/rejected": -295.9976501464844, |
|
"eval_loss": 0.5139505863189697, |
|
"eval_rewards/accuracies": 0.7301587462425232, |
|
"eval_rewards/chosen": -0.019654909148812294, |
|
"eval_rewards/margins": 1.0088927745819092, |
|
"eval_rewards/rejected": -1.0285476446151733, |
|
"eval_runtime": 93.6172, |
|
"eval_samples_per_second": 21.364, |
|
"eval_steps_per_second": 0.673, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.7652905198776755e-08, |
|
"logits/chosen": -2.6289658546447754, |
|
"logits/rejected": -2.5545971393585205, |
|
"logps/chosen": -374.7056579589844, |
|
"logps/rejected": -280.23291015625, |
|
"loss": 0.3592, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.028684472665190697, |
|
"rewards/margins": 1.139135718345642, |
|
"rewards/rejected": -1.1104512214660645, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.574159021406728e-08, |
|
"logits/chosen": -2.576387882232666, |
|
"logits/rejected": -2.578197956085205, |
|
"logps/chosen": -348.49371337890625, |
|
"logps/rejected": -368.74249267578125, |
|
"loss": 0.3494, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.12000823020935059, |
|
"rewards/margins": 1.18352210521698, |
|
"rewards/rejected": -1.3035303354263306, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.383027522935779e-08, |
|
"logits/chosen": -2.6115736961364746, |
|
"logits/rejected": -2.5528273582458496, |
|
"logps/chosen": -349.59222412109375, |
|
"logps/rejected": -296.5248107910156, |
|
"loss": 0.3612, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1196526288986206, |
|
"rewards/margins": 1.4233744144439697, |
|
"rewards/rejected": -1.3037217855453491, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.191896024464832e-08, |
|
"logits/chosen": -2.579690456390381, |
|
"logits/rejected": -2.5436129570007324, |
|
"logps/chosen": -358.9446105957031, |
|
"logps/rejected": -281.4949645996094, |
|
"loss": 0.3526, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.12253328412771225, |
|
"rewards/margins": 1.364234209060669, |
|
"rewards/rejected": -1.2417008876800537, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.0007645259938836e-08, |
|
"logits/chosen": -2.5429506301879883, |
|
"logits/rejected": -2.530900478363037, |
|
"logps/chosen": -280.2276916503906, |
|
"logps/rejected": -265.14361572265625, |
|
"loss": 0.3422, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0878460705280304, |
|
"rewards/margins": 1.5060606002807617, |
|
"rewards/rejected": -1.4182145595550537, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.809633027522936e-08, |
|
"logits/chosen": -2.5236594676971436, |
|
"logits/rejected": -2.5627801418304443, |
|
"logps/chosen": -307.91845703125, |
|
"logps/rejected": -264.10601806640625, |
|
"loss": 0.353, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.025409594178199768, |
|
"rewards/margins": 1.2293641567230225, |
|
"rewards/rejected": -1.2547738552093506, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6185015290519877e-08, |
|
"logits/chosen": -2.4620065689086914, |
|
"logits/rejected": -2.5668463706970215, |
|
"logps/chosen": -343.9793701171875, |
|
"logps/rejected": -317.62261962890625, |
|
"loss": 0.3397, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.43111473321914673, |
|
"rewards/margins": 1.7761211395263672, |
|
"rewards/rejected": -1.3450063467025757, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4273700305810396e-08, |
|
"logits/chosen": -2.6023755073547363, |
|
"logits/rejected": -2.5225300788879395, |
|
"logps/chosen": -391.25018310546875, |
|
"logps/rejected": -317.033203125, |
|
"loss": 0.3525, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.30640777945518494, |
|
"rewards/margins": 1.6793296337127686, |
|
"rewards/rejected": -1.3729219436645508, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2362385321100918e-08, |
|
"logits/chosen": -2.6185410022735596, |
|
"logits/rejected": -2.570345878601074, |
|
"logps/chosen": -363.7101135253906, |
|
"logps/rejected": -276.9412536621094, |
|
"loss": 0.3368, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.16394543647766113, |
|
"rewards/margins": 1.499637246131897, |
|
"rewards/rejected": -1.3356918096542358, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0451070336391437e-08, |
|
"logits/chosen": -2.499922513961792, |
|
"logits/rejected": -2.4994664192199707, |
|
"logps/chosen": -341.2127990722656, |
|
"logps/rejected": -247.0023956298828, |
|
"loss": 0.3643, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.09523084759712219, |
|
"rewards/margins": 1.2245643138885498, |
|
"rewards/rejected": -1.129333734512329, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_logits/chosen": -2.706326723098755, |
|
"eval_logits/rejected": -2.655164957046509, |
|
"eval_logps/chosen": -370.08856201171875, |
|
"eval_logps/rejected": -295.9974060058594, |
|
"eval_loss": 0.5145964622497559, |
|
"eval_rewards/accuracies": 0.7420634627342224, |
|
"eval_rewards/chosen": -0.02332073263823986, |
|
"eval_rewards/margins": 1.0052008628845215, |
|
"eval_rewards/rejected": -1.0285216569900513, |
|
"eval_runtime": 93.8959, |
|
"eval_samples_per_second": 21.3, |
|
"eval_steps_per_second": 0.671, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8539755351681956e-08, |
|
"logits/chosen": -2.5391743183135986, |
|
"logits/rejected": -2.5156824588775635, |
|
"logps/chosen": -338.16632080078125, |
|
"logps/rejected": -303.3216552734375, |
|
"loss": 0.3278, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.011912358924746513, |
|
"rewards/margins": 1.2774769067764282, |
|
"rewards/rejected": -1.2893892526626587, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6628440366972478e-08, |
|
"logits/chosen": -2.6104888916015625, |
|
"logits/rejected": -2.520526885986328, |
|
"logps/chosen": -363.6390380859375, |
|
"logps/rejected": -289.16900634765625, |
|
"loss": 0.3387, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.2589784264564514, |
|
"rewards/margins": 1.5904781818389893, |
|
"rewards/rejected": -1.3314998149871826, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4717125382262997e-08, |
|
"logits/chosen": -2.596144914627075, |
|
"logits/rejected": -2.5945043563842773, |
|
"logps/chosen": -344.29937744140625, |
|
"logps/rejected": -305.58819580078125, |
|
"loss": 0.3331, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.24774548411369324, |
|
"rewards/margins": 1.5245163440704346, |
|
"rewards/rejected": -1.276770830154419, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2805810397553517e-08, |
|
"logits/chosen": -2.595520496368408, |
|
"logits/rejected": -2.5853350162506104, |
|
"logps/chosen": -347.48846435546875, |
|
"logps/rejected": -286.7652282714844, |
|
"loss": 0.3707, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.09414155781269073, |
|
"rewards/margins": 1.1994467973709106, |
|
"rewards/rejected": -1.1053051948547363, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0894495412844038e-08, |
|
"logits/chosen": -2.53163480758667, |
|
"logits/rejected": -2.5577445030212402, |
|
"logps/chosen": -323.544677734375, |
|
"logps/rejected": -286.0074462890625, |
|
"loss": 0.3423, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.10755489021539688, |
|
"rewards/margins": 1.2161157131195068, |
|
"rewards/rejected": -1.1085608005523682, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.983180428134555e-09, |
|
"logits/chosen": -2.5790271759033203, |
|
"logits/rejected": -2.506789207458496, |
|
"logps/chosen": -333.0215148925781, |
|
"logps/rejected": -236.8868408203125, |
|
"loss": 0.3549, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.001955956220626831, |
|
"rewards/margins": 1.2684563398361206, |
|
"rewards/rejected": -1.2665002346038818, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.071865443425076e-09, |
|
"logits/chosen": -2.5556507110595703, |
|
"logits/rejected": -2.4685635566711426, |
|
"logps/chosen": -344.0811767578125, |
|
"logps/rejected": -286.39019775390625, |
|
"loss": 0.3413, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.07706295698881149, |
|
"rewards/margins": 1.4479432106018066, |
|
"rewards/rejected": -1.370880365371704, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.1605504587155965e-09, |
|
"logits/chosen": -2.5983853340148926, |
|
"logits/rejected": -2.6669363975524902, |
|
"logps/chosen": -354.70013427734375, |
|
"logps/rejected": -296.3901672363281, |
|
"loss": 0.3418, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.048993874341249466, |
|
"rewards/margins": 1.3146908283233643, |
|
"rewards/rejected": -1.2656971216201782, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.249235474006116e-09, |
|
"logits/chosen": -2.588571071624756, |
|
"logits/rejected": -2.5282468795776367, |
|
"logps/chosen": -345.6400146484375, |
|
"logps/rejected": -299.24481201171875, |
|
"loss": 0.3617, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.17157995700836182, |
|
"rewards/margins": 1.4679298400878906, |
|
"rewards/rejected": -1.2963498830795288, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.3379204892966359e-09, |
|
"logits/chosen": -2.513842821121216, |
|
"logits/rejected": -2.489457368850708, |
|
"logps/chosen": -330.482177734375, |
|
"logps/rejected": -260.58203125, |
|
"loss": 0.3322, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.15938715636730194, |
|
"rewards/margins": 1.6171241998672485, |
|
"rewards/rejected": -1.4577369689941406, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_logits/chosen": -2.707925319671631, |
|
"eval_logits/rejected": -2.657289981842041, |
|
"eval_logps/chosen": -370.14801025390625, |
|
"eval_logps/rejected": -296.0495910644531, |
|
"eval_loss": 0.5141671299934387, |
|
"eval_rewards/accuracies": 0.7301587462425232, |
|
"eval_rewards/chosen": -0.029261818155646324, |
|
"eval_rewards/margins": 1.0044795274734497, |
|
"eval_rewards/rejected": -1.0337414741516113, |
|
"eval_runtime": 93.5443, |
|
"eval_samples_per_second": 21.38, |
|
"eval_steps_per_second": 0.673, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2907, |
|
"total_flos": 0.0, |
|
"train_loss": 0.45207962556766157, |
|
"train_runtime": 21062.2956, |
|
"train_samples_per_second": 8.826, |
|
"train_steps_per_second": 0.138 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2907, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|