|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984168865435357, |
|
"eval_steps": 400, |
|
"global_step": 473, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021108179419525065, |
|
"grad_norm": 3.7888171889145084, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -1.7614977359771729, |
|
"logits/rejected": -2.1336593627929688, |
|
"logps/chosen": -258.78717041015625, |
|
"logps/rejected": -241.137451171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010554089709762533, |
|
"grad_norm": 5.486005258108119, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -1.652553915977478, |
|
"logits/rejected": -1.944653868675232, |
|
"logps/chosen": -254.9417724609375, |
|
"logps/rejected": -233.73040771484375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.3359375, |
|
"rewards/chosen": 0.0009400760754942894, |
|
"rewards/margins": -0.00012203870574012399, |
|
"rewards/rejected": 0.0010621148394420743, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021108179419525065, |
|
"grad_norm": 4.961389255891659, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.7172822952270508, |
|
"logits/rejected": -1.9224716424942017, |
|
"logps/chosen": -285.58203125, |
|
"logps/rejected": -271.65899658203125, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0001836848387029022, |
|
"rewards/margins": 5.627591235679574e-05, |
|
"rewards/rejected": 0.00012740897363983095, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0316622691292876, |
|
"grad_norm": 4.465341637207026, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -1.7000751495361328, |
|
"logits/rejected": -2.006362199783325, |
|
"logps/chosen": -294.66119384765625, |
|
"logps/rejected": -266.40240478515625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003800996346399188, |
|
"rewards/margins": 0.0003609915147535503, |
|
"rewards/rejected": 0.003440004540607333, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04221635883905013, |
|
"grad_norm": 4.29839711906534, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.5813852548599243, |
|
"logits/rejected": -1.917645812034607, |
|
"logps/chosen": -269.6716003417969, |
|
"logps/rejected": -243.76126098632812, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.004329930525273085, |
|
"rewards/margins": 0.0014942068373784423, |
|
"rewards/rejected": 0.002835723338648677, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052770448548812667, |
|
"grad_norm": 4.1974406559327, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -1.4200265407562256, |
|
"logits/rejected": -1.6618592739105225, |
|
"logps/chosen": -277.4543762207031, |
|
"logps/rejected": -256.47283935546875, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.011705084703862667, |
|
"rewards/margins": 0.001989929471164942, |
|
"rewards/rejected": 0.009715155698359013, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0633245382585752, |
|
"grad_norm": 4.063491497272294, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.5429295301437378, |
|
"logits/rejected": -1.7798576354980469, |
|
"logps/chosen": -282.87689208984375, |
|
"logps/rejected": -262.7992858886719, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.019914668053388596, |
|
"rewards/margins": 0.004695773124694824, |
|
"rewards/rejected": 0.015218895860016346, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07387862796833773, |
|
"grad_norm": 4.21419727711893, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -1.5838125944137573, |
|
"logits/rejected": -1.8180118799209595, |
|
"logps/chosen": -261.9321594238281, |
|
"logps/rejected": -255.01626586914062, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.029436618089675903, |
|
"rewards/margins": 0.007458895444869995, |
|
"rewards/rejected": 0.021977724507451057, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08443271767810026, |
|
"grad_norm": 3.992903303419019, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.5156856775283813, |
|
"logits/rejected": -1.7749900817871094, |
|
"logps/chosen": -263.44287109375, |
|
"logps/rejected": -244.74044799804688, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.03905363008379936, |
|
"rewards/margins": 0.01673820987343788, |
|
"rewards/rejected": 0.02231542207300663, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09498680738786279, |
|
"grad_norm": 4.26522333339902, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -1.6547425985336304, |
|
"logits/rejected": -1.8507578372955322, |
|
"logps/chosen": -268.19354248046875, |
|
"logps/rejected": -257.1205139160156, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.04456416517496109, |
|
"rewards/margins": 0.022208593785762787, |
|
"rewards/rejected": 0.0223555751144886, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10554089709762533, |
|
"grad_norm": 4.338593813384902, |
|
"learning_rate": 4.999726797933858e-07, |
|
"logits/chosen": -1.6332728862762451, |
|
"logits/rejected": -1.850756049156189, |
|
"logps/chosen": -263.47998046875, |
|
"logps/rejected": -249.18734741210938, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.037815388292074203, |
|
"rewards/margins": 0.02914128080010414, |
|
"rewards/rejected": 0.008674108423292637, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11609498680738786, |
|
"grad_norm": 4.4734059999380635, |
|
"learning_rate": 4.99665396039775e-07, |
|
"logits/chosen": -1.6244800090789795, |
|
"logits/rejected": -1.8389520645141602, |
|
"logps/chosen": -280.68115234375, |
|
"logps/rejected": -266.77935791015625, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01766197383403778, |
|
"rewards/margins": 0.03439956158399582, |
|
"rewards/rejected": -0.016737591475248337, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1266490765171504, |
|
"grad_norm": 5.180495838186906, |
|
"learning_rate": 4.99017099386437e-07, |
|
"logits/chosen": -1.7377235889434814, |
|
"logits/rejected": -1.9698741436004639, |
|
"logps/chosen": -276.61029052734375, |
|
"logps/rejected": -265.09356689453125, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.04501429200172424, |
|
"rewards/margins": 0.047541338950395584, |
|
"rewards/rejected": -0.09255563467741013, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13720316622691292, |
|
"grad_norm": 5.0555182208034335, |
|
"learning_rate": 4.980286753286194e-07, |
|
"logits/chosen": -1.7377593517303467, |
|
"logits/rejected": -1.9555679559707642, |
|
"logps/chosen": -297.94842529296875, |
|
"logps/rejected": -286.69110107421875, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.013160338625311852, |
|
"rewards/margins": 0.06764128059148788, |
|
"rewards/rejected": -0.08080162107944489, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14775725593667546, |
|
"grad_norm": 5.593861280995437, |
|
"learning_rate": 4.967014739346915e-07, |
|
"logits/chosen": -1.902021050453186, |
|
"logits/rejected": -2.1676580905914307, |
|
"logps/chosen": -274.930908203125, |
|
"logps/rejected": -265.46917724609375, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.07597370445728302, |
|
"rewards/margins": 0.06604303419589996, |
|
"rewards/rejected": -0.14201673865318298, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.158311345646438, |
|
"grad_norm": 5.729170497012147, |
|
"learning_rate": 4.950373080021136e-07, |
|
"logits/chosen": -1.8614518642425537, |
|
"logits/rejected": -2.113079786300659, |
|
"logps/chosen": -286.76824951171875, |
|
"logps/rejected": -274.01043701171875, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07419217377901077, |
|
"rewards/margins": 0.07467035204172134, |
|
"rewards/rejected": -0.14886252582073212, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16886543535620052, |
|
"grad_norm": 5.494684099688743, |
|
"learning_rate": 4.930384505813737e-07, |
|
"logits/chosen": -1.923152208328247, |
|
"logits/rejected": -2.1438252925872803, |
|
"logps/chosen": -284.2359619140625, |
|
"logps/rejected": -276.147705078125, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11922915279865265, |
|
"rewards/margins": 0.0638352707028389, |
|
"rewards/rejected": -0.18306441605091095, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17941952506596306, |
|
"grad_norm": 5.815028665022688, |
|
"learning_rate": 4.907076318712738e-07, |
|
"logits/chosen": -1.9811105728149414, |
|
"logits/rejected": -2.159453868865967, |
|
"logps/chosen": -286.17047119140625, |
|
"logps/rejected": -275.25762939453125, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.10111021995544434, |
|
"rewards/margins": 0.09180058538913727, |
|
"rewards/rejected": -0.1929108202457428, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18997361477572558, |
|
"grad_norm": 6.05860390265305, |
|
"learning_rate": 4.88048035489807e-07, |
|
"logits/chosen": -1.8606590032577515, |
|
"logits/rejected": -2.207517147064209, |
|
"logps/chosen": -288.4847106933594, |
|
"logps/rejected": -272.2112731933594, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.16333934664726257, |
|
"rewards/margins": 0.09367315471172333, |
|
"rewards/rejected": -0.2570124864578247, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20052770448548812, |
|
"grad_norm": 5.9220580962205105, |
|
"learning_rate": 4.85063294125718e-07, |
|
"logits/chosen": -1.9957729578018188, |
|
"logits/rejected": -2.102470636367798, |
|
"logps/chosen": -306.8893127441406, |
|
"logps/rejected": -315.8654479980469, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.189494326710701, |
|
"rewards/margins": 0.06697932630777359, |
|
"rewards/rejected": -0.2564736604690552, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21108179419525067, |
|
"grad_norm": 7.106879633726346, |
|
"learning_rate": 4.817574845766874e-07, |
|
"logits/chosen": -1.914390206336975, |
|
"logits/rejected": -2.158510446548462, |
|
"logps/chosen": -312.228271484375, |
|
"logps/rejected": -307.2701416015625, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23975618183612823, |
|
"rewards/margins": 0.13584721088409424, |
|
"rewards/rejected": -0.37560343742370605, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22163588390501318, |
|
"grad_norm": 6.261854070868125, |
|
"learning_rate": 4.781351221809166e-07, |
|
"logits/chosen": -2.121222496032715, |
|
"logits/rejected": -2.3385891914367676, |
|
"logps/chosen": -288.9300231933594, |
|
"logps/rejected": -287.0550537109375, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.34203463792800903, |
|
"rewards/margins": 0.1223745122551918, |
|
"rewards/rejected": -0.4644091725349426, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23218997361477572, |
|
"grad_norm": 6.2411593338388816, |
|
"learning_rate": 4.742011546497182e-07, |
|
"logits/chosen": -1.9769681692123413, |
|
"logits/rejected": -2.1361823081970215, |
|
"logps/chosen": -309.54766845703125, |
|
"logps/rejected": -307.20306396484375, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28532418608665466, |
|
"rewards/margins": 0.1274307519197464, |
|
"rewards/rejected": -0.41275492310523987, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24274406332453827, |
|
"grad_norm": 6.782103703812521, |
|
"learning_rate": 4.6996095530953875e-07, |
|
"logits/chosen": -1.9189682006835938, |
|
"logits/rejected": -2.1745872497558594, |
|
"logps/chosen": -314.22308349609375, |
|
"logps/rejected": -309.86859130859375, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3373129367828369, |
|
"rewards/margins": 0.11438401788473129, |
|
"rewards/rejected": -0.4516969621181488, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2532981530343008, |
|
"grad_norm": 7.845105191338386, |
|
"learning_rate": 4.654203157626399e-07, |
|
"logits/chosen": -2.0927116870880127, |
|
"logits/rejected": -2.4226441383361816, |
|
"logps/chosen": -330.85467529296875, |
|
"logps/rejected": -319.5343933105469, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.41750985383987427, |
|
"rewards/margins": 0.1433776617050171, |
|
"rewards/rejected": -0.5608875155448914, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2638522427440633, |
|
"grad_norm": 7.838145177076444, |
|
"learning_rate": 4.605854379764673e-07, |
|
"logits/chosen": -2.088397264480591, |
|
"logits/rejected": -2.309814453125, |
|
"logps/chosen": -321.032958984375, |
|
"logps/rejected": -316.51812744140625, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.44016337394714355, |
|
"rewards/margins": 0.12760691344738007, |
|
"rewards/rejected": -0.5677703619003296, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27440633245382584, |
|
"grad_norm": 7.175313209394211, |
|
"learning_rate": 4.5546292581250857e-07, |
|
"logits/chosen": -2.1430201530456543, |
|
"logits/rejected": -2.3672008514404297, |
|
"logps/chosen": -320.6697692871094, |
|
"logps/rejected": -315.40594482421875, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.46516576409339905, |
|
"rewards/margins": 0.20094823837280273, |
|
"rewards/rejected": -0.6661140322685242, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2849604221635884, |
|
"grad_norm": 8.139621502884008, |
|
"learning_rate": 4.5005977600621275e-07, |
|
"logits/chosen": -2.157411813735962, |
|
"logits/rejected": -2.422761917114258, |
|
"logps/chosen": -334.2851867675781, |
|
"logps/rejected": -331.96240234375, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5050655007362366, |
|
"rewards/margins": 0.16327856481075287, |
|
"rewards/rejected": -0.6683440208435059, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2955145118733509, |
|
"grad_norm": 8.66968270113885, |
|
"learning_rate": 4.443833686102919e-07, |
|
"logits/chosen": -2.18753981590271, |
|
"logits/rejected": -2.4200239181518555, |
|
"logps/chosen": -351.04388427734375, |
|
"logps/rejected": -355.5639953613281, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6600111126899719, |
|
"rewards/margins": 0.2067330777645111, |
|
"rewards/rejected": -0.8667442202568054, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30606860158311344, |
|
"grad_norm": 8.486691938463958, |
|
"learning_rate": 4.384414569144561e-07, |
|
"logits/chosen": -2.2690327167510986, |
|
"logits/rejected": -2.467618227005005, |
|
"logps/chosen": -345.2842102050781, |
|
"logps/rejected": -351.8019104003906, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6318648457527161, |
|
"rewards/margins": 0.21810145676136017, |
|
"rewards/rejected": -0.8499662280082703, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.316622691292876, |
|
"grad_norm": 10.799601481281647, |
|
"learning_rate": 4.3224215685535287e-07, |
|
"logits/chosen": -2.1107537746429443, |
|
"logits/rejected": -2.275696039199829, |
|
"logps/chosen": -330.2477111816406, |
|
"logps/rejected": -332.95306396484375, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6694210767745972, |
|
"rewards/margins": 0.16177809238433838, |
|
"rewards/rejected": -0.8311992883682251, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32717678100263853, |
|
"grad_norm": 9.91414676698451, |
|
"learning_rate": 4.2579393593117364e-07, |
|
"logits/chosen": -2.109783887863159, |
|
"logits/rejected": -2.3675389289855957, |
|
"logps/chosen": -360.96612548828125, |
|
"logps/rejected": -354.8112487792969, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7774096727371216, |
|
"rewards/margins": 0.20474901795387268, |
|
"rewards/rejected": -0.9821586608886719, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33773087071240104, |
|
"grad_norm": 9.84160004233017, |
|
"learning_rate": 4.191056016360699e-07, |
|
"logits/chosen": -2.1540074348449707, |
|
"logits/rejected": -2.363142728805542, |
|
"logps/chosen": -353.576416015625, |
|
"logps/rejected": -356.2342834472656, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8168758153915405, |
|
"rewards/margins": 0.1971598118543625, |
|
"rewards/rejected": -1.014035701751709, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3482849604221636, |
|
"grad_norm": 10.137761591125681, |
|
"learning_rate": 4.121862894301754e-07, |
|
"logits/chosen": -2.1386771202087402, |
|
"logits/rejected": -2.463273286819458, |
|
"logps/chosen": -378.07904052734375, |
|
"logps/rejected": -362.71893310546875, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.91053307056427, |
|
"rewards/margins": 0.17932763695716858, |
|
"rewards/rejected": -1.0898606777191162, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35883905013192613, |
|
"grad_norm": 12.186665200345084, |
|
"learning_rate": 4.050454502616667e-07, |
|
"logits/chosen": -2.1371123790740967, |
|
"logits/rejected": -2.453059673309326, |
|
"logps/chosen": -393.2257080078125, |
|
"logps/rejected": -389.81195068359375, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0101797580718994, |
|
"rewards/margins": 0.24000540375709534, |
|
"rewards/rejected": -1.2501851320266724, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36939313984168864, |
|
"grad_norm": 7.8151280249971276, |
|
"learning_rate": 3.976928376579047e-07, |
|
"logits/chosen": -2.1572844982147217, |
|
"logits/rejected": -2.5259194374084473, |
|
"logps/chosen": -371.3215026855469, |
|
"logps/rejected": -361.8147277832031, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8133503794670105, |
|
"rewards/margins": 0.20597751438617706, |
|
"rewards/rejected": -1.0193278789520264, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37994722955145116, |
|
"grad_norm": 10.09035062532825, |
|
"learning_rate": 3.9013849440328945e-07, |
|
"logits/chosen": -2.11098051071167, |
|
"logits/rejected": -2.379697799682617, |
|
"logps/chosen": -331.082763671875, |
|
"logps/rejected": -332.23443603515625, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7421666979789734, |
|
"rewards/margins": 0.18613779544830322, |
|
"rewards/rejected": -0.9283044934272766, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39050131926121373, |
|
"grad_norm": 11.160942548142629, |
|
"learning_rate": 3.8239273882202473e-07, |
|
"logits/chosen": -2.120657444000244, |
|
"logits/rejected": -2.317275285720825, |
|
"logps/chosen": -406.42041015625, |
|
"logps/rejected": -405.0521545410156, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1358160972595215, |
|
"rewards/margins": 0.21516843140125275, |
|
"rewards/rejected": -1.3509845733642578, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40105540897097625, |
|
"grad_norm": 10.026177303858306, |
|
"learning_rate": 3.7446615068452804e-07, |
|
"logits/chosen": -2.2416388988494873, |
|
"logits/rejected": -2.50757098197937, |
|
"logps/chosen": -402.29205322265625, |
|
"logps/rejected": -400.1927795410156, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1884238719940186, |
|
"rewards/margins": 0.24852600693702698, |
|
"rewards/rejected": -1.4369499683380127, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41160949868073876, |
|
"grad_norm": 9.915852139948614, |
|
"learning_rate": 3.6636955675673743e-07, |
|
"logits/chosen": -2.292942762374878, |
|
"logits/rejected": -2.5384058952331543, |
|
"logps/chosen": -383.97418212890625, |
|
"logps/rejected": -393.2140197753906, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.096928358078003, |
|
"rewards/margins": 0.28661248087882996, |
|
"rewards/rejected": -1.3835408687591553, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42216358839050133, |
|
"grad_norm": 14.20330931182508, |
|
"learning_rate": 3.5811401601205093e-07, |
|
"logits/chosen": -2.2763895988464355, |
|
"logits/rejected": -2.5465030670166016, |
|
"logps/chosen": -393.99267578125, |
|
"logps/rejected": -405.69268798828125, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2335624694824219, |
|
"rewards/margins": 0.20860306918621063, |
|
"rewards/rejected": -1.442165732383728, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43271767810026385, |
|
"grad_norm": 9.857364268590045, |
|
"learning_rate": 3.497108045260995e-07, |
|
"logits/chosen": -2.2732205390930176, |
|
"logits/rejected": -2.512218713760376, |
|
"logps/chosen": -384.2240905761719, |
|
"logps/rejected": -387.35052490234375, |
|
"loss": 0.6098, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2368552684783936, |
|
"rewards/margins": 0.2555080056190491, |
|
"rewards/rejected": -1.492363452911377, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44327176781002636, |
|
"grad_norm": 10.31880014400564, |
|
"learning_rate": 3.411714000749838e-07, |
|
"logits/chosen": -2.3171160221099854, |
|
"logits/rejected": -2.6205945014953613, |
|
"logps/chosen": -408.45916748046875, |
|
"logps/rejected": -408.751220703125, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2992274761199951, |
|
"rewards/margins": 0.2618922293186188, |
|
"rewards/rejected": -1.5611199140548706, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45382585751978893, |
|
"grad_norm": 12.303776927971242, |
|
"learning_rate": 3.3250746645801287e-07, |
|
"logits/chosen": -2.199439525604248, |
|
"logits/rejected": -2.4404823780059814, |
|
"logps/chosen": -443.233642578125, |
|
"logps/rejected": -461.68597412109375, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.6428359746932983, |
|
"rewards/margins": 0.345574289560318, |
|
"rewards/rejected": -1.9884103536605835, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46437994722955145, |
|
"grad_norm": 12.23954935903092, |
|
"learning_rate": 3.237308375663571e-07, |
|
"logits/chosen": -2.234389305114746, |
|
"logits/rejected": -2.4988560676574707, |
|
"logps/chosen": -442.395751953125, |
|
"logps/rejected": -463.5758361816406, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.693788766860962, |
|
"rewards/margins": 0.34694477915763855, |
|
"rewards/rejected": -2.040733575820923, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47493403693931396, |
|
"grad_norm": 14.593463763552798, |
|
"learning_rate": 3.148535012193767e-07, |
|
"logits/chosen": -2.2539751529693604, |
|
"logits/rejected": -2.492187023162842, |
|
"logps/chosen": -510.832275390625, |
|
"logps/rejected": -525.7044677734375, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1941466331481934, |
|
"rewards/margins": 0.3329901695251465, |
|
"rewards/rejected": -2.5271365642547607, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48548812664907653, |
|
"grad_norm": 10.847822783700963, |
|
"learning_rate": 3.0588758279070183e-07, |
|
"logits/chosen": -2.233119249343872, |
|
"logits/rejected": -2.4563241004943848, |
|
"logps/chosen": -432.57635498046875, |
|
"logps/rejected": -434.28399658203125, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.6671392917633057, |
|
"rewards/margins": 0.20822450518608093, |
|
"rewards/rejected": -1.8753639459609985, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49604221635883905, |
|
"grad_norm": 9.907519766746905, |
|
"learning_rate": 2.968453286464312e-07, |
|
"logits/chosen": -2.2562708854675293, |
|
"logits/rejected": -2.479283094406128, |
|
"logps/chosen": -388.1554260253906, |
|
"logps/rejected": -398.91656494140625, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3175015449523926, |
|
"rewards/margins": 0.24448028206825256, |
|
"rewards/rejected": -1.5619816780090332, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5065963060686016, |
|
"grad_norm": 11.385701610802691, |
|
"learning_rate": 2.8773908941806877e-07, |
|
"logits/chosen": -2.2707817554473877, |
|
"logits/rejected": -2.499936103820801, |
|
"logps/chosen": -438.36834716796875, |
|
"logps/rejected": -435.4722595214844, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5956835746765137, |
|
"rewards/margins": 0.22251293063163757, |
|
"rewards/rejected": -1.8181965351104736, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5171503957783641, |
|
"grad_norm": 13.386635563356508, |
|
"learning_rate": 2.785813031330473e-07, |
|
"logits/chosen": -2.2956652641296387, |
|
"logits/rejected": -2.5434913635253906, |
|
"logps/chosen": -469.11676025390625, |
|
"logps/rejected": -465.9659118652344, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.9268583059310913, |
|
"rewards/margins": 0.21980834007263184, |
|
"rewards/rejected": -2.1466667652130127, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5277044854881267, |
|
"grad_norm": 10.39915818076319, |
|
"learning_rate": 2.693844782258779e-07, |
|
"logits/chosen": -2.3407020568847656, |
|
"logits/rejected": -2.5239195823669434, |
|
"logps/chosen": -459.4602966308594, |
|
"logps/rejected": -466.40966796875, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.813126564025879, |
|
"rewards/margins": 0.22814805805683136, |
|
"rewards/rejected": -2.0412745475769043, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5382585751978892, |
|
"grad_norm": 13.963581040356289, |
|
"learning_rate": 2.601611764531342e-07, |
|
"logits/chosen": -2.2778186798095703, |
|
"logits/rejected": -2.4619011878967285, |
|
"logps/chosen": -394.53631591796875, |
|
"logps/rejected": -416.8975524902344, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4522285461425781, |
|
"rewards/margins": 0.3218362331390381, |
|
"rewards/rejected": -1.7740647792816162, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5488126649076517, |
|
"grad_norm": 9.618957459005115, |
|
"learning_rate": 2.5092399573560323e-07, |
|
"logits/chosen": -2.219548463821411, |
|
"logits/rejected": -2.35976243019104, |
|
"logps/chosen": -442.07470703125, |
|
"logps/rejected": -447.6449279785156, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.6692779064178467, |
|
"rewards/margins": 0.20155127346515656, |
|
"rewards/rejected": -1.8708292245864868, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5593667546174143, |
|
"grad_norm": 13.385422305434652, |
|
"learning_rate": 2.4168555295104124e-07, |
|
"logits/chosen": -2.215520143508911, |
|
"logits/rejected": -2.2993171215057373, |
|
"logps/chosen": -438.2977600097656, |
|
"logps/rejected": -455.9774475097656, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5749682188034058, |
|
"rewards/margins": 0.3128505051136017, |
|
"rewards/rejected": -1.8878189325332642, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5699208443271768, |
|
"grad_norm": 20.732510246650026, |
|
"learning_rate": 2.3245846670103626e-07, |
|
"logits/chosen": -2.383749008178711, |
|
"logits/rejected": -2.67887806892395, |
|
"logps/chosen": -489.03680419921875, |
|
"logps/rejected": -513.4251708984375, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8371191024780273, |
|
"rewards/margins": 0.4673503041267395, |
|
"rewards/rejected": -2.304469347000122, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5804749340369393, |
|
"grad_norm": 21.85869665384531, |
|
"learning_rate": 2.232553400755159e-07, |
|
"logits/chosen": -2.5215096473693848, |
|
"logits/rejected": -2.706601619720459, |
|
"logps/chosen": -494.96881103515625, |
|
"logps/rejected": -513.2868041992188, |
|
"loss": 0.6131, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0746359825134277, |
|
"rewards/margins": 0.40606698393821716, |
|
"rewards/rejected": -2.4807028770446777, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5910290237467019, |
|
"grad_norm": 11.697047808438843, |
|
"learning_rate": 2.1408874343844294e-07, |
|
"logits/chosen": -2.4797873497009277, |
|
"logits/rejected": -2.675075054168701, |
|
"logps/chosen": -458.46148681640625, |
|
"logps/rejected": -470.7688903808594, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7771122455596924, |
|
"rewards/margins": 0.36666423082351685, |
|
"rewards/rejected": -2.1437766551971436, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6015831134564644, |
|
"grad_norm": 12.782798316845536, |
|
"learning_rate": 2.049711972582101e-07, |
|
"logits/chosen": -2.287956714630127, |
|
"logits/rejected": -2.528700590133667, |
|
"logps/chosen": -435.39569091796875, |
|
"logps/rejected": -443.40240478515625, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6082191467285156, |
|
"rewards/margins": 0.2757379412651062, |
|
"rewards/rejected": -1.8839571475982666, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6121372031662269, |
|
"grad_norm": 15.368915084454407, |
|
"learning_rate": 1.9591515500618588e-07, |
|
"logits/chosen": -2.276632070541382, |
|
"logits/rejected": -2.4731783866882324, |
|
"logps/chosen": -473.08233642578125, |
|
"logps/rejected": -492.99774169921875, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.886792540550232, |
|
"rewards/margins": 0.30836355686187744, |
|
"rewards/rejected": -2.1951560974121094, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6226912928759895, |
|
"grad_norm": 12.7131825042862, |
|
"learning_rate": 1.8693298614677112e-07, |
|
"logits/chosen": -2.141019821166992, |
|
"logits/rejected": -2.3793327808380127, |
|
"logps/chosen": -507.8687438964844, |
|
"logps/rejected": -522.310546875, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.982000708580017, |
|
"rewards/margins": 0.3759006857872009, |
|
"rewards/rejected": -2.3579015731811523, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.633245382585752, |
|
"grad_norm": 16.694511883816396, |
|
"learning_rate": 1.7803695924219814e-07, |
|
"logits/chosen": -2.2722671031951904, |
|
"logits/rejected": -2.486273765563965, |
|
"logps/chosen": -485.3500061035156, |
|
"logps/rejected": -501.1082458496094, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0275466442108154, |
|
"rewards/margins": 0.3500698506832123, |
|
"rewards/rejected": -2.3776164054870605, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6437994722955145, |
|
"grad_norm": 12.047975502833824, |
|
"learning_rate": 1.6923922519515067e-07, |
|
"logits/chosen": -2.2678744792938232, |
|
"logits/rejected": -2.4072413444519043, |
|
"logps/chosen": -485.83709716796875, |
|
"logps/rejected": -510.50628662109375, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0933148860931396, |
|
"rewards/margins": 0.37190961837768555, |
|
"rewards/rejected": -2.465224504470825, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6543535620052771, |
|
"grad_norm": 15.563285458971103, |
|
"learning_rate": 1.605518006520924e-07, |
|
"logits/chosen": -2.2849347591400146, |
|
"logits/rejected": -2.5423800945281982, |
|
"logps/chosen": -502.2515563964844, |
|
"logps/rejected": -523.35400390625, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3487892150878906, |
|
"rewards/margins": 0.39526933431625366, |
|
"rewards/rejected": -2.744058609008789, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6649076517150396, |
|
"grad_norm": 12.024205485012896, |
|
"learning_rate": 1.519865515899731e-07, |
|
"logits/chosen": -2.3724029064178467, |
|
"logits/rejected": -2.584688901901245, |
|
"logps/chosen": -492.44183349609375, |
|
"logps/rejected": -507.952880859375, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.114487409591675, |
|
"rewards/margins": 0.37069326639175415, |
|
"rewards/rejected": -2.485180616378784, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6754617414248021, |
|
"grad_norm": 12.928310439067417, |
|
"learning_rate": 1.4355517710873182e-07, |
|
"logits/chosen": -2.332822322845459, |
|
"logits/rejected": -2.580765724182129, |
|
"logps/chosen": -478.24560546875, |
|
"logps/rejected": -496.3634338378906, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.112123966217041, |
|
"rewards/margins": 0.3569082021713257, |
|
"rewards/rejected": -2.4690322875976562, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6860158311345647, |
|
"grad_norm": 14.108193064852378, |
|
"learning_rate": 1.3526919345173318e-07, |
|
"logits/chosen": -2.4187912940979004, |
|
"logits/rejected": -2.564967632293701, |
|
"logps/chosen": -518.6376953125, |
|
"logps/rejected": -543.2145385742188, |
|
"loss": 0.578, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.3633389472961426, |
|
"rewards/margins": 0.39262861013412476, |
|
"rewards/rejected": -2.755967378616333, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6965699208443272, |
|
"grad_norm": 19.566691765082115, |
|
"learning_rate": 1.2713991827596443e-07, |
|
"logits/chosen": -2.466085195541382, |
|
"logits/rejected": -2.6994807720184326, |
|
"logps/chosen": -525.8836669921875, |
|
"logps/rejected": -555.58154296875, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.558525800704956, |
|
"rewards/margins": 0.42205095291137695, |
|
"rewards/rejected": -2.980576753616333, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7071240105540897, |
|
"grad_norm": 16.838554493879652, |
|
"learning_rate": 1.191784551934773e-07, |
|
"logits/chosen": -2.502603054046631, |
|
"logits/rejected": -2.686891794204712, |
|
"logps/chosen": -503.7294006347656, |
|
"logps/rejected": -527.2174072265625, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.4917781352996826, |
|
"rewards/margins": 0.3848406672477722, |
|
"rewards/rejected": -2.8766188621520996, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7176781002638523, |
|
"grad_norm": 14.504581595303138, |
|
"learning_rate": 1.1139567860518953e-07, |
|
"logits/chosen": -2.369147777557373, |
|
"logits/rejected": -2.5709891319274902, |
|
"logps/chosen": -490.6739807128906, |
|
"logps/rejected": -508.79486083984375, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.1547963619232178, |
|
"rewards/margins": 0.4393877387046814, |
|
"rewards/rejected": -2.594184160232544, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7282321899736148, |
|
"grad_norm": 17.356368091641894, |
|
"learning_rate": 1.0380221884776128e-07, |
|
"logits/chosen": -2.3545467853546143, |
|
"logits/rejected": -2.6243462562561035, |
|
"logps/chosen": -524.93896484375, |
|
"logps/rejected": -539.4567260742188, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.282606840133667, |
|
"rewards/margins": 0.3932141065597534, |
|
"rewards/rejected": -2.675821304321289, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7387862796833773, |
|
"grad_norm": 12.791178420216584, |
|
"learning_rate": 9.640844767383405e-08, |
|
"logits/chosen": -2.3955166339874268, |
|
"logits/rejected": -2.7560970783233643, |
|
"logps/chosen": -512.841552734375, |
|
"logps/rejected": -530.08203125, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2674124240875244, |
|
"rewards/margins": 0.4363299012184143, |
|
"rewards/rejected": -2.703742504119873, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7493403693931399, |
|
"grad_norm": 15.246389662934607, |
|
"learning_rate": 8.922446408546378e-08, |
|
"logits/chosen": -2.1915884017944336, |
|
"logits/rejected": -2.4269826412200928, |
|
"logps/chosen": -500.866943359375, |
|
"logps/rejected": -525.7913208007812, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0900752544403076, |
|
"rewards/margins": 0.4145973324775696, |
|
"rewards/rejected": -2.5046725273132324, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7598944591029023, |
|
"grad_norm": 13.547069603435855, |
|
"learning_rate": 8.22600805400994e-08, |
|
"logits/chosen": -2.177799940109253, |
|
"logits/rejected": -2.3907604217529297, |
|
"logps/chosen": -484.6221618652344, |
|
"logps/rejected": -505.7215270996094, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.995901346206665, |
|
"rewards/margins": 0.3997262418270111, |
|
"rewards/rejected": -2.395627498626709, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7704485488126649, |
|
"grad_norm": 13.801750243778415, |
|
"learning_rate": 7.552480954794558e-08, |
|
"logits/chosen": -2.4111971855163574, |
|
"logits/rejected": -2.541329860687256, |
|
"logps/chosen": -483.94940185546875, |
|
"logps/rejected": -505.018798828125, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1984305381774902, |
|
"rewards/margins": 0.30352845788002014, |
|
"rewards/rejected": -2.5019590854644775, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7810026385224275, |
|
"grad_norm": 14.514442087318445, |
|
"learning_rate": 6.902785067901854e-08, |
|
"logits/chosen": -2.3650124073028564, |
|
"logits/rejected": -2.6508941650390625, |
|
"logps/chosen": -493.879150390625, |
|
"logps/rejected": -509.673828125, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.1922976970672607, |
|
"rewards/margins": 0.36951905488967896, |
|
"rewards/rejected": -2.561816930770874, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7915567282321899, |
|
"grad_norm": 17.145765706359093, |
|
"learning_rate": 6.277807799763973e-08, |
|
"logits/chosen": -2.3265914916992188, |
|
"logits/rejected": -2.5552210807800293, |
|
"logps/chosen": -559.2115478515625, |
|
"logps/rejected": -583.4457397460938, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.589047908782959, |
|
"rewards/margins": 0.4866320490837097, |
|
"rewards/rejected": -3.0756797790527344, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8021108179419525, |
|
"grad_norm": 12.09091453196307, |
|
"learning_rate": 5.678402794153145e-08, |
|
"logits/chosen": -2.2694175243377686, |
|
"logits/rejected": -2.5305798053741455, |
|
"logps/chosen": -522.9309692382812, |
|
"logps/rejected": -551.8192749023438, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.3409042358398438, |
|
"rewards/margins": 0.4437629282474518, |
|
"rewards/rejected": -2.7846672534942627, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8126649076517151, |
|
"grad_norm": 13.834538465076683, |
|
"learning_rate": 5.105388766206969e-08, |
|
"logits/chosen": -2.3926773071289062, |
|
"logits/rejected": -2.5119540691375732, |
|
"logps/chosen": -503.90850830078125, |
|
"logps/rejected": -526.0748901367188, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.2285289764404297, |
|
"rewards/margins": 0.36956310272216797, |
|
"rewards/rejected": -2.5980920791625977, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8232189973614775, |
|
"grad_norm": 12.082575884438302, |
|
"learning_rate": 4.5595483841620484e-08, |
|
"logits/chosen": -2.219971179962158, |
|
"logits/rejected": -2.4546897411346436, |
|
"logps/chosen": -487.01544189453125, |
|
"logps/rejected": -499.76495361328125, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0403149127960205, |
|
"rewards/margins": 0.41723886132240295, |
|
"rewards/rejected": -2.4575533866882324, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8337730870712401, |
|
"grad_norm": 14.209125492445594, |
|
"learning_rate": 4.0416272003232526e-08, |
|
"logits/chosen": -2.3332419395446777, |
|
"logits/rejected": -2.5107414722442627, |
|
"logps/chosen": -479.9403381347656, |
|
"logps/rejected": -502.7928771972656, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.998462438583374, |
|
"rewards/margins": 0.44837865233421326, |
|
"rewards/rejected": -2.446840763092041, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8443271767810027, |
|
"grad_norm": 13.431649672455546, |
|
"learning_rate": 3.552332632729041e-08, |
|
"logits/chosen": -2.3189663887023926, |
|
"logits/rejected": -2.414161205291748, |
|
"logps/chosen": -480.2158203125, |
|
"logps/rejected": -508.34466552734375, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9780933856964111, |
|
"rewards/margins": 0.3599149286746979, |
|
"rewards/rejected": -2.338008403778076, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8443271767810027, |
|
"eval_logits/chosen": -2.860567092895508, |
|
"eval_logits/rejected": -2.755436420440674, |
|
"eval_logps/chosen": -475.5936279296875, |
|
"eval_logps/rejected": -511.86138916015625, |
|
"eval_loss": 0.6271286606788635, |
|
"eval_rewards/accuracies": 0.6350806355476379, |
|
"eval_rewards/chosen": -2.127014636993408, |
|
"eval_rewards/margins": 0.2526260018348694, |
|
"eval_rewards/rejected": -2.379640579223633, |
|
"eval_runtime": 325.3184, |
|
"eval_samples_per_second": 6.074, |
|
"eval_steps_per_second": 0.381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8548812664907651, |
|
"grad_norm": 14.805379177265603, |
|
"learning_rate": 3.092332998903416e-08, |
|
"logits/chosen": -2.3201870918273926, |
|
"logits/rejected": -2.5070722103118896, |
|
"logps/chosen": -494.46075439453125, |
|
"logps/rejected": -522.0347900390625, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.1851017475128174, |
|
"rewards/margins": 0.4036984443664551, |
|
"rewards/rejected": -2.5887999534606934, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8654353562005277, |
|
"grad_norm": 14.004584602296926, |
|
"learning_rate": 2.6622566030146455e-08, |
|
"logits/chosen": -2.2431082725524902, |
|
"logits/rejected": -2.432163715362549, |
|
"logps/chosen": -490.9959411621094, |
|
"logps/rejected": -510.72088623046875, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1516032218933105, |
|
"rewards/margins": 0.3738686442375183, |
|
"rewards/rejected": -2.5254716873168945, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8759894459102903, |
|
"grad_norm": 12.549317692341273, |
|
"learning_rate": 2.26269087768734e-08, |
|
"logits/chosen": -2.362277030944824, |
|
"logits/rejected": -2.535696029663086, |
|
"logps/chosen": -504.3929748535156, |
|
"logps/rejected": -532.2606201171875, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.364351511001587, |
|
"rewards/margins": 0.4068065285682678, |
|
"rewards/rejected": -2.771157741546631, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8865435356200527, |
|
"grad_norm": 17.291644458564797, |
|
"learning_rate": 1.894181581640106e-08, |
|
"logits/chosen": -2.474963903427124, |
|
"logits/rejected": -2.697309970855713, |
|
"logps/chosen": -524.9340209960938, |
|
"logps/rejected": -546.8604125976562, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.467923641204834, |
|
"rewards/margins": 0.43783673644065857, |
|
"rewards/rejected": -2.9057605266571045, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8970976253298153, |
|
"grad_norm": 14.61494685094214, |
|
"learning_rate": 1.5572320542448143e-08, |
|
"logits/chosen": -2.338838577270508, |
|
"logits/rejected": -2.5825653076171875, |
|
"logps/chosen": -540.0514526367188, |
|
"logps/rejected": -563.7896118164062, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.444960832595825, |
|
"rewards/margins": 0.40859413146972656, |
|
"rewards/rejected": -2.8535547256469727, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9076517150395779, |
|
"grad_norm": 11.41595324078826, |
|
"learning_rate": 1.2523025280255729e-08, |
|
"logits/chosen": -2.3540937900543213, |
|
"logits/rejected": -2.604950428009033, |
|
"logps/chosen": -529.1434326171875, |
|
"logps/rejected": -546.1678466796875, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.464961051940918, |
|
"rewards/margins": 0.4290400445461273, |
|
"rewards/rejected": -2.894001007080078, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9182058047493403, |
|
"grad_norm": 15.41017745385185, |
|
"learning_rate": 9.798095000364214e-09, |
|
"logits/chosen": -2.409531831741333, |
|
"logits/rejected": -2.6002402305603027, |
|
"logps/chosen": -515.3450317382812, |
|
"logps/rejected": -548.9906005859375, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.4495291709899902, |
|
"rewards/margins": 0.48690468072891235, |
|
"rewards/rejected": -2.936434030532837, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9287598944591029, |
|
"grad_norm": 15.067700929396372, |
|
"learning_rate": 7.401251629764876e-09, |
|
"logits/chosen": -2.5300445556640625, |
|
"logits/rejected": -2.7091293334960938, |
|
"logps/chosen": -556.22607421875, |
|
"logps/rejected": -582.80126953125, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.7479639053344727, |
|
"rewards/margins": 0.405862033367157, |
|
"rewards/rejected": -3.1538259983062744, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9393139841688655, |
|
"grad_norm": 15.58719326370491, |
|
"learning_rate": 5.335768968195098e-09, |
|
"logits/chosen": -2.4501638412475586, |
|
"logits/rejected": -2.711761713027954, |
|
"logps/chosen": -547.3215942382812, |
|
"logps/rejected": -560.46142578125, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.6382341384887695, |
|
"rewards/margins": 0.393528550863266, |
|
"rewards/rejected": -3.0317625999450684, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9498680738786279, |
|
"grad_norm": 18.222145597670412, |
|
"learning_rate": 3.604468216521883e-09, |
|
"logits/chosen": -2.5248587131500244, |
|
"logits/rejected": -2.665889024734497, |
|
"logps/chosen": -544.0384521484375, |
|
"logps/rejected": -558.4286499023438, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.6324782371520996, |
|
"rewards/margins": 0.36458876729011536, |
|
"rewards/rejected": -2.9970669746398926, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9604221635883905, |
|
"grad_norm": 18.763031291028923, |
|
"learning_rate": 2.2097141233206884e-09, |
|
"logits/chosen": -2.4280190467834473, |
|
"logits/rejected": -2.6637661457061768, |
|
"logps/chosen": -531.4348754882812, |
|
"logps/rejected": -560.4275512695312, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5917065143585205, |
|
"rewards/margins": 0.4408086836338043, |
|
"rewards/rejected": -3.032515287399292, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9709762532981531, |
|
"grad_norm": 13.86977592293856, |
|
"learning_rate": 1.1534117549133472e-09, |
|
"logits/chosen": -2.514380931854248, |
|
"logits/rejected": -2.7799925804138184, |
|
"logps/chosen": -552.8911743164062, |
|
"logps/rejected": -577.2693481445312, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.666104793548584, |
|
"rewards/margins": 0.5134469270706177, |
|
"rewards/rejected": -3.179551601409912, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9815303430079155, |
|
"grad_norm": 15.545025613547882, |
|
"learning_rate": 4.3700389327672173e-10, |
|
"logits/chosen": -2.3933727741241455, |
|
"logits/rejected": -2.630448818206787, |
|
"logps/chosen": -566.9051513671875, |
|
"logps/rejected": -592.8246459960938, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.771134614944458, |
|
"rewards/margins": 0.4589918553829193, |
|
"rewards/rejected": -3.230126142501831, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9920844327176781, |
|
"grad_norm": 13.156905753144738, |
|
"learning_rate": 6.146906537587982e-11, |
|
"logits/chosen": -2.4273524284362793, |
|
"logits/rejected": -2.5787882804870605, |
|
"logps/chosen": -544.8056640625, |
|
"logps/rejected": -564.3406982421875, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.588836908340454, |
|
"rewards/margins": 0.3985593914985657, |
|
"rewards/rejected": -2.987395763397217, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9984168865435357, |
|
"step": 473, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6128583030015167, |
|
"train_runtime": 20192.8387, |
|
"train_samples_per_second": 3.003, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 473, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|