|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9993222089532967, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -4.324154853820801, |
|
"logits/rejected": -4.269870758056641, |
|
"logps/chosen": -367.06219482421875, |
|
"logps/rejected": -317.6511535644531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -4.277963638305664, |
|
"logits/rejected": -4.137287616729736, |
|
"logps/chosen": -423.3011779785156, |
|
"logps/rejected": -322.6611633300781, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": -0.0024322373792529106, |
|
"rewards/margins": -0.0025027708616107702, |
|
"rewards/rejected": 7.053340232232586e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -4.263833045959473, |
|
"logits/rejected": -4.1435723304748535, |
|
"logps/chosen": -392.3028259277344, |
|
"logps/rejected": -317.58099365234375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00013974684407003224, |
|
"rewards/margins": 0.00036675756564363837, |
|
"rewards/rejected": -0.0002270108088850975, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -4.267035961151123, |
|
"logits/rejected": -4.1559858322143555, |
|
"logps/chosen": -406.5338134765625, |
|
"logps/rejected": -325.1300354003906, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": 0.0006199823692440987, |
|
"rewards/margins": 0.001958064269274473, |
|
"rewards/rejected": -0.0013380816671997309, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -4.252381324768066, |
|
"logits/rejected": -4.157735824584961, |
|
"logps/chosen": -376.9677429199219, |
|
"logps/rejected": -313.22186279296875, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": 0.00031176567426882684, |
|
"rewards/margins": -0.00012173606228316203, |
|
"rewards/rejected": 0.0004335021658334881, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -4.273421287536621, |
|
"logits/rejected": -4.16197395324707, |
|
"logps/chosen": -397.47222900390625, |
|
"logps/rejected": -314.4212341308594, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.002201077062636614, |
|
"rewards/margins": 0.0010374437551945448, |
|
"rewards/rejected": -0.0032385208178311586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -4.286251544952393, |
|
"logits/rejected": -4.157068252563477, |
|
"logps/chosen": -398.3650207519531, |
|
"logps/rejected": -320.15008544921875, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.45781248807907104, |
|
"rewards/chosen": -0.004078245721757412, |
|
"rewards/margins": -0.0025454089045524597, |
|
"rewards/rejected": -0.0015328375156968832, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -4.272886753082275, |
|
"logits/rejected": -4.148139953613281, |
|
"logps/chosen": -401.9974060058594, |
|
"logps/rejected": -301.5581970214844, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5140625238418579, |
|
"rewards/chosen": -0.0007527429843321443, |
|
"rewards/margins": 0.0018155823927372694, |
|
"rewards/rejected": -0.0025683254934847355, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -4.266884803771973, |
|
"logits/rejected": -4.140568256378174, |
|
"logps/chosen": -422.1355895996094, |
|
"logps/rejected": -317.8870849609375, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.0010083441156893969, |
|
"rewards/margins": 0.003068871796131134, |
|
"rewards/rejected": -0.004077216610312462, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -4.275304317474365, |
|
"logits/rejected": -4.130114555358887, |
|
"logps/chosen": -405.22271728515625, |
|
"logps/rejected": -321.6945495605469, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": -0.0015558091690763831, |
|
"rewards/margins": 0.00022566183179151267, |
|
"rewards/rejected": -0.001781471073627472, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -4.3047099113464355, |
|
"logits/rejected": -4.166022300720215, |
|
"logps/chosen": -401.2640075683594, |
|
"logps/rejected": -308.84307861328125, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.5140625238418579, |
|
"rewards/chosen": -0.0015420484123751521, |
|
"rewards/margins": -0.0010955848265439272, |
|
"rewards/rejected": -0.0004464638768695295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -4.256144046783447, |
|
"logits/rejected": -4.105890274047852, |
|
"logps/chosen": -420.77423095703125, |
|
"logps/rejected": -302.2908935546875, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.4859375059604645, |
|
"rewards/chosen": 9.21973041840829e-05, |
|
"rewards/margins": -0.0004623614368028939, |
|
"rewards/rejected": 0.0005545587628148496, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -4.286948204040527, |
|
"logits/rejected": -4.167834281921387, |
|
"logps/chosen": -405.80596923828125, |
|
"logps/rejected": -323.16510009765625, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": -0.0016373072285205126, |
|
"rewards/margins": -0.001050219521857798, |
|
"rewards/rejected": -0.0005870877066627145, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -4.278336524963379, |
|
"logits/rejected": -4.132115840911865, |
|
"logps/chosen": -401.5750732421875, |
|
"logps/rejected": -298.68670654296875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.49531251192092896, |
|
"rewards/chosen": -0.002432642038911581, |
|
"rewards/margins": 0.0018511947710067034, |
|
"rewards/rejected": -0.004283837042748928, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -4.276402473449707, |
|
"logits/rejected": -4.149945259094238, |
|
"logps/chosen": -412.5904846191406, |
|
"logps/rejected": -305.0515441894531, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.49531251192092896, |
|
"rewards/chosen": -0.0023457477800548077, |
|
"rewards/margins": -0.0008829582366161048, |
|
"rewards/rejected": -0.0014627889031544328, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -4.2490129470825195, |
|
"logits/rejected": -4.137168884277344, |
|
"logps/chosen": -381.56756591796875, |
|
"logps/rejected": -299.19793701171875, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0032607235480099916, |
|
"rewards/margins": -0.0023342289496213198, |
|
"rewards/rejected": -0.0009264945983886719, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -4.262160778045654, |
|
"logits/rejected": -4.115349769592285, |
|
"logps/chosen": -399.4332580566406, |
|
"logps/rejected": -311.04608154296875, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -0.0022923736833035946, |
|
"rewards/margins": -0.0016422644257545471, |
|
"rewards/rejected": -0.0006501094321720302, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -4.254582405090332, |
|
"logits/rejected": -4.094132423400879, |
|
"logps/chosen": -419.3282775878906, |
|
"logps/rejected": -314.56829833984375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4859375059604645, |
|
"rewards/chosen": -0.0008115085074678063, |
|
"rewards/margins": 0.0018295502522960305, |
|
"rewards/rejected": -0.002641058526933193, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -4.262505531311035, |
|
"logits/rejected": -4.1487321853637695, |
|
"logps/chosen": -413.31011962890625, |
|
"logps/rejected": -324.69805908203125, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -0.0014527825405821204, |
|
"rewards/margins": -0.00011156280379509553, |
|
"rewards/rejected": -0.0013412194093689322, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -4.2973856925964355, |
|
"logits/rejected": -4.131929397583008, |
|
"logps/chosen": -417.13568115234375, |
|
"logps/rejected": -314.27001953125, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0009450524812564254, |
|
"rewards/margins": -0.0020798335317522287, |
|
"rewards/rejected": 0.0011347811669111252, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -4.277375221252441, |
|
"logits/rejected": -4.1441168785095215, |
|
"logps/chosen": -381.47296142578125, |
|
"logps/rejected": -301.980224609375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.002059588208794594, |
|
"rewards/margins": 0.00202515278942883, |
|
"rewards/rejected": 3.443551395321265e-05, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -4.272757530212402, |
|
"logits/rejected": -4.1654133796691895, |
|
"logps/chosen": -407.0859069824219, |
|
"logps/rejected": -331.93328857421875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.0008089464390650392, |
|
"rewards/margins": 0.0022251014597713947, |
|
"rewards/rejected": -0.0014161552535369992, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -4.265755653381348, |
|
"logits/rejected": -4.139852523803711, |
|
"logps/chosen": -396.70904541015625, |
|
"logps/rejected": -311.2632141113281, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0018544609192758799, |
|
"rewards/margins": -0.0020773629657924175, |
|
"rewards/rejected": 0.00022290220658760518, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -4.260704040527344, |
|
"logits/rejected": -4.126727104187012, |
|
"logps/chosen": -408.3927917480469, |
|
"logps/rejected": -319.992919921875, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.4859375059604645, |
|
"rewards/chosen": -0.0022286553867161274, |
|
"rewards/margins": -0.000920031510759145, |
|
"rewards/rejected": -0.001308623468503356, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -4.259461879730225, |
|
"logits/rejected": -4.144876956939697, |
|
"logps/chosen": -409.2859802246094, |
|
"logps/rejected": -328.0377502441406, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.504687488079071, |
|
"rewards/chosen": -0.0005639836890622973, |
|
"rewards/margins": -0.00022939601331017911, |
|
"rewards/rejected": -0.0003345878212712705, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -4.266787052154541, |
|
"logits/rejected": -4.1633710861206055, |
|
"logps/chosen": -403.5596923828125, |
|
"logps/rejected": -319.5048522949219, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0008266723598353565, |
|
"rewards/margins": -0.0015376238152384758, |
|
"rewards/rejected": 0.0007109515718184412, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -4.2665534019470215, |
|
"logits/rejected": -4.13022518157959, |
|
"logps/chosen": -379.11322021484375, |
|
"logps/rejected": -289.3961486816406, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0011679441668093204, |
|
"rewards/margins": 0.00039926558383740485, |
|
"rewards/rejected": -0.0015672097215428948, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -4.271460056304932, |
|
"logits/rejected": -4.139186859130859, |
|
"logps/chosen": -424.3821716308594, |
|
"logps/rejected": -318.91943359375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.00013967350241728127, |
|
"rewards/margins": 0.0009566223016008735, |
|
"rewards/rejected": -0.0010962963569909334, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -4.285706520080566, |
|
"logits/rejected": -4.135653018951416, |
|
"logps/chosen": -408.2174377441406, |
|
"logps/rejected": -306.91937255859375, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 2.2016651200829074e-05, |
|
"rewards/margins": 0.0013473120052367449, |
|
"rewards/rejected": -0.001325295539572835, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -4.282963752746582, |
|
"logits/rejected": -4.156318664550781, |
|
"logps/chosen": -383.9125671386719, |
|
"logps/rejected": -296.3202819824219, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.4828124940395355, |
|
"rewards/chosen": -0.0008248983067460358, |
|
"rewards/margins": -0.0010658926330506802, |
|
"rewards/rejected": 0.00024099461734294891, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -4.276602268218994, |
|
"logits/rejected": -4.160402774810791, |
|
"logps/chosen": -395.6986083984375, |
|
"logps/rejected": -308.46368408203125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.00020179541024845093, |
|
"rewards/margins": 0.00023063849948812276, |
|
"rewards/rejected": -0.00043243388063274324, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": -4.262394905090332, |
|
"logits/rejected": -4.137896537780762, |
|
"logps/chosen": -405.48583984375, |
|
"logps/rejected": -316.9510498046875, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0025507945101708174, |
|
"rewards/margins": 0.0003848490596283227, |
|
"rewards/rejected": -0.0029356435406953096, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": -4.2743682861328125, |
|
"logits/rejected": -4.132224082946777, |
|
"logps/chosen": -406.7742614746094, |
|
"logps/rejected": -300.9280700683594, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5171874761581421, |
|
"rewards/chosen": -0.0014929536264389753, |
|
"rewards/margins": -0.00023388855333905667, |
|
"rewards/rejected": -0.0012590645346790552, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -4.293769836425781, |
|
"logits/rejected": -4.170851230621338, |
|
"logps/chosen": -406.20074462890625, |
|
"logps/rejected": -320.6856384277344, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.47968751192092896, |
|
"rewards/chosen": -3.2803043268359033e-06, |
|
"rewards/margins": -0.0023293071426451206, |
|
"rewards/rejected": 0.0023260267917066813, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": -4.264895439147949, |
|
"logits/rejected": -4.144906044006348, |
|
"logps/chosen": -382.7566223144531, |
|
"logps/rejected": -307.6598205566406, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.001648748992010951, |
|
"rewards/margins": 0.002527676522731781, |
|
"rewards/rejected": -0.004176425281912088, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": -4.280018329620361, |
|
"logits/rejected": -4.165085792541504, |
|
"logps/chosen": -396.96026611328125, |
|
"logps/rejected": -309.4230651855469, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.510937511920929, |
|
"rewards/chosen": 0.0016236413503065705, |
|
"rewards/margins": 0.0022962945513427258, |
|
"rewards/rejected": -0.000672653317451477, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -4.289803504943848, |
|
"logits/rejected": -4.140151500701904, |
|
"logps/chosen": -405.28973388671875, |
|
"logps/rejected": -312.529541015625, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0023417104966938496, |
|
"rewards/margins": -0.0007966022822074592, |
|
"rewards/rejected": -0.0015451073413714767, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": -4.268817901611328, |
|
"logits/rejected": -4.147084712982178, |
|
"logps/chosen": -399.0814514160156, |
|
"logps/rejected": -325.91363525390625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 3.723411282408051e-05, |
|
"rewards/margins": 0.0022587967105209827, |
|
"rewards/rejected": -0.0022215619683265686, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": -4.2609758377075195, |
|
"logits/rejected": -4.148962020874023, |
|
"logps/chosen": -393.0679016113281, |
|
"logps/rejected": -317.9642028808594, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.0006004157476127148, |
|
"rewards/margins": 0.0016590984305366874, |
|
"rewards/rejected": -0.001058683032169938, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -4.2723493576049805, |
|
"logits/rejected": -4.122799873352051, |
|
"logps/chosen": -397.3229675292969, |
|
"logps/rejected": -304.338623046875, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.5328124761581421, |
|
"rewards/chosen": 0.0007614147616550326, |
|
"rewards/margins": 7.879303666413762e-06, |
|
"rewards/rejected": 0.0007535360055044293, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": -4.290783405303955, |
|
"logits/rejected": -4.144261360168457, |
|
"logps/chosen": -412.53375244140625, |
|
"logps/rejected": -313.51739501953125, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0025499488692730665, |
|
"rewards/margins": -0.002634689910337329, |
|
"rewards/rejected": 8.474113565171137e-05, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": -4.280355930328369, |
|
"logits/rejected": -4.188906669616699, |
|
"logps/chosen": -399.37750244140625, |
|
"logps/rejected": -324.8134765625, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.0012526216451078653, |
|
"rewards/margins": 0.0029473325703293085, |
|
"rewards/rejected": -0.0016947109252214432, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -4.2888898849487305, |
|
"logits/rejected": -4.1478071212768555, |
|
"logps/chosen": -418.41351318359375, |
|
"logps/rejected": -319.01507568359375, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4984374940395355, |
|
"rewards/chosen": 0.0015372170601040125, |
|
"rewards/margins": -0.0005680068279616535, |
|
"rewards/rejected": 0.0021052241791039705, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": -4.254868984222412, |
|
"logits/rejected": -4.136019706726074, |
|
"logps/chosen": -384.3445739746094, |
|
"logps/rejected": -295.01312255859375, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.4828124940395355, |
|
"rewards/chosen": -0.0011382882948964834, |
|
"rewards/margins": -0.0026994033250957727, |
|
"rewards/rejected": 0.0015611147973686457, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": -4.2907233238220215, |
|
"logits/rejected": -4.132693290710449, |
|
"logps/chosen": -414.918212890625, |
|
"logps/rejected": -315.94573974609375, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0015450514620169997, |
|
"rewards/margins": -0.0002846633142326027, |
|
"rewards/rejected": 0.001829715445637703, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -4.255721092224121, |
|
"logits/rejected": -4.1633100509643555, |
|
"logps/chosen": -400.84967041015625, |
|
"logps/rejected": -327.2038269042969, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.0014290885301306844, |
|
"rewards/margins": 0.0006102249026298523, |
|
"rewards/rejected": -0.0020393135491758585, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": -4.247704029083252, |
|
"logits/rejected": -4.119411468505859, |
|
"logps/chosen": -394.64178466796875, |
|
"logps/rejected": -308.6086120605469, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0018646775279194117, |
|
"rewards/margins": 0.0020817045588046312, |
|
"rewards/rejected": -0.00021702758385799825, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": -4.270275592803955, |
|
"logits/rejected": -4.133027076721191, |
|
"logps/chosen": -419.8226623535156, |
|
"logps/rejected": -323.5798645019531, |
|
"loss": 0.6949, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -0.0003835520183201879, |
|
"rewards/margins": -0.00283462880179286, |
|
"rewards/rejected": 0.0024510768707841635, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -4.274611473083496, |
|
"logits/rejected": -4.136614799499512, |
|
"logps/chosen": -400.21673583984375, |
|
"logps/rejected": -314.0077209472656, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4828124940395355, |
|
"rewards/chosen": -0.00010184728307649493, |
|
"rewards/margins": 0.0012176515301689506, |
|
"rewards/rejected": -0.0013194989878684282, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": -4.25107479095459, |
|
"logits/rejected": -4.151733875274658, |
|
"logps/chosen": -397.98016357421875, |
|
"logps/rejected": -308.0941467285156, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.4828124940395355, |
|
"rewards/chosen": -0.0004709061176981777, |
|
"rewards/margins": -0.0007273858063854277, |
|
"rewards/rejected": 0.00025647960137575865, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": -4.2503204345703125, |
|
"logits/rejected": -4.1171112060546875, |
|
"logps/chosen": -412.30450439453125, |
|
"logps/rejected": -309.49481201171875, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.0027156358119100332, |
|
"rewards/margins": 0.003060466842725873, |
|
"rewards/rejected": -0.0003448307979851961, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -4.279356002807617, |
|
"logits/rejected": -4.125931262969971, |
|
"logps/chosen": -409.6204528808594, |
|
"logps/rejected": -298.81329345703125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": 0.0007973018218763173, |
|
"rewards/margins": 0.003946124110370874, |
|
"rewards/rejected": -0.0031488225795328617, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": -4.253719329833984, |
|
"logits/rejected": -4.118457794189453, |
|
"logps/chosen": -408.22882080078125, |
|
"logps/rejected": -319.6842346191406, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": 0.00437967898324132, |
|
"rewards/margins": 0.004070502705872059, |
|
"rewards/rejected": 0.00030917683034203947, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": -4.232258319854736, |
|
"logits/rejected": -4.110759258270264, |
|
"logps/chosen": -414.5962829589844, |
|
"logps/rejected": -323.87078857421875, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.002581060165539384, |
|
"rewards/margins": 0.0032051261514425278, |
|
"rewards/rejected": -0.0006240661023184657, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -4.25144624710083, |
|
"logits/rejected": -4.156504154205322, |
|
"logps/chosen": -395.7941589355469, |
|
"logps/rejected": -313.2027893066406, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.4859375059604645, |
|
"rewards/chosen": 0.0006000929279252887, |
|
"rewards/margins": -0.0023031379096210003, |
|
"rewards/rejected": 0.0029032311867922544, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": -4.2781877517700195, |
|
"logits/rejected": -4.152641296386719, |
|
"logps/chosen": -400.3907470703125, |
|
"logps/rejected": -323.36663818359375, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.47968751192092896, |
|
"rewards/chosen": 0.0021930981893092394, |
|
"rewards/margins": -0.0007275763782672584, |
|
"rewards/rejected": 0.0029206746257841587, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": -4.2500386238098145, |
|
"logits/rejected": -4.107308387756348, |
|
"logps/chosen": -411.8309020996094, |
|
"logps/rejected": -319.95989990234375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": 0.0027205091901123524, |
|
"rewards/margins": 0.004122564569115639, |
|
"rewards/rejected": -0.0014020560774952173, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -4.307834625244141, |
|
"logits/rejected": -4.123293399810791, |
|
"logps/chosen": -412.34625244140625, |
|
"logps/rejected": -310.9077453613281, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.00016926185344345868, |
|
"rewards/margins": 0.0005053894128650427, |
|
"rewards/rejected": -0.0006746514118276536, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": -4.272593021392822, |
|
"logits/rejected": -4.129204750061035, |
|
"logps/chosen": -411.84161376953125, |
|
"logps/rejected": -321.55072021484375, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.003927945625036955, |
|
"rewards/margins": 0.0026295329444110394, |
|
"rewards/rejected": 0.0012984138447791338, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": -4.280831813812256, |
|
"logits/rejected": -4.155787467956543, |
|
"logps/chosen": -408.56256103515625, |
|
"logps/rejected": -312.60577392578125, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -9.327723819296807e-05, |
|
"rewards/margins": -0.00040280382381752133, |
|
"rewards/rejected": 0.0003095265128649771, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -4.285837650299072, |
|
"logits/rejected": -4.156912803649902, |
|
"logps/chosen": -404.223876953125, |
|
"logps/rejected": -322.22894287109375, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": 0.00110340875107795, |
|
"rewards/margins": -0.0020270957611501217, |
|
"rewards/rejected": 0.003130504861474037, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": -4.270712852478027, |
|
"logits/rejected": -4.1485466957092285, |
|
"logps/chosen": -427.80010986328125, |
|
"logps/rejected": -334.1997375488281, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": 0.0012709179427474737, |
|
"rewards/margins": 0.00037813876406289637, |
|
"rewards/rejected": 0.0008927792077884078, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": -4.2818217277526855, |
|
"logits/rejected": -4.141083240509033, |
|
"logps/chosen": -397.087646484375, |
|
"logps/rejected": -299.8023986816406, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.0015098925214260817, |
|
"rewards/margins": 0.003647155361250043, |
|
"rewards/rejected": -0.0021372628398239613, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -4.2828288078308105, |
|
"logits/rejected": -4.15810489654541, |
|
"logps/chosen": -408.72857666015625, |
|
"logps/rejected": -310.77764892578125, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.00010033079888671637, |
|
"rewards/margins": -0.0017872953321784735, |
|
"rewards/rejected": 0.0018876262474805117, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": -4.249554634094238, |
|
"logits/rejected": -4.136783599853516, |
|
"logps/chosen": -414.71624755859375, |
|
"logps/rejected": -330.455810546875, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.006457502953708172, |
|
"rewards/margins": 0.0046369172632694244, |
|
"rewards/rejected": 0.0018205851083621383, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": -4.281416893005371, |
|
"logits/rejected": -4.156689643859863, |
|
"logps/chosen": -419.05401611328125, |
|
"logps/rejected": -319.1665954589844, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": 0.0033603243064135313, |
|
"rewards/margins": 0.0027967148926109076, |
|
"rewards/rejected": 0.0005636097048409283, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -4.239119529724121, |
|
"logits/rejected": -4.116203308105469, |
|
"logps/chosen": -421.9933166503906, |
|
"logps/rejected": -327.5665588378906, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": 0.0006358650280162692, |
|
"rewards/margins": -3.5358032619114965e-05, |
|
"rewards/rejected": 0.0006712229805998504, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": -4.252030849456787, |
|
"logits/rejected": -4.138897895812988, |
|
"logps/chosen": -402.931884765625, |
|
"logps/rejected": -311.1913757324219, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": 0.003976965788751841, |
|
"rewards/margins": 0.0018042316660284996, |
|
"rewards/rejected": 0.002172734122723341, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": -4.269718170166016, |
|
"logits/rejected": -4.140122413635254, |
|
"logps/chosen": -402.2257385253906, |
|
"logps/rejected": -318.7126770019531, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.00315951113589108, |
|
"rewards/margins": 0.0014611692167818546, |
|
"rewards/rejected": 0.0016983415698632598, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -4.262238025665283, |
|
"logits/rejected": -4.1432600021362305, |
|
"logps/chosen": -405.7474670410156, |
|
"logps/rejected": -317.77447509765625, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.510937511920929, |
|
"rewards/chosen": 0.005235121585428715, |
|
"rewards/margins": 0.004777342546731234, |
|
"rewards/rejected": 0.00045777950435876846, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": -4.308034420013428, |
|
"logits/rejected": -4.176297187805176, |
|
"logps/chosen": -417.35626220703125, |
|
"logps/rejected": -313.90887451171875, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.0032903787214308977, |
|
"rewards/margins": 0.003389782505109906, |
|
"rewards/rejected": -9.9403434433043e-05, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": -4.285686492919922, |
|
"logits/rejected": -4.153790473937988, |
|
"logps/chosen": -423.37127685546875, |
|
"logps/rejected": -331.2875671386719, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.542187511920929, |
|
"rewards/chosen": 0.005327778868377209, |
|
"rewards/margins": 0.005328441970050335, |
|
"rewards/rejected": -6.637536102971353e-07, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -4.279539108276367, |
|
"logits/rejected": -4.177431106567383, |
|
"logps/chosen": -379.5310363769531, |
|
"logps/rejected": -300.36602783203125, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.510937511920929, |
|
"rewards/chosen": 0.0025111553259193897, |
|
"rewards/margins": 0.0021530953235924244, |
|
"rewards/rejected": 0.00035806017694994807, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": -4.239541530609131, |
|
"logits/rejected": -4.1288628578186035, |
|
"logps/chosen": -375.0932312011719, |
|
"logps/rejected": -298.7073974609375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0017489530146121979, |
|
"rewards/margins": 0.0001466287358198315, |
|
"rewards/rejected": 0.0016023240750655532, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": -4.258530616760254, |
|
"logits/rejected": -4.131335258483887, |
|
"logps/chosen": -378.4256591796875, |
|
"logps/rejected": -288.19842529296875, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.49531251192092896, |
|
"rewards/chosen": 0.0009945884812623262, |
|
"rewards/margins": -0.0008082139538601041, |
|
"rewards/rejected": 0.0018028020858764648, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -4.268132209777832, |
|
"logits/rejected": -4.133342742919922, |
|
"logps/chosen": -402.5184631347656, |
|
"logps/rejected": -310.98651123046875, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0009071314707398415, |
|
"rewards/margins": -0.001490770373493433, |
|
"rewards/rejected": 0.0023979023098945618, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": -4.277812480926514, |
|
"logits/rejected": -4.182877063751221, |
|
"logps/chosen": -408.18048095703125, |
|
"logps/rejected": -319.3320617675781, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.004439138807356358, |
|
"rewards/margins": 0.0019760008435696363, |
|
"rewards/rejected": 0.0024631377309560776, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": -4.280663967132568, |
|
"logits/rejected": -4.152825355529785, |
|
"logps/chosen": -401.7829895019531, |
|
"logps/rejected": -306.715576171875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004907527472823858, |
|
"rewards/margins": 0.00137388426810503, |
|
"rewards/rejected": 0.003533643204718828, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -4.289612770080566, |
|
"logits/rejected": -4.144918918609619, |
|
"logps/chosen": -399.4092712402344, |
|
"logps/rejected": -304.46209716796875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.002732563531026244, |
|
"rewards/margins": 0.0014565556775778532, |
|
"rewards/rejected": 0.0012760077370330691, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": -4.240975856781006, |
|
"logits/rejected": -4.128232479095459, |
|
"logps/chosen": -413.4056701660156, |
|
"logps/rejected": -328.46240234375, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.526562511920929, |
|
"rewards/chosen": 0.003439761698246002, |
|
"rewards/margins": 0.0025056053418666124, |
|
"rewards/rejected": 0.0009341565892100334, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": -4.260972023010254, |
|
"logits/rejected": -4.160162448883057, |
|
"logps/chosen": -404.38824462890625, |
|
"logps/rejected": -330.4800109863281, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.5015624761581421, |
|
"rewards/chosen": 0.0018609057879075408, |
|
"rewards/margins": -0.000567351933568716, |
|
"rewards/rejected": 0.0024282578378915787, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -4.292420387268066, |
|
"logits/rejected": -4.155418395996094, |
|
"logps/chosen": -402.5275573730469, |
|
"logps/rejected": -311.781982421875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.0035346276126801968, |
|
"rewards/margins": 0.002268751384690404, |
|
"rewards/rejected": 0.0012658759951591492, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": -4.276908874511719, |
|
"logits/rejected": -4.151911735534668, |
|
"logps/chosen": -408.8199768066406, |
|
"logps/rejected": -321.52215576171875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.00492482166737318, |
|
"rewards/margins": 0.0010390502866357565, |
|
"rewards/rejected": 0.003885771380737424, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": -4.282100677490234, |
|
"logits/rejected": -4.158295631408691, |
|
"logps/chosen": -399.35430908203125, |
|
"logps/rejected": -316.38165283203125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.510937511920929, |
|
"rewards/chosen": 0.0036009408067911863, |
|
"rewards/margins": 0.0014611782971769571, |
|
"rewards/rejected": 0.002139762043952942, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -4.265560150146484, |
|
"logits/rejected": -4.16861629486084, |
|
"logps/chosen": -384.11151123046875, |
|
"logps/rejected": -317.0963134765625, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.49531251192092896, |
|
"rewards/chosen": 0.0022504080552607775, |
|
"rewards/margins": -0.001352280960418284, |
|
"rewards/rejected": 0.0036026891320943832, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": -4.278510093688965, |
|
"logits/rejected": -4.114365577697754, |
|
"logps/chosen": -409.4468688964844, |
|
"logps/rejected": -319.8404541015625, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0014671286335214972, |
|
"rewards/margins": -0.001252708025276661, |
|
"rewards/rejected": 0.0027198365423828363, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": -4.275809288024902, |
|
"logits/rejected": -4.195669651031494, |
|
"logps/chosen": -395.3207702636719, |
|
"logps/rejected": -331.13372802734375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.510937511920929, |
|
"rewards/chosen": 0.0037080198526382446, |
|
"rewards/margins": 0.0016179044032469392, |
|
"rewards/rejected": 0.002090116497129202, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -4.255551815032959, |
|
"logits/rejected": -4.139596462249756, |
|
"logps/chosen": -402.43768310546875, |
|
"logps/rejected": -321.7333984375, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0047720326110720634, |
|
"rewards/margins": 0.003143607871606946, |
|
"rewards/rejected": 0.0016284246230497956, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": -4.2646284103393555, |
|
"logits/rejected": -4.149605751037598, |
|
"logps/chosen": -387.3090515136719, |
|
"logps/rejected": -301.6280212402344, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.004609361290931702, |
|
"rewards/margins": 0.0038493976462632418, |
|
"rewards/rejected": 0.0007599632954224944, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": -4.2500715255737305, |
|
"logits/rejected": -4.115664958953857, |
|
"logps/chosen": -425.0071716308594, |
|
"logps/rejected": -319.0146484375, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5140625238418579, |
|
"rewards/chosen": 0.004975964780896902, |
|
"rewards/margins": 0.0043433718383312225, |
|
"rewards/rejected": 0.0006325935246422887, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -4.254255294799805, |
|
"logits/rejected": -4.149744510650635, |
|
"logps/chosen": -409.4825134277344, |
|
"logps/rejected": -328.2546691894531, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0023609010968357325, |
|
"rewards/margins": 0.0011571452487260103, |
|
"rewards/rejected": 0.0012037558481097221, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": -4.244211673736572, |
|
"logits/rejected": -4.126115322113037, |
|
"logps/chosen": -405.63763427734375, |
|
"logps/rejected": -319.128662109375, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4984374940395355, |
|
"rewards/chosen": 0.002668022643774748, |
|
"rewards/margins": -0.00015769092715345323, |
|
"rewards/rejected": 0.0028257134836167097, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": -4.292696952819824, |
|
"logits/rejected": -4.156879425048828, |
|
"logps/chosen": -415.4306640625, |
|
"logps/rejected": -324.4388732910156, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0032460917718708515, |
|
"rewards/margins": 0.0033036619424819946, |
|
"rewards/rejected": -5.7570869103074074e-05, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -4.28294038772583, |
|
"logits/rejected": -4.117633819580078, |
|
"logps/chosen": -415.66290283203125, |
|
"logps/rejected": -305.69390869140625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5296875238418579, |
|
"rewards/chosen": 0.004189764615148306, |
|
"rewards/margins": 0.0031828763894736767, |
|
"rewards/rejected": 0.0010068879928439856, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": -4.260636806488037, |
|
"logits/rejected": -4.170042037963867, |
|
"logps/chosen": -407.6413879394531, |
|
"logps/rejected": -327.45574951171875, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5765625238418579, |
|
"rewards/chosen": 0.00892933551222086, |
|
"rewards/margins": 0.008162255398929119, |
|
"rewards/rejected": 0.0007670802297070622, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": -4.2640604972839355, |
|
"logits/rejected": -4.171642303466797, |
|
"logps/chosen": -400.89373779296875, |
|
"logps/rejected": -322.080078125, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.00451917527243495, |
|
"rewards/margins": 0.0021246224641799927, |
|
"rewards/rejected": 0.002394552808254957, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -4.275049209594727, |
|
"logits/rejected": -4.145798683166504, |
|
"logps/chosen": -429.56634521484375, |
|
"logps/rejected": -331.715576171875, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.526562511920929, |
|
"rewards/chosen": 0.0037804250605404377, |
|
"rewards/margins": 0.003932067193090916, |
|
"rewards/rejected": -0.00015164251090027392, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -4.191330432891846, |
|
"eval_logits/rejected": -4.081260681152344, |
|
"eval_logps/chosen": -402.61639404296875, |
|
"eval_logps/rejected": -315.7343444824219, |
|
"eval_loss": 0.6920775771141052, |
|
"eval_rewards/accuracies": 0.5070000290870667, |
|
"eval_rewards/chosen": 0.003913247026503086, |
|
"eval_rewards/margins": 0.002829314675182104, |
|
"eval_rewards/rejected": 0.0010839327005669475, |
|
"eval_runtime": 762.8033, |
|
"eval_samples_per_second": 2.622, |
|
"eval_steps_per_second": 0.655, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": -4.291719436645508, |
|
"logits/rejected": -4.159844875335693, |
|
"logps/chosen": -410.930419921875, |
|
"logps/rejected": -320.3424377441406, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.004596616607159376, |
|
"rewards/margins": 3.339797694934532e-05, |
|
"rewards/rejected": 0.004563218913972378, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": -4.274192810058594, |
|
"logits/rejected": -4.13837194442749, |
|
"logps/chosen": -401.00213623046875, |
|
"logps/rejected": -310.458251953125, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": 0.0076770298182964325, |
|
"rewards/margins": 0.005642565432935953, |
|
"rewards/rejected": 0.0020344643853604794, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -4.276472091674805, |
|
"logits/rejected": -4.1441144943237305, |
|
"logps/chosen": -413.98760986328125, |
|
"logps/rejected": -310.71051025390625, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": 0.010534636676311493, |
|
"rewards/margins": 0.00471758097410202, |
|
"rewards/rejected": 0.005817054770886898, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": -4.2762131690979, |
|
"logits/rejected": -4.135240077972412, |
|
"logps/chosen": -420.061767578125, |
|
"logps/rejected": -330.8781433105469, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.00425821915268898, |
|
"rewards/margins": -0.00019165253615938127, |
|
"rewards/rejected": 0.004449871368706226, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": -4.270270824432373, |
|
"logits/rejected": -4.130080223083496, |
|
"logps/chosen": -392.8804626464844, |
|
"logps/rejected": -304.02294921875, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.004289795644581318, |
|
"rewards/margins": 0.0029843891970813274, |
|
"rewards/rejected": 0.0013054062146693468, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -4.268284797668457, |
|
"logits/rejected": -4.145885944366455, |
|
"logps/chosen": -408.9548645019531, |
|
"logps/rejected": -317.30560302734375, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.510937511920929, |
|
"rewards/chosen": 0.007414447609335184, |
|
"rewards/margins": 0.0030614163260906935, |
|
"rewards/rejected": 0.004353031050413847, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": -4.300627708435059, |
|
"logits/rejected": -4.120467185974121, |
|
"logps/chosen": -422.968994140625, |
|
"logps/rejected": -307.66473388671875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00493131997063756, |
|
"rewards/margins": 0.0012998055899515748, |
|
"rewards/rejected": 0.0036315140314400196, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": -4.260711669921875, |
|
"logits/rejected": -4.147767066955566, |
|
"logps/chosen": -389.6875305175781, |
|
"logps/rejected": -305.23516845703125, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.005956425331532955, |
|
"rewards/margins": 0.002882971428334713, |
|
"rewards/rejected": 0.003073454136028886, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -4.267011642456055, |
|
"logits/rejected": -4.122623443603516, |
|
"logps/chosen": -403.59942626953125, |
|
"logps/rejected": -312.45770263671875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.006427975837141275, |
|
"rewards/margins": 0.0026332822162657976, |
|
"rewards/rejected": 0.0037946938537061214, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": -4.294173717498779, |
|
"logits/rejected": -4.156063556671143, |
|
"logps/chosen": -417.2015686035156, |
|
"logps/rejected": -329.7525634765625, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": 0.009924950078129768, |
|
"rewards/margins": 0.0070407153107225895, |
|
"rewards/rejected": 0.002884234767407179, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": -4.2720513343811035, |
|
"logits/rejected": -4.118578910827637, |
|
"logps/chosen": -408.00701904296875, |
|
"logps/rejected": -299.61981201171875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.504687488079071, |
|
"rewards/chosen": 0.0056653618812561035, |
|
"rewards/margins": 0.0025350514333695173, |
|
"rewards/rejected": 0.003130309982225299, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -4.258962154388428, |
|
"logits/rejected": -4.1505255699157715, |
|
"logps/chosen": -391.7621154785156, |
|
"logps/rejected": -303.1611022949219, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.007706860546022654, |
|
"rewards/margins": 0.004729891195893288, |
|
"rewards/rejected": 0.002976970048621297, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": -4.25510311126709, |
|
"logits/rejected": -4.135909080505371, |
|
"logps/chosen": -400.17205810546875, |
|
"logps/rejected": -307.4388427734375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00958174280822277, |
|
"rewards/margins": 0.006473850458860397, |
|
"rewards/rejected": 0.0031078937463462353, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": -4.276521682739258, |
|
"logits/rejected": -4.158177852630615, |
|
"logps/chosen": -395.39215087890625, |
|
"logps/rejected": -310.30010986328125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5140625238418579, |
|
"rewards/chosen": 0.009448185563087463, |
|
"rewards/margins": 0.005911382380872965, |
|
"rewards/rejected": 0.003536803647875786, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -4.291528224945068, |
|
"logits/rejected": -4.155767917633057, |
|
"logps/chosen": -423.88177490234375, |
|
"logps/rejected": -330.54730224609375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.567187488079071, |
|
"rewards/chosen": 0.009671617299318314, |
|
"rewards/margins": 0.0064759948290884495, |
|
"rewards/rejected": 0.003195622470229864, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": -4.27802848815918, |
|
"logits/rejected": -4.153050422668457, |
|
"logps/chosen": -406.92523193359375, |
|
"logps/rejected": -305.8375244140625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.007676907815039158, |
|
"rewards/margins": 0.0026743696071207523, |
|
"rewards/rejected": 0.0050025382079184055, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": -4.263790607452393, |
|
"logits/rejected": -4.140618801116943, |
|
"logps/chosen": -417.0517578125, |
|
"logps/rejected": -325.2332763671875, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": 0.007303275167942047, |
|
"rewards/margins": 0.004130188841372728, |
|
"rewards/rejected": 0.0031730863265693188, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -4.309510707855225, |
|
"logits/rejected": -4.183161735534668, |
|
"logps/chosen": -388.7647705078125, |
|
"logps/rejected": -309.83673095703125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.526562511920929, |
|
"rewards/chosen": 0.009641969576478004, |
|
"rewards/margins": 0.006214521359652281, |
|
"rewards/rejected": 0.0034274482168257236, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": -4.253263473510742, |
|
"logits/rejected": -4.152525424957275, |
|
"logps/chosen": -387.7099609375, |
|
"logps/rejected": -302.3329162597656, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.510937511920929, |
|
"rewards/chosen": 0.0074900491163134575, |
|
"rewards/margins": 0.0033420673571527004, |
|
"rewards/rejected": 0.00414798129349947, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": -4.244006156921387, |
|
"logits/rejected": -4.125102996826172, |
|
"logps/chosen": -400.85107421875, |
|
"logps/rejected": -304.1709289550781, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.007216416299343109, |
|
"rewards/margins": 0.002480756724253297, |
|
"rewards/rejected": 0.004735658876597881, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -4.282382488250732, |
|
"logits/rejected": -4.156040191650391, |
|
"logps/chosen": -409.8832092285156, |
|
"logps/rejected": -317.5098571777344, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.008845189586281776, |
|
"rewards/margins": 0.0021160345058888197, |
|
"rewards/rejected": 0.006729154847562313, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": -4.294495582580566, |
|
"logits/rejected": -4.211082458496094, |
|
"logps/chosen": -398.4643249511719, |
|
"logps/rejected": -337.3882141113281, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.005116584710776806, |
|
"rewards/margins": 0.0013333541573956609, |
|
"rewards/rejected": 0.003783230436965823, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": -4.265214443206787, |
|
"logits/rejected": -4.16311502456665, |
|
"logps/chosen": -387.9737243652344, |
|
"logps/rejected": -306.5306091308594, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5015624761581421, |
|
"rewards/chosen": 0.005055157467722893, |
|
"rewards/margins": 0.0023398033808916807, |
|
"rewards/rejected": 0.002715354086831212, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -4.283998489379883, |
|
"logits/rejected": -4.149415016174316, |
|
"logps/chosen": -393.43609619140625, |
|
"logps/rejected": -300.15667724609375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.007636996451765299, |
|
"rewards/margins": 0.004209595732390881, |
|
"rewards/rejected": 0.0034273997880518436, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": -4.261348724365234, |
|
"logits/rejected": -4.140153884887695, |
|
"logps/chosen": -393.04046630859375, |
|
"logps/rejected": -311.3672180175781, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.008078986778855324, |
|
"rewards/margins": 0.003632976207882166, |
|
"rewards/rejected": 0.0044460115022957325, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": -4.243834972381592, |
|
"logits/rejected": -4.141684055328369, |
|
"logps/chosen": -408.1537170410156, |
|
"logps/rejected": -314.95404052734375, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.009249814786016941, |
|
"rewards/margins": 0.0036557712592184544, |
|
"rewards/rejected": 0.005594043061137199, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -4.290652275085449, |
|
"logits/rejected": -4.172784328460693, |
|
"logps/chosen": -404.22247314453125, |
|
"logps/rejected": -311.638916015625, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5140625238418579, |
|
"rewards/chosen": 0.008968379348516464, |
|
"rewards/margins": 0.001797800650820136, |
|
"rewards/rejected": 0.007170577999204397, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": -4.221989631652832, |
|
"logits/rejected": -4.126450061798096, |
|
"logps/chosen": -362.48773193359375, |
|
"logps/rejected": -310.7374572753906, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.007350596599280834, |
|
"rewards/margins": 0.004948228131979704, |
|
"rewards/rejected": 0.0024023696314543486, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": -4.273645877838135, |
|
"logits/rejected": -4.147796630859375, |
|
"logps/chosen": -397.3808288574219, |
|
"logps/rejected": -298.5054626464844, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5296875238418579, |
|
"rewards/chosen": 0.008822308853268623, |
|
"rewards/margins": 0.0060681127943098545, |
|
"rewards/rejected": 0.0027541951276361942, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -4.290090560913086, |
|
"logits/rejected": -4.17457389831543, |
|
"logps/chosen": -405.3314208984375, |
|
"logps/rejected": -332.5986633300781, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.008818728849291801, |
|
"rewards/margins": 0.0060805464163422585, |
|
"rewards/rejected": 0.0027381826657801867, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": -4.275857448577881, |
|
"logits/rejected": -4.184884548187256, |
|
"logps/chosen": -413.36260986328125, |
|
"logps/rejected": -335.1319274902344, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": 0.011068764142692089, |
|
"rewards/margins": 0.006428800523281097, |
|
"rewards/rejected": 0.004639963153749704, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": -4.262097358703613, |
|
"logits/rejected": -4.138064384460449, |
|
"logps/chosen": -420.592041015625, |
|
"logps/rejected": -336.1610107421875, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.008383669890463352, |
|
"rewards/margins": 0.0029027739074081182, |
|
"rewards/rejected": 0.00548089575022459, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -4.273635387420654, |
|
"logits/rejected": -4.136135578155518, |
|
"logps/chosen": -419.8043518066406, |
|
"logps/rejected": -326.3934631347656, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.008629587478935719, |
|
"rewards/margins": 0.0051121762953698635, |
|
"rewards/rejected": 0.003517411183565855, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": -4.25814962387085, |
|
"logits/rejected": -4.133566856384277, |
|
"logps/chosen": -388.8675231933594, |
|
"logps/rejected": -307.92169189453125, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5296875238418579, |
|
"rewards/chosen": 0.010921096429228783, |
|
"rewards/margins": 0.0063910940662026405, |
|
"rewards/rejected": 0.004530002363026142, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": -4.258055210113525, |
|
"logits/rejected": -4.142639636993408, |
|
"logps/chosen": -417.714599609375, |
|
"logps/rejected": -331.1921691894531, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.011006483808159828, |
|
"rewards/margins": 0.007266665808856487, |
|
"rewards/rejected": 0.0037398170679807663, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -4.290154457092285, |
|
"logits/rejected": -4.134154796600342, |
|
"logps/chosen": -419.7810974121094, |
|
"logps/rejected": -316.3617248535156, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.012462007813155651, |
|
"rewards/margins": 0.0071654594503343105, |
|
"rewards/rejected": 0.005296547897160053, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": -4.25482702255249, |
|
"logits/rejected": -4.102695941925049, |
|
"logps/chosen": -426.0753479003906, |
|
"logps/rejected": -321.2982177734375, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": 0.00866289995610714, |
|
"rewards/margins": 0.0026752217672765255, |
|
"rewards/rejected": 0.005987677723169327, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": -4.27578067779541, |
|
"logits/rejected": -4.178536415100098, |
|
"logps/chosen": -399.89422607421875, |
|
"logps/rejected": -309.16204833984375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.0074403537437319756, |
|
"rewards/margins": 0.0027673656586557627, |
|
"rewards/rejected": 0.004672987386584282, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -4.252071380615234, |
|
"logits/rejected": -4.123923301696777, |
|
"logps/chosen": -402.0321044921875, |
|
"logps/rejected": -311.18463134765625, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": 0.007632538676261902, |
|
"rewards/margins": 0.003616305533796549, |
|
"rewards/rejected": 0.004016232676804066, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": -4.282023906707764, |
|
"logits/rejected": -4.184301853179932, |
|
"logps/chosen": -389.57904052734375, |
|
"logps/rejected": -322.18389892578125, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00903762225061655, |
|
"rewards/margins": 0.0019117307383567095, |
|
"rewards/rejected": 0.007125890348106623, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": -4.287262916564941, |
|
"logits/rejected": -4.1361284255981445, |
|
"logps/chosen": -420.2037658691406, |
|
"logps/rejected": -316.2614440917969, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.009709215722978115, |
|
"rewards/margins": 0.005225184373557568, |
|
"rewards/rejected": 0.0044840313494205475, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -4.279843807220459, |
|
"logits/rejected": -4.154780864715576, |
|
"logps/chosen": -405.2857971191406, |
|
"logps/rejected": -318.71368408203125, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": 0.007445839233696461, |
|
"rewards/margins": 0.0028315638191998005, |
|
"rewards/rejected": 0.00461427541449666, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": -4.297855377197266, |
|
"logits/rejected": -4.1429829597473145, |
|
"logps/chosen": -411.401123046875, |
|
"logps/rejected": -309.50469970703125, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.010837659239768982, |
|
"rewards/margins": 0.007386946585029364, |
|
"rewards/rejected": 0.003450712887570262, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": -4.270615577697754, |
|
"logits/rejected": -4.156781196594238, |
|
"logps/chosen": -402.6678161621094, |
|
"logps/rejected": -313.5867614746094, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.012565011158585548, |
|
"rewards/margins": 0.004020330961793661, |
|
"rewards/rejected": 0.008544680662453175, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -4.293181896209717, |
|
"logits/rejected": -4.163121223449707, |
|
"logps/chosen": -383.950927734375, |
|
"logps/rejected": -298.0428161621094, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.009886051528155804, |
|
"rewards/margins": 0.004015837796032429, |
|
"rewards/rejected": 0.0058702146634459496, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": -4.2656683921813965, |
|
"logits/rejected": -4.139580249786377, |
|
"logps/chosen": -408.76361083984375, |
|
"logps/rejected": -313.99298095703125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.011651566252112389, |
|
"rewards/margins": 0.006123474799096584, |
|
"rewards/rejected": 0.005528091918677092, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": -4.313319206237793, |
|
"logits/rejected": -4.1831374168396, |
|
"logps/chosen": -416.11773681640625, |
|
"logps/rejected": -317.92279052734375, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.01188584603369236, |
|
"rewards/margins": 0.005925232544541359, |
|
"rewards/rejected": 0.005960613489151001, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -4.274300575256348, |
|
"logits/rejected": -4.167074203491211, |
|
"logps/chosen": -395.77117919921875, |
|
"logps/rejected": -322.4521179199219, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": 0.013639995828270912, |
|
"rewards/margins": 0.007328096777200699, |
|
"rewards/rejected": 0.006311898585408926, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": -4.246912956237793, |
|
"logits/rejected": -4.1353559494018555, |
|
"logps/chosen": -402.06451416015625, |
|
"logps/rejected": -326.43328857421875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.008587488904595375, |
|
"rewards/margins": 0.0023642387241125107, |
|
"rewards/rejected": 0.006223250180482864, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": -4.270732879638672, |
|
"logits/rejected": -4.132948398590088, |
|
"logps/chosen": -413.1165466308594, |
|
"logps/rejected": -322.5657653808594, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.542187511920929, |
|
"rewards/chosen": 0.012603357434272766, |
|
"rewards/margins": 0.006664451211690903, |
|
"rewards/rejected": 0.005938907153904438, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -4.273732662200928, |
|
"logits/rejected": -4.119269371032715, |
|
"logps/chosen": -390.9085998535156, |
|
"logps/rejected": -302.38348388671875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": 0.008494021371006966, |
|
"rewards/margins": 0.0025641201063990593, |
|
"rewards/rejected": 0.005929900798946619, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": -4.267933368682861, |
|
"logits/rejected": -4.161561012268066, |
|
"logps/chosen": -390.6529235839844, |
|
"logps/rejected": -303.3753662109375, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.010899425484240055, |
|
"rewards/margins": 0.005612888839095831, |
|
"rewards/rejected": 0.005286536645144224, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": -4.304495811462402, |
|
"logits/rejected": -4.120760440826416, |
|
"logps/chosen": -413.19744873046875, |
|
"logps/rejected": -302.12603759765625, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.011145773343741894, |
|
"rewards/margins": 0.006089083384722471, |
|
"rewards/rejected": 0.005056688562035561, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -4.27075719833374, |
|
"logits/rejected": -4.125774383544922, |
|
"logps/chosen": -398.0982360839844, |
|
"logps/rejected": -307.02325439453125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5328124761581421, |
|
"rewards/chosen": 0.010808114893734455, |
|
"rewards/margins": 0.005621565040200949, |
|
"rewards/rejected": 0.005186550319194794, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": -4.2627692222595215, |
|
"logits/rejected": -4.124849796295166, |
|
"logps/chosen": -400.72039794921875, |
|
"logps/rejected": -309.2858581542969, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": 0.00958535261452198, |
|
"rewards/margins": 0.004684613086283207, |
|
"rewards/rejected": 0.004900740925222635, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": -4.256047248840332, |
|
"logits/rejected": -4.143467903137207, |
|
"logps/chosen": -395.4497985839844, |
|
"logps/rejected": -311.4337463378906, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.542187511920929, |
|
"rewards/chosen": 0.012033809907734394, |
|
"rewards/margins": 0.006985441781580448, |
|
"rewards/rejected": 0.005048368591815233, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -4.2988762855529785, |
|
"logits/rejected": -4.160223960876465, |
|
"logps/chosen": -421.30926513671875, |
|
"logps/rejected": -330.73834228515625, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.012272657826542854, |
|
"rewards/margins": 0.004651675932109356, |
|
"rewards/rejected": 0.007620981428772211, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": -4.249758720397949, |
|
"logits/rejected": -4.107924938201904, |
|
"logps/chosen": -394.63458251953125, |
|
"logps/rejected": -304.65570068359375, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": 0.014264127239584923, |
|
"rewards/margins": 0.008482937701046467, |
|
"rewards/rejected": 0.005781189538538456, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": -4.296584606170654, |
|
"logits/rejected": -4.137936592102051, |
|
"logps/chosen": -426.6543884277344, |
|
"logps/rejected": -326.82086181640625, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.013136537745594978, |
|
"rewards/margins": 0.0072318254970014095, |
|
"rewards/rejected": 0.005904710851609707, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -4.269163608551025, |
|
"logits/rejected": -4.147532939910889, |
|
"logps/chosen": -418.16259765625, |
|
"logps/rejected": -319.60687255859375, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.015316249802708626, |
|
"rewards/margins": 0.00932287611067295, |
|
"rewards/rejected": 0.005993373692035675, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": -4.262753963470459, |
|
"logits/rejected": -4.136817932128906, |
|
"logps/chosen": -414.9779357910156, |
|
"logps/rejected": -330.0815734863281, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.013297900557518005, |
|
"rewards/margins": 0.007364665158092976, |
|
"rewards/rejected": 0.00593323539942503, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": -4.293523788452148, |
|
"logits/rejected": -4.158999443054199, |
|
"logps/chosen": -413.1465759277344, |
|
"logps/rejected": -309.12017822265625, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.013114909641444683, |
|
"rewards/margins": 0.005343536846339703, |
|
"rewards/rejected": 0.007771371863782406, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -4.268404960632324, |
|
"logits/rejected": -4.135837554931641, |
|
"logps/chosen": -394.46466064453125, |
|
"logps/rejected": -313.1191101074219, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.01660170406103134, |
|
"rewards/margins": 0.011182994581758976, |
|
"rewards/rejected": 0.005418709013611078, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": -4.279505252838135, |
|
"logits/rejected": -4.153802871704102, |
|
"logps/chosen": -396.4070739746094, |
|
"logps/rejected": -320.4418029785156, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.009519520215690136, |
|
"rewards/margins": 0.0036930330097675323, |
|
"rewards/rejected": 0.005826488137245178, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": -4.265553951263428, |
|
"logits/rejected": -4.148402214050293, |
|
"logps/chosen": -419.32171630859375, |
|
"logps/rejected": -328.2819519042969, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.018884066492319107, |
|
"rewards/margins": 0.00994439609348774, |
|
"rewards/rejected": 0.008939670398831367, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -4.287269115447998, |
|
"logits/rejected": -4.127593040466309, |
|
"logps/chosen": -386.55499267578125, |
|
"logps/rejected": -294.11505126953125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": 0.013925912790000439, |
|
"rewards/margins": 0.008140355348587036, |
|
"rewards/rejected": 0.005785556975752115, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": -4.2739386558532715, |
|
"logits/rejected": -4.1460676193237305, |
|
"logps/chosen": -406.03900146484375, |
|
"logps/rejected": -317.59918212890625, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.526562511920929, |
|
"rewards/chosen": 0.013792428188025951, |
|
"rewards/margins": 0.004698522854596376, |
|
"rewards/rejected": 0.009093904867768288, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": -4.243307113647461, |
|
"logits/rejected": -4.1257524490356445, |
|
"logps/chosen": -389.49627685546875, |
|
"logps/rejected": -310.25921630859375, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.01434385310858488, |
|
"rewards/margins": 0.007951314561069012, |
|
"rewards/rejected": 0.006392539478838444, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -4.276772975921631, |
|
"logits/rejected": -4.147823333740234, |
|
"logps/chosen": -384.64141845703125, |
|
"logps/rejected": -305.95355224609375, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.010843750089406967, |
|
"rewards/margins": 0.003594112815335393, |
|
"rewards/rejected": 0.0072496384382247925, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": -4.259124279022217, |
|
"logits/rejected": -4.154895782470703, |
|
"logps/chosen": -404.73663330078125, |
|
"logps/rejected": -332.0699462890625, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.010340576991438866, |
|
"rewards/margins": 0.002993339207023382, |
|
"rewards/rejected": 0.007347238250076771, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": -4.2700676918029785, |
|
"logits/rejected": -4.160883903503418, |
|
"logps/chosen": -406.1430358886719, |
|
"logps/rejected": -311.64459228515625, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.012120211496949196, |
|
"rewards/margins": 0.006281781941652298, |
|
"rewards/rejected": 0.005838429089635611, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -4.2109479904174805, |
|
"logits/rejected": -4.121271133422852, |
|
"logps/chosen": -390.29046630859375, |
|
"logps/rejected": -306.08843994140625, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.012644372880458832, |
|
"rewards/margins": 0.0077982256188988686, |
|
"rewards/rejected": 0.004846146795898676, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": -4.242735385894775, |
|
"logits/rejected": -4.120673179626465, |
|
"logps/chosen": -396.1617431640625, |
|
"logps/rejected": -299.79345703125, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.013311442919075489, |
|
"rewards/margins": 0.006340789142996073, |
|
"rewards/rejected": 0.006970655173063278, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": -4.255246162414551, |
|
"logits/rejected": -4.130453109741211, |
|
"logps/chosen": -393.3377380371094, |
|
"logps/rejected": -320.06109619140625, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": 0.014351250603795052, |
|
"rewards/margins": 0.00757851917296648, |
|
"rewards/rejected": 0.006772731896489859, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -4.267707347869873, |
|
"logits/rejected": -4.147231101989746, |
|
"logps/chosen": -414.69256591796875, |
|
"logps/rejected": -318.17291259765625, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.542187511920929, |
|
"rewards/chosen": 0.012826653197407722, |
|
"rewards/margins": 0.005257748067378998, |
|
"rewards/rejected": 0.00756890419870615, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": -4.258088111877441, |
|
"logits/rejected": -4.129515647888184, |
|
"logps/chosen": -401.8673400878906, |
|
"logps/rejected": -315.34417724609375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.017096903175115585, |
|
"rewards/margins": 0.009566163644194603, |
|
"rewards/rejected": 0.0075307427905499935, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": -4.2839155197143555, |
|
"logits/rejected": -4.152881622314453, |
|
"logps/chosen": -420.7337951660156, |
|
"logps/rejected": -324.15985107421875, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.013899828307330608, |
|
"rewards/margins": 0.006815521512180567, |
|
"rewards/rejected": 0.00708430539816618, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -4.261233329772949, |
|
"logits/rejected": -4.125912666320801, |
|
"logps/chosen": -408.70989990234375, |
|
"logps/rejected": -311.13983154296875, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.018134312704205513, |
|
"rewards/margins": 0.011861599050462246, |
|
"rewards/rejected": 0.00627271318808198, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": -4.2318902015686035, |
|
"logits/rejected": -4.125788688659668, |
|
"logps/chosen": -421.2041015625, |
|
"logps/rejected": -331.3543701171875, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.01709870807826519, |
|
"rewards/margins": 0.005781983956694603, |
|
"rewards/rejected": 0.011316723190248013, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": -4.270097732543945, |
|
"logits/rejected": -4.112766265869141, |
|
"logps/chosen": -415.7230529785156, |
|
"logps/rejected": -309.1680603027344, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.016786256805062294, |
|
"rewards/margins": 0.010718188248574734, |
|
"rewards/rejected": 0.006068066693842411, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -4.254898548126221, |
|
"logits/rejected": -4.1117353439331055, |
|
"logps/chosen": -406.07330322265625, |
|
"logps/rejected": -307.8787841796875, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.016035914421081543, |
|
"rewards/margins": 0.009246991947293282, |
|
"rewards/rejected": 0.006788922939449549, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": -4.308491230010986, |
|
"logits/rejected": -4.162188529968262, |
|
"logps/chosen": -414.5621643066406, |
|
"logps/rejected": -300.40106201171875, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.019205499440431595, |
|
"rewards/margins": 0.009419824928045273, |
|
"rewards/rejected": 0.009785676375031471, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": -4.263808250427246, |
|
"logits/rejected": -4.1408891677856445, |
|
"logps/chosen": -409.64141845703125, |
|
"logps/rejected": -318.11041259765625, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5296875238418579, |
|
"rewards/chosen": 0.014620177447795868, |
|
"rewards/margins": 0.007014470640569925, |
|
"rewards/rejected": 0.007605706341564655, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -4.306565284729004, |
|
"logits/rejected": -4.195437431335449, |
|
"logps/chosen": -398.6597595214844, |
|
"logps/rejected": -313.44366455078125, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5328124761581421, |
|
"rewards/chosen": 0.013173435814678669, |
|
"rewards/margins": 0.004629576578736305, |
|
"rewards/rejected": 0.008543858304619789, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": -4.287682056427002, |
|
"logits/rejected": -4.170054912567139, |
|
"logps/chosen": -413.82818603515625, |
|
"logps/rejected": -336.8646545410156, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.016411561518907547, |
|
"rewards/margins": 0.005662465933710337, |
|
"rewards/rejected": 0.0107490923255682, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": -4.2755303382873535, |
|
"logits/rejected": -4.166233062744141, |
|
"logps/chosen": -372.7901306152344, |
|
"logps/rejected": -299.3600158691406, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": 0.015290270559489727, |
|
"rewards/margins": 0.007844468578696251, |
|
"rewards/rejected": 0.0074458010494709015, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -4.297582149505615, |
|
"logits/rejected": -4.127498149871826, |
|
"logps/chosen": -397.54498291015625, |
|
"logps/rejected": -304.5601501464844, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.01460904348641634, |
|
"rewards/margins": 0.0060775866732001305, |
|
"rewards/rejected": 0.00853145681321621, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": -4.267261028289795, |
|
"logits/rejected": -4.143542289733887, |
|
"logps/chosen": -438.79937744140625, |
|
"logps/rejected": -327.8368225097656, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.017624245956540108, |
|
"rewards/margins": 0.009726700372993946, |
|
"rewards/rejected": 0.007897543720901012, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": -4.271047115325928, |
|
"logits/rejected": -4.188268661499023, |
|
"logps/chosen": -386.79815673828125, |
|
"logps/rejected": -312.2374572753906, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": 0.013870956376194954, |
|
"rewards/margins": 0.004729996435344219, |
|
"rewards/rejected": 0.00914095900952816, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -4.247762203216553, |
|
"logits/rejected": -4.124339580535889, |
|
"logps/chosen": -399.97686767578125, |
|
"logps/rejected": -318.4253845214844, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.567187488079071, |
|
"rewards/chosen": 0.01522988360375166, |
|
"rewards/margins": 0.008996319025754929, |
|
"rewards/rejected": 0.0062335641123354435, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": -4.297530174255371, |
|
"logits/rejected": -4.151538372039795, |
|
"logps/chosen": -411.59027099609375, |
|
"logps/rejected": -309.5844421386719, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.01565638557076454, |
|
"rewards/margins": 0.008750095032155514, |
|
"rewards/rejected": 0.006906290538609028, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": -4.267110347747803, |
|
"logits/rejected": -4.1285834312438965, |
|
"logps/chosen": -399.1042785644531, |
|
"logps/rejected": -314.04193115234375, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5640624761581421, |
|
"rewards/chosen": 0.01778355799615383, |
|
"rewards/margins": 0.010103506036102772, |
|
"rewards/rejected": 0.007680053357034922, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -4.260018825531006, |
|
"logits/rejected": -4.148495197296143, |
|
"logps/chosen": -393.2950744628906, |
|
"logps/rejected": -308.5839538574219, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.01795302703976631, |
|
"rewards/margins": 0.009579015895724297, |
|
"rewards/rejected": 0.008374011144042015, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": -4.2746124267578125, |
|
"logits/rejected": -4.14363956451416, |
|
"logps/chosen": -423.131103515625, |
|
"logps/rejected": -334.2845458984375, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": 0.01808355748653412, |
|
"rewards/margins": 0.008949248120188713, |
|
"rewards/rejected": 0.009134308435022831, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": -4.257375717163086, |
|
"logits/rejected": -4.1283063888549805, |
|
"logps/chosen": -409.4438171386719, |
|
"logps/rejected": -327.1435852050781, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.016801532357931137, |
|
"rewards/margins": 0.007867367006838322, |
|
"rewards/rejected": 0.008934165351092815, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -4.274910926818848, |
|
"logits/rejected": -4.14418888092041, |
|
"logps/chosen": -400.5626220703125, |
|
"logps/rejected": -311.94171142578125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.013942083343863487, |
|
"rewards/margins": 0.006330497562885284, |
|
"rewards/rejected": 0.0076115853153169155, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": -4.279542446136475, |
|
"logits/rejected": -4.158401012420654, |
|
"logps/chosen": -405.467529296875, |
|
"logps/rejected": -329.11602783203125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.557812511920929, |
|
"rewards/chosen": 0.018447261303663254, |
|
"rewards/margins": 0.006302011664956808, |
|
"rewards/rejected": 0.012145251035690308, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -4.192010879516602, |
|
"eval_logits/rejected": -4.082387447357178, |
|
"eval_logps/chosen": -402.46429443359375, |
|
"eval_logps/rejected": -315.65875244140625, |
|
"eval_loss": 0.6883671879768372, |
|
"eval_rewards/accuracies": 0.5569999814033508, |
|
"eval_rewards/chosen": 0.019122228026390076, |
|
"eval_rewards/margins": 0.010481986217200756, |
|
"eval_rewards/rejected": 0.00864024180918932, |
|
"eval_runtime": 765.0828, |
|
"eval_samples_per_second": 2.614, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": -4.2764363288879395, |
|
"logits/rejected": -4.1841230392456055, |
|
"logps/chosen": -391.84844970703125, |
|
"logps/rejected": -318.74786376953125, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.017281491309404373, |
|
"rewards/margins": 0.008870037272572517, |
|
"rewards/rejected": 0.008411452174186707, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -4.290091514587402, |
|
"logits/rejected": -4.141688346862793, |
|
"logps/chosen": -413.09112548828125, |
|
"logps/rejected": -315.75860595703125, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.01853874884545803, |
|
"rewards/margins": 0.010042714886367321, |
|
"rewards/rejected": 0.008496033027768135, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": -4.286593437194824, |
|
"logits/rejected": -4.1507697105407715, |
|
"logps/chosen": -389.1523132324219, |
|
"logps/rejected": -307.9405822753906, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.018895253539085388, |
|
"rewards/margins": 0.011058597825467587, |
|
"rewards/rejected": 0.007836655713617802, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": -4.287877559661865, |
|
"logits/rejected": -4.147292613983154, |
|
"logps/chosen": -416.8997497558594, |
|
"logps/rejected": -327.71551513671875, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.018623776733875275, |
|
"rewards/margins": 0.009957761503756046, |
|
"rewards/rejected": 0.008666014298796654, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -4.2642436027526855, |
|
"logits/rejected": -4.148723125457764, |
|
"logps/chosen": -388.62054443359375, |
|
"logps/rejected": -313.1318359375, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.01719742640852928, |
|
"rewards/margins": 0.005715816281735897, |
|
"rewards/rejected": 0.011481606401503086, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": -4.268927097320557, |
|
"logits/rejected": -4.110062599182129, |
|
"logps/chosen": -428.08935546875, |
|
"logps/rejected": -315.6456604003906, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0186283178627491, |
|
"rewards/margins": 0.010059954598546028, |
|
"rewards/rejected": 0.008568364195525646, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": -4.257794380187988, |
|
"logits/rejected": -4.151054859161377, |
|
"logps/chosen": -390.84942626953125, |
|
"logps/rejected": -318.6888732910156, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.557812511920929, |
|
"rewards/chosen": 0.016992371529340744, |
|
"rewards/margins": 0.00896035972982645, |
|
"rewards/rejected": 0.008032011799514294, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -4.262537956237793, |
|
"logits/rejected": -4.151637077331543, |
|
"logps/chosen": -397.4742431640625, |
|
"logps/rejected": -319.90289306640625, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.017107700929045677, |
|
"rewards/margins": 0.00891804601997137, |
|
"rewards/rejected": 0.008189653977751732, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": -4.258824348449707, |
|
"logits/rejected": -4.141880989074707, |
|
"logps/chosen": -393.25311279296875, |
|
"logps/rejected": -307.5327453613281, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.01972380466759205, |
|
"rewards/margins": 0.008552981540560722, |
|
"rewards/rejected": 0.011170822195708752, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": -4.245623588562012, |
|
"logits/rejected": -4.144326686859131, |
|
"logps/chosen": -403.1796569824219, |
|
"logps/rejected": -326.97894287109375, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.01679617539048195, |
|
"rewards/margins": 0.004923067055642605, |
|
"rewards/rejected": 0.011873109266161919, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -4.275112152099609, |
|
"logits/rejected": -4.147688388824463, |
|
"logps/chosen": -394.7803039550781, |
|
"logps/rejected": -311.6099548339844, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.557812511920929, |
|
"rewards/chosen": 0.01747475564479828, |
|
"rewards/margins": 0.00970934983342886, |
|
"rewards/rejected": 0.00776540394872427, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": -4.274272918701172, |
|
"logits/rejected": -4.157819747924805, |
|
"logps/chosen": -378.563720703125, |
|
"logps/rejected": -302.5975036621094, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": 0.01942756399512291, |
|
"rewards/margins": 0.008909397758543491, |
|
"rewards/rejected": 0.010518166236579418, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": -4.260178565979004, |
|
"logits/rejected": -4.149471759796143, |
|
"logps/chosen": -418.959716796875, |
|
"logps/rejected": -332.4044494628906, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.017765840515494347, |
|
"rewards/margins": 0.008554233238101006, |
|
"rewards/rejected": 0.009211607277393341, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -4.2685041427612305, |
|
"logits/rejected": -4.117525100708008, |
|
"logps/chosen": -430.4039001464844, |
|
"logps/rejected": -319.9500732421875, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.022361256182193756, |
|
"rewards/margins": 0.01272774301469326, |
|
"rewards/rejected": 0.009633513167500496, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": -4.2741522789001465, |
|
"logits/rejected": -4.18676233291626, |
|
"logps/chosen": -379.95263671875, |
|
"logps/rejected": -313.4046936035156, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.557812511920929, |
|
"rewards/chosen": 0.01609444059431553, |
|
"rewards/margins": 0.009009727276861668, |
|
"rewards/rejected": 0.007084711454808712, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": -4.279686450958252, |
|
"logits/rejected": -4.155128479003906, |
|
"logps/chosen": -416.2315368652344, |
|
"logps/rejected": -321.23992919921875, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.01688700169324875, |
|
"rewards/margins": 0.005368704441934824, |
|
"rewards/rejected": 0.011518299579620361, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -4.279124736785889, |
|
"logits/rejected": -4.1183061599731445, |
|
"logps/chosen": -428.96142578125, |
|
"logps/rejected": -316.525146484375, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.019147472456097603, |
|
"rewards/margins": 0.008967303670942783, |
|
"rewards/rejected": 0.01018016878515482, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": -4.268794059753418, |
|
"logits/rejected": -4.155481815338135, |
|
"logps/chosen": -411.06573486328125, |
|
"logps/rejected": -329.96044921875, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.020965853706002235, |
|
"rewards/margins": 0.009673124179244041, |
|
"rewards/rejected": 0.011292731389403343, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": -4.276661396026611, |
|
"logits/rejected": -4.141108512878418, |
|
"logps/chosen": -391.82391357421875, |
|
"logps/rejected": -302.3670349121094, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.02172374725341797, |
|
"rewards/margins": 0.012819238007068634, |
|
"rewards/rejected": 0.008904511108994484, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -4.27203369140625, |
|
"logits/rejected": -4.1426167488098145, |
|
"logps/chosen": -420.7378845214844, |
|
"logps/rejected": -312.4131774902344, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.018989499658346176, |
|
"rewards/margins": 0.009152286686003208, |
|
"rewards/rejected": 0.009837212972342968, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": -4.257569789886475, |
|
"logits/rejected": -4.129474639892578, |
|
"logps/chosen": -409.93609619140625, |
|
"logps/rejected": -317.70989990234375, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5765625238418579, |
|
"rewards/chosen": 0.019030291587114334, |
|
"rewards/margins": 0.01004733331501484, |
|
"rewards/rejected": 0.008982958272099495, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": -4.251282691955566, |
|
"logits/rejected": -4.162935256958008, |
|
"logps/chosen": -396.5443420410156, |
|
"logps/rejected": -335.5440368652344, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.020548541098833084, |
|
"rewards/margins": 0.009832927957177162, |
|
"rewards/rejected": 0.010715610347688198, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -4.25177526473999, |
|
"logits/rejected": -4.121354579925537, |
|
"logps/chosen": -406.2722473144531, |
|
"logps/rejected": -319.5417175292969, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.022125694900751114, |
|
"rewards/margins": 0.00926921982318163, |
|
"rewards/rejected": 0.01285647600889206, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": -4.285470485687256, |
|
"logits/rejected": -4.150871276855469, |
|
"logps/chosen": -422.54071044921875, |
|
"logps/rejected": -323.01458740234375, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.023092512041330338, |
|
"rewards/margins": 0.010742807760834694, |
|
"rewards/rejected": 0.012349705211818218, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": -4.2822346687316895, |
|
"logits/rejected": -4.1521124839782715, |
|
"logps/chosen": -414.7969665527344, |
|
"logps/rejected": -330.4766540527344, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": 0.021155862137675285, |
|
"rewards/margins": 0.010015945881605148, |
|
"rewards/rejected": 0.011139917187392712, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -4.249444961547852, |
|
"logits/rejected": -4.152978897094727, |
|
"logps/chosen": -389.48052978515625, |
|
"logps/rejected": -311.3460693359375, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.020100217312574387, |
|
"rewards/margins": 0.01170959509909153, |
|
"rewards/rejected": 0.008390624076128006, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": -4.282201290130615, |
|
"logits/rejected": -4.174456596374512, |
|
"logps/chosen": -396.2969665527344, |
|
"logps/rejected": -308.55548095703125, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.557812511920929, |
|
"rewards/chosen": 0.020026249811053276, |
|
"rewards/margins": 0.008474086411297321, |
|
"rewards/rejected": 0.01155216433107853, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": -4.26694393157959, |
|
"logits/rejected": -4.153203010559082, |
|
"logps/chosen": -390.5587158203125, |
|
"logps/rejected": -312.5738830566406, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.01699787937104702, |
|
"rewards/margins": 0.009081227704882622, |
|
"rewards/rejected": 0.007916653528809547, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -4.26552677154541, |
|
"logits/rejected": -4.145693778991699, |
|
"logps/chosen": -413.443115234375, |
|
"logps/rejected": -327.2570495605469, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.022313930094242096, |
|
"rewards/margins": 0.009759850800037384, |
|
"rewards/rejected": 0.012554079294204712, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": -4.264178276062012, |
|
"logits/rejected": -4.172913551330566, |
|
"logps/chosen": -407.66339111328125, |
|
"logps/rejected": -321.8123779296875, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.023416642099618912, |
|
"rewards/margins": 0.010417203418910503, |
|
"rewards/rejected": 0.012999439612030983, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": -4.250650405883789, |
|
"logits/rejected": -4.140833854675293, |
|
"logps/chosen": -374.83990478515625, |
|
"logps/rejected": -302.1290588378906, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.019031699746847153, |
|
"rewards/margins": 0.008602599613368511, |
|
"rewards/rejected": 0.010429101064801216, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -4.270743370056152, |
|
"logits/rejected": -4.151588439941406, |
|
"logps/chosen": -443.17791748046875, |
|
"logps/rejected": -332.28302001953125, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": 0.01945783570408821, |
|
"rewards/margins": 0.004704002290964127, |
|
"rewards/rejected": 0.014753831550478935, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": -4.275900840759277, |
|
"logits/rejected": -4.1336822509765625, |
|
"logps/chosen": -416.0301208496094, |
|
"logps/rejected": -323.3976135253906, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5640624761581421, |
|
"rewards/chosen": 0.021596388891339302, |
|
"rewards/margins": 0.012395900674164295, |
|
"rewards/rejected": 0.009200489148497581, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": -4.26046085357666, |
|
"logits/rejected": -4.138212203979492, |
|
"logps/chosen": -397.1346740722656, |
|
"logps/rejected": -319.06781005859375, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.021542087197303772, |
|
"rewards/margins": 0.009193692356348038, |
|
"rewards/rejected": 0.012348394840955734, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -4.280481815338135, |
|
"logits/rejected": -4.179018974304199, |
|
"logps/chosen": -407.4460754394531, |
|
"logps/rejected": -328.47021484375, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.01951112225651741, |
|
"rewards/margins": 0.011365312151610851, |
|
"rewards/rejected": 0.008145810104906559, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": -4.224648952484131, |
|
"logits/rejected": -4.105835914611816, |
|
"logps/chosen": -381.49658203125, |
|
"logps/rejected": -303.1542663574219, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.020115623250603676, |
|
"rewards/margins": 0.009458022192120552, |
|
"rewards/rejected": 0.010657599195837975, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": -4.270878791809082, |
|
"logits/rejected": -4.1364850997924805, |
|
"logps/chosen": -407.75909423828125, |
|
"logps/rejected": -300.38336181640625, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.019836071878671646, |
|
"rewards/margins": 0.010231700725853443, |
|
"rewards/rejected": 0.009604370221495628, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -4.28275203704834, |
|
"logits/rejected": -4.161673545837402, |
|
"logps/chosen": -389.09051513671875, |
|
"logps/rejected": -298.67401123046875, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.022033553570508957, |
|
"rewards/margins": 0.010990725830197334, |
|
"rewards/rejected": 0.011042827740311623, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": -4.2564802169799805, |
|
"logits/rejected": -4.108375549316406, |
|
"logps/chosen": -383.16119384765625, |
|
"logps/rejected": -286.8339538574219, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.019884765148162842, |
|
"rewards/margins": 0.007881352677941322, |
|
"rewards/rejected": 0.012003413401544094, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": -4.2984795570373535, |
|
"logits/rejected": -4.14174747467041, |
|
"logps/chosen": -417.61273193359375, |
|
"logps/rejected": -321.7851867675781, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": 0.023209992796182632, |
|
"rewards/margins": 0.013400438241660595, |
|
"rewards/rejected": 0.009809553623199463, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -4.233872413635254, |
|
"logits/rejected": -4.129950046539307, |
|
"logps/chosen": -393.71453857421875, |
|
"logps/rejected": -327.7266845703125, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.526562511920929, |
|
"rewards/chosen": 0.017995553091168404, |
|
"rewards/margins": 0.006606035865843296, |
|
"rewards/rejected": 0.011389517225325108, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": -4.279402732849121, |
|
"logits/rejected": -4.134713172912598, |
|
"logps/chosen": -390.8346862792969, |
|
"logps/rejected": -306.3309020996094, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.02169058658182621, |
|
"rewards/margins": 0.008909964933991432, |
|
"rewards/rejected": 0.012780621647834778, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": -4.276646614074707, |
|
"logits/rejected": -4.130280017852783, |
|
"logps/chosen": -404.8982849121094, |
|
"logps/rejected": -310.2183532714844, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5921875238418579, |
|
"rewards/chosen": 0.022115709260106087, |
|
"rewards/margins": 0.012477119453251362, |
|
"rewards/rejected": 0.00963858887553215, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -4.257068634033203, |
|
"logits/rejected": -4.119657516479492, |
|
"logps/chosen": -405.00750732421875, |
|
"logps/rejected": -309.30023193359375, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.021267231553792953, |
|
"rewards/margins": 0.007724496070295572, |
|
"rewards/rejected": 0.013542735949158669, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": -4.301741600036621, |
|
"logits/rejected": -4.1736650466918945, |
|
"logps/chosen": -388.27923583984375, |
|
"logps/rejected": -308.6962890625, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.01904493011534214, |
|
"rewards/margins": 0.008867397904396057, |
|
"rewards/rejected": 0.010177532210946083, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": -4.262604236602783, |
|
"logits/rejected": -4.160672187805176, |
|
"logps/chosen": -396.7488708496094, |
|
"logps/rejected": -320.4295654296875, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.027415934950113297, |
|
"rewards/margins": 0.016748551279306412, |
|
"rewards/rejected": 0.010667381808161736, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -4.258942604064941, |
|
"logits/rejected": -4.159635543823242, |
|
"logps/chosen": -410.4764099121094, |
|
"logps/rejected": -334.953369140625, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.021485131233930588, |
|
"rewards/margins": 0.00812376569956541, |
|
"rewards/rejected": 0.013361366465687752, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": -4.264720916748047, |
|
"logits/rejected": -4.117271900177002, |
|
"logps/chosen": -385.35052490234375, |
|
"logps/rejected": -290.49114990234375, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.018992017954587936, |
|
"rewards/margins": 0.008459472097456455, |
|
"rewards/rejected": 0.01053254771977663, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": -4.276088714599609, |
|
"logits/rejected": -4.1584858894348145, |
|
"logps/chosen": -397.9452209472656, |
|
"logps/rejected": -306.89739990234375, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": 0.020109858363866806, |
|
"rewards/margins": 0.011120992712676525, |
|
"rewards/rejected": 0.008988862857222557, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -4.260004997253418, |
|
"logits/rejected": -4.13455867767334, |
|
"logps/chosen": -389.76397705078125, |
|
"logps/rejected": -304.0987548828125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.589062511920929, |
|
"rewards/chosen": 0.021830763667821884, |
|
"rewards/margins": 0.013007350265979767, |
|
"rewards/rejected": 0.008823414333164692, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": -4.2787675857543945, |
|
"logits/rejected": -4.1580352783203125, |
|
"logps/chosen": -410.0357971191406, |
|
"logps/rejected": -329.86334228515625, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02292051911354065, |
|
"rewards/margins": 0.008661197498440742, |
|
"rewards/rejected": 0.014259323477745056, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": -4.27437686920166, |
|
"logits/rejected": -4.147732734680176, |
|
"logps/chosen": -401.8902587890625, |
|
"logps/rejected": -296.3741760253906, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": 0.024159640073776245, |
|
"rewards/margins": 0.01200934313237667, |
|
"rewards/rejected": 0.012150297872722149, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -4.271141529083252, |
|
"logits/rejected": -4.141169548034668, |
|
"logps/chosen": -401.7086181640625, |
|
"logps/rejected": -319.8025817871094, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.023453358560800552, |
|
"rewards/margins": 0.01219714991748333, |
|
"rewards/rejected": 0.011256209574639797, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": -4.2775139808654785, |
|
"logits/rejected": -4.150107383728027, |
|
"logps/chosen": -426.84576416015625, |
|
"logps/rejected": -320.8899230957031, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.02233419567346573, |
|
"rewards/margins": 0.011261718347668648, |
|
"rewards/rejected": 0.011072477325797081, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": -4.264378547668457, |
|
"logits/rejected": -4.172327995300293, |
|
"logps/chosen": -404.49420166015625, |
|
"logps/rejected": -316.01739501953125, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.5765625238418579, |
|
"rewards/chosen": 0.02468792162835598, |
|
"rewards/margins": 0.014898866415023804, |
|
"rewards/rejected": 0.009789055213332176, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -4.274040222167969, |
|
"logits/rejected": -4.133544921875, |
|
"logps/chosen": -412.9867248535156, |
|
"logps/rejected": -303.045166015625, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": 0.024420084431767464, |
|
"rewards/margins": 0.011113069020211697, |
|
"rewards/rejected": 0.013307017274200916, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": -4.268971920013428, |
|
"logits/rejected": -4.1228179931640625, |
|
"logps/chosen": -412.2561950683594, |
|
"logps/rejected": -311.38623046875, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02301056683063507, |
|
"rewards/margins": 0.011502142064273357, |
|
"rewards/rejected": 0.011508422903716564, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": -4.249145030975342, |
|
"logits/rejected": -4.131203651428223, |
|
"logps/chosen": -407.7536926269531, |
|
"logps/rejected": -336.33172607421875, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.024292880669236183, |
|
"rewards/margins": 0.009482759051024914, |
|
"rewards/rejected": 0.01481011975556612, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -4.284695625305176, |
|
"logits/rejected": -4.161882400512695, |
|
"logps/chosen": -408.2262878417969, |
|
"logps/rejected": -316.01239013671875, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.020047323778271675, |
|
"rewards/margins": 0.009101735427975655, |
|
"rewards/rejected": 0.01094558835029602, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": -4.280055522918701, |
|
"logits/rejected": -4.136964321136475, |
|
"logps/chosen": -430.8373107910156, |
|
"logps/rejected": -321.91973876953125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.024292441084980965, |
|
"rewards/margins": 0.013177357614040375, |
|
"rewards/rejected": 0.01111508347094059, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": -4.265179634094238, |
|
"logits/rejected": -4.146527290344238, |
|
"logps/chosen": -391.67169189453125, |
|
"logps/rejected": -320.75372314453125, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.02059927210211754, |
|
"rewards/margins": 0.011022168211638927, |
|
"rewards/rejected": 0.009577102959156036, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -4.287339210510254, |
|
"logits/rejected": -4.139233112335205, |
|
"logps/chosen": -410.2044372558594, |
|
"logps/rejected": -304.69049072265625, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.024769442155957222, |
|
"rewards/margins": 0.013655883260071278, |
|
"rewards/rejected": 0.011113559827208519, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": -4.297701835632324, |
|
"logits/rejected": -4.167489051818848, |
|
"logps/chosen": -416.37469482421875, |
|
"logps/rejected": -326.1492614746094, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.023087535053491592, |
|
"rewards/margins": 0.013127269223332405, |
|
"rewards/rejected": 0.009960266761481762, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": -4.28665828704834, |
|
"logits/rejected": -4.161301612854004, |
|
"logps/chosen": -382.5966796875, |
|
"logps/rejected": -317.6145935058594, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.021074790507555008, |
|
"rewards/margins": 0.010219180956482887, |
|
"rewards/rejected": 0.010855610482394695, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -4.263566017150879, |
|
"logits/rejected": -4.153146266937256, |
|
"logps/chosen": -402.6602478027344, |
|
"logps/rejected": -314.2333068847656, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.567187488079071, |
|
"rewards/chosen": 0.0247223861515522, |
|
"rewards/margins": 0.01265608798712492, |
|
"rewards/rejected": 0.012066296301782131, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": -4.286923885345459, |
|
"logits/rejected": -4.174257278442383, |
|
"logps/chosen": -401.7779541015625, |
|
"logps/rejected": -322.3847961425781, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5765625238418579, |
|
"rewards/chosen": 0.024542078375816345, |
|
"rewards/margins": 0.01016208902001381, |
|
"rewards/rejected": 0.014379991218447685, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": -4.264492988586426, |
|
"logits/rejected": -4.1116790771484375, |
|
"logps/chosen": -414.8555603027344, |
|
"logps/rejected": -294.9403381347656, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.589062511920929, |
|
"rewards/chosen": 0.0254591666162014, |
|
"rewards/margins": 0.014859716407954693, |
|
"rewards/rejected": 0.010599448345601559, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -4.258959770202637, |
|
"logits/rejected": -4.129164695739746, |
|
"logps/chosen": -404.98980712890625, |
|
"logps/rejected": -310.5729675292969, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.021537696942687035, |
|
"rewards/margins": 0.010314391925930977, |
|
"rewards/rejected": 0.011223304085433483, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": -4.262757301330566, |
|
"logits/rejected": -4.128348350524902, |
|
"logps/chosen": -396.00445556640625, |
|
"logps/rejected": -306.8481140136719, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.573437511920929, |
|
"rewards/chosen": 0.02303687483072281, |
|
"rewards/margins": 0.014214645139873028, |
|
"rewards/rejected": 0.008822232484817505, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": -4.266745567321777, |
|
"logits/rejected": -4.137091159820557, |
|
"logps/chosen": -402.24993896484375, |
|
"logps/rejected": -299.17864990234375, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.023752233013510704, |
|
"rewards/margins": 0.010097989812493324, |
|
"rewards/rejected": 0.01365424133837223, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -4.23615026473999, |
|
"logits/rejected": -4.157704830169678, |
|
"logps/chosen": -375.03277587890625, |
|
"logps/rejected": -310.47540283203125, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.024319518357515335, |
|
"rewards/margins": 0.013446244411170483, |
|
"rewards/rejected": 0.010873274877667427, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": -4.296034812927246, |
|
"logits/rejected": -4.151383399963379, |
|
"logps/chosen": -410.50146484375, |
|
"logps/rejected": -302.6440734863281, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02242346480488777, |
|
"rewards/margins": 0.010714459232985973, |
|
"rewards/rejected": 0.011709003709256649, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": -4.266396522521973, |
|
"logits/rejected": -4.138249397277832, |
|
"logps/chosen": -404.47039794921875, |
|
"logps/rejected": -302.8034362792969, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.024602141231298447, |
|
"rewards/margins": 0.011944174766540527, |
|
"rewards/rejected": 0.012657967396080494, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -4.252145767211914, |
|
"logits/rejected": -4.149975776672363, |
|
"logps/chosen": -393.45355224609375, |
|
"logps/rejected": -314.1678771972656, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.567187488079071, |
|
"rewards/chosen": 0.02430255338549614, |
|
"rewards/margins": 0.01004251278936863, |
|
"rewards/rejected": 0.014260041527450085, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": -4.265524864196777, |
|
"logits/rejected": -4.1408796310424805, |
|
"logps/chosen": -405.55511474609375, |
|
"logps/rejected": -310.058837890625, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.021910104900598526, |
|
"rewards/margins": 0.011513815261423588, |
|
"rewards/rejected": 0.010396288707852364, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": -4.2873334884643555, |
|
"logits/rejected": -4.150042533874512, |
|
"logps/chosen": -411.6018981933594, |
|
"logps/rejected": -312.466552734375, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.023441951721906662, |
|
"rewards/margins": 0.01284896582365036, |
|
"rewards/rejected": 0.010592986829578876, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -4.27925968170166, |
|
"logits/rejected": -4.131691932678223, |
|
"logps/chosen": -414.133056640625, |
|
"logps/rejected": -314.72442626953125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.02151947282254696, |
|
"rewards/margins": 0.00846365001052618, |
|
"rewards/rejected": 0.013055823743343353, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": -4.298044681549072, |
|
"logits/rejected": -4.16172981262207, |
|
"logps/chosen": -410.84063720703125, |
|
"logps/rejected": -313.7170104980469, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5015624761581421, |
|
"rewards/chosen": 0.01963016204535961, |
|
"rewards/margins": 0.0036624562926590443, |
|
"rewards/rejected": 0.015967708081007004, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": -4.293381690979004, |
|
"logits/rejected": -4.154776573181152, |
|
"logps/chosen": -410.7498474121094, |
|
"logps/rejected": -313.6238708496094, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.023770933970808983, |
|
"rewards/margins": 0.014910402707755566, |
|
"rewards/rejected": 0.008860534057021141, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -4.240599632263184, |
|
"logits/rejected": -4.130780220031738, |
|
"logps/chosen": -411.70074462890625, |
|
"logps/rejected": -340.7054443359375, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.023142261430621147, |
|
"rewards/margins": 0.007068459875881672, |
|
"rewards/rejected": 0.0160738043487072, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": -4.274569511413574, |
|
"logits/rejected": -4.144467830657959, |
|
"logps/chosen": -390.971923828125, |
|
"logps/rejected": -306.0947265625, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.02243395894765854, |
|
"rewards/margins": 0.010051446035504341, |
|
"rewards/rejected": 0.012382512912154198, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": -4.260807037353516, |
|
"logits/rejected": -4.1508378982543945, |
|
"logps/chosen": -390.257080078125, |
|
"logps/rejected": -308.8485107421875, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.02236488275229931, |
|
"rewards/margins": 0.008465753868222237, |
|
"rewards/rejected": 0.013899129815399647, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -4.267168998718262, |
|
"logits/rejected": -4.142585277557373, |
|
"logps/chosen": -400.91070556640625, |
|
"logps/rejected": -305.3719177246094, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.022190194576978683, |
|
"rewards/margins": 0.009010069072246552, |
|
"rewards/rejected": 0.013180124573409557, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": -4.2551798820495605, |
|
"logits/rejected": -4.179601669311523, |
|
"logps/chosen": -403.3457946777344, |
|
"logps/rejected": -337.92742919921875, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.567187488079071, |
|
"rewards/chosen": 0.02564335986971855, |
|
"rewards/margins": 0.012334323488175869, |
|
"rewards/rejected": 0.013309036381542683, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": -4.273959159851074, |
|
"logits/rejected": -4.142548084259033, |
|
"logps/chosen": -399.456787109375, |
|
"logps/rejected": -308.5451965332031, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": 0.02145221456885338, |
|
"rewards/margins": 0.009570146910846233, |
|
"rewards/rejected": 0.01188206858932972, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -4.258917808532715, |
|
"logits/rejected": -4.150449275970459, |
|
"logps/chosen": -403.0406494140625, |
|
"logps/rejected": -322.05072021484375, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.024062659591436386, |
|
"rewards/margins": 0.0073325140401721, |
|
"rewards/rejected": 0.01673014461994171, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": -4.2558393478393555, |
|
"logits/rejected": -4.121561527252197, |
|
"logps/chosen": -406.07708740234375, |
|
"logps/rejected": -318.92083740234375, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.026952465996146202, |
|
"rewards/margins": 0.0162807684391737, |
|
"rewards/rejected": 0.010671699419617653, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": -4.284726142883301, |
|
"logits/rejected": -4.145880222320557, |
|
"logps/chosen": -407.32513427734375, |
|
"logps/rejected": -315.43463134765625, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": 0.025830427184700966, |
|
"rewards/margins": 0.01146793458610773, |
|
"rewards/rejected": 0.01436249352991581, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -4.281344413757324, |
|
"logits/rejected": -4.137038230895996, |
|
"logps/chosen": -425.17877197265625, |
|
"logps/rejected": -322.626953125, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.024241294711828232, |
|
"rewards/margins": 0.012439909391105175, |
|
"rewards/rejected": 0.011801382526755333, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": -4.255224227905273, |
|
"logits/rejected": -4.1160664558410645, |
|
"logps/chosen": -417.343017578125, |
|
"logps/rejected": -317.6444091796875, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.020717119798064232, |
|
"rewards/margins": 0.00756122637540102, |
|
"rewards/rejected": 0.013155892491340637, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": -4.262511730194092, |
|
"logits/rejected": -4.158580780029297, |
|
"logps/chosen": -414.7383728027344, |
|
"logps/rejected": -330.22784423828125, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.026073377579450607, |
|
"rewards/margins": 0.00955992005765438, |
|
"rewards/rejected": 0.016513461247086525, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -4.292693138122559, |
|
"logits/rejected": -4.152438163757324, |
|
"logps/chosen": -409.6170349121094, |
|
"logps/rejected": -313.6919860839844, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.551562488079071, |
|
"rewards/chosen": 0.027544280514121056, |
|
"rewards/margins": 0.013068397529423237, |
|
"rewards/rejected": 0.014475886709988117, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": -4.245689392089844, |
|
"logits/rejected": -4.140617847442627, |
|
"logps/chosen": -390.8550109863281, |
|
"logps/rejected": -315.28570556640625, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.535937488079071, |
|
"rewards/chosen": 0.022948402911424637, |
|
"rewards/margins": 0.008875529281795025, |
|
"rewards/rejected": 0.014072870835661888, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": -4.273464202880859, |
|
"logits/rejected": -4.1207990646362305, |
|
"logps/chosen": -421.85577392578125, |
|
"logps/rejected": -319.6473083496094, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.025004684925079346, |
|
"rewards/margins": 0.013815673068165779, |
|
"rewards/rejected": 0.011189011856913567, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -4.305132865905762, |
|
"logits/rejected": -4.1475958824157715, |
|
"logps/chosen": -421.42529296875, |
|
"logps/rejected": -318.45184326171875, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.024396046996116638, |
|
"rewards/margins": 0.010859435424208641, |
|
"rewards/rejected": 0.013536609709262848, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": -4.286343097686768, |
|
"logits/rejected": -4.121587753295898, |
|
"logps/chosen": -385.7117614746094, |
|
"logps/rejected": -282.57080078125, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.022473538294434547, |
|
"rewards/margins": 0.011112211272120476, |
|
"rewards/rejected": 0.011361326090991497, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": -4.263542652130127, |
|
"logits/rejected": -4.148170471191406, |
|
"logps/chosen": -402.5519104003906, |
|
"logps/rejected": -317.2384338378906, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.027037670835852623, |
|
"rewards/margins": 0.01205758098512888, |
|
"rewards/rejected": 0.014980090782046318, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -4.191605567932129, |
|
"eval_logits/rejected": -4.081777572631836, |
|
"eval_logps/chosen": -402.4017333984375, |
|
"eval_logps/rejected": -315.6105651855469, |
|
"eval_loss": 0.6876626014709473, |
|
"eval_rewards/accuracies": 0.5644999742507935, |
|
"eval_rewards/chosen": 0.025381002575159073, |
|
"eval_rewards/margins": 0.011920945718884468, |
|
"eval_rewards/rejected": 0.013460054062306881, |
|
"eval_runtime": 776.0859, |
|
"eval_samples_per_second": 2.577, |
|
"eval_steps_per_second": 0.644, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6907179896044994, |
|
"train_runtime": 111372.3355, |
|
"train_samples_per_second": 1.669, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|