|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1274, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007849293563579278, |
|
"grad_norm": 6.091196076983652, |
|
"learning_rate": 3.90625e-09, |
|
"logits/chosen": 5914.52099609375, |
|
"logits/rejected": 2785.021484375, |
|
"logps/chosen": -212.45889282226562, |
|
"logps/rejected": -98.59669494628906, |
|
"loss": 1.3863, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007849293563579277, |
|
"grad_norm": 6.048636541099143, |
|
"learning_rate": 3.9062499999999997e-08, |
|
"logits/chosen": 4973.81396484375, |
|
"logits/rejected": 4328.32861328125, |
|
"logps/chosen": -204.19737243652344, |
|
"logps/rejected": -179.740234375, |
|
"loss": 1.3862, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.08651990443468094, |
|
"rewards/margins": 0.12112583220005035, |
|
"rewards/rejected": -0.034605927765369415, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 6.189956928555152, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": 6084.02587890625, |
|
"logits/rejected": 4834.0732421875, |
|
"logps/chosen": -217.18612670898438, |
|
"logps/rejected": -196.73153686523438, |
|
"loss": 1.3864, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.02506137453019619, |
|
"rewards/margins": 0.04303772374987602, |
|
"rewards/rejected": -0.01797635480761528, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.023547880690737835, |
|
"grad_norm": 5.4726473359462195, |
|
"learning_rate": 1.1718749999999999e-07, |
|
"logits/chosen": 6084.0302734375, |
|
"logits/rejected": 5104.97900390625, |
|
"logps/chosen": -250.5454559326172, |
|
"logps/rejected": -209.36410522460938, |
|
"loss": 1.3861, |
|
"rewards/accuracies": 0.5583332777023315, |
|
"rewards/chosen": 0.026890581473708153, |
|
"rewards/margins": 0.09340113401412964, |
|
"rewards/rejected": -0.06651054322719574, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 5.708267831588723, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": 5311.87744140625, |
|
"logits/rejected": 4346.86328125, |
|
"logps/chosen": -212.0022430419922, |
|
"logps/rejected": -181.71847534179688, |
|
"loss": 1.386, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": 0.07700984179973602, |
|
"rewards/margins": 0.10846559703350067, |
|
"rewards/rejected": -0.031455766409635544, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03924646781789639, |
|
"grad_norm": 5.759396354993872, |
|
"learning_rate": 1.9531249999999998e-07, |
|
"logits/chosen": 6424.58251953125, |
|
"logits/rejected": 5042.18115234375, |
|
"logps/chosen": -265.2978820800781, |
|
"logps/rejected": -206.7998809814453, |
|
"loss": 1.3856, |
|
"rewards/accuracies": 0.6750000715255737, |
|
"rewards/chosen": 0.3287124037742615, |
|
"rewards/margins": 0.4289844036102295, |
|
"rewards/rejected": -0.10027195513248444, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 5.54406858970845, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": 5484.29541015625, |
|
"logits/rejected": 4559.962890625, |
|
"logps/chosen": -213.7506103515625, |
|
"logps/rejected": -209.12460327148438, |
|
"loss": 1.385, |
|
"rewards/accuracies": 0.6583333611488342, |
|
"rewards/chosen": 0.253384530544281, |
|
"rewards/margins": 0.5778969526290894, |
|
"rewards/rejected": -0.32451242208480835, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.054945054945054944, |
|
"grad_norm": 5.35185403577633, |
|
"learning_rate": 2.734375e-07, |
|
"logits/chosen": 5194.3994140625, |
|
"logits/rejected": 4918.51220703125, |
|
"logps/chosen": -178.344970703125, |
|
"logps/rejected": -177.43560791015625, |
|
"loss": 1.3842, |
|
"rewards/accuracies": 0.6916667222976685, |
|
"rewards/chosen": 0.13593974709510803, |
|
"rewards/margins": 0.8398297429084778, |
|
"rewards/rejected": -0.7038900256156921, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 5.638870230561589, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 5774.1318359375, |
|
"logits/rejected": 5269.8134765625, |
|
"logps/chosen": -196.78341674804688, |
|
"logps/rejected": -182.97677612304688, |
|
"loss": 1.3822, |
|
"rewards/accuracies": 0.6416666507720947, |
|
"rewards/chosen": -0.21483942866325378, |
|
"rewards/margins": 1.1714082956314087, |
|
"rewards/rejected": -1.3862475156784058, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0706436420722135, |
|
"grad_norm": 6.478511073625711, |
|
"learning_rate": 3.5156249999999997e-07, |
|
"logits/chosen": 6040.28759765625, |
|
"logits/rejected": 5181.716796875, |
|
"logps/chosen": -220.1483917236328, |
|
"logps/rejected": -190.4631805419922, |
|
"loss": 1.3787, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -1.2661734819412231, |
|
"rewards/margins": 4.753196716308594, |
|
"rewards/rejected": -6.019370079040527, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 7.188974837064224, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": 5967.84326171875, |
|
"logits/rejected": 5745.97119140625, |
|
"logps/chosen": -213.9687042236328, |
|
"logps/rejected": -208.8219757080078, |
|
"loss": 1.3796, |
|
"rewards/accuracies": 0.6833333969116211, |
|
"rewards/chosen": -3.738008975982666, |
|
"rewards/margins": 5.6422576904296875, |
|
"rewards/rejected": -9.380266189575195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08634222919937205, |
|
"grad_norm": 6.118081410153287, |
|
"learning_rate": 4.2968749999999996e-07, |
|
"logits/chosen": 6471.71923828125, |
|
"logits/rejected": 5290.84716796875, |
|
"logps/chosen": -188.41543579101562, |
|
"logps/rejected": -190.62838745117188, |
|
"loss": 1.3749, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -5.300592422485352, |
|
"rewards/margins": 6.335596561431885, |
|
"rewards/rejected": -11.636189460754395, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 8.791461375827627, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": 6398.0341796875, |
|
"logits/rejected": 5325.00927734375, |
|
"logps/chosen": -210.2766571044922, |
|
"logps/rejected": -212.75204467773438, |
|
"loss": 1.3728, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -7.439939022064209, |
|
"rewards/margins": 7.147006988525391, |
|
"rewards/rejected": -14.586946487426758, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"grad_norm": 6.480916055994096, |
|
"learning_rate": 4.999962424962166e-07, |
|
"logits/chosen": 6332.94677734375, |
|
"logits/rejected": 5863.13134765625, |
|
"logps/chosen": -215.77871704101562, |
|
"logps/rejected": -212.88671875, |
|
"loss": 1.3705, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -5.970229625701904, |
|
"rewards/margins": 9.271949768066406, |
|
"rewards/rejected": -15.242179870605469, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 7.916401372438219, |
|
"learning_rate": 4.998647417232375e-07, |
|
"logits/chosen": 6197.4365234375, |
|
"logits/rejected": 5458.46240234375, |
|
"logps/chosen": -195.366943359375, |
|
"logps/rejected": -196.8258056640625, |
|
"loss": 1.3696, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -9.699501037597656, |
|
"rewards/margins": 9.53441047668457, |
|
"rewards/rejected": -19.23391342163086, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11773940345368916, |
|
"grad_norm": 9.11816822426609, |
|
"learning_rate": 4.995454786965036e-07, |
|
"logits/chosen": 6377.1611328125, |
|
"logits/rejected": 5330.43115234375, |
|
"logps/chosen": -209.7741241455078, |
|
"logps/rejected": -192.396728515625, |
|
"loss": 1.3666, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -8.089722633361816, |
|
"rewards/margins": 12.00928783416748, |
|
"rewards/rejected": -20.099010467529297, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 7.198206798530057, |
|
"learning_rate": 4.990386933279972e-07, |
|
"logits/chosen": 6321.40087890625, |
|
"logits/rejected": 5649.20849609375, |
|
"logps/chosen": -207.3892822265625, |
|
"logps/rejected": -219.2005157470703, |
|
"loss": 1.3659, |
|
"rewards/accuracies": 0.6916667222976685, |
|
"rewards/chosen": -9.86109733581543, |
|
"rewards/margins": 12.281832695007324, |
|
"rewards/rejected": -22.14293098449707, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13343799058084774, |
|
"grad_norm": 6.321494665117691, |
|
"learning_rate": 4.983447664444096e-07, |
|
"logits/chosen": 6516.60546875, |
|
"logits/rejected": 5811.42822265625, |
|
"logps/chosen": -219.67501831054688, |
|
"logps/rejected": -216.0376434326172, |
|
"loss": 1.3671, |
|
"rewards/accuracies": 0.6750000715255737, |
|
"rewards/chosen": -10.054361343383789, |
|
"rewards/margins": 9.661711692810059, |
|
"rewards/rejected": -19.71607208251953, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 7.558390140870204, |
|
"learning_rate": 4.97464219500968e-07, |
|
"logits/chosen": 5710.5439453125, |
|
"logits/rejected": 4990.15771484375, |
|
"logps/chosen": -198.03170776367188, |
|
"logps/rejected": -199.192626953125, |
|
"loss": 1.3638, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": -9.503952026367188, |
|
"rewards/margins": 11.527425765991211, |
|
"rewards/rejected": -21.031375885009766, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14913657770800628, |
|
"grad_norm": 8.14145163308194, |
|
"learning_rate": 4.963977141895843e-07, |
|
"logits/chosen": 5859.50146484375, |
|
"logits/rejected": 5036.01953125, |
|
"logps/chosen": -214.22640991210938, |
|
"logps/rejected": -225.4895782470703, |
|
"loss": 1.3601, |
|
"rewards/accuracies": 0.7250000834465027, |
|
"rewards/chosen": -10.467451095581055, |
|
"rewards/margins": 23.782638549804688, |
|
"rewards/rejected": -34.25008773803711, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 8.494418405300177, |
|
"learning_rate": 4.951460519416227e-07, |
|
"logits/chosen": 5772.40625, |
|
"logits/rejected": 5338.69140625, |
|
"logps/chosen": -191.8777313232422, |
|
"logps/rejected": -223.7870635986328, |
|
"loss": 1.359, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -9.333466529846191, |
|
"rewards/margins": 17.782575607299805, |
|
"rewards/rejected": -27.116046905517578, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16483516483516483, |
|
"grad_norm": 9.658476061049418, |
|
"learning_rate": 4.937101733256606e-07, |
|
"logits/chosen": 5223.62548828125, |
|
"logits/rejected": 4660.197265625, |
|
"logps/chosen": -166.54293823242188, |
|
"logps/rejected": -186.89669799804688, |
|
"loss": 1.3593, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -12.629673957824707, |
|
"rewards/margins": 17.08604621887207, |
|
"rewards/rejected": -29.715723037719727, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 12.248366456833509, |
|
"learning_rate": 4.920911573406924e-07, |
|
"logits/chosen": 6362.5478515625, |
|
"logits/rejected": 5419.66650390625, |
|
"logps/chosen": -207.56906127929688, |
|
"logps/rejected": -192.8691864013672, |
|
"loss": 1.3577, |
|
"rewards/accuracies": 0.7166666388511658, |
|
"rewards/chosen": -11.232467651367188, |
|
"rewards/margins": 17.640005111694336, |
|
"rewards/rejected": -28.872472763061523, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18053375196232338, |
|
"grad_norm": 7.234262107057838, |
|
"learning_rate": 4.902902206053098e-07, |
|
"logits/chosen": 5827.66650390625, |
|
"logits/rejected": 5263.23046875, |
|
"logps/chosen": -198.8260498046875, |
|
"logps/rejected": -209.947265625, |
|
"loss": 1.3604, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -11.487305641174316, |
|
"rewards/margins": 18.69247817993164, |
|
"rewards/rejected": -30.179784774780273, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 8.459392596172329, |
|
"learning_rate": 4.883087164434672e-07, |
|
"logits/chosen": 5309.54736328125, |
|
"logits/rejected": 4243.5830078125, |
|
"logps/chosen": -175.29354858398438, |
|
"logps/rejected": -179.5849151611328, |
|
"loss": 1.3558, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -7.664523124694824, |
|
"rewards/margins": 17.230939865112305, |
|
"rewards/rejected": -24.895463943481445, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19623233908948196, |
|
"grad_norm": 9.145048905164794, |
|
"learning_rate": 4.861481338675183e-07, |
|
"logits/chosen": 6279.61474609375, |
|
"logits/rejected": 5581.43603515625, |
|
"logps/chosen": -178.78981018066406, |
|
"logps/rejected": -217.976806640625, |
|
"loss": 1.3579, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -11.645959854125977, |
|
"rewards/margins": 21.824161529541016, |
|
"rewards/rejected": -33.470123291015625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 10.037813125733608, |
|
"learning_rate": 4.838100964592904e-07, |
|
"logits/chosen": 6413.66650390625, |
|
"logits/rejected": 5192.2119140625, |
|
"logps/chosen": -214.44338989257812, |
|
"logps/rejected": -199.10244750976562, |
|
"loss": 1.3693, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -14.724939346313477, |
|
"rewards/margins": 18.1535587310791, |
|
"rewards/rejected": -32.87849426269531, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2119309262166405, |
|
"grad_norm": 10.210289382921355, |
|
"learning_rate": 4.812963611500339e-07, |
|
"logits/chosen": 6258.6923828125, |
|
"logits/rejected": 6061.39453125, |
|
"logps/chosen": -207.8274383544922, |
|
"logps/rejected": -219.6881561279297, |
|
"loss": 1.3476, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": -11.071606636047363, |
|
"rewards/margins": 19.98748779296875, |
|
"rewards/rejected": -31.059091567993164, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 11.667424937518986, |
|
"learning_rate": 4.786088169001671e-07, |
|
"logits/chosen": 5358.77783203125, |
|
"logits/rejected": 4660.5009765625, |
|
"logps/chosen": -173.97543334960938, |
|
"logps/rejected": -208.5042266845703, |
|
"loss": 1.3537, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -10.824542045593262, |
|
"rewards/margins": 28.37823486328125, |
|
"rewards/rejected": -39.202781677246094, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22762951334379905, |
|
"grad_norm": 10.483113107420898, |
|
"learning_rate": 4.7574948327980567e-07, |
|
"logits/chosen": 7435.53759765625, |
|
"logits/rejected": 5505.32666015625, |
|
"logps/chosen": -247.2607879638672, |
|
"logps/rejected": -226.1746368408203, |
|
"loss": 1.3473, |
|
"rewards/accuracies": 0.7666667699813843, |
|
"rewards/chosen": -10.232341766357422, |
|
"rewards/margins": 33.42657470703125, |
|
"rewards/rejected": -43.65891647338867, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 8.228448413177858, |
|
"learning_rate": 4.727205089511466e-07, |
|
"logits/chosen": 5422.88818359375, |
|
"logits/rejected": 5400.13525390625, |
|
"logps/chosen": -178.8369903564453, |
|
"logps/rejected": -201.50466918945312, |
|
"loss": 1.357, |
|
"rewards/accuracies": 0.6750000715255737, |
|
"rewards/chosen": -11.364561080932617, |
|
"rewards/margins": 19.953664779663086, |
|
"rewards/rejected": -31.318225860595703, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24332810047095763, |
|
"grad_norm": 7.699139270208414, |
|
"learning_rate": 4.6952417005384247e-07, |
|
"logits/chosen": 6096.75732421875, |
|
"logits/rejected": 5434.83837890625, |
|
"logps/chosen": -185.6956024169922, |
|
"logps/rejected": -198.10134887695312, |
|
"loss": 1.3619, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -8.114912033081055, |
|
"rewards/margins": 14.518139839172363, |
|
"rewards/rejected": -22.6330509185791, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 8.328180326269704, |
|
"learning_rate": 4.661628684945851e-07, |
|
"logits/chosen": 6136.8212890625, |
|
"logits/rejected": 5324.23583984375, |
|
"logps/chosen": -210.75827026367188, |
|
"logps/rejected": -234.6461944580078, |
|
"loss": 1.3578, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.701288223266602, |
|
"rewards/margins": 22.927001953125, |
|
"rewards/rejected": -32.62828826904297, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25902668759811615, |
|
"grad_norm": 9.875007026467317, |
|
"learning_rate": 4.626391301421782e-07, |
|
"logits/chosen": 5934.5712890625, |
|
"logits/rejected": 5409.8681640625, |
|
"logps/chosen": -204.72036743164062, |
|
"logps/rejected": -202.51492309570312, |
|
"loss": 1.3638, |
|
"rewards/accuracies": 0.6916666626930237, |
|
"rewards/chosen": -10.267139434814453, |
|
"rewards/margins": 14.924982070922852, |
|
"rewards/rejected": -25.192119598388672, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 8.606670577696239, |
|
"learning_rate": 4.5895560292945996e-07, |
|
"logits/chosen": 6179.17822265625, |
|
"logits/rejected": 6319.3310546875, |
|
"logps/chosen": -199.89364624023438, |
|
"logps/rejected": -245.26809692382812, |
|
"loss": 1.356, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -6.035394191741943, |
|
"rewards/margins": 18.865169525146484, |
|
"rewards/rejected": -24.900564193725586, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27472527472527475, |
|
"grad_norm": 22.986995482748114, |
|
"learning_rate": 4.5511505486349865e-07, |
|
"logits/chosen": 6497.4287109375, |
|
"logits/rejected": 5893.86474609375, |
|
"logps/chosen": -206.90151977539062, |
|
"logps/rejected": -249.62130737304688, |
|
"loss": 1.3533, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -12.075809478759766, |
|
"rewards/margins": 30.723468780517578, |
|
"rewards/rejected": -42.79928207397461, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 9.742030346206404, |
|
"learning_rate": 4.5112037194555876e-07, |
|
"logits/chosen": 5949.8857421875, |
|
"logits/rejected": 5860.00634765625, |
|
"logps/chosen": -198.9341278076172, |
|
"logps/rejected": -252.93209838867188, |
|
"loss": 1.3655, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -21.01068878173828, |
|
"rewards/margins": 32.63959503173828, |
|
"rewards/rejected": -53.6502799987793, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2904238618524333, |
|
"grad_norm": 8.763637069131867, |
|
"learning_rate": 4.4697455600239863e-07, |
|
"logits/chosen": 5399.63525390625, |
|
"logits/rejected": 5097.599609375, |
|
"logps/chosen": -195.9980010986328, |
|
"logps/rejected": -197.7607879638672, |
|
"loss": 1.3627, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -11.57593822479248, |
|
"rewards/margins": 18.113765716552734, |
|
"rewards/rejected": -29.6897029876709, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 9.722274579855199, |
|
"learning_rate": 4.426807224305315e-07, |
|
"logits/chosen": 6468.1220703125, |
|
"logits/rejected": 5369.0634765625, |
|
"logps/chosen": -234.26748657226562, |
|
"logps/rejected": -212.1043243408203, |
|
"loss": 1.354, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -5.926461219787598, |
|
"rewards/margins": 23.168312072753906, |
|
"rewards/rejected": -29.094772338867188, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"grad_norm": 15.31595541298082, |
|
"learning_rate": 4.3824209785514326e-07, |
|
"logits/chosen": 6639.2294921875, |
|
"logits/rejected": 5100.4287109375, |
|
"logps/chosen": -221.4827117919922, |
|
"logps/rejected": -218.9009552001953, |
|
"loss": 1.3476, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -5.989265441894531, |
|
"rewards/margins": 33.05856704711914, |
|
"rewards/rejected": -39.047828674316406, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 10.537639563559068, |
|
"learning_rate": 4.3366201770542687e-07, |
|
"logits/chosen": 5737.9208984375, |
|
"logits/rejected": 5631.57080078125, |
|
"logps/chosen": -203.96151733398438, |
|
"logps/rejected": -229.1461639404297, |
|
"loss": 1.3599, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -13.172935485839844, |
|
"rewards/margins": 27.23373794555664, |
|
"rewards/rejected": -40.406673431396484, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3218210361067504, |
|
"grad_norm": 14.959421459394797, |
|
"learning_rate": 4.2894392370815567e-07, |
|
"logits/chosen": 6207.42041015625, |
|
"logits/rejected": 5546.6611328125, |
|
"logps/chosen": -224.15078735351562, |
|
"logps/rejected": -258.1195068359375, |
|
"loss": 1.3344, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -11.663908004760742, |
|
"rewards/margins": 38.00326156616211, |
|
"rewards/rejected": -49.66717529296875, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 17.843898253212178, |
|
"learning_rate": 4.2409136130137845e-07, |
|
"logits/chosen": 5856.669921875, |
|
"logits/rejected": 5317.4970703125, |
|
"logps/chosen": -218.15768432617188, |
|
"logps/rejected": -230.917236328125, |
|
"loss": 1.3484, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -14.127777099609375, |
|
"rewards/margins": 35.54801940917969, |
|
"rewards/rejected": -49.6757926940918, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33751962323390894, |
|
"grad_norm": 12.654228568647438, |
|
"learning_rate": 4.1910797697018017e-07, |
|
"logits/chosen": 5639.2978515625, |
|
"logits/rejected": 4720.31982421875, |
|
"logps/chosen": -193.45645141601562, |
|
"logps/rejected": -209.80795288085938, |
|
"loss": 1.3462, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -13.711461067199707, |
|
"rewards/margins": 34.86336898803711, |
|
"rewards/rejected": -48.57483673095703, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 15.182887086880035, |
|
"learning_rate": 4.1399751550651084e-07, |
|
"logits/chosen": 5991.6171875, |
|
"logits/rejected": 5934.1552734375, |
|
"logps/chosen": -193.38800048828125, |
|
"logps/rejected": -230.582275390625, |
|
"loss": 1.3459, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -10.994651794433594, |
|
"rewards/margins": 27.90401268005371, |
|
"rewards/rejected": -38.89866638183594, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3532182103610675, |
|
"grad_norm": 12.029246709671026, |
|
"learning_rate": 4.087638171951401e-07, |
|
"logits/chosen": 6900.34765625, |
|
"logits/rejected": 4994.3525390625, |
|
"logps/chosen": -218.0048370361328, |
|
"logps/rejected": -219.4988555908203, |
|
"loss": 1.3499, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -9.236083984375, |
|
"rewards/margins": 47.2701416015625, |
|
"rewards/rejected": -56.5062255859375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 15.803880587400545, |
|
"learning_rate": 4.034108149278543e-07, |
|
"logits/chosen": 7089.22021484375, |
|
"logits/rejected": 5539.4384765625, |
|
"logps/chosen": -264.29150390625, |
|
"logps/rejected": -238.7609405517578, |
|
"loss": 1.3517, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -13.85925006866455, |
|
"rewards/margins": 34.69366455078125, |
|
"rewards/rejected": -48.552913665771484, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.36891679748822603, |
|
"grad_norm": 16.115599045605588, |
|
"learning_rate": 3.979425312480629e-07, |
|
"logits/chosen": 6082.546875, |
|
"logits/rejected": 5345.21728515625, |
|
"logps/chosen": -225.55813598632812, |
|
"logps/rejected": -248.83438110351562, |
|
"loss": 1.3451, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -13.293352127075195, |
|
"rewards/margins": 32.974754333496094, |
|
"rewards/rejected": -46.26811218261719, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 12.53417188182312, |
|
"learning_rate": 3.923630753280357e-07, |
|
"logits/chosen": 6546.7509765625, |
|
"logits/rejected": 5691.3193359375, |
|
"logps/chosen": -218.65902709960938, |
|
"logps/rejected": -214.631103515625, |
|
"loss": 1.3509, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -11.263853073120117, |
|
"rewards/margins": 28.971487045288086, |
|
"rewards/rejected": -40.23533630371094, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 19.95513568811511, |
|
"learning_rate": 3.866766398810424e-07, |
|
"logits/chosen": 6155.7880859375, |
|
"logits/rejected": 5917.6748046875, |
|
"logps/chosen": -180.28146362304688, |
|
"logps/rejected": -236.327880859375, |
|
"loss": 1.3366, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -3.6729559898376465, |
|
"rewards/margins": 30.630626678466797, |
|
"rewards/rejected": -34.3035888671875, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 14.227400790753371, |
|
"learning_rate": 3.8088749801071496e-07, |
|
"logits/chosen": 6715.08447265625, |
|
"logits/rejected": 5196.7041015625, |
|
"logps/chosen": -247.65261840820312, |
|
"logps/rejected": -270.3143005371094, |
|
"loss": 1.3572, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -25.698944091796875, |
|
"rewards/margins": 42.09914779663086, |
|
"rewards/rejected": -67.798095703125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4003139717425432, |
|
"grad_norm": 10.674798547850948, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 5342.1806640625, |
|
"logits/rejected": 4739.6083984375, |
|
"logps/chosen": -199.51919555664062, |
|
"logps/rejected": -209.77294921875, |
|
"loss": 1.3525, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -10.036214828491211, |
|
"rewards/margins": 33.37144088745117, |
|
"rewards/rejected": -43.407649993896484, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 12.39074250082983, |
|
"learning_rate": 3.6901857004211443e-07, |
|
"logits/chosen": 5672.80517578125, |
|
"logits/rejected": 5283.02490234375, |
|
"logps/chosen": -211.51986694335938, |
|
"logps/rejected": -235.0128173828125, |
|
"loss": 1.3601, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -13.053865432739258, |
|
"rewards/margins": 26.170928955078125, |
|
"rewards/rejected": -39.224796295166016, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.41601255886970173, |
|
"grad_norm": 11.108226426071516, |
|
"learning_rate": 3.6294770291596076e-07, |
|
"logits/chosen": 6426.45166015625, |
|
"logits/rejected": 5303.09375, |
|
"logps/chosen": -220.7977294921875, |
|
"logps/rejected": -231.1540985107422, |
|
"loss": 1.3453, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": -7.8749799728393555, |
|
"rewards/margins": 24.518779754638672, |
|
"rewards/rejected": -32.39376449584961, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 31.66087255257573, |
|
"learning_rate": 3.5679196060850034e-07, |
|
"logits/chosen": 6119.76708984375, |
|
"logits/rejected": 5501.98193359375, |
|
"logps/chosen": -221.72915649414062, |
|
"logps/rejected": -231.87255859375, |
|
"loss": 1.3487, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -10.911323547363281, |
|
"rewards/margins": 32.72243881225586, |
|
"rewards/rejected": -43.63376235961914, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4317111459968603, |
|
"grad_norm": 17.116916865875684, |
|
"learning_rate": 3.505559688866229e-07, |
|
"logits/chosen": 5922.16259765625, |
|
"logits/rejected": 5534.40625, |
|
"logps/chosen": -227.80270385742188, |
|
"logps/rejected": -273.3616638183594, |
|
"loss": 1.3437, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -13.062261581420898, |
|
"rewards/margins": 35.235328674316406, |
|
"rewards/rejected": -48.29759216308594, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 10.303239033366689, |
|
"learning_rate": 3.4424441382108826e-07, |
|
"logits/chosen": 5970.333984375, |
|
"logits/rejected": 5599.16015625, |
|
"logps/chosen": -220.08242797851562, |
|
"logps/rejected": -242.54141235351562, |
|
"loss": 1.354, |
|
"rewards/accuracies": 0.6583333611488342, |
|
"rewards/chosen": -16.8071346282959, |
|
"rewards/margins": 32.52507781982422, |
|
"rewards/rejected": -49.33221435546875, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4474097331240188, |
|
"grad_norm": 20.36092824335855, |
|
"learning_rate": 3.378620382651523e-07, |
|
"logits/chosen": 6295.93798828125, |
|
"logits/rejected": 5818.79541015625, |
|
"logps/chosen": -256.4508361816406, |
|
"logps/rejected": -272.3232727050781, |
|
"loss": 1.3442, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -13.490982055664062, |
|
"rewards/margins": 35.06177520751953, |
|
"rewards/rejected": -48.55276107788086, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 13.847806384981444, |
|
"learning_rate": 3.314136382905234e-07, |
|
"logits/chosen": 6245.16455078125, |
|
"logits/rejected": 5669.74609375, |
|
"logps/chosen": -220.435546875, |
|
"logps/rejected": -257.63934326171875, |
|
"loss": 1.3525, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -8.020076751708984, |
|
"rewards/margins": 36.30790328979492, |
|
"rewards/rejected": -44.327980041503906, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4631083202511774, |
|
"grad_norm": 13.55788048465109, |
|
"learning_rate": 3.249040595833274e-07, |
|
"logits/chosen": 6800.77880859375, |
|
"logits/rejected": 5768.46728515625, |
|
"logps/chosen": -242.50244140625, |
|
"logps/rejected": -225.4458770751953, |
|
"loss": 1.3389, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -10.776572227478027, |
|
"rewards/margins": 35.810447692871094, |
|
"rewards/rejected": -46.5870246887207, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 19.567474002862465, |
|
"learning_rate": 3.1833819380279023e-07, |
|
"logits/chosen": 6432.34130859375, |
|
"logits/rejected": 5503.3408203125, |
|
"logps/chosen": -190.61471557617188, |
|
"logps/rejected": -236.8105926513672, |
|
"loss": 1.3495, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -13.066381454467773, |
|
"rewards/margins": 34.057960510253906, |
|
"rewards/rejected": -47.12434005737305, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.478806907378336, |
|
"grad_norm": 10.833448357785096, |
|
"learning_rate": 3.11720974905373e-07, |
|
"logits/chosen": 6166.84716796875, |
|
"logits/rejected": 5408.181640625, |
|
"logps/chosen": -217.9842987060547, |
|
"logps/rejected": -233.4291534423828, |
|
"loss": 1.3351, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.866181373596191, |
|
"rewards/margins": 36.140419006347656, |
|
"rewards/rejected": -48.0066032409668, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 17.584761280856203, |
|
"learning_rate": 3.0505737543712275e-07, |
|
"logits/chosen": 5255.32763671875, |
|
"logits/rejected": 4338.2158203125, |
|
"logps/chosen": -199.14022827148438, |
|
"logps/rejected": -215.39840698242188, |
|
"loss": 1.3499, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -15.381566047668457, |
|
"rewards/margins": 38.9179801940918, |
|
"rewards/rejected": -54.29954147338867, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4945054945054945, |
|
"grad_norm": 13.336131403415491, |
|
"learning_rate": 2.9835240279702513e-07, |
|
"logits/chosen": 6839.3251953125, |
|
"logits/rejected": 5872.88525390625, |
|
"logps/chosen": -251.8268280029297, |
|
"logps/rejected": -247.50167846679688, |
|
"loss": 1.3415, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -9.010820388793945, |
|
"rewards/margins": 44.24280548095703, |
|
"rewards/rejected": -53.25362014770508, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 10.500273772282682, |
|
"learning_rate": 2.9161109547416667e-07, |
|
"logits/chosen": 6504.427734375, |
|
"logits/rejected": 5596.26953125, |
|
"logps/chosen": -223.74313354492188, |
|
"logps/rejected": -247.1144256591797, |
|
"loss": 1.3389, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -12.981588363647461, |
|
"rewards/margins": 19.623910903930664, |
|
"rewards/rejected": -32.605499267578125, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 13.241712923369416, |
|
"learning_rate": 2.848385192615339e-07, |
|
"logits/chosen": 5621.92431640625, |
|
"logits/rejected": 4618.6728515625, |
|
"logps/chosen": -207.3036651611328, |
|
"logps/rejected": -212.81039428710938, |
|
"loss": 1.3446, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -10.964346885681152, |
|
"rewards/margins": 33.11830520629883, |
|
"rewards/rejected": -44.08264923095703, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 13.137564726428407, |
|
"learning_rate": 2.780397634492949e-07, |
|
"logits/chosen": 6302.98388671875, |
|
"logits/rejected": 5078.0986328125, |
|
"logps/chosen": -229.484375, |
|
"logps/rejected": -250.4006805419922, |
|
"loss": 1.3497, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -12.097832679748535, |
|
"rewards/margins": 50.35541915893555, |
|
"rewards/rejected": -62.45325469970703, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5259026687598116, |
|
"grad_norm": 12.607712109286384, |
|
"learning_rate": 2.71219937000424e-07, |
|
"logits/chosen": 6293.5849609375, |
|
"logits/rejected": 5201.06005859375, |
|
"logps/chosen": -219.1787109375, |
|
"logps/rejected": -234.1125030517578, |
|
"loss": 1.3522, |
|
"rewards/accuracies": 0.7916667461395264, |
|
"rewards/chosen": -12.419242858886719, |
|
"rewards/margins": 32.838829040527344, |
|
"rewards/rejected": -45.25807571411133, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 10.42216150578162, |
|
"learning_rate": 2.6438416471154273e-07, |
|
"logits/chosen": 6108.7177734375, |
|
"logits/rejected": 5131.86474609375, |
|
"logps/chosen": -223.22036743164062, |
|
"logps/rejected": -227.4945831298828, |
|
"loss": 1.3444, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -9.559103012084961, |
|
"rewards/margins": 38.708797454833984, |
|
"rewards/rejected": -48.267906188964844, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5416012558869702, |
|
"grad_norm": 17.01807369235278, |
|
"learning_rate": 2.5753758336186326e-07, |
|
"logits/chosen": 6047.66015625, |
|
"logits/rejected": 5569.13134765625, |
|
"logps/chosen": -221.77609252929688, |
|
"logps/rejected": -264.51800537109375, |
|
"loss": 1.3412, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -12.669670104980469, |
|
"rewards/margins": 35.65166473388672, |
|
"rewards/rejected": -48.32134246826172, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 16.12776261618448, |
|
"learning_rate": 2.5068533785312666e-07, |
|
"logits/chosen": 5761.84619140625, |
|
"logits/rejected": 5558.48583984375, |
|
"logps/chosen": -202.7579345703125, |
|
"logps/rejected": -238.9604034423828, |
|
"loss": 1.3651, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -13.49761962890625, |
|
"rewards/margins": 38.488807678222656, |
|
"rewards/rejected": -51.986427307128906, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5572998430141287, |
|
"grad_norm": 11.103396938840731, |
|
"learning_rate": 2.4383257734343794e-07, |
|
"logits/chosen": 5719.7939453125, |
|
"logits/rejected": 5761.4130859375, |
|
"logps/chosen": -207.0905303955078, |
|
"logps/rejected": -249.865966796875, |
|
"loss": 1.3403, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -13.179117202758789, |
|
"rewards/margins": 34.066200256347656, |
|
"rewards/rejected": -47.24531936645508, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 10.875868983762, |
|
"learning_rate": 2.3698445137790258e-07, |
|
"logits/chosen": 6126.095703125, |
|
"logits/rejected": 5306.52001953125, |
|
"logps/chosen": -227.9593505859375, |
|
"logps/rejected": -244.30264282226562, |
|
"loss": 1.3517, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -8.9561128616333, |
|
"rewards/margins": 35.58136749267578, |
|
"rewards/rejected": -44.537479400634766, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5729984301412873, |
|
"grad_norm": 12.46733582160012, |
|
"learning_rate": 2.3014610601897157e-07, |
|
"logits/chosen": 6644.74365234375, |
|
"logits/rejected": 5127.03857421875, |
|
"logps/chosen": -237.8786163330078, |
|
"logps/rejected": -223.18807983398438, |
|
"loss": 1.3406, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -12.732693672180176, |
|
"rewards/margins": 35.38166427612305, |
|
"rewards/rejected": -48.11436080932617, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 14.378248213557361, |
|
"learning_rate": 2.2332267997940513e-07, |
|
"logits/chosen": 5524.26220703125, |
|
"logits/rejected": 4709.974609375, |
|
"logps/chosen": -201.53176879882812, |
|
"logps/rejected": -213.3249053955078, |
|
"loss": 1.3391, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -9.107335090637207, |
|
"rewards/margins": 41.571495056152344, |
|
"rewards/rejected": -50.6788330078125, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5886970172684458, |
|
"grad_norm": 16.649702927791314, |
|
"learning_rate": 2.1651930076075723e-07, |
|
"logits/chosen": 6013.10302734375, |
|
"logits/rejected": 5475.51953125, |
|
"logps/chosen": -194.5826416015625, |
|
"logps/rejected": -208.33847045898438, |
|
"loss": 1.3492, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -14.156700134277344, |
|
"rewards/margins": 27.087514877319336, |
|
"rewards/rejected": -41.24421691894531, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 12.057829105152498, |
|
"learning_rate": 2.0974108080028692e-07, |
|
"logits/chosen": 6306.58837890625, |
|
"logits/rejected": 5016.3056640625, |
|
"logps/chosen": -212.6140594482422, |
|
"logps/rejected": -217.46597290039062, |
|
"loss": 1.3462, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -9.363100051879883, |
|
"rewards/margins": 33.57235336303711, |
|
"rewards/rejected": -42.935447692871094, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6043956043956044, |
|
"grad_norm": 13.607431735853279, |
|
"learning_rate": 2.0299311362918773e-07, |
|
"logits/chosen": 6517.55224609375, |
|
"logits/rejected": 5634.74755859375, |
|
"logps/chosen": -242.9558563232422, |
|
"logps/rejected": -272.95355224609375, |
|
"loss": 1.3507, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -13.222195625305176, |
|
"rewards/margins": 35.8712272644043, |
|
"rewards/rejected": -49.093421936035156, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 14.042243888509429, |
|
"learning_rate": 1.962804700450265e-07, |
|
"logits/chosen": 6358.8125, |
|
"logits/rejected": 6069.78759765625, |
|
"logps/chosen": -226.16159057617188, |
|
"logps/rejected": -279.2201232910156, |
|
"loss": 1.3483, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -10.056262016296387, |
|
"rewards/margins": 31.77614974975586, |
|
"rewards/rejected": -41.83241653442383, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6200941915227629, |
|
"grad_norm": 13.85111247684391, |
|
"learning_rate": 1.8960819430126334e-07, |
|
"logits/chosen": 5926.2744140625, |
|
"logits/rejected": 5265.1884765625, |
|
"logps/chosen": -216.1208953857422, |
|
"logps/rejected": -251.05642700195312, |
|
"loss": 1.3464, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -17.989896774291992, |
|
"rewards/margins": 44.46880340576172, |
|
"rewards/rejected": -62.45869827270508, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 12.764962415212846, |
|
"learning_rate": 1.8298130031671972e-07, |
|
"logits/chosen": 5927.6357421875, |
|
"logits/rejected": 5216.50146484375, |
|
"logps/chosen": -230.69552612304688, |
|
"logps/rejected": -257.57598876953125, |
|
"loss": 1.3564, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -13.972677230834961, |
|
"rewards/margins": 30.90505027770996, |
|
"rewards/rejected": -44.87772750854492, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6357927786499215, |
|
"grad_norm": 12.348701701738166, |
|
"learning_rate": 1.7640476790784075e-07, |
|
"logits/chosen": 5474.27490234375, |
|
"logits/rejected": 4945.47509765625, |
|
"logps/chosen": -213.3369598388672, |
|
"logps/rejected": -264.7867736816406, |
|
"loss": 1.3448, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -10.881568908691406, |
|
"rewards/margins": 32.60791778564453, |
|
"rewards/rejected": -43.48948287963867, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 12.55787593683916, |
|
"learning_rate": 1.6988353904658492e-07, |
|
"logits/chosen": 5950.470703125, |
|
"logits/rejected": 4638.33349609375, |
|
"logps/chosen": -230.09524536132812, |
|
"logps/rejected": -206.407470703125, |
|
"loss": 1.3416, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -8.880694389343262, |
|
"rewards/margins": 28.838424682617188, |
|
"rewards/rejected": -37.71912384033203, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6514913657770801, |
|
"grad_norm": 17.32057277815633, |
|
"learning_rate": 1.634225141467513e-07, |
|
"logits/chosen": 5889.0400390625, |
|
"logits/rejected": 5296.57861328125, |
|
"logps/chosen": -219.9248046875, |
|
"logps/rejected": -244.50936889648438, |
|
"loss": 1.3485, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -15.738775253295898, |
|
"rewards/margins": 36.31574249267578, |
|
"rewards/rejected": -52.05452346801758, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 9.825712429431242, |
|
"learning_rate": 1.570265483815364e-07, |
|
"logits/chosen": 6438.00390625, |
|
"logits/rejected": 5311.1455078125, |
|
"logps/chosen": -243.78604125976562, |
|
"logps/rejected": -258.28704833984375, |
|
"loss": 1.3441, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -16.456207275390625, |
|
"rewards/margins": 29.037649154663086, |
|
"rewards/rejected": -45.493858337402344, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6671899529042387, |
|
"grad_norm": 14.735433365070342, |
|
"learning_rate": 1.5070044803508691e-07, |
|
"logits/chosen": 5953.31298828125, |
|
"logits/rejected": 5381.14306640625, |
|
"logps/chosen": -227.7479705810547, |
|
"logps/rejected": -255.1121368408203, |
|
"loss": 1.3349, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -12.270512580871582, |
|
"rewards/margins": 42.38630294799805, |
|
"rewards/rejected": -54.65681838989258, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 14.85492459591332, |
|
"learning_rate": 1.444489668907914e-07, |
|
"logits/chosen": 6416.33544921875, |
|
"logits/rejected": 5480.611328125, |
|
"logps/chosen": -260.19989013671875, |
|
"logps/rejected": -254.9077606201172, |
|
"loss": 1.3516, |
|
"rewards/accuracies": 0.7750000953674316, |
|
"rewards/chosen": -12.429244995117188, |
|
"rewards/margins": 39.79665756225586, |
|
"rewards/rejected": -52.22589874267578, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6828885400313972, |
|
"grad_norm": 13.017271488143887, |
|
"learning_rate": 1.3827680265902232e-07, |
|
"logits/chosen": 6371.8037109375, |
|
"logits/rejected": 5308.52490234375, |
|
"logps/chosen": -242.83413696289062, |
|
"logps/rejected": -247.3595733642578, |
|
"loss": 1.351, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -12.737371444702148, |
|
"rewards/margins": 33.933265686035156, |
|
"rewards/rejected": -46.67063522338867, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 15.15778095800919, |
|
"learning_rate": 1.3218859344701632e-07, |
|
"logits/chosen": 5609.341796875, |
|
"logits/rejected": 5382.73095703125, |
|
"logps/chosen": -221.3697967529297, |
|
"logps/rejected": -276.8291931152344, |
|
"loss": 1.3483, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -10.858831405639648, |
|
"rewards/margins": 34.38120651245117, |
|
"rewards/rejected": -45.24003982543945, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6985871271585558, |
|
"grad_norm": 13.765055358350205, |
|
"learning_rate": 1.2618891427354172e-07, |
|
"logits/chosen": 6611.1533203125, |
|
"logits/rejected": 5410.708984375, |
|
"logps/chosen": -267.79962158203125, |
|
"logps/rejected": -259.8660888671875, |
|
"loss": 1.3481, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -10.203554153442383, |
|
"rewards/margins": 37.881988525390625, |
|
"rewards/rejected": -48.085540771484375, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 12.391358583369788, |
|
"learning_rate": 1.202822736309758e-07, |
|
"logits/chosen": 5603.50537109375, |
|
"logits/rejected": 5218.40185546875, |
|
"logps/chosen": -215.1715087890625, |
|
"logps/rejected": -255.24758911132812, |
|
"loss": 1.3495, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -10.822305679321289, |
|
"rewards/margins": 33.995201110839844, |
|
"rewards/rejected": -44.8175048828125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 18.87336390048285, |
|
"learning_rate": 1.1447311009737299e-07, |
|
"logits/chosen": 5508.84375, |
|
"logits/rejected": 5254.75244140625, |
|
"logps/chosen": -222.1977081298828, |
|
"logps/rejected": -262.20513916015625, |
|
"loss": 1.3453, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -13.971402168273926, |
|
"rewards/margins": 40.305274963378906, |
|
"rewards/rejected": -54.27667999267578, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 15.471482371326609, |
|
"learning_rate": 1.0876578900107053e-07, |
|
"logits/chosen": 6093.49951171875, |
|
"logits/rejected": 5076.36376953125, |
|
"logps/chosen": -245.2948455810547, |
|
"logps/rejected": -248.81405639648438, |
|
"loss": 1.3461, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -12.873262405395508, |
|
"rewards/margins": 37.79849624633789, |
|
"rewards/rejected": -50.6717529296875, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7299843014128728, |
|
"grad_norm": 11.479378316337622, |
|
"learning_rate": 1.0316459914033793e-07, |
|
"logits/chosen": 6001.8134765625, |
|
"logits/rejected": 4559.4609375, |
|
"logps/chosen": -252.53317260742188, |
|
"logps/rejected": -239.29428100585938, |
|
"loss": 1.3471, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -16.544239044189453, |
|
"rewards/margins": 36.82581329345703, |
|
"rewards/rejected": -53.37005615234375, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 12.94277337339525, |
|
"learning_rate": 9.767374956053584e-08, |
|
"logits/chosen": 5815.173828125, |
|
"logits/rejected": 5115.169921875, |
|
"logps/chosen": -231.0220184326172, |
|
"logps/rejected": -261.7562561035156, |
|
"loss": 1.3429, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -12.728368759155273, |
|
"rewards/margins": 44.04799270629883, |
|
"rewards/rejected": -56.7763671875, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7456828885400314, |
|
"grad_norm": 16.27087945734002, |
|
"learning_rate": 9.229736639120561e-08, |
|
"logits/chosen": 5988.3154296875, |
|
"logits/rejected": 5553.0830078125, |
|
"logps/chosen": -231.2310028076172, |
|
"logps/rejected": -251.68289184570312, |
|
"loss": 1.348, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -13.42981243133545, |
|
"rewards/margins": 24.098569869995117, |
|
"rewards/rejected": -37.528377532958984, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 16.766717992055163, |
|
"learning_rate": 8.70394897454659e-08, |
|
"logits/chosen": 5841.966796875, |
|
"logits/rejected": 5221.5361328125, |
|
"logps/chosen": -227.2954864501953, |
|
"logps/rejected": -253.348876953125, |
|
"loss": 1.3363, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.86706829071045, |
|
"rewards/margins": 42.00550079345703, |
|
"rewards/rejected": -50.87256622314453, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7613814756671899, |
|
"grad_norm": 17.264677009971713, |
|
"learning_rate": 8.19040706840472e-08, |
|
"logits/chosen": 5942.7607421875, |
|
"logits/rejected": 4996.2412109375, |
|
"logps/chosen": -252.40908813476562, |
|
"logps/rejected": -269.8039855957031, |
|
"loss": 1.3361, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -12.189082145690918, |
|
"rewards/margins": 50.92434310913086, |
|
"rewards/rejected": -63.113426208496094, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 12.969674705460362, |
|
"learning_rate": 7.689496824624525e-08, |
|
"logits/chosen": 5647.4619140625, |
|
"logits/rejected": 4565.35107421875, |
|
"logps/chosen": -239.58450317382812, |
|
"logps/rejected": -268.37799072265625, |
|
"loss": 1.3324, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -10.767900466918945, |
|
"rewards/margins": 67.11649322509766, |
|
"rewards/rejected": -77.88438415527344, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7770800627943485, |
|
"grad_norm": 23.179398971044233, |
|
"learning_rate": 7.201594655002458e-08, |
|
"logits/chosen": 5969.14111328125, |
|
"logits/rejected": 5011.64013671875, |
|
"logps/chosen": -241.0636444091797, |
|
"logps/rejected": -262.5384216308594, |
|
"loss": 1.3365, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -15.168705940246582, |
|
"rewards/margins": 53.300010681152344, |
|
"rewards/rejected": -68.46871185302734, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 18.79279527226742, |
|
"learning_rate": 6.727067196345099e-08, |
|
"logits/chosen": 5659.3037109375, |
|
"logits/rejected": 4810.89599609375, |
|
"logps/chosen": -227.1795654296875, |
|
"logps/rejected": -228.3984375, |
|
"loss": 1.3449, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -13.423723220825195, |
|
"rewards/margins": 34.62942123413086, |
|
"rewards/rejected": -48.053138732910156, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.792778649921507, |
|
"grad_norm": 15.30089044819146, |
|
"learning_rate": 6.26627103495786e-08, |
|
"logits/chosen": 5842.5341796875, |
|
"logits/rejected": 4896.11181640625, |
|
"logps/chosen": -224.3483428955078, |
|
"logps/rejected": -247.2809295654297, |
|
"loss": 1.34, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -9.639090538024902, |
|
"rewards/margins": 46.20824432373047, |
|
"rewards/rejected": -55.84733200073242, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 12.810319531592627, |
|
"learning_rate": 5.8195524386862374e-08, |
|
"logits/chosen": 5930.25390625, |
|
"logits/rejected": 5296.1630859375, |
|
"logps/chosen": -257.00250244140625, |
|
"logps/rejected": -280.92657470703125, |
|
"loss": 1.3463, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.82390022277832, |
|
"rewards/margins": 46.02201461791992, |
|
"rewards/rejected": -54.845909118652344, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8084772370486656, |
|
"grad_norm": 11.44579430939054, |
|
"learning_rate": 5.38724709671092e-08, |
|
"logits/chosen": 6328.5556640625, |
|
"logits/rejected": 5993.76171875, |
|
"logps/chosen": -243.43869018554688, |
|
"logps/rejected": -289.0228271484375, |
|
"loss": 1.3372, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -11.970319747924805, |
|
"rewards/margins": 43.93321990966797, |
|
"rewards/rejected": -55.903541564941406, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 14.741952244341237, |
|
"learning_rate": 4.969679867292276e-08, |
|
"logits/chosen": 5626.61572265625, |
|
"logits/rejected": 5149.10791015625, |
|
"logps/chosen": -236.9131317138672, |
|
"logps/rejected": -273.8883972167969, |
|
"loss": 1.3424, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -16.811473846435547, |
|
"rewards/margins": 47.892974853515625, |
|
"rewards/rejected": -64.70445251464844, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8241758241758241, |
|
"grad_norm": 12.693662955042376, |
|
"learning_rate": 4.5671645336537416e-08, |
|
"logits/chosen": 5679.7373046875, |
|
"logits/rejected": 5195.1259765625, |
|
"logps/chosen": -251.4984130859375, |
|
"logps/rejected": -279.0545959472656, |
|
"loss": 1.3414, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -14.017779350280762, |
|
"rewards/margins": 49.24242401123047, |
|
"rewards/rejected": -63.26020431518555, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 31.47444666328788, |
|
"learning_rate": 4.180003568187776e-08, |
|
"logits/chosen": 7014.08056640625, |
|
"logits/rejected": 5543.162109375, |
|
"logps/chosen": -276.7340393066406, |
|
"logps/rejected": -269.3011169433594, |
|
"loss": 1.3503, |
|
"rewards/accuracies": 0.6666667461395264, |
|
"rewards/chosen": -15.746711730957031, |
|
"rewards/margins": 33.51522445678711, |
|
"rewards/rejected": -49.26193618774414, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8398744113029827, |
|
"grad_norm": 16.176876775515055, |
|
"learning_rate": 3.8084879051612144e-08, |
|
"logits/chosen": 5845.7783203125, |
|
"logits/rejected": 5383.59521484375, |
|
"logps/chosen": -234.75259399414062, |
|
"logps/rejected": -243.68917846679688, |
|
"loss": 1.3441, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -9.509564399719238, |
|
"rewards/margins": 41.015254974365234, |
|
"rewards/rejected": -50.524818420410156, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 14.969831250800548, |
|
"learning_rate": 3.452896722091128e-08, |
|
"logits/chosen": 6403.892578125, |
|
"logits/rejected": 4980.4814453125, |
|
"logps/chosen": -274.7662658691406, |
|
"logps/rejected": -261.01898193359375, |
|
"loss": 1.3305, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -8.196954727172852, |
|
"rewards/margins": 51.842140197753906, |
|
"rewards/rejected": -60.039100646972656, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8555729984301413, |
|
"grad_norm": 12.198123465136609, |
|
"learning_rate": 3.11349722995527e-08, |
|
"logits/chosen": 6488.9091796875, |
|
"logits/rejected": 4886.4169921875, |
|
"logps/chosen": -241.4394073486328, |
|
"logps/rejected": -268.80352783203125, |
|
"loss": 1.3471, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -13.964780807495117, |
|
"rewards/margins": 41.95417785644531, |
|
"rewards/rejected": -55.9189567565918, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 18.075378598084896, |
|
"learning_rate": 2.7905444723949762e-08, |
|
"logits/chosen": 6258.9072265625, |
|
"logits/rejected": 5193.19384765625, |
|
"logps/chosen": -251.8688507080078, |
|
"logps/rejected": -251.71829223632812, |
|
"loss": 1.3449, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -13.332514762878418, |
|
"rewards/margins": 48.888423919677734, |
|
"rewards/rejected": -62.2209358215332, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8712715855572999, |
|
"grad_norm": 18.246911185615897, |
|
"learning_rate": 2.484281134061142e-08, |
|
"logits/chosen": 6621.4384765625, |
|
"logits/rejected": 5365.8623046875, |
|
"logps/chosen": -279.5318603515625, |
|
"logps/rejected": -282.0029296875, |
|
"loss": 1.3424, |
|
"rewards/accuracies": 0.8083332777023315, |
|
"rewards/chosen": -14.918279647827148, |
|
"rewards/margins": 44.81663131713867, |
|
"rewards/rejected": -59.73491287231445, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 22.551350441375604, |
|
"learning_rate": 2.194937358247506e-08, |
|
"logits/chosen": 6477.88916015625, |
|
"logits/rejected": 5286.2412109375, |
|
"logps/chosen": -260.225341796875, |
|
"logps/rejected": -279.5767822265625, |
|
"loss": 1.3418, |
|
"rewards/accuracies": 0.7666667699813843, |
|
"rewards/chosen": -15.084878921508789, |
|
"rewards/margins": 47.721107482910156, |
|
"rewards/rejected": -62.805992126464844, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8869701726844584, |
|
"grad_norm": 20.252362802872884, |
|
"learning_rate": 1.9227305739481612e-08, |
|
"logits/chosen": 5893.1474609375, |
|
"logits/rejected": 4668.8095703125, |
|
"logps/chosen": -245.6111297607422, |
|
"logps/rejected": -238.3753662109375, |
|
"loss": 1.3376, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -10.829057693481445, |
|
"rewards/margins": 47.32074737548828, |
|
"rewards/rejected": -58.149803161621094, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 13.189894058710424, |
|
"learning_rate": 1.6678653324693787e-08, |
|
"logits/chosen": 6479.234375, |
|
"logits/rejected": 5293.7001953125, |
|
"logps/chosen": -269.5186462402344, |
|
"logps/rejected": -273.58905029296875, |
|
"loss": 1.3437, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -11.897893905639648, |
|
"rewards/margins": 40.87809753417969, |
|
"rewards/rejected": -52.77599334716797, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.902668759811617, |
|
"grad_norm": 12.521869991300122, |
|
"learning_rate": 1.4305331537183384e-08, |
|
"logits/chosen": 5731.880859375, |
|
"logits/rejected": 5293.7578125, |
|
"logps/chosen": -239.46334838867188, |
|
"logps/rejected": -267.51025390625, |
|
"loss": 1.3369, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -12.479973793029785, |
|
"rewards/margins": 35.54231262207031, |
|
"rewards/rejected": -48.02228927612305, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 13.463956997262862, |
|
"learning_rate": 1.2109123822844653e-08, |
|
"logits/chosen": 5900.7177734375, |
|
"logits/rejected": 4710.4609375, |
|
"logps/chosen": -244.7340545654297, |
|
"logps/rejected": -246.96536254882812, |
|
"loss": 1.3439, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -16.17725372314453, |
|
"rewards/margins": 34.85002899169922, |
|
"rewards/rejected": -51.027286529541016, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9183673469387755, |
|
"grad_norm": 15.662977380913924, |
|
"learning_rate": 1.0091680534213387e-08, |
|
"logits/chosen": 6465.8505859375, |
|
"logits/rejected": 6233.8583984375, |
|
"logps/chosen": -257.33880615234375, |
|
"logps/rejected": -297.4341735839844, |
|
"loss": 1.3457, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -14.390420913696289, |
|
"rewards/margins": 36.296791076660156, |
|
"rewards/rejected": -50.68721389770508, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 18.75305375047271, |
|
"learning_rate": 8.254517690300944e-09, |
|
"logits/chosen": 5696.08154296875, |
|
"logits/rejected": 5191.6025390625, |
|
"logps/chosen": -252.3257293701172, |
|
"logps/rejected": -268.64801025390625, |
|
"loss": 1.3451, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -12.379720687866211, |
|
"rewards/margins": 39.44649887084961, |
|
"rewards/rejected": -51.82622146606445, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9340659340659341, |
|
"grad_norm": 14.426773906657814, |
|
"learning_rate": 6.599015837372907e-09, |
|
"logits/chosen": 6177.75537109375, |
|
"logits/rejected": 5415.826171875, |
|
"logps/chosen": -269.7903747558594, |
|
"logps/rejected": -276.715576171875, |
|
"loss": 1.3386, |
|
"rewards/accuracies": 0.7166666388511658, |
|
"rewards/chosen": -20.904890060424805, |
|
"rewards/margins": 38.65822219848633, |
|
"rewards/rejected": -59.5631103515625, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 15.760226868571879, |
|
"learning_rate": 5.126419011529992e-09, |
|
"logits/chosen": 6390.10302734375, |
|
"logits/rejected": 5463.6162109375, |
|
"logps/chosen": -267.0502014160156, |
|
"logps/rejected": -277.47808837890625, |
|
"loss": 1.3385, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -11.388493537902832, |
|
"rewards/margins": 47.42402267456055, |
|
"rewards/rejected": -58.81251907348633, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9497645211930926, |
|
"grad_norm": 29.730772203455786, |
|
"learning_rate": 3.837833803870177e-09, |
|
"logits/chosen": 5976.55224609375, |
|
"logits/rejected": 5252.8037109375, |
|
"logps/chosen": -253.4025115966797, |
|
"logps/rejected": -275.3264465332031, |
|
"loss": 1.3459, |
|
"rewards/accuracies": 0.7750000953674316, |
|
"rewards/chosen": -13.217000007629395, |
|
"rewards/margins": 43.908164978027344, |
|
"rewards/rejected": -57.125160217285156, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 17.84856218528166, |
|
"learning_rate": 2.734228528934679e-09, |
|
"logits/chosen": 7450.5419921875, |
|
"logits/rejected": 5507.4033203125, |
|
"logps/chosen": -313.83624267578125, |
|
"logps/rejected": -304.4243469238281, |
|
"loss": 1.3486, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -17.067832946777344, |
|
"rewards/margins": 42.159278869628906, |
|
"rewards/rejected": -59.22711181640625, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9654631083202512, |
|
"grad_norm": 19.74856745242947, |
|
"learning_rate": 1.8164324970625645e-09, |
|
"logits/chosen": 6633.40478515625, |
|
"logits/rejected": 5254.0, |
|
"logps/chosen": -270.46966552734375, |
|
"logps/rejected": -267.3912048339844, |
|
"loss": 1.3434, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -9.887968063354492, |
|
"rewards/margins": 44.506534576416016, |
|
"rewards/rejected": -54.394500732421875, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 12.427120458275336, |
|
"learning_rate": 1.0851353912008642e-09, |
|
"logits/chosen": 5715.10546875, |
|
"logits/rejected": 5259.88232421875, |
|
"logps/chosen": -249.3816680908203, |
|
"logps/rejected": -292.0200500488281, |
|
"loss": 1.3377, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -17.769298553466797, |
|
"rewards/margins": 39.09291076660156, |
|
"rewards/rejected": -56.862205505371094, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9811616954474097, |
|
"grad_norm": 12.98993462559583, |
|
"learning_rate": 5.408867486384471e-10, |
|
"logits/chosen": 5827.32421875, |
|
"logits/rejected": 4937.1123046875, |
|
"logps/chosen": -239.4810333251953, |
|
"logps/rejected": -234.88510131835938, |
|
"loss": 1.3445, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -9.172881126403809, |
|
"rewards/margins": 36.28196716308594, |
|
"rewards/rejected": -45.4548454284668, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 16.16222617431415, |
|
"learning_rate": 1.840955480532924e-10, |
|
"logits/chosen": 5506.1591796875, |
|
"logits/rejected": 5235.78662109375, |
|
"logps/chosen": -246.6016082763672, |
|
"logps/rejected": -265.4342956542969, |
|
"loss": 1.3381, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -14.441003799438477, |
|
"rewards/margins": 34.66820526123047, |
|
"rewards/rejected": -49.10921096801758, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9968602825745683, |
|
"grad_norm": 19.003423412523194, |
|
"learning_rate": 1.502990218302247e-11, |
|
"logits/chosen": 5780.91015625, |
|
"logits/rejected": 4716.0341796875, |
|
"logps/chosen": -237.00357055664062, |
|
"logps/rejected": -240.70358276367188, |
|
"loss": 1.3392, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -12.52961540222168, |
|
"rewards/margins": 41.86973571777344, |
|
"rewards/rejected": -54.39934539794922, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1274, |
|
"total_flos": 0.0, |
|
"train_loss": 1.3517364699574805, |
|
"train_runtime": 14845.1399, |
|
"train_samples_per_second": 4.118, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1274, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|