|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998751404669747, |
|
"eval_steps": 1000, |
|
"global_step": 4004, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 1.2468827930174565e-08, |
|
"logits/chosen": -2.4102063179016113, |
|
"logits/rejected": -2.672837734222412, |
|
"logps/chosen": -21.34674835205078, |
|
"logps/rejected": -42.586097717285156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.2468827930174566e-07, |
|
"logits/chosen": -2.239577293395996, |
|
"logits/rejected": -2.476416826248169, |
|
"logps/chosen": -21.881580352783203, |
|
"logps/rejected": -54.84682083129883, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.00018471028306521475, |
|
"rewards/margins": 0.00028743690927512944, |
|
"rewards/rejected": -0.00010272659710608423, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 2.493765586034913e-07, |
|
"logits/chosen": -2.163784980773926, |
|
"logits/rejected": -2.405578136444092, |
|
"logps/chosen": -21.341472625732422, |
|
"logps/rejected": -55.192710876464844, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.000193244923138991, |
|
"rewards/margins": 0.0009195079328492284, |
|
"rewards/rejected": -0.0007262630388140678, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 3.7406483790523695e-07, |
|
"logits/chosen": -2.0837199687957764, |
|
"logits/rejected": -2.361438274383545, |
|
"logps/chosen": -21.834430694580078, |
|
"logps/rejected": -51.4864501953125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 6.211231811903417e-05, |
|
"rewards/margins": 0.000506018113810569, |
|
"rewards/rejected": -0.00044390588300302625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 4.987531172069826e-07, |
|
"logits/chosen": -2.088737726211548, |
|
"logits/rejected": -2.3435609340667725, |
|
"logps/chosen": -22.16689682006836, |
|
"logps/rejected": -55.5480842590332, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0003017825947608799, |
|
"rewards/margins": 0.001477475045248866, |
|
"rewards/rejected": -0.0011756925377994776, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 6.234413965087283e-07, |
|
"logits/chosen": -2.1819872856140137, |
|
"logits/rejected": -2.480788469314575, |
|
"logps/chosen": -22.51431655883789, |
|
"logps/rejected": -58.81789016723633, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0005659356247633696, |
|
"rewards/margins": 0.00293327565304935, |
|
"rewards/rejected": -0.0023673397954553366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 7.481296758104739e-07, |
|
"logits/chosen": -2.13201642036438, |
|
"logits/rejected": -2.3695003986358643, |
|
"logps/chosen": -22.39255142211914, |
|
"logps/rejected": -57.26397705078125, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0013241939013823867, |
|
"rewards/margins": 0.0052015529945492744, |
|
"rewards/rejected": -0.0038773592095822096, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 8.728179551122195e-07, |
|
"logits/chosen": -2.2473509311676025, |
|
"logits/rejected": -2.4891819953918457, |
|
"logps/chosen": -21.353261947631836, |
|
"logps/rejected": -50.47459411621094, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0020654837135225534, |
|
"rewards/margins": 0.00875779427587986, |
|
"rewards/rejected": -0.0066923112608492374, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 9.975062344139653e-07, |
|
"logits/chosen": -2.0938005447387695, |
|
"logits/rejected": -2.3415422439575195, |
|
"logps/chosen": -21.880977630615234, |
|
"logps/rejected": -55.579505920410156, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004472785629332066, |
|
"rewards/margins": 0.016874177381396294, |
|
"rewards/rejected": -0.012401392683386803, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 1.1221945137157108e-06, |
|
"logits/chosen": -2.2169880867004395, |
|
"logits/rejected": -2.43099308013916, |
|
"logps/chosen": -21.09603500366211, |
|
"logps/rejected": -52.89866256713867, |
|
"loss": 0.681, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007039895746856928, |
|
"rewards/margins": 0.02454659901559353, |
|
"rewards/rejected": -0.017506705597043037, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.2468827930174565e-06, |
|
"logits/chosen": -2.079484701156616, |
|
"logits/rejected": -2.3457648754119873, |
|
"logps/chosen": -20.769153594970703, |
|
"logps/rejected": -60.0728645324707, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010782149620354176, |
|
"rewards/margins": 0.03612237051129341, |
|
"rewards/rejected": -0.02534021995961666, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 1.3715710723192023e-06, |
|
"logits/chosen": -2.1157209873199463, |
|
"logits/rejected": -2.373112440109253, |
|
"logps/chosen": -20.276954650878906, |
|
"logps/rejected": -56.75362014770508, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016899898648262024, |
|
"rewards/margins": 0.055193256586790085, |
|
"rewards/rejected": -0.03829335793852806, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 1.4962593516209478e-06, |
|
"logits/chosen": -2.0827651023864746, |
|
"logits/rejected": -2.3430802822113037, |
|
"logps/chosen": -20.093006134033203, |
|
"logps/rejected": -55.97692108154297, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02318699099123478, |
|
"rewards/margins": 0.07196114957332611, |
|
"rewards/rejected": -0.04877415671944618, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 1.6209476309226935e-06, |
|
"logits/chosen": -2.2666783332824707, |
|
"logits/rejected": -2.533306837081909, |
|
"logps/chosen": -18.66347885131836, |
|
"logps/rejected": -60.43751907348633, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03544957563281059, |
|
"rewards/margins": 0.10495258867740631, |
|
"rewards/rejected": -0.06950302422046661, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.745635910224439e-06, |
|
"logits/chosen": -2.132418155670166, |
|
"logits/rejected": -2.3867433071136475, |
|
"logps/chosen": -16.58405876159668, |
|
"logps/rejected": -65.14090728759766, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.052351079881191254, |
|
"rewards/margins": 0.1396940052509308, |
|
"rewards/rejected": -0.08734293282032013, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.8703241895261848e-06, |
|
"logits/chosen": -2.2100329399108887, |
|
"logits/rejected": -2.446981906890869, |
|
"logps/chosen": -14.807760238647461, |
|
"logps/rejected": -61.867767333984375, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07087540626525879, |
|
"rewards/margins": 0.17729689180850983, |
|
"rewards/rejected": -0.10642149299383163, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.9950124688279305e-06, |
|
"logits/chosen": -2.2164454460144043, |
|
"logits/rejected": -2.4408745765686035, |
|
"logps/chosen": -12.931081771850586, |
|
"logps/rejected": -65.07794189453125, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09038490056991577, |
|
"rewards/margins": 0.2332114726305008, |
|
"rewards/rejected": -0.14282655715942383, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 2.119700748129676e-06, |
|
"logits/chosen": -2.3733327388763428, |
|
"logits/rejected": -2.637248992919922, |
|
"logps/chosen": -10.747810363769531, |
|
"logps/rejected": -66.96683502197266, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1092229038476944, |
|
"rewards/margins": 0.2931229770183563, |
|
"rewards/rejected": -0.18390007317066193, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 2.2443890274314216e-06, |
|
"logits/chosen": -2.059483051300049, |
|
"logits/rejected": -2.3210699558258057, |
|
"logps/chosen": -9.155550003051758, |
|
"logps/rejected": -81.08940124511719, |
|
"loss": 0.525, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1273214966058731, |
|
"rewards/margins": 0.371276319026947, |
|
"rewards/rejected": -0.24395480751991272, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 2.3690773067331675e-06, |
|
"logits/chosen": -2.1012320518493652, |
|
"logits/rejected": -2.380855083465576, |
|
"logps/chosen": -8.220524787902832, |
|
"logps/rejected": -86.42562866210938, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13605494797229767, |
|
"rewards/margins": 0.4646981358528137, |
|
"rewards/rejected": -0.32864317297935486, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 2.493765586034913e-06, |
|
"logits/chosen": -2.178351879119873, |
|
"logits/rejected": -2.448537826538086, |
|
"logps/chosen": -7.208306312561035, |
|
"logps/rejected": -91.26810455322266, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15183252096176147, |
|
"rewards/margins": 0.5721064805984497, |
|
"rewards/rejected": -0.42027395963668823, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 2.6184538653366586e-06, |
|
"logits/chosen": -2.1630733013153076, |
|
"logits/rejected": -2.411649703979492, |
|
"logps/chosen": -4.617680072784424, |
|
"logps/rejected": -102.69218444824219, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1683187633752823, |
|
"rewards/margins": 0.705622136592865, |
|
"rewards/rejected": -0.5373033881187439, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 2.7431421446384045e-06, |
|
"logits/chosen": -2.1586241722106934, |
|
"logits/rejected": -2.3992838859558105, |
|
"logps/chosen": -3.4560561180114746, |
|
"logps/rejected": -116.10273742675781, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18259310722351074, |
|
"rewards/margins": 0.8416939973831177, |
|
"rewards/rejected": -0.6591008901596069, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 2.86783042394015e-06, |
|
"logits/chosen": -2.1585848331451416, |
|
"logits/rejected": -2.381598949432373, |
|
"logps/chosen": -2.7871248722076416, |
|
"logps/rejected": -130.73558044433594, |
|
"loss": 0.3171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18752917647361755, |
|
"rewards/margins": 0.9992051124572754, |
|
"rewards/rejected": -0.8116759061813354, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 2.9925187032418956e-06, |
|
"logits/chosen": -2.2422091960906982, |
|
"logits/rejected": -2.491379499435425, |
|
"logps/chosen": -2.960984230041504, |
|
"logps/rejected": -167.91981506347656, |
|
"loss": 0.2364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1910472810268402, |
|
"rewards/margins": 1.3611987829208374, |
|
"rewards/rejected": -1.1701514720916748, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 3.117206982543641e-06, |
|
"logits/chosen": -2.1426875591278076, |
|
"logits/rejected": -2.385005474090576, |
|
"logps/chosen": -2.7859835624694824, |
|
"logps/rejected": -208.47451782226562, |
|
"loss": 0.1849, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1911415457725525, |
|
"rewards/margins": 1.752079963684082, |
|
"rewards/rejected": -1.5609384775161743, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 3.241895261845387e-06, |
|
"logits/chosen": -2.171957492828369, |
|
"logits/rejected": -2.405855894088745, |
|
"logps/chosen": -2.6331560611724854, |
|
"logps/rejected": -266.33441162109375, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18891991674900055, |
|
"rewards/margins": 2.3281731605529785, |
|
"rewards/rejected": -2.1392531394958496, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 3.3665835411471326e-06, |
|
"logits/chosen": -2.136756181716919, |
|
"logits/rejected": -2.3706109523773193, |
|
"logps/chosen": -2.3929905891418457, |
|
"logps/rejected": -311.50164794921875, |
|
"loss": 0.1147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19246290624141693, |
|
"rewards/margins": 2.773569107055664, |
|
"rewards/rejected": -2.581106185913086, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 3.491271820448878e-06, |
|
"logits/chosen": -2.04775071144104, |
|
"logits/rejected": -2.256491184234619, |
|
"logps/chosen": -3.071178436279297, |
|
"logps/rejected": -338.50042724609375, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18945232033729553, |
|
"rewards/margins": 3.0333011150360107, |
|
"rewards/rejected": -2.8438491821289062, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.615960099750624e-06, |
|
"logits/chosen": -2.0987088680267334, |
|
"logits/rejected": -2.3304831981658936, |
|
"logps/chosen": -2.6518688201904297, |
|
"logps/rejected": -317.264892578125, |
|
"loss": 0.1174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1924947053194046, |
|
"rewards/margins": 2.8529839515686035, |
|
"rewards/rejected": -2.660489082336426, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 3.7406483790523696e-06, |
|
"logits/chosen": -2.090928792953491, |
|
"logits/rejected": -2.331522226333618, |
|
"logps/chosen": -2.140141725540161, |
|
"logps/rejected": -386.000244140625, |
|
"loss": 0.0843, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19298282265663147, |
|
"rewards/margins": 3.519921064376831, |
|
"rewards/rejected": -3.3269379138946533, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 3.8653366583541155e-06, |
|
"logits/chosen": -2.0327889919281006, |
|
"logits/rejected": -2.2435851097106934, |
|
"logps/chosen": -2.820808172225952, |
|
"logps/rejected": -395.51666259765625, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19103531539440155, |
|
"rewards/margins": 3.636303424835205, |
|
"rewards/rejected": -3.445268154144287, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 3.990024937655861e-06, |
|
"logits/chosen": -2.0326077938079834, |
|
"logits/rejected": -2.2442269325256348, |
|
"logps/chosen": -2.771969795227051, |
|
"logps/rejected": -503.487548828125, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18934586644172668, |
|
"rewards/margins": 4.642784595489502, |
|
"rewards/rejected": -4.453438758850098, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 4.114713216957607e-06, |
|
"logits/chosen": -2.1069068908691406, |
|
"logits/rejected": -2.2923953533172607, |
|
"logps/chosen": -4.020249366760254, |
|
"logps/rejected": -392.1181335449219, |
|
"loss": 0.087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18417596817016602, |
|
"rewards/margins": 3.6148147583007812, |
|
"rewards/rejected": -3.4306392669677734, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 4.239401496259352e-06, |
|
"logits/chosen": -2.040024518966675, |
|
"logits/rejected": -2.235044240951538, |
|
"logps/chosen": -4.0639448165893555, |
|
"logps/rejected": -495.93304443359375, |
|
"loss": 0.07, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17745602130889893, |
|
"rewards/margins": 4.601096153259277, |
|
"rewards/rejected": -4.423640251159668, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 4.364089775561098e-06, |
|
"logits/chosen": -2.1371054649353027, |
|
"logits/rejected": -2.314563274383545, |
|
"logps/chosen": -5.727438449859619, |
|
"logps/rejected": -448.86077880859375, |
|
"loss": 0.0817, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1613588184118271, |
|
"rewards/margins": 4.152359962463379, |
|
"rewards/rejected": -3.991001605987549, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 4.488778054862843e-06, |
|
"logits/chosen": -2.1167104244232178, |
|
"logits/rejected": -2.327972412109375, |
|
"logps/chosen": -7.92899227142334, |
|
"logps/rejected": -551.7445068359375, |
|
"loss": 0.0461, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14445874094963074, |
|
"rewards/margins": 5.136443138122559, |
|
"rewards/rejected": -4.9919843673706055, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 4.6134663341645895e-06, |
|
"logits/chosen": -2.1294167041778564, |
|
"logits/rejected": -2.320831537246704, |
|
"logps/chosen": -14.308802604675293, |
|
"logps/rejected": -556.9048461914062, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07796537131071091, |
|
"rewards/margins": 5.144980430603027, |
|
"rewards/rejected": -5.067015171051025, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 4.738154613466335e-06, |
|
"logits/chosen": -2.059129238128662, |
|
"logits/rejected": -2.283139705657959, |
|
"logps/chosen": -30.504648208618164, |
|
"logps/rejected": -904.5690307617188, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08387070894241333, |
|
"rewards/margins": 8.345690727233887, |
|
"rewards/rejected": -8.429560661315918, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 4.862842892768081e-06, |
|
"logits/chosen": -2.1258416175842285, |
|
"logits/rejected": -2.3157804012298584, |
|
"logps/chosen": -44.502899169921875, |
|
"logps/rejected": -788.58056640625, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2255050241947174, |
|
"rewards/margins": 7.145709037780762, |
|
"rewards/rejected": -7.371213436126709, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 4.987531172069826e-06, |
|
"logits/chosen": -2.0560178756713867, |
|
"logits/rejected": -2.2473480701446533, |
|
"logps/chosen": -65.08824157714844, |
|
"logps/rejected": -957.4326171875, |
|
"loss": 0.022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4304937422275543, |
|
"rewards/margins": 8.619054794311523, |
|
"rewards/rejected": -9.04954719543457, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 4.999923022460671e-06, |
|
"logits/chosen": -2.0123705863952637, |
|
"logits/rejected": -2.2279648780822754, |
|
"logps/chosen": -74.45851135253906, |
|
"logps/rejected": -1195.67138671875, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5188314318656921, |
|
"rewards/margins": 10.849455833435059, |
|
"rewards/rejected": -11.368288040161133, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 4.999656933348981e-06, |
|
"logits/chosen": -2.234529972076416, |
|
"logits/rejected": -2.406409740447998, |
|
"logps/chosen": -88.35977935791016, |
|
"logps/rejected": -900.6641845703125, |
|
"loss": 0.0226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6683001518249512, |
|
"rewards/margins": 7.860695838928223, |
|
"rewards/rejected": -8.528995513916016, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.005340576171875, |
|
"learning_rate": 4.99920080255011e-06, |
|
"logits/chosen": -2.054624080657959, |
|
"logits/rejected": -2.283973217010498, |
|
"logps/chosen": -83.51972198486328, |
|
"logps/rejected": -1245.379150390625, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6176443099975586, |
|
"rewards/margins": 11.277586936950684, |
|
"rewards/rejected": -11.895231246948242, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5, |
|
"learning_rate": 4.998554664742362e-06, |
|
"logits/chosen": -2.136657476425171, |
|
"logits/rejected": -2.320204973220825, |
|
"logps/chosen": -88.63077545166016, |
|
"logps/rejected": -1043.793701171875, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6717870831489563, |
|
"rewards/margins": 9.280545234680176, |
|
"rewards/rejected": -9.95233154296875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 4.997718569049726e-06, |
|
"logits/chosen": -2.074990749359131, |
|
"logits/rejected": -2.277477979660034, |
|
"logps/chosen": -94.67475891113281, |
|
"logps/rejected": -1131.5499267578125, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7206478714942932, |
|
"rewards/margins": 10.060680389404297, |
|
"rewards/rejected": -10.781328201293945, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 4.9966925790381404e-06, |
|
"logits/chosen": -2.1405222415924072, |
|
"logits/rejected": -2.3201403617858887, |
|
"logps/chosen": -73.81873321533203, |
|
"logps/rejected": -1013.6409912109375, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.528481125831604, |
|
"rewards/margins": 9.096994400024414, |
|
"rewards/rejected": -9.625473976135254, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 4.995476772710657e-06, |
|
"logits/chosen": -2.0950608253479004, |
|
"logits/rejected": -2.316931962966919, |
|
"logps/chosen": -103.3821792602539, |
|
"logps/rejected": -1335.799560546875, |
|
"loss": 0.021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8135234117507935, |
|
"rewards/margins": 11.993521690368652, |
|
"rewards/rejected": -12.807044982910156, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.994071242501516e-06, |
|
"logits/chosen": -2.185049057006836, |
|
"logits/rejected": -2.3854544162750244, |
|
"logps/chosen": -70.46857452392578, |
|
"logps/rejected": -1074.28271484375, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.48124104738235474, |
|
"rewards/margins": 9.766096115112305, |
|
"rewards/rejected": -10.247336387634277, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.8967857360839844e-05, |
|
"learning_rate": 4.992476095269112e-06, |
|
"logits/chosen": -2.1872477531433105, |
|
"logits/rejected": -2.3788347244262695, |
|
"logps/chosen": -60.58274459838867, |
|
"logps/rejected": -1168.738037109375, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3954273462295532, |
|
"rewards/margins": 10.741006851196289, |
|
"rewards/rejected": -11.136434555053711, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 4.990691452287877e-06, |
|
"logits/chosen": -2.034578800201416, |
|
"logits/rejected": -2.228356122970581, |
|
"logps/chosen": -70.90155029296875, |
|
"logps/rejected": -1098.248779296875, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.49016299843788147, |
|
"rewards/margins": 9.965959548950195, |
|
"rewards/rejected": -10.456122398376465, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 4.988717449239056e-06, |
|
"logits/chosen": -2.086670398712158, |
|
"logits/rejected": -2.27720046043396, |
|
"logps/chosen": -79.89573669433594, |
|
"logps/rejected": -1124.399658203125, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5849625468254089, |
|
"rewards/margins": 10.160270690917969, |
|
"rewards/rejected": -10.745233535766602, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.0245361328125, |
|
"learning_rate": 4.98655423620039e-06, |
|
"logits/chosen": -2.119935989379883, |
|
"logits/rejected": -2.3267104625701904, |
|
"logps/chosen": -77.09886169433594, |
|
"logps/rejected": -1248.1905517578125, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5508411526679993, |
|
"rewards/margins": 11.388254165649414, |
|
"rewards/rejected": -11.939095497131348, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.01080322265625, |
|
"learning_rate": 4.984201977634711e-06, |
|
"logits/chosen": -2.213916301727295, |
|
"logits/rejected": -2.4463677406311035, |
|
"logps/chosen": -90.18511199951172, |
|
"logps/rejected": -1377.499755859375, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6767342686653137, |
|
"rewards/margins": 12.541799545288086, |
|
"rewards/rejected": -13.218534469604492, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 4.9816608523774345e-06, |
|
"logits/chosen": -2.105821132659912, |
|
"logits/rejected": -2.3127095699310303, |
|
"logps/chosen": -79.32666015625, |
|
"logps/rejected": -1143.241943359375, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5732973217964172, |
|
"rewards/margins": 10.353887557983398, |
|
"rewards/rejected": -10.927184104919434, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 4.978931053622964e-06, |
|
"logits/chosen": -2.1495628356933594, |
|
"logits/rejected": -2.370626449584961, |
|
"logps/chosen": -78.36927795410156, |
|
"logps/rejected": -1290.0146484375, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5716887712478638, |
|
"rewards/margins": 11.808379173278809, |
|
"rewards/rejected": -12.380067825317383, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0003566741943359375, |
|
"learning_rate": 4.9760127889100044e-06, |
|
"logits/chosen": -2.1675076484680176, |
|
"logits/rejected": -2.3700671195983887, |
|
"logps/chosen": -62.899436950683594, |
|
"logps/rejected": -1186.429443359375, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4144817888736725, |
|
"rewards/margins": 10.914512634277344, |
|
"rewards/rejected": -11.328994750976562, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 4.972906280105781e-06, |
|
"logits/chosen": -2.0299549102783203, |
|
"logits/rejected": -2.252498149871826, |
|
"logps/chosen": -80.07968139648438, |
|
"logps/rejected": -1246.3656005859375, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5788360238075256, |
|
"rewards/margins": 11.359301567077637, |
|
"rewards/rejected": -11.938138008117676, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0030670166015625, |
|
"learning_rate": 4.969611763389175e-06, |
|
"logits/chosen": -2.19167423248291, |
|
"logits/rejected": -2.402195453643799, |
|
"logps/chosen": -83.22602844238281, |
|
"logps/rejected": -1125.298095703125, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6159827709197998, |
|
"rewards/margins": 10.143194198608398, |
|
"rewards/rejected": -10.759176254272461, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 4.966129489232762e-06, |
|
"logits/chosen": -2.1329731941223145, |
|
"logits/rejected": -2.375246286392212, |
|
"logps/chosen": -77.91215515136719, |
|
"logps/rejected": -1410.508056640625, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5520095825195312, |
|
"rewards/margins": 12.985345840454102, |
|
"rewards/rejected": -13.537355422973633, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.962459722383775e-06, |
|
"logits/chosen": -2.0712943077087402, |
|
"logits/rejected": -2.288693428039551, |
|
"logps/chosen": -73.90785217285156, |
|
"logps/rejected": -1434.8343505859375, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5121776461601257, |
|
"rewards/margins": 13.254251480102539, |
|
"rewards/rejected": -13.766427993774414, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.000926971435546875, |
|
"learning_rate": 4.958602741843975e-06, |
|
"logits/chosen": -2.0742838382720947, |
|
"logits/rejected": -2.333592176437378, |
|
"logps/chosen": -74.49140167236328, |
|
"logps/rejected": -1376.306396484375, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5260452032089233, |
|
"rewards/margins": 12.687009811401367, |
|
"rewards/rejected": -13.213055610656738, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 4.954558840848437e-06, |
|
"logits/chosen": -2.213879346847534, |
|
"logits/rejected": -2.4216055870056152, |
|
"logps/chosen": -65.52778625488281, |
|
"logps/rejected": -1092.6256103515625, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43167224526405334, |
|
"rewards/margins": 10.010717391967773, |
|
"rewards/rejected": -10.442389488220215, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 4.950328326843258e-06, |
|
"logits/chosen": -2.0717647075653076, |
|
"logits/rejected": -2.3038885593414307, |
|
"logps/chosen": -59.320228576660156, |
|
"logps/rejected": -1350.045654296875, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37677788734436035, |
|
"rewards/margins": 12.551568984985352, |
|
"rewards/rejected": -12.92834758758545, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.00848388671875, |
|
"learning_rate": 4.945911521462182e-06, |
|
"logits/chosen": -2.2182841300964355, |
|
"logits/rejected": -2.4369709491729736, |
|
"logps/chosen": -66.91700744628906, |
|
"logps/rejected": -1338.757080078125, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.45198917388916016, |
|
"rewards/margins": 12.385056495666504, |
|
"rewards/rejected": -12.837045669555664, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 4.941308760502149e-06, |
|
"logits/chosen": -2.2334372997283936, |
|
"logits/rejected": -2.4091877937316895, |
|
"logps/chosen": -77.1009292602539, |
|
"logps/rejected": -1184.353759765625, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5503292083740234, |
|
"rewards/margins": 10.71965217590332, |
|
"rewards/rejected": -11.26998233795166, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 4.936520393897762e-06, |
|
"logits/chosen": -2.174837589263916, |
|
"logits/rejected": -2.3993821144104004, |
|
"logps/chosen": -66.07880401611328, |
|
"logps/rejected": -1318.9051513671875, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43549758195877075, |
|
"rewards/margins": 12.193208694458008, |
|
"rewards/rejected": -12.628705978393555, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.0062255859375, |
|
"learning_rate": 4.931546785694684e-06, |
|
"logits/chosen": -2.2091901302337646, |
|
"logits/rejected": -2.44826078414917, |
|
"logps/chosen": -83.01612091064453, |
|
"logps/rejected": -1474.8822021484375, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6095518469810486, |
|
"rewards/margins": 13.620869636535645, |
|
"rewards/rejected": -14.230420112609863, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 4.926388314021964e-06, |
|
"logits/chosen": -2.2539894580841064, |
|
"logits/rejected": -2.4782536029815674, |
|
"logps/chosen": -97.8957748413086, |
|
"logps/rejected": -1248.599365234375, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7543063759803772, |
|
"rewards/margins": 11.235261917114258, |
|
"rewards/rejected": -11.989568710327148, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.00060272216796875, |
|
"learning_rate": 4.921045371063283e-06, |
|
"logits/chosen": -2.241508960723877, |
|
"logits/rejected": -2.45992112159729, |
|
"logps/chosen": -75.34230041503906, |
|
"logps/rejected": -1410.6185302734375, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5371032953262329, |
|
"rewards/margins": 13.022886276245117, |
|
"rewards/rejected": -13.559989929199219, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 4.915518363027142e-06, |
|
"logits/chosen": -2.3091747760772705, |
|
"logits/rejected": -2.516079902648926, |
|
"logps/chosen": -77.0430679321289, |
|
"logps/rejected": -1162.167236328125, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5587199926376343, |
|
"rewards/margins": 10.586333274841309, |
|
"rewards/rejected": -11.145052909851074, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 4.909807710115977e-06, |
|
"logits/chosen": -2.06872820854187, |
|
"logits/rejected": -2.280989170074463, |
|
"logps/chosen": -57.82390213012695, |
|
"logps/rejected": -1309.2158203125, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36199483275413513, |
|
"rewards/margins": 12.196971893310547, |
|
"rewards/rejected": -12.558965682983398, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 4.903913846494211e-06, |
|
"logits/chosen": -2.057790994644165, |
|
"logits/rejected": -2.2983431816101074, |
|
"logps/chosen": -63.02252960205078, |
|
"logps/rejected": -1628.169677734375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4083371162414551, |
|
"rewards/margins": 15.24769115447998, |
|
"rewards/rejected": -15.656028747558594, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 4.897837220255251e-06, |
|
"logits/chosen": -2.101783275604248, |
|
"logits/rejected": -2.2945144176483154, |
|
"logps/chosen": -62.76520538330078, |
|
"logps/rejected": -1316.2451171875, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4040141999721527, |
|
"rewards/margins": 12.210726737976074, |
|
"rewards/rejected": -12.614742279052734, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 4.891578293387413e-06, |
|
"logits/chosen": -2.183640241622925, |
|
"logits/rejected": -2.3983137607574463, |
|
"logps/chosen": -72.8852767944336, |
|
"logps/rejected": -1332.2720947265625, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5061370134353638, |
|
"rewards/margins": 12.295533180236816, |
|
"rewards/rejected": -12.801671028137207, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 4.885137541738808e-06, |
|
"logits/chosen": -2.1432900428771973, |
|
"logits/rejected": -2.3399500846862793, |
|
"logps/chosen": -52.877479553222656, |
|
"logps/rejected": -1188.441650390625, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3169303238391876, |
|
"rewards/margins": 11.048254013061523, |
|
"rewards/rejected": -11.365182876586914, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.086162567138672e-05, |
|
"learning_rate": 4.878515454981153e-06, |
|
"logits/chosen": -2.013054370880127, |
|
"logits/rejected": -2.238393783569336, |
|
"logps/chosen": -60.31416702270508, |
|
"logps/rejected": -1504.335693359375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38582319021224976, |
|
"rewards/margins": 14.089462280273438, |
|
"rewards/rejected": -14.475286483764648, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 4.8717125365725545e-06, |
|
"logits/chosen": -2.2411911487579346, |
|
"logits/rejected": -2.4217007160186768, |
|
"logps/chosen": -71.92973327636719, |
|
"logps/rejected": -1072.138427734375, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4977429509162903, |
|
"rewards/margins": 9.747591018676758, |
|
"rewards/rejected": -10.245333671569824, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 4.864729303719221e-06, |
|
"logits/chosen": -2.183976650238037, |
|
"logits/rejected": -2.4096364974975586, |
|
"logps/chosen": -75.01698303222656, |
|
"logps/rejected": -1566.1385498046875, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5285240411758423, |
|
"rewards/margins": 14.545486450195312, |
|
"rewards/rejected": -15.074010848999023, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 4.857566287336152e-06, |
|
"logits/chosen": -2.1151528358459473, |
|
"logits/rejected": -2.352687358856201, |
|
"logps/chosen": -81.51287841796875, |
|
"logps/rejected": -1503.718505859375, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5906480550765991, |
|
"rewards/margins": 13.887521743774414, |
|
"rewards/rejected": -14.478169441223145, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.000972747802734375, |
|
"learning_rate": 4.850224032006765e-06, |
|
"logits/chosen": -2.2330470085144043, |
|
"logits/rejected": -2.4612553119659424, |
|
"logps/chosen": -86.96638488769531, |
|
"logps/rejected": -1362.5970458984375, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6454036831855774, |
|
"rewards/margins": 12.467041969299316, |
|
"rewards/rejected": -13.112444877624512, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 4.8427030959414984e-06, |
|
"logits/chosen": -2.0308804512023926, |
|
"logits/rejected": -2.2706708908081055, |
|
"logps/chosen": -80.2787857055664, |
|
"logps/rejected": -1434.3277587890625, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5847833752632141, |
|
"rewards/margins": 13.225725173950195, |
|
"rewards/rejected": -13.810508728027344, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 4.835004050935369e-06, |
|
"logits/chosen": -2.134955644607544, |
|
"logits/rejected": -2.338745594024658, |
|
"logps/chosen": -71.83667755126953, |
|
"logps/rejected": -1364.427978515625, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.49762091040611267, |
|
"rewards/margins": 12.594762802124023, |
|
"rewards/rejected": -13.092384338378906, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 4.8271274823245e-06, |
|
"logits/chosen": -2.1413967609405518, |
|
"logits/rejected": -2.343967914581299, |
|
"logps/chosen": -51.91362762451172, |
|
"logps/rejected": -1269.490478515625, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2962699830532074, |
|
"rewards/margins": 11.855816841125488, |
|
"rewards/rejected": -12.152085304260254, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 4.8190739889416264e-06, |
|
"logits/chosen": -2.1291534900665283, |
|
"logits/rejected": -2.3538265228271484, |
|
"logps/chosen": -51.05685043334961, |
|
"logps/rejected": -1429.0635986328125, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28625136613845825, |
|
"rewards/margins": 13.460894584655762, |
|
"rewards/rejected": -13.74714469909668, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.06884765625, |
|
"learning_rate": 4.810844183070553e-06, |
|
"logits/chosen": -2.2312417030334473, |
|
"logits/rejected": -2.45286226272583, |
|
"logps/chosen": -65.88993072509766, |
|
"logps/rejected": -1232.466064453125, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4434642195701599, |
|
"rewards/margins": 11.373895645141602, |
|
"rewards/rejected": -11.817359924316406, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 4.802438690399622e-06, |
|
"logits/chosen": -2.1778035163879395, |
|
"logits/rejected": -2.4104442596435547, |
|
"logps/chosen": -61.74702835083008, |
|
"logps/rejected": -1364.88525390625, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4024318754673004, |
|
"rewards/margins": 12.702482223510742, |
|
"rewards/rejected": -13.104913711547852, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.00174713134765625, |
|
"learning_rate": 4.793858149974129e-06, |
|
"logits/chosen": -2.142401933670044, |
|
"logits/rejected": -2.3973865509033203, |
|
"logps/chosen": -64.52376556396484, |
|
"logps/rejected": -1546.420654296875, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4257756769657135, |
|
"rewards/margins": 14.488656997680664, |
|
"rewards/rejected": -14.91443157196045, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 4.785103214147747e-06, |
|
"logits/chosen": -2.2586052417755127, |
|
"logits/rejected": -2.4925296306610107, |
|
"logps/chosen": -58.837852478027344, |
|
"logps/rejected": -1360.659423828125, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3716233968734741, |
|
"rewards/margins": 12.72169017791748, |
|
"rewards/rejected": -13.093313217163086, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.0027008056640625, |
|
"learning_rate": 4.776174548532926e-06, |
|
"logits/chosen": -2.158493757247925, |
|
"logits/rejected": -2.3726634979248047, |
|
"logps/chosen": -60.35230255126953, |
|
"logps/rejected": -1398.0426025390625, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3876059055328369, |
|
"rewards/margins": 13.057897567749023, |
|
"rewards/rejected": -13.445503234863281, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 4.767072831950288e-06, |
|
"logits/chosen": -2.205594539642334, |
|
"logits/rejected": -2.447887420654297, |
|
"logps/chosen": -58.41968536376953, |
|
"logps/rejected": -1474.5992431640625, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36307448148727417, |
|
"rewards/margins": 13.841937065124512, |
|
"rewards/rejected": -14.205012321472168, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 4.7577987563770226e-06, |
|
"logits/chosen": -2.0987536907196045, |
|
"logits/rejected": -2.3415169715881348, |
|
"logps/chosen": -63.24462890625, |
|
"logps/rejected": -1456.5894775390625, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40772971510887146, |
|
"rewards/margins": 13.594934463500977, |
|
"rewards/rejected": -14.002664566040039, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.001953125, |
|
"learning_rate": 4.748353026894273e-06, |
|
"logits/chosen": -2.176764965057373, |
|
"logits/rejected": -2.3934457302093506, |
|
"logps/chosen": -77.48558044433594, |
|
"logps/rejected": -1403.7001953125, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5516784191131592, |
|
"rewards/margins": 12.959416389465332, |
|
"rewards/rejected": -13.51109504699707, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 4.738736361633532e-06, |
|
"logits/chosen": -2.2761058807373047, |
|
"logits/rejected": -2.475376605987549, |
|
"logps/chosen": -69.98649597167969, |
|
"logps/rejected": -1317.3599853515625, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4822467267513275, |
|
"rewards/margins": 12.17310619354248, |
|
"rewards/rejected": -12.655353546142578, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.00396728515625, |
|
"learning_rate": 4.728949491722046e-06, |
|
"logits/chosen": -2.3034911155700684, |
|
"logits/rejected": -2.5063111782073975, |
|
"logps/chosen": -82.03058624267578, |
|
"logps/rejected": -1236.0631103515625, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6094164848327637, |
|
"rewards/margins": 11.258821487426758, |
|
"rewards/rejected": -11.868239402770996, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 4.718993161227231e-06, |
|
"logits/chosen": -2.156198740005493, |
|
"logits/rejected": -2.4342427253723145, |
|
"logps/chosen": -52.259849548339844, |
|
"logps/rejected": -1551.4473876953125, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2965083420276642, |
|
"rewards/margins": 14.673095703125, |
|
"rewards/rejected": -14.9696044921875, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.0185546875, |
|
"learning_rate": 4.708868127100098e-06, |
|
"logits/chosen": -2.225891351699829, |
|
"logits/rejected": -2.446601629257202, |
|
"logps/chosen": -45.30867385864258, |
|
"logps/rejected": -1178.07958984375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24066181480884552, |
|
"rewards/margins": 11.030438423156738, |
|
"rewards/rejected": -11.271100044250488, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.000530242919921875, |
|
"learning_rate": 4.6985751591177075e-06, |
|
"logits/chosen": -2.071913242340088, |
|
"logits/rejected": -2.3076140880584717, |
|
"logps/chosen": -40.99647521972656, |
|
"logps/rejected": -1356.589599609375, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1879800409078598, |
|
"rewards/margins": 12.83641529083252, |
|
"rewards/rejected": -13.024395942687988, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.0279541015625, |
|
"learning_rate": 4.688115039824648e-06, |
|
"logits/chosen": -2.138272523880005, |
|
"logits/rejected": -2.3490092754364014, |
|
"logps/chosen": -39.265869140625, |
|
"logps/rejected": -1271.980712890625, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17672276496887207, |
|
"rewards/margins": 12.01789665222168, |
|
"rewards/rejected": -12.194620132446289, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.96453857421875e-05, |
|
"learning_rate": 4.677488564473535e-06, |
|
"logits/chosen": -2.0846240520477295, |
|
"logits/rejected": -2.3261351585388184, |
|
"logps/chosen": -54.3425178527832, |
|
"logps/rejected": -1450.612548828125, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32301291823387146, |
|
"rewards/margins": 13.639193534851074, |
|
"rewards/rejected": -13.96220588684082, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 4.666696540964556e-06, |
|
"logits/chosen": -2.2266921997070312, |
|
"logits/rejected": -2.44096040725708, |
|
"logps/chosen": -60.16071701049805, |
|
"logps/rejected": -1275.765380859375, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3776375949382782, |
|
"rewards/margins": 11.887288093566895, |
|
"rewards/rejected": -12.264925003051758, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_logits/chosen": -2.585369348526001, |
|
"eval_logits/rejected": -2.6955134868621826, |
|
"eval_logps/chosen": -101.94501495361328, |
|
"eval_logps/rejected": -625.497314453125, |
|
"eval_loss": 0.01159477885812521, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.7603151202201843, |
|
"eval_rewards/margins": 5.045938014984131, |
|
"eval_rewards/rejected": -5.806252956390381, |
|
"eval_runtime": 0.6566, |
|
"eval_samples_per_second": 7.615, |
|
"eval_steps_per_second": 4.569, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.000972747802734375, |
|
"learning_rate": 4.6557397897840454e-06, |
|
"logits/chosen": -2.227430820465088, |
|
"logits/rejected": -2.466034412384033, |
|
"logps/chosen": -49.18635940551758, |
|
"logps/rejected": -1351.74853515625, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27514129877090454, |
|
"rewards/margins": 12.695302963256836, |
|
"rewards/rejected": -12.970443725585938, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.0011138916015625, |
|
"learning_rate": 4.644619143942108e-06, |
|
"logits/chosen": -2.2175045013427734, |
|
"logits/rejected": -2.4644241333007812, |
|
"logps/chosen": -38.532127380371094, |
|
"logps/rejected": -1415.847412109375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16876588761806488, |
|
"rewards/margins": 13.413320541381836, |
|
"rewards/rejected": -13.582087516784668, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 4.633335448909284e-06, |
|
"logits/chosen": -2.0612175464630127, |
|
"logits/rejected": -2.274484157562256, |
|
"logps/chosen": -39.870052337646484, |
|
"logps/rejected": -1330.5323486328125, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17578956484794617, |
|
"rewards/margins": 12.581718444824219, |
|
"rewards/rejected": -12.75750732421875, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 4.621889562552272e-06, |
|
"logits/chosen": -2.163442850112915, |
|
"logits/rejected": -2.4233555793762207, |
|
"logps/chosen": -62.47473907470703, |
|
"logps/rejected": -1504.832763671875, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40125980973243713, |
|
"rewards/margins": 14.0916748046875, |
|
"rewards/rejected": -14.492935180664062, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.004669189453125, |
|
"learning_rate": 4.610282355068707e-06, |
|
"logits/chosen": -2.2863821983337402, |
|
"logits/rejected": -2.5355706214904785, |
|
"logps/chosen": -59.4514274597168, |
|
"logps/rejected": -1562.39013671875, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36575835943222046, |
|
"rewards/margins": 14.687586784362793, |
|
"rewards/rejected": -15.053342819213867, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 4.598514708921006e-06, |
|
"logits/chosen": -2.262545108795166, |
|
"logits/rejected": -2.510559558868408, |
|
"logps/chosen": -55.71985626220703, |
|
"logps/rejected": -1498.1640625, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3440173268318176, |
|
"rewards/margins": 14.074090957641602, |
|
"rewards/rejected": -14.418106079101562, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.00142669677734375, |
|
"learning_rate": 4.5865875187692695e-06, |
|
"logits/chosen": -2.2046749591827393, |
|
"logits/rejected": -2.423334836959839, |
|
"logps/chosen": -48.60809326171875, |
|
"logps/rejected": -1244.1680908203125, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2774764597415924, |
|
"rewards/margins": 11.670097351074219, |
|
"rewards/rejected": -11.9475736618042, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 4.57450169140327e-06, |
|
"logits/chosen": -2.0672097206115723, |
|
"logits/rejected": -2.3301241397857666, |
|
"logps/chosen": -47.35566711425781, |
|
"logps/rejected": -1535.633544921875, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25116461515426636, |
|
"rewards/margins": 14.5623779296875, |
|
"rewards/rejected": -14.813543319702148, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 4.562258145673507e-06, |
|
"logits/chosen": -2.2260966300964355, |
|
"logits/rejected": -2.4950501918792725, |
|
"logps/chosen": -40.86091613769531, |
|
"logps/rejected": -1499.596435546875, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18965426087379456, |
|
"rewards/margins": 14.258898735046387, |
|
"rewards/rejected": -14.448553085327148, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.01068115234375, |
|
"learning_rate": 4.549857812421353e-06, |
|
"logits/chosen": -2.14607572555542, |
|
"logits/rejected": -2.3866307735443115, |
|
"logps/chosen": -44.410030364990234, |
|
"logps/rejected": -1332.9017333984375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23083043098449707, |
|
"rewards/margins": 12.585546493530273, |
|
"rewards/rejected": -12.816377639770508, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 4.537301634408281e-06, |
|
"logits/chosen": -2.169417142868042, |
|
"logits/rejected": -2.4057748317718506, |
|
"logps/chosen": -44.095577239990234, |
|
"logps/rejected": -1315.9925537109375, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2211724817752838, |
|
"rewards/margins": 12.42273235321045, |
|
"rewards/rejected": -12.643904685974121, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 4.52459056624419e-06, |
|
"logits/chosen": -2.217676877975464, |
|
"logits/rejected": -2.4193835258483887, |
|
"logps/chosen": -46.805503845214844, |
|
"logps/rejected": -1376.5738525390625, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2504929006099701, |
|
"rewards/margins": 12.966726303100586, |
|
"rewards/rejected": -13.217218399047852, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.12451171875, |
|
"learning_rate": 4.51172557431483e-06, |
|
"logits/chosen": -2.1065962314605713, |
|
"logits/rejected": -2.3267951011657715, |
|
"logps/chosen": -61.67560958862305, |
|
"logps/rejected": -1427.5928955078125, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39770394563674927, |
|
"rewards/margins": 13.320897102355957, |
|
"rewards/rejected": -13.718599319458008, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 4.49870763670833e-06, |
|
"logits/chosen": -2.1609268188476562, |
|
"logits/rejected": -2.4237403869628906, |
|
"logps/chosen": -55.2051887512207, |
|
"logps/rejected": -1529.240966796875, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3337005376815796, |
|
"rewards/margins": 14.417539596557617, |
|
"rewards/rejected": -14.751240730285645, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 4.4855377431408335e-06, |
|
"logits/chosen": -2.152674436569214, |
|
"logits/rejected": -2.3682188987731934, |
|
"logps/chosen": -57.719703674316406, |
|
"logps/rejected": -1428.2706298828125, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3568641245365143, |
|
"rewards/margins": 13.377996444702148, |
|
"rewards/rejected": -13.73486042022705, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.0169677734375, |
|
"learning_rate": 4.472216894881261e-06, |
|
"logits/chosen": -2.146556854248047, |
|
"logits/rejected": -2.361703872680664, |
|
"logps/chosen": -56.385284423828125, |
|
"logps/rejected": -1308.488525390625, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34260934591293335, |
|
"rewards/margins": 12.229646682739258, |
|
"rewards/rejected": -12.572256088256836, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 4.4587461046751815e-06, |
|
"logits/chosen": -2.1846487522125244, |
|
"logits/rejected": -2.4170939922332764, |
|
"logps/chosen": -47.7278938293457, |
|
"logps/rejected": -1271.083740234375, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2519899308681488, |
|
"rewards/margins": 11.94546890258789, |
|
"rewards/rejected": -12.197460174560547, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.0247802734375, |
|
"learning_rate": 4.44512639666781e-06, |
|
"logits/chosen": -2.1769089698791504, |
|
"logits/rejected": -2.394580602645874, |
|
"logps/chosen": -61.13446044921875, |
|
"logps/rejected": -1223.753662109375, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3933146893978119, |
|
"rewards/margins": 11.356694221496582, |
|
"rewards/rejected": -11.75001049041748, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 4.431358806326158e-06, |
|
"logits/chosen": -2.1201298236846924, |
|
"logits/rejected": -2.3456811904907227, |
|
"logps/chosen": -81.98688507080078, |
|
"logps/rejected": -1611.583984375, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6020825505256653, |
|
"rewards/margins": 14.962076187133789, |
|
"rewards/rejected": -15.56415843963623, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 4.4174443803603e-06, |
|
"logits/chosen": -2.204873561859131, |
|
"logits/rejected": -2.4108097553253174, |
|
"logps/chosen": -82.76813507080078, |
|
"logps/rejected": -1430.592041015625, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6107802391052246, |
|
"rewards/margins": 13.188611030578613, |
|
"rewards/rejected": -13.79939079284668, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 4.4033841766438e-06, |
|
"logits/chosen": -2.178987503051758, |
|
"logits/rejected": -2.39570689201355, |
|
"logps/chosen": -57.776702880859375, |
|
"logps/rejected": -1284.997802734375, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3557528853416443, |
|
"rewards/margins": 11.97689151763916, |
|
"rewards/rejected": -12.33264446258545, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.004180908203125, |
|
"learning_rate": 4.389179264133281e-06, |
|
"logits/chosen": -2.260874032974243, |
|
"logits/rejected": -2.495485305786133, |
|
"logps/chosen": -35.43501663208008, |
|
"logps/rejected": -1262.712890625, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1415407359600067, |
|
"rewards/margins": 11.985517501831055, |
|
"rewards/rejected": -12.127059936523438, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 4.374830722787159e-06, |
|
"logits/chosen": -2.265794277191162, |
|
"logits/rejected": -2.539062976837158, |
|
"logps/chosen": -40.56992721557617, |
|
"logps/rejected": -1323.746826171875, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.184851735830307, |
|
"rewards/margins": 12.533957481384277, |
|
"rewards/rejected": -12.718809127807617, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 4.360339643483533e-06, |
|
"logits/chosen": -2.2265820503234863, |
|
"logits/rejected": -2.4537243843078613, |
|
"logps/chosen": -40.92462921142578, |
|
"logps/rejected": -1421.2384033203125, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19266225397586823, |
|
"rewards/margins": 13.46106243133545, |
|
"rewards/rejected": -13.653724670410156, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.00179290771484375, |
|
"learning_rate": 4.345707127937253e-06, |
|
"logits/chosen": -2.136321544647217, |
|
"logits/rejected": -2.4158737659454346, |
|
"logps/chosen": -47.67406463623047, |
|
"logps/rejected": -1579.249267578125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2534434497356415, |
|
"rewards/margins": 15.00439453125, |
|
"rewards/rejected": -15.257838249206543, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 4.330934288616154e-06, |
|
"logits/chosen": -2.168765068054199, |
|
"logits/rejected": -2.4067187309265137, |
|
"logps/chosen": -62.91276931762695, |
|
"logps/rejected": -1362.3446044921875, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40555182099342346, |
|
"rewards/margins": 12.703886032104492, |
|
"rewards/rejected": -13.10943603515625, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 4.316022248656485e-06, |
|
"logits/chosen": -2.1002354621887207, |
|
"logits/rejected": -2.365851402282715, |
|
"logps/chosen": -53.953285217285156, |
|
"logps/rejected": -1585.8782958984375, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32181739807128906, |
|
"rewards/margins": 14.933857917785645, |
|
"rewards/rejected": -15.255674362182617, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.024658203125, |
|
"learning_rate": 4.3009721417775166e-06, |
|
"logits/chosen": -2.1251707077026367, |
|
"logits/rejected": -2.363041639328003, |
|
"logps/chosen": -58.41363525390625, |
|
"logps/rejected": -1543.182861328125, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36327052116394043, |
|
"rewards/margins": 14.513801574707031, |
|
"rewards/rejected": -14.87707233428955, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 4.285785112195346e-06, |
|
"logits/chosen": -2.1945090293884277, |
|
"logits/rejected": -2.4488844871520996, |
|
"logps/chosen": -69.85707092285156, |
|
"logps/rejected": -1662.5419921875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47537103295326233, |
|
"rewards/margins": 15.588732719421387, |
|
"rewards/rejected": -16.064102172851562, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 4.27046231453591e-06, |
|
"logits/chosen": -2.1391608715057373, |
|
"logits/rejected": -2.379563808441162, |
|
"logps/chosen": -63.22686004638672, |
|
"logps/rejected": -1555.8231201171875, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41675299406051636, |
|
"rewards/margins": 14.574475288391113, |
|
"rewards/rejected": -14.991228103637695, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.682209014892578e-06, |
|
"learning_rate": 4.255004913747196e-06, |
|
"logits/chosen": -2.1814258098602295, |
|
"logits/rejected": -2.415797710418701, |
|
"logps/chosen": -57.22446823120117, |
|
"logps/rejected": -1578.1937255859375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35579347610473633, |
|
"rewards/margins": 14.885470390319824, |
|
"rewards/rejected": -15.241262435913086, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.0059814453125, |
|
"learning_rate": 4.2394140850106825e-06, |
|
"logits/chosen": -2.1057560443878174, |
|
"logits/rejected": -2.3444247245788574, |
|
"logps/chosen": -60.09722900390625, |
|
"logps/rejected": -1566.6488037109375, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37801143527030945, |
|
"rewards/margins": 14.739140510559082, |
|
"rewards/rejected": -15.117152214050293, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.00049591064453125, |
|
"learning_rate": 4.223691013651986e-06, |
|
"logits/chosen": -2.145397424697876, |
|
"logits/rejected": -2.3859896659851074, |
|
"logps/chosen": -50.876380920410156, |
|
"logps/rejected": -1696.628173828125, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28237268328666687, |
|
"rewards/margins": 16.069416046142578, |
|
"rewards/rejected": -16.351787567138672, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 4.207836895050748e-06, |
|
"logits/chosen": -2.290546178817749, |
|
"logits/rejected": -2.601999282836914, |
|
"logps/chosen": -49.566925048828125, |
|
"logps/rejected": -1810.896240234375, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27409371733665466, |
|
"rewards/margins": 17.24148178100586, |
|
"rewards/rejected": -17.515573501586914, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 4.1918529345497525e-06, |
|
"logits/chosen": -2.2135214805603027, |
|
"logits/rejected": -2.4138569831848145, |
|
"logps/chosen": -51.47322463989258, |
|
"logps/rejected": -1197.149169921875, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2979530692100525, |
|
"rewards/margins": 11.184531211853027, |
|
"rewards/rejected": -11.482483863830566, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 4.175740347363289e-06, |
|
"logits/chosen": -2.2823052406311035, |
|
"logits/rejected": -2.500483989715576, |
|
"logps/chosen": -50.924964904785156, |
|
"logps/rejected": -1341.169189453125, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2940273880958557, |
|
"rewards/margins": 12.595098495483398, |
|
"rewards/rejected": -12.889126777648926, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.001129150390625, |
|
"learning_rate": 4.159500358484759e-06, |
|
"logits/chosen": -2.1221683025360107, |
|
"logits/rejected": -2.388002872467041, |
|
"logps/chosen": -52.10107421875, |
|
"logps/rejected": -1701.734375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2977118492126465, |
|
"rewards/margins": 16.128461837768555, |
|
"rewards/rejected": -16.42617416381836, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 4.143134202593549e-06, |
|
"logits/chosen": -2.1562037467956543, |
|
"logits/rejected": -2.3721659183502197, |
|
"logps/chosen": -50.73106002807617, |
|
"logps/rejected": -1416.411376953125, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28152209520339966, |
|
"rewards/margins": 13.32117748260498, |
|
"rewards/rejected": -13.602702140808105, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.00372314453125, |
|
"learning_rate": 4.126643123961158e-06, |
|
"logits/chosen": -2.2438769340515137, |
|
"logits/rejected": -2.4929661750793457, |
|
"logps/chosen": -71.16793060302734, |
|
"logps/rejected": -1686.4351806640625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4894431233406067, |
|
"rewards/margins": 15.813896179199219, |
|
"rewards/rejected": -16.303340911865234, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.03466796875, |
|
"learning_rate": 4.110028376356599e-06, |
|
"logits/chosen": -2.222071647644043, |
|
"logits/rejected": -2.447359323501587, |
|
"logps/chosen": -70.91515350341797, |
|
"logps/rejected": -1337.4664306640625, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4935643672943115, |
|
"rewards/margins": 12.358075141906738, |
|
"rewards/rejected": -12.851638793945312, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 4.093291222951079e-06, |
|
"logits/chosen": -2.1609065532684326, |
|
"logits/rejected": -2.4100985527038574, |
|
"logps/chosen": -71.06592559814453, |
|
"logps/rejected": -1599.0948486328125, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.49402037262916565, |
|
"rewards/margins": 14.944366455078125, |
|
"rewards/rejected": -15.438386917114258, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 4.076432936221965e-06, |
|
"logits/chosen": -2.1633338928222656, |
|
"logits/rejected": -2.3718645572662354, |
|
"logps/chosen": -76.24402618408203, |
|
"logps/rejected": -1331.0867919921875, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5511754751205444, |
|
"rewards/margins": 12.27659797668457, |
|
"rewards/rejected": -12.827774047851562, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 4.059454797856039e-06, |
|
"logits/chosen": -2.200438976287842, |
|
"logits/rejected": -2.4105000495910645, |
|
"logps/chosen": -72.47054290771484, |
|
"logps/rejected": -1285.55029296875, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5113335251808167, |
|
"rewards/margins": 11.851224899291992, |
|
"rewards/rejected": -12.362558364868164, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.0015869140625, |
|
"learning_rate": 4.042358098652057e-06, |
|
"logits/chosen": -2.257859468460083, |
|
"logits/rejected": -2.485215187072754, |
|
"logps/chosen": -52.50494384765625, |
|
"logps/rejected": -1284.864990234375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30147919058799744, |
|
"rewards/margins": 12.056472778320312, |
|
"rewards/rejected": -12.357951164245605, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.00494384765625, |
|
"learning_rate": 4.025144138422615e-06, |
|
"logits/chosen": -2.1999363899230957, |
|
"logits/rejected": -2.436066150665283, |
|
"logps/chosen": -60.535972595214844, |
|
"logps/rejected": -1517.8021240234375, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3815072178840637, |
|
"rewards/margins": 14.274576187133789, |
|
"rewards/rejected": -14.656084060668945, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 4.007814225895321e-06, |
|
"logits/chosen": -2.1949074268341064, |
|
"logits/rejected": -2.453916549682617, |
|
"logps/chosen": -40.10565948486328, |
|
"logps/rejected": -1380.6016845703125, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.184524804353714, |
|
"rewards/margins": 13.0862398147583, |
|
"rewards/rejected": -13.270764350891113, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.0703125, |
|
"learning_rate": 3.990369678613303e-06, |
|
"logits/chosen": -2.1046247482299805, |
|
"logits/rejected": -2.339478015899658, |
|
"logps/chosen": -32.17253875732422, |
|
"logps/rejected": -1487.5965576171875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1069754809141159, |
|
"rewards/margins": 14.181404113769531, |
|
"rewards/rejected": -14.288378715515137, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 3.97281182283504e-06, |
|
"logits/chosen": -2.168814182281494, |
|
"logits/rejected": -2.4204602241516113, |
|
"logps/chosen": -33.689884185791016, |
|
"logps/rejected": -1507.740966796875, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11984201520681381, |
|
"rewards/margins": 14.405647277832031, |
|
"rewards/rejected": -14.525489807128906, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 3.955141993433526e-06, |
|
"logits/chosen": -2.2266287803649902, |
|
"logits/rejected": -2.45817494392395, |
|
"logps/chosen": -52.63502883911133, |
|
"logps/rejected": -1366.678955078125, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3042375445365906, |
|
"rewards/margins": 12.845235824584961, |
|
"rewards/rejected": -13.14947509765625, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 3.937361533794784e-06, |
|
"logits/chosen": -2.156094551086426, |
|
"logits/rejected": -2.3926451206207275, |
|
"logps/chosen": -44.07966232299805, |
|
"logps/rejected": -1358.091064453125, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22154085338115692, |
|
"rewards/margins": 12.819009780883789, |
|
"rewards/rejected": -13.040552139282227, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.021484375, |
|
"learning_rate": 3.919471795715738e-06, |
|
"logits/chosen": -2.212313652038574, |
|
"logits/rejected": -2.4430899620056152, |
|
"logps/chosen": -40.03847122192383, |
|
"logps/rejected": -1265.60009765625, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1857212483882904, |
|
"rewards/margins": 11.979241371154785, |
|
"rewards/rejected": -12.164961814880371, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 3.901474139301433e-06, |
|
"logits/chosen": -2.100083112716675, |
|
"logits/rejected": -2.327531337738037, |
|
"logps/chosen": -47.98102569580078, |
|
"logps/rejected": -1396.822021484375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2628587484359741, |
|
"rewards/margins": 13.183023452758789, |
|
"rewards/rejected": -13.445881843566895, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 3.883369932861634e-06, |
|
"logits/chosen": -2.2499475479125977, |
|
"logits/rejected": -2.4626846313476562, |
|
"logps/chosen": -53.71254348754883, |
|
"logps/rejected": -1261.4793701171875, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31738823652267456, |
|
"rewards/margins": 11.826452255249023, |
|
"rewards/rejected": -12.143839836120605, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.000232696533203125, |
|
"learning_rate": 3.865160552806796e-06, |
|
"logits/chosen": -2.293903350830078, |
|
"logits/rejected": -2.5309927463531494, |
|
"logps/chosen": -59.31644821166992, |
|
"logps/rejected": -1348.3590087890625, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37211018800735474, |
|
"rewards/margins": 12.622639656066895, |
|
"rewards/rejected": -12.994749069213867, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.0002765655517578125, |
|
"learning_rate": 3.84684738354342e-06, |
|
"logits/chosen": -2.301741361618042, |
|
"logits/rejected": -2.5269277095794678, |
|
"logps/chosen": -35.07439422607422, |
|
"logps/rejected": -1298.1329345703125, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1266619861125946, |
|
"rewards/margins": 12.352213859558105, |
|
"rewards/rejected": -12.478874206542969, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 3.828431817368798e-06, |
|
"logits/chosen": -2.15970778465271, |
|
"logits/rejected": -2.3912577629089355, |
|
"logps/chosen": -23.495868682861328, |
|
"logps/rejected": -1347.333740234375, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.02085093781352043, |
|
"rewards/margins": 12.912274360656738, |
|
"rewards/rejected": -12.933123588562012, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 3.8099152543651684e-06, |
|
"logits/chosen": -2.3851158618927, |
|
"logits/rejected": -2.659996509552002, |
|
"logps/chosen": -34.04401397705078, |
|
"logps/rejected": -1443.980712890625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12582853436470032, |
|
"rewards/margins": 13.780733108520508, |
|
"rewards/rejected": -13.906559944152832, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.791299102293261e-06, |
|
"logits/chosen": -2.125797748565674, |
|
"logits/rejected": -2.3718996047973633, |
|
"logps/chosen": -31.654226303100586, |
|
"logps/rejected": -1515.550048828125, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0940675288438797, |
|
"rewards/margins": 14.493083000183105, |
|
"rewards/rejected": -14.587150573730469, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 3.7725847764852774e-06, |
|
"logits/chosen": -2.117516040802002, |
|
"logits/rejected": -2.376412868499756, |
|
"logps/chosen": -33.58929443359375, |
|
"logps/rejected": -1522.32470703125, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11228612810373306, |
|
"rewards/margins": 14.516647338867188, |
|
"rewards/rejected": -14.628933906555176, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 3.7537736997372833e-06, |
|
"logits/chosen": -2.183899402618408, |
|
"logits/rejected": -2.4056055545806885, |
|
"logps/chosen": -38.9683723449707, |
|
"logps/rejected": -1303.519287109375, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17212000489234924, |
|
"rewards/margins": 12.313672065734863, |
|
"rewards/rejected": -12.485791206359863, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.00689697265625, |
|
"learning_rate": 3.734867302201038e-06, |
|
"logits/chosen": -2.2842166423797607, |
|
"logits/rejected": -2.4898123741149902, |
|
"logps/chosen": -38.427486419677734, |
|
"logps/rejected": -1249.1448974609375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17140671610832214, |
|
"rewards/margins": 11.829398155212402, |
|
"rewards/rejected": -12.000804901123047, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.7158670212752666e-06, |
|
"logits/chosen": -2.1761648654937744, |
|
"logits/rejected": -2.4285309314727783, |
|
"logps/chosen": -43.9667854309082, |
|
"logps/rejected": -1409.6014404296875, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21925847232341766, |
|
"rewards/margins": 13.351308822631836, |
|
"rewards/rejected": -13.570569038391113, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.028076171875, |
|
"learning_rate": 3.696774301496376e-06, |
|
"logits/chosen": -2.253307342529297, |
|
"logits/rejected": -2.4998929500579834, |
|
"logps/chosen": -39.94139862060547, |
|
"logps/rejected": -1315.309814453125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1798352301120758, |
|
"rewards/margins": 12.484976768493652, |
|
"rewards/rejected": -12.664812088012695, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 3.677590594428629e-06, |
|
"logits/chosen": -2.187530517578125, |
|
"logits/rejected": -2.411306142807007, |
|
"logps/chosen": -46.19135284423828, |
|
"logps/rejected": -1337.900390625, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2449018955230713, |
|
"rewards/margins": 12.619891166687012, |
|
"rewards/rejected": -12.86479377746582, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0035400390625, |
|
"learning_rate": 3.658317358553794e-06, |
|
"logits/chosen": -2.1583094596862793, |
|
"logits/rejected": -2.399893045425415, |
|
"logps/chosen": -42.413978576660156, |
|
"logps/rejected": -1464.1385498046875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20776596665382385, |
|
"rewards/margins": 13.914667129516602, |
|
"rewards/rejected": -14.122431755065918, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.638956059160252e-06, |
|
"logits/chosen": -2.2085630893707275, |
|
"logits/rejected": -2.465798854827881, |
|
"logps/chosen": -51.00899887084961, |
|
"logps/rejected": -1475.9312744140625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28529030084609985, |
|
"rewards/margins": 13.978610038757324, |
|
"rewards/rejected": -14.26390266418457, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 3.6195081682315972e-06, |
|
"logits/chosen": -2.2395682334899902, |
|
"logits/rejected": -2.461138963699341, |
|
"logps/chosen": -52.74529266357422, |
|
"logps/rejected": -1418.346923828125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3086877167224884, |
|
"rewards/margins": 13.38988208770752, |
|
"rewards/rejected": -13.698568344116211, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 3.5999751643347342e-06, |
|
"logits/chosen": -2.16579008102417, |
|
"logits/rejected": -2.4046080112457275, |
|
"logps/chosen": -46.71515655517578, |
|
"logps/rejected": -1608.938232421875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24464385211467743, |
|
"rewards/margins": 15.266571044921875, |
|
"rewards/rejected": -15.51121711730957, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 3.5803585325074536e-06, |
|
"logits/chosen": -2.1881327629089355, |
|
"logits/rejected": -2.427145481109619, |
|
"logps/chosen": -37.16319274902344, |
|
"logps/rejected": -1421.040771484375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15972432494163513, |
|
"rewards/margins": 13.539385795593262, |
|
"rewards/rejected": -13.69911003112793, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 3.5606597641455387e-06, |
|
"logits/chosen": -2.219900369644165, |
|
"logits/rejected": -2.4398694038391113, |
|
"logps/chosen": -32.802005767822266, |
|
"logps/rejected": -1393.4263916015625, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11474724858999252, |
|
"rewards/margins": 13.298954963684082, |
|
"rewards/rejected": -13.413702011108398, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 3.540880356889376e-06, |
|
"logits/chosen": -2.23069429397583, |
|
"logits/rejected": -2.4424965381622314, |
|
"logps/chosen": -42.188209533691406, |
|
"logps/rejected": -1353.078857421875, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2047566920518875, |
|
"rewards/margins": 12.800407409667969, |
|
"rewards/rejected": -13.005162239074707, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 3.5210218145100934e-06, |
|
"logits/chosen": -2.1350436210632324, |
|
"logits/rejected": -2.3985211849212646, |
|
"logps/chosen": -51.05349349975586, |
|
"logps/rejected": -1367.61767578125, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29429805278778076, |
|
"rewards/margins": 12.873303413391113, |
|
"rewards/rejected": -13.167600631713867, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 3.5010856467952335e-06, |
|
"logits/chosen": -2.1528429985046387, |
|
"logits/rejected": -2.3955628871917725, |
|
"logps/chosen": -42.9320068359375, |
|
"logps/rejected": -1482.957763671875, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20944638550281525, |
|
"rewards/margins": 14.0554780960083, |
|
"rewards/rejected": -14.264923095703125, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 3.4810733694339687e-06, |
|
"logits/chosen": -2.2495784759521484, |
|
"logits/rejected": -2.512760639190674, |
|
"logps/chosen": -57.50274658203125, |
|
"logps/rejected": -1577.8023681640625, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3561248183250427, |
|
"rewards/margins": 14.898828506469727, |
|
"rewards/rejected": -15.25495433807373, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.4609865039018676e-06, |
|
"logits/chosen": -2.2507643699645996, |
|
"logits/rejected": -2.475839614868164, |
|
"logps/chosen": -41.08405685424805, |
|
"logps/rejected": -1401.9703369140625, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20163026452064514, |
|
"rewards/margins": 13.3068265914917, |
|
"rewards/rejected": -13.508456230163574, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 3.4408265773452226e-06, |
|
"logits/chosen": -2.1668903827667236, |
|
"logits/rejected": -2.4009640216827393, |
|
"logps/chosen": -43.23725891113281, |
|
"logps/rejected": -1427.717041015625, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21243014931678772, |
|
"rewards/margins": 13.541119575500488, |
|
"rewards/rejected": -13.753549575805664, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.0021514892578125, |
|
"learning_rate": 3.420595122464942e-06, |
|
"logits/chosen": -2.2544631958007812, |
|
"logits/rejected": -2.4994306564331055, |
|
"logps/chosen": -50.723716735839844, |
|
"logps/rejected": -1400.42236328125, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2909182906150818, |
|
"rewards/margins": 13.212321281433105, |
|
"rewards/rejected": -13.503240585327148, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 3.4002936774000284e-06, |
|
"logits/chosen": -2.1552722454071045, |
|
"logits/rejected": -2.4494900703430176, |
|
"logps/chosen": -53.8035888671875, |
|
"logps/rejected": -1743.7855224609375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31691521406173706, |
|
"rewards/margins": 16.568302154541016, |
|
"rewards/rejected": -16.885215759277344, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.04296875, |
|
"learning_rate": 3.3799237856106348e-06, |
|
"logits/chosen": -2.1529643535614014, |
|
"logits/rejected": -2.4126904010772705, |
|
"logps/chosen": -55.90287399291992, |
|
"logps/rejected": -1550.77783203125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34558922052383423, |
|
"rewards/margins": 14.627325057983398, |
|
"rewards/rejected": -14.97291374206543, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.004913330078125, |
|
"learning_rate": 3.35948699576072e-06, |
|
"logits/chosen": -2.108168363571167, |
|
"logits/rejected": -2.371859550476074, |
|
"logps/chosen": -63.180198669433594, |
|
"logps/rejected": -1683.2808837890625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40645408630371094, |
|
"rewards/margins": 15.862528800964355, |
|
"rewards/rejected": -16.268983840942383, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 3.3389848616003085e-06, |
|
"logits/chosen": -2.202070951461792, |
|
"logits/rejected": -2.4270646572113037, |
|
"logps/chosen": -47.17142105102539, |
|
"logps/rejected": -1418.73046875, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25016140937805176, |
|
"rewards/margins": 13.404383659362793, |
|
"rewards/rejected": -13.654545783996582, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 3.3184189418473674e-06, |
|
"logits/chosen": -2.0919992923736572, |
|
"logits/rejected": -2.3279192447662354, |
|
"logps/chosen": -37.22324752807617, |
|
"logps/rejected": -1371.5806884765625, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15740033984184265, |
|
"rewards/margins": 13.067277908325195, |
|
"rewards/rejected": -13.224676132202148, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 3.2977908000692925e-06, |
|
"logits/chosen": -2.1699509620666504, |
|
"logits/rejected": -2.4078266620635986, |
|
"logps/chosen": -46.939552307128906, |
|
"logps/rejected": -1496.64501953125, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24631306529045105, |
|
"rewards/margins": 14.200462341308594, |
|
"rewards/rejected": -14.44677448272705, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 3.2771020045640435e-06, |
|
"logits/chosen": -2.314471960067749, |
|
"logits/rejected": -2.533036708831787, |
|
"logps/chosen": -49.747779846191406, |
|
"logps/rejected": -1293.560302734375, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27524739503860474, |
|
"rewards/margins": 12.140924453735352, |
|
"rewards/rejected": -12.416172981262207, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 3.256354128240907e-06, |
|
"logits/chosen": -2.101799488067627, |
|
"logits/rejected": -2.320006847381592, |
|
"logps/chosen": -58.1518669128418, |
|
"logps/rejected": -1474.82666015625, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.361042320728302, |
|
"rewards/margins": 13.842974662780762, |
|
"rewards/rejected": -14.204015731811523, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.551823854446411e-07, |
|
"learning_rate": 3.235548748500914e-06, |
|
"logits/chosen": -2.3442602157592773, |
|
"logits/rejected": -2.5813608169555664, |
|
"logps/chosen": -64.3367691040039, |
|
"logps/rejected": -1516.5281982421875, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42836618423461914, |
|
"rewards/margins": 14.244784355163574, |
|
"rewards/rejected": -14.673149108886719, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 3.214687447116913e-06, |
|
"logits/chosen": -2.129812717437744, |
|
"logits/rejected": -2.35500168800354, |
|
"logps/chosen": -60.48137664794922, |
|
"logps/rejected": -1468.074462890625, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3904629647731781, |
|
"rewards/margins": 13.763618469238281, |
|
"rewards/rejected": -14.154080390930176, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.005218505859375, |
|
"learning_rate": 3.193771810113313e-06, |
|
"logits/chosen": -2.1812546253204346, |
|
"logits/rejected": -2.450334072113037, |
|
"logps/chosen": -57.156097412109375, |
|
"logps/rejected": -1621.7850341796875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34869837760925293, |
|
"rewards/margins": 15.336013793945312, |
|
"rewards/rejected": -15.684713363647461, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03271484375, |
|
"learning_rate": 3.1728034276455032e-06, |
|
"logits/chosen": -2.1772501468658447, |
|
"logits/rejected": -2.4167187213897705, |
|
"logps/chosen": -47.676063537597656, |
|
"logps/rejected": -1501.980224609375, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2580206096172333, |
|
"rewards/margins": 14.23988151550293, |
|
"rewards/rejected": -14.497901916503906, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.00274658203125, |
|
"learning_rate": 3.1517838938789597e-06, |
|
"logits/chosen": -2.1416432857513428, |
|
"logits/rejected": -2.3887360095977783, |
|
"logps/chosen": -31.932031631469727, |
|
"logps/rejected": -1682.0501708984375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10166473686695099, |
|
"rewards/margins": 16.101634979248047, |
|
"rewards/rejected": -16.203296661376953, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 3.130714806868041e-06, |
|
"logits/chosen": -2.132199764251709, |
|
"logits/rejected": -2.3675732612609863, |
|
"logps/chosen": -38.96401596069336, |
|
"logps/rejected": -1434.172607421875, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1679878979921341, |
|
"rewards/margins": 13.631708145141602, |
|
"rewards/rejected": -13.799695014953613, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 3.1095977684344976e-06, |
|
"logits/chosen": -2.221590042114258, |
|
"logits/rejected": -2.477220296859741, |
|
"logps/chosen": -42.42957305908203, |
|
"logps/rejected": -1500.699462890625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2044091671705246, |
|
"rewards/margins": 14.278982162475586, |
|
"rewards/rejected": -14.483392715454102, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.0040283203125, |
|
"learning_rate": 3.0884343840456874e-06, |
|
"logits/chosen": -2.280695915222168, |
|
"logits/rejected": -2.5356380939483643, |
|
"logps/chosen": -51.98859405517578, |
|
"logps/rejected": -1650.245361328125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3021391034126282, |
|
"rewards/margins": 15.651565551757812, |
|
"rewards/rejected": -15.953704833984375, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.0002536773681640625, |
|
"learning_rate": 3.0672262626925174e-06, |
|
"logits/chosen": -2.1820268630981445, |
|
"logits/rejected": -2.439319133758545, |
|
"logps/chosen": -47.39429473876953, |
|
"logps/rejected": -1611.6102294921875, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24379031360149384, |
|
"rewards/margins": 15.318346977233887, |
|
"rewards/rejected": -15.56213665008545, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.910064697265625e-05, |
|
"learning_rate": 3.0459750167671147e-06, |
|
"logits/chosen": -2.1863160133361816, |
|
"logits/rejected": -2.450911283493042, |
|
"logps/chosen": -57.97031784057617, |
|
"logps/rejected": -1733.2484130859375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35936951637268066, |
|
"rewards/margins": 16.4171199798584, |
|
"rewards/rejected": -16.776485443115234, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.375, |
|
"learning_rate": 3.024682261940247e-06, |
|
"logits/chosen": -2.1711161136627197, |
|
"logits/rejected": -2.381054401397705, |
|
"logps/chosen": -63.16656494140625, |
|
"logps/rejected": -1473.282958984375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40881744027137756, |
|
"rewards/margins": 13.772600173950195, |
|
"rewards/rejected": -14.181416511535645, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.000568389892578125, |
|
"learning_rate": 3.0033496170384803e-06, |
|
"logits/chosen": -2.232100009918213, |
|
"logits/rejected": -2.4612276554107666, |
|
"logps/chosen": -56.055152893066406, |
|
"logps/rejected": -1356.71484375, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3471956253051758, |
|
"rewards/margins": 12.729546546936035, |
|
"rewards/rejected": -13.076742172241211, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 2.9819787039211068e-06, |
|
"logits/chosen": -2.1615240573883057, |
|
"logits/rejected": -2.393810510635376, |
|
"logps/chosen": -35.02969741821289, |
|
"logps/rejected": -1524.7955322265625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13164165616035461, |
|
"rewards/margins": 14.575594902038574, |
|
"rewards/rejected": -14.707235336303711, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0054931640625, |
|
"learning_rate": 2.960571147356845e-06, |
|
"logits/chosen": -2.256544828414917, |
|
"logits/rejected": -2.5309910774230957, |
|
"logps/chosen": -49.80757522583008, |
|
"logps/rejected": -1592.794677734375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2703229784965515, |
|
"rewards/margins": 15.133091926574707, |
|
"rewards/rejected": -15.403416633605957, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0023040771484375, |
|
"learning_rate": 2.9391285749003046e-06, |
|
"logits/chosen": -2.15415620803833, |
|
"logits/rejected": -2.405571460723877, |
|
"logps/chosen": -40.737998962402344, |
|
"logps/rejected": -1701.052734375, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18067236244678497, |
|
"rewards/margins": 16.24726104736328, |
|
"rewards/rejected": -16.427934646606445, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": -2.6136603355407715, |
|
"eval_logits/rejected": -2.7333316802978516, |
|
"eval_logps/chosen": -48.08984375, |
|
"eval_logps/rejected": -693.2846069335938, |
|
"eval_loss": 0.0037064917851239443, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.22176341712474823, |
|
"eval_rewards/margins": 6.262362003326416, |
|
"eval_rewards/rejected": -6.48412561416626, |
|
"eval_runtime": 0.6544, |
|
"eval_samples_per_second": 7.641, |
|
"eval_steps_per_second": 4.585, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 2.9176526167682543e-06, |
|
"logits/chosen": -2.1183362007141113, |
|
"logits/rejected": -2.351123571395874, |
|
"logps/chosen": -37.299964904785156, |
|
"logps/rejected": -1437.7230224609375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15952737629413605, |
|
"rewards/margins": 13.696490287780762, |
|
"rewards/rejected": -13.85601806640625, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 2.8961449057156775e-06, |
|
"logits/chosen": -2.200801372528076, |
|
"logits/rejected": -2.4389915466308594, |
|
"logps/chosen": -42.25465774536133, |
|
"logps/rejected": -1569.040771484375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20262226462364197, |
|
"rewards/margins": 14.958514213562012, |
|
"rewards/rejected": -15.16113567352295, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.0037689208984375, |
|
"learning_rate": 2.874607076911642e-06, |
|
"logits/chosen": -2.212007999420166, |
|
"logits/rejected": -2.4628169536590576, |
|
"logps/chosen": -54.49187088012695, |
|
"logps/rejected": -1452.176513671875, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31496500968933105, |
|
"rewards/margins": 13.68268871307373, |
|
"rewards/rejected": -13.997654914855957, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.0026092529296875, |
|
"learning_rate": 2.8530407678149806e-06, |
|
"logits/chosen": -2.1855294704437256, |
|
"logits/rejected": -2.428863525390625, |
|
"logps/chosen": -61.762428283691406, |
|
"logps/rejected": -1588.792236328125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4000323414802551, |
|
"rewards/margins": 14.938389778137207, |
|
"rewards/rejected": -15.338421821594238, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.001739501953125, |
|
"learning_rate": 2.8314476180498003e-06, |
|
"logits/chosen": -2.0332534313201904, |
|
"logits/rejected": -2.267488718032837, |
|
"logps/chosen": -41.453369140625, |
|
"logps/rejected": -1475.7647705078125, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1973056197166443, |
|
"rewards/margins": 14.028945922851562, |
|
"rewards/rejected": -14.226251602172852, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 2.8098292692808253e-06, |
|
"logits/chosen": -2.2281060218811035, |
|
"logits/rejected": -2.422762632369995, |
|
"logps/chosen": -41.2132453918457, |
|
"logps/rejected": -1153.19775390625, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1940850019454956, |
|
"rewards/margins": 10.885441780090332, |
|
"rewards/rejected": -11.079526901245117, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 2.7881873650885904e-06, |
|
"logits/chosen": -2.227834463119507, |
|
"logits/rejected": -2.4453253746032715, |
|
"logps/chosen": -50.43096160888672, |
|
"logps/rejected": -1375.1741943359375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2844979166984558, |
|
"rewards/margins": 12.969167709350586, |
|
"rewards/rejected": -13.25366497039795, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 2.7665235508444772e-06, |
|
"logits/chosen": -2.1580593585968018, |
|
"logits/rejected": -2.404978036880493, |
|
"logps/chosen": -47.8787841796875, |
|
"logps/rejected": -1663.496826171875, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2667320966720581, |
|
"rewards/margins": 15.83378791809082, |
|
"rewards/rejected": -16.10051727294922, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.000820159912109375, |
|
"learning_rate": 2.7448394735856275e-06, |
|
"logits/chosen": -2.1202292442321777, |
|
"logits/rejected": -2.387399196624756, |
|
"logps/chosen": -29.072830200195312, |
|
"logps/rejected": -1652.7601318359375, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07453112304210663, |
|
"rewards/margins": 15.884991645812988, |
|
"rewards/rejected": -15.959524154663086, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 2.723136781889722e-06, |
|
"logits/chosen": -2.248565912246704, |
|
"logits/rejected": -2.483459949493408, |
|
"logps/chosen": -49.5106201171875, |
|
"logps/rejected": -1374.344482421875, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26804444193840027, |
|
"rewards/margins": 12.9815092086792, |
|
"rewards/rejected": -13.249552726745605, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 2.7014171257496414e-06, |
|
"logits/chosen": -2.2338385581970215, |
|
"logits/rejected": -2.4451489448547363, |
|
"logps/chosen": -47.859092712402344, |
|
"logps/rejected": -1475.451416015625, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.259570837020874, |
|
"rewards/margins": 13.942883491516113, |
|
"rewards/rejected": -14.20245361328125, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 2.6796821564480237e-06, |
|
"logits/chosen": -2.1667749881744385, |
|
"logits/rejected": -2.3811049461364746, |
|
"logps/chosen": -51.062232971191406, |
|
"logps/rejected": -1320.582763671875, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2894384562969208, |
|
"rewards/margins": 12.419047355651855, |
|
"rewards/rejected": -12.708486557006836, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.00604248046875, |
|
"learning_rate": 2.6579335264317253e-06, |
|
"logits/chosen": -2.3176040649414062, |
|
"logits/rejected": -2.558061361312866, |
|
"logps/chosen": -36.845001220703125, |
|
"logps/rejected": -1507.6351318359375, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15489184856414795, |
|
"rewards/margins": 14.385536193847656, |
|
"rewards/rejected": -14.540430068969727, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.00830078125, |
|
"learning_rate": 2.6361728891861843e-06, |
|
"logits/chosen": -2.067624568939209, |
|
"logits/rejected": -2.2963385581970215, |
|
"logps/chosen": -45.42739486694336, |
|
"logps/rejected": -1546.36083984375, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23402588069438934, |
|
"rewards/margins": 14.646380424499512, |
|
"rewards/rejected": -14.880406379699707, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.0032806396484375, |
|
"learning_rate": 2.614401899109716e-06, |
|
"logits/chosen": -2.247525930404663, |
|
"logits/rejected": -2.4837863445281982, |
|
"logps/chosen": -48.529815673828125, |
|
"logps/rejected": -1460.3306884765625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2696106433868408, |
|
"rewards/margins": 13.825261116027832, |
|
"rewards/rejected": -14.094873428344727, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 2.5926222113877282e-06, |
|
"logits/chosen": -2.243438482284546, |
|
"logits/rejected": -2.4923970699310303, |
|
"logps/chosen": -43.63848114013672, |
|
"logps/rejected": -1591.591064453125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2216021567583084, |
|
"rewards/margins": 15.112271308898926, |
|
"rewards/rejected": -15.333871841430664, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 2.570835481866889e-06, |
|
"logits/chosen": -2.144465923309326, |
|
"logits/rejected": -2.3723580837249756, |
|
"logps/chosen": -45.58980178833008, |
|
"logps/rejected": -1466.3011474609375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2433461844921112, |
|
"rewards/margins": 13.90100383758545, |
|
"rewards/rejected": -14.1443510055542, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 2.5490433669292337e-06, |
|
"logits/chosen": -2.0634944438934326, |
|
"logits/rejected": -2.311782121658325, |
|
"logps/chosen": -37.41926193237305, |
|
"logps/rejected": -1625.005126953125, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1547044962644577, |
|
"rewards/margins": 15.56786060333252, |
|
"rewards/rejected": -15.722564697265625, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.00244140625, |
|
"learning_rate": 2.527247523366232e-06, |
|
"logits/chosen": -2.2304885387420654, |
|
"logits/rejected": -2.4748549461364746, |
|
"logps/chosen": -54.11591339111328, |
|
"logps/rejected": -1548.42578125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31723862886428833, |
|
"rewards/margins": 14.627525329589844, |
|
"rewards/rejected": -14.9447660446167, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.01214599609375, |
|
"learning_rate": 2.5054496082528336e-06, |
|
"logits/chosen": -2.2945401668548584, |
|
"logits/rejected": -2.553946018218994, |
|
"logps/chosen": -50.36088180541992, |
|
"logps/rejected": -1503.6251220703125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2821517586708069, |
|
"rewards/margins": 14.262479782104492, |
|
"rewards/rejected": -14.544631958007812, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 2.483651278821481e-06, |
|
"logits/chosen": -2.240737199783325, |
|
"logits/rejected": -2.468348264694214, |
|
"logps/chosen": -38.926151275634766, |
|
"logps/rejected": -1415.637939453125, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17295430600643158, |
|
"rewards/margins": 13.47205638885498, |
|
"rewards/rejected": -13.645009994506836, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 2.4618541923361166e-06, |
|
"logits/chosen": -2.4229185581207275, |
|
"logits/rejected": -2.6283278465270996, |
|
"logps/chosen": -44.134647369384766, |
|
"logps/rejected": -1301.507080078125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2265511453151703, |
|
"rewards/margins": 12.291707038879395, |
|
"rewards/rejected": -12.518258094787598, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.001983642578125, |
|
"learning_rate": 2.4400600059661836e-06, |
|
"logits/chosen": -2.0719246864318848, |
|
"logits/rejected": -2.375192165374756, |
|
"logps/chosen": -46.18827819824219, |
|
"logps/rejected": -1760.173095703125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24284347891807556, |
|
"rewards/margins": 16.816072463989258, |
|
"rewards/rejected": -17.058916091918945, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 2.41827037666064e-06, |
|
"logits/chosen": -2.2636351585388184, |
|
"logits/rejected": -2.4840915203094482, |
|
"logps/chosen": -47.29922103881836, |
|
"logps/rejected": -1315.336181640625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25474974513053894, |
|
"rewards/margins": 12.407878875732422, |
|
"rewards/rejected": -12.662630081176758, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.01141357421875, |
|
"learning_rate": 2.396486961021983e-06, |
|
"logits/chosen": -2.1793510913848877, |
|
"logits/rejected": -2.4308459758758545, |
|
"logps/chosen": -41.266380310058594, |
|
"logps/rejected": -1442.6708984375, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19039396941661835, |
|
"rewards/margins": 13.722501754760742, |
|
"rewards/rejected": -13.912895202636719, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.010498046875, |
|
"learning_rate": 2.3747114151802993e-06, |
|
"logits/chosen": -2.3280482292175293, |
|
"logits/rejected": -2.5701987743377686, |
|
"logps/chosen": -47.68052673339844, |
|
"logps/rejected": -1394.991455078125, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2576830983161926, |
|
"rewards/margins": 13.206995010375977, |
|
"rewards/rejected": -13.464675903320312, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 2.352945394667363e-06, |
|
"logits/chosen": -2.0980782508850098, |
|
"logits/rejected": -2.364197254180908, |
|
"logps/chosen": -47.831058502197266, |
|
"logps/rejected": -1665.154296875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25840622186660767, |
|
"rewards/margins": 15.833898544311523, |
|
"rewards/rejected": -16.092304229736328, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 2.3311905542907627e-06, |
|
"logits/chosen": -2.256291389465332, |
|
"logits/rejected": -2.486441135406494, |
|
"logps/chosen": -42.5937614440918, |
|
"logps/rejected": -1361.2073974609375, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2082025557756424, |
|
"rewards/margins": 12.911099433898926, |
|
"rewards/rejected": -13.119302749633789, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 2.30944854800809e-06, |
|
"logits/chosen": -2.2147023677825928, |
|
"logits/rejected": -2.4364144802093506, |
|
"logps/chosen": -40.498531341552734, |
|
"logps/rejected": -1479.181396484375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18588443100452423, |
|
"rewards/margins": 14.101339340209961, |
|
"rewards/rejected": -14.287226676940918, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.004364013671875, |
|
"learning_rate": 2.287721028801204e-06, |
|
"logits/chosen": -2.175849676132202, |
|
"logits/rejected": -2.4008584022521973, |
|
"logps/chosen": -43.332298278808594, |
|
"logps/rejected": -1385.960205078125, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21884088218212128, |
|
"rewards/margins": 13.133634567260742, |
|
"rewards/rejected": -13.352476119995117, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.057861328125, |
|
"learning_rate": 2.26600964855055e-06, |
|
"logits/chosen": -2.2437031269073486, |
|
"logits/rejected": -2.4617691040039062, |
|
"logps/chosen": -43.779388427734375, |
|
"logps/rejected": -1358.084716796875, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22216272354125977, |
|
"rewards/margins": 12.88685417175293, |
|
"rewards/rejected": -13.109016418457031, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 2.244316057909573e-06, |
|
"logits/chosen": -2.205610752105713, |
|
"logits/rejected": -2.4241251945495605, |
|
"logps/chosen": -37.175682067871094, |
|
"logps/rejected": -1405.6822509765625, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15614905953407288, |
|
"rewards/margins": 13.411378860473633, |
|
"rewards/rejected": -13.567527770996094, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 2.2226419061792282e-06, |
|
"logits/chosen": -2.284442901611328, |
|
"logits/rejected": -2.527775526046753, |
|
"logps/chosen": -48.442630767822266, |
|
"logps/rejected": -1552.012939453125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2705609202384949, |
|
"rewards/margins": 14.728759765625, |
|
"rewards/rejected": -14.999322891235352, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.0081787109375, |
|
"learning_rate": 2.200988841182589e-06, |
|
"logits/chosen": -2.219576358795166, |
|
"logits/rejected": -2.4669265747070312, |
|
"logps/chosen": -44.78432083129883, |
|
"logps/rejected": -1667.3538818359375, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2340162992477417, |
|
"rewards/margins": 15.922113418579102, |
|
"rewards/rejected": -16.156129837036133, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.01287841796875, |
|
"learning_rate": 2.179358509139559e-06, |
|
"logits/chosen": -2.171391010284424, |
|
"logits/rejected": -2.4009640216827393, |
|
"logps/chosen": -61.962059020996094, |
|
"logps/rejected": -1350.2457275390625, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3955707848072052, |
|
"rewards/margins": 12.617085456848145, |
|
"rewards/rejected": -13.012655258178711, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 2.1577525545417254e-06, |
|
"logits/chosen": -2.1860475540161133, |
|
"logits/rejected": -2.418872594833374, |
|
"logps/chosen": -58.040443420410156, |
|
"logps/rejected": -1468.6392822265625, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3626258969306946, |
|
"rewards/margins": 13.818751335144043, |
|
"rewards/rejected": -14.181378364562988, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0186767578125, |
|
"learning_rate": 2.1361726200273293e-06, |
|
"logits/chosen": -2.2700607776641846, |
|
"logits/rejected": -2.521707057952881, |
|
"logps/chosen": -48.01632308959961, |
|
"logps/rejected": -1544.2449951171875, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25974512100219727, |
|
"rewards/margins": 14.68183708190918, |
|
"rewards/rejected": -14.941583633422852, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 2.1146203462563773e-06, |
|
"logits/chosen": -2.335644483566284, |
|
"logits/rejected": -2.5736241340637207, |
|
"logps/chosen": -39.67052459716797, |
|
"logps/rejected": -1430.16796875, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18928556144237518, |
|
"rewards/margins": 13.622029304504395, |
|
"rewards/rejected": -13.811314582824707, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 2.0930973717859117e-06, |
|
"logits/chosen": -2.352358341217041, |
|
"logits/rejected": -2.598140239715576, |
|
"logps/chosen": -44.863258361816406, |
|
"logps/rejected": -1464.2244873046875, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23050542175769806, |
|
"rewards/margins": 13.890615463256836, |
|
"rewards/rejected": -14.121121406555176, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.00011587142944335938, |
|
"learning_rate": 2.0716053329454337e-06, |
|
"logits/chosen": -2.07816481590271, |
|
"logits/rejected": -2.320413112640381, |
|
"logps/chosen": -42.12782287597656, |
|
"logps/rejected": -1603.09716796875, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20372910797595978, |
|
"rewards/margins": 15.295297622680664, |
|
"rewards/rejected": -15.49902629852295, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0255126953125, |
|
"learning_rate": 2.0501458637124963e-06, |
|
"logits/chosen": -2.2174525260925293, |
|
"logits/rejected": -2.5070488452911377, |
|
"logps/chosen": -49.52367401123047, |
|
"logps/rejected": -1763.182861328125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2707751393318176, |
|
"rewards/margins": 16.840404510498047, |
|
"rewards/rejected": -17.11117935180664, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 2.0287205955884812e-06, |
|
"logits/chosen": -2.2282018661499023, |
|
"logits/rejected": -2.47560453414917, |
|
"logps/chosen": -39.33561706542969, |
|
"logps/rejected": -1586.3204345703125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17531004548072815, |
|
"rewards/margins": 15.11853313446045, |
|
"rewards/rejected": -15.293844223022461, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 2.0073311574745583e-06, |
|
"logits/chosen": -2.1908931732177734, |
|
"logits/rejected": -2.4531962871551514, |
|
"logps/chosen": -46.56280517578125, |
|
"logps/rejected": -1645.0823974609375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24412044882774353, |
|
"rewards/margins": 15.665544509887695, |
|
"rewards/rejected": -15.909663200378418, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.01141357421875, |
|
"learning_rate": 1.9859791755478453e-06, |
|
"logits/chosen": -2.2081665992736816, |
|
"logits/rejected": -2.4285478591918945, |
|
"logps/chosen": -36.21527099609375, |
|
"logps/rejected": -1296.198486328125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14344918727874756, |
|
"rewards/margins": 12.341351509094238, |
|
"rewards/rejected": -12.484800338745117, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 1.9646662731377737e-06, |
|
"logits/chosen": -2.157654285430908, |
|
"logits/rejected": -2.3908090591430664, |
|
"logps/chosen": -45.09668731689453, |
|
"logps/rejected": -1404.2435302734375, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23492522537708282, |
|
"rewards/margins": 13.305872917175293, |
|
"rewards/rejected": -13.540797233581543, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.01287841796875, |
|
"learning_rate": 1.9433940706026743e-06, |
|
"logits/chosen": -2.1844208240509033, |
|
"logits/rejected": -2.438828468322754, |
|
"logps/chosen": -47.74811935424805, |
|
"logps/rejected": -1656.3568115234375, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2575169503688812, |
|
"rewards/margins": 15.76098346710205, |
|
"rewards/rejected": -16.01849937438965, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.000972747802734375, |
|
"learning_rate": 1.9221641852065807e-06, |
|
"logits/chosen": -2.18261456489563, |
|
"logits/rejected": -2.4000496864318848, |
|
"logps/chosen": -44.85232925415039, |
|
"logps/rejected": -1401.519775390625, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22903266549110413, |
|
"rewards/margins": 13.285311698913574, |
|
"rewards/rejected": -13.51434326171875, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 1.9009782309962805e-06, |
|
"logits/chosen": -2.281862497329712, |
|
"logits/rejected": -2.5180306434631348, |
|
"logps/chosen": -35.415897369384766, |
|
"logps/rejected": -1375.5728759765625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13269564509391785, |
|
"rewards/margins": 13.11363697052002, |
|
"rewards/rejected": -13.246332168579102, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 1.8798378186785979e-06, |
|
"logits/chosen": -2.2361299991607666, |
|
"logits/rejected": -2.4721415042877197, |
|
"logps/chosen": -30.1846923828125, |
|
"logps/rejected": -1444.7518310546875, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0831189677119255, |
|
"rewards/margins": 13.8583402633667, |
|
"rewards/rejected": -13.941459655761719, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 1.8587445554979404e-06, |
|
"logits/chosen": -2.073253870010376, |
|
"logits/rejected": -2.3244481086730957, |
|
"logps/chosen": -36.15102005004883, |
|
"logps/rejected": -1567.7164306640625, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1387818604707718, |
|
"rewards/margins": 14.997014045715332, |
|
"rewards/rejected": -15.135795593261719, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.0006103515625, |
|
"learning_rate": 1.8377000451141013e-06, |
|
"logits/chosen": -2.120227336883545, |
|
"logits/rejected": -2.379242420196533, |
|
"logps/chosen": -42.131507873535156, |
|
"logps/rejected": -1565.419189453125, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.207178995013237, |
|
"rewards/margins": 14.909128189086914, |
|
"rewards/rejected": -15.116305351257324, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 1.8167058874803405e-06, |
|
"logits/chosen": -2.234502077102661, |
|
"logits/rejected": -2.4847466945648193, |
|
"logps/chosen": -42.673118591308594, |
|
"logps/rejected": -1594.222900390625, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2050865888595581, |
|
"rewards/margins": 15.173876762390137, |
|
"rewards/rejected": -15.3789644241333, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 1.7957636787217451e-06, |
|
"logits/chosen": -2.1729538440704346, |
|
"logits/rejected": -2.4276270866394043, |
|
"logps/chosen": -26.112987518310547, |
|
"logps/rejected": -1523.0291748046875, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.044597070664167404, |
|
"rewards/margins": 14.675150871276855, |
|
"rewards/rejected": -14.719749450683594, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.0322265625, |
|
"learning_rate": 1.7748750110138768e-06, |
|
"logits/chosen": -2.106745481491089, |
|
"logits/rejected": -2.3529787063598633, |
|
"logps/chosen": -38.04988098144531, |
|
"logps/rejected": -1700.769287109375, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15463842451572418, |
|
"rewards/margins": 16.276233673095703, |
|
"rewards/rejected": -16.430871963500977, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 1.7540414724617282e-06, |
|
"logits/chosen": -2.070836067199707, |
|
"logits/rejected": -2.3102221488952637, |
|
"logps/chosen": -41.751487731933594, |
|
"logps/rejected": -1488.04931640625, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19269555807113647, |
|
"rewards/margins": 14.147076606750488, |
|
"rewards/rejected": -14.33977222442627, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.00946044921875, |
|
"learning_rate": 1.7332646469789827e-06, |
|
"logits/chosen": -2.2572789192199707, |
|
"logits/rejected": -2.481287956237793, |
|
"logps/chosen": -29.41888427734375, |
|
"logps/rejected": -1229.242431640625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07652698457241058, |
|
"rewards/margins": 11.74826717376709, |
|
"rewards/rejected": -11.824793815612793, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 1.7125461141675881e-06, |
|
"logits/chosen": -2.1423022747039795, |
|
"logits/rejected": -2.3926641941070557, |
|
"logps/chosen": -30.7061710357666, |
|
"logps/rejected": -1465.9014892578125, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09345726668834686, |
|
"rewards/margins": 14.04102897644043, |
|
"rewards/rejected": -14.134485244750977, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.00014209747314453125, |
|
"learning_rate": 1.6918874491976744e-06, |
|
"logits/chosen": -2.290851354598999, |
|
"logits/rejected": -2.5240445137023926, |
|
"logps/chosen": -36.6445198059082, |
|
"logps/rejected": -1480.7818603515625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.149479478597641, |
|
"rewards/margins": 14.131547927856445, |
|
"rewards/rejected": -14.281025886535645, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.002655029296875, |
|
"learning_rate": 1.6712902226877917e-06, |
|
"logits/chosen": -2.1575067043304443, |
|
"logits/rejected": -2.402039051055908, |
|
"logps/chosen": -45.549842834472656, |
|
"logps/rejected": -1545.114990234375, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23408398032188416, |
|
"rewards/margins": 14.681310653686523, |
|
"rewards/rejected": -14.91539478302002, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.0028839111328125, |
|
"learning_rate": 1.6507560005854977e-06, |
|
"logits/chosen": -2.066991090774536, |
|
"logits/rejected": -2.3206119537353516, |
|
"logps/chosen": -47.11815643310547, |
|
"logps/rejected": -1413.30126953125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24164950847625732, |
|
"rewards/margins": 13.29053020477295, |
|
"rewards/rejected": -13.532180786132812, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.03759765625, |
|
"learning_rate": 1.6302863440483121e-06, |
|
"logits/chosen": -2.1091551780700684, |
|
"logits/rejected": -2.394484043121338, |
|
"logps/chosen": -54.07494354248047, |
|
"logps/rejected": -1674.350341796875, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3039209842681885, |
|
"rewards/margins": 15.897099494934082, |
|
"rewards/rejected": -16.201021194458008, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 1.6098828093250203e-06, |
|
"logits/chosen": -2.0393662452697754, |
|
"logits/rejected": -2.2912774085998535, |
|
"logps/chosen": -43.22583770751953, |
|
"logps/rejected": -1745.9964599609375, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20912513136863708, |
|
"rewards/margins": 16.64576530456543, |
|
"rewards/rejected": -16.85489273071289, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 1.5895469476373545e-06, |
|
"logits/chosen": -2.12833833694458, |
|
"logits/rejected": -2.353044033050537, |
|
"logps/chosen": -51.28118133544922, |
|
"logps/rejected": -1477.751708984375, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2927466034889221, |
|
"rewards/margins": 13.93517017364502, |
|
"rewards/rejected": -14.227917671203613, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.002105712890625, |
|
"learning_rate": 1.5692803050620642e-06, |
|
"logits/chosen": -2.146883726119995, |
|
"logits/rejected": -2.3877830505371094, |
|
"logps/chosen": -42.891048431396484, |
|
"logps/rejected": -1572.1334228515625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20707789063453674, |
|
"rewards/margins": 14.972661018371582, |
|
"rewards/rejected": -15.17973804473877, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 1.5490844224133717e-06, |
|
"logits/chosen": -2.2065834999084473, |
|
"logits/rejected": -2.4583041667938232, |
|
"logps/chosen": -58.87604522705078, |
|
"logps/rejected": -1606.8402099609375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3703632652759552, |
|
"rewards/margins": 15.166918754577637, |
|
"rewards/rejected": -15.53728199005127, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 1.528960835125822e-06, |
|
"logits/chosen": -2.3619742393493652, |
|
"logits/rejected": -2.5886929035186768, |
|
"logps/chosen": -47.88628005981445, |
|
"logps/rejected": -1394.3492431640625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2605898976325989, |
|
"rewards/margins": 13.201173782348633, |
|
"rewards/rejected": -13.461764335632324, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 1.5089110731375568e-06, |
|
"logits/chosen": -2.1769912242889404, |
|
"logits/rejected": -2.4125704765319824, |
|
"logps/chosen": -54.75007247924805, |
|
"logps/rejected": -1521.18310546875, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32575544714927673, |
|
"rewards/margins": 14.370445251464844, |
|
"rewards/rejected": -14.696202278137207, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 1.4889366607739925e-06, |
|
"logits/chosen": -2.322796583175659, |
|
"logits/rejected": -2.5181009769439697, |
|
"logps/chosen": -45.69524383544922, |
|
"logps/rejected": -1201.08251953125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24491195380687714, |
|
"rewards/margins": 11.296446800231934, |
|
"rewards/rejected": -11.541359901428223, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.03076171875, |
|
"learning_rate": 1.4690391166319307e-06, |
|
"logits/chosen": -2.1181106567382812, |
|
"logits/rejected": -2.3545029163360596, |
|
"logps/chosen": -43.6742057800293, |
|
"logps/rejected": -1542.8385009765625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21549615263938904, |
|
"rewards/margins": 14.664543151855469, |
|
"rewards/rejected": -14.880040168762207, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.4492199534641055e-06, |
|
"logits/chosen": -2.21667218208313, |
|
"logits/rejected": -2.4625155925750732, |
|
"logps/chosen": -47.34065628051758, |
|
"logps/rejected": -1472.347900390625, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25726571679115295, |
|
"rewards/margins": 13.980672836303711, |
|
"rewards/rejected": -14.237937927246094, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0111083984375, |
|
"learning_rate": 1.429480678064174e-06, |
|
"logits/chosen": -2.2022199630737305, |
|
"logits/rejected": -2.4795124530792236, |
|
"logps/chosen": -51.5767822265625, |
|
"logps/rejected": -1817.763671875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2908291518688202, |
|
"rewards/margins": 17.308137893676758, |
|
"rewards/rejected": -17.598966598510742, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 1.4098227911521523e-06, |
|
"logits/chosen": -2.219804286956787, |
|
"logits/rejected": -2.462226390838623, |
|
"logps/chosen": -46.08092498779297, |
|
"logps/rejected": -1526.5423583984375, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23279635608196259, |
|
"rewards/margins": 14.499191284179688, |
|
"rewards/rejected": -14.731986999511719, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 1.3902477872603295e-06, |
|
"logits/chosen": -2.319612503051758, |
|
"logits/rejected": -2.517526149749756, |
|
"logps/chosen": -40.09135055541992, |
|
"logps/rejected": -1286.315185546875, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18802298605442047, |
|
"rewards/margins": 12.167932510375977, |
|
"rewards/rejected": -12.355955123901367, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 1.370757154619638e-06, |
|
"logits/chosen": -2.2395832538604736, |
|
"logits/rejected": -2.470933198928833, |
|
"logps/chosen": -56.124351501464844, |
|
"logps/rejected": -1607.262939453125, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33287256956100464, |
|
"rewards/margins": 15.186471939086914, |
|
"rewards/rejected": -15.519342422485352, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 8.761882781982422e-06, |
|
"learning_rate": 1.3513523750465049e-06, |
|
"logits/chosen": -2.2328319549560547, |
|
"logits/rejected": -2.4625821113586426, |
|
"logps/chosen": -39.597564697265625, |
|
"logps/rejected": -1417.1002197265625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17806950211524963, |
|
"rewards/margins": 13.474327087402344, |
|
"rewards/rejected": -13.652397155761719, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.0030517578125, |
|
"learning_rate": 1.332034923830199e-06, |
|
"logits/chosen": -2.136444568634033, |
|
"logits/rejected": -2.3981611728668213, |
|
"logps/chosen": -44.04825210571289, |
|
"logps/rejected": -1527.0830078125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22466015815734863, |
|
"rewards/margins": 14.532748222351074, |
|
"rewards/rejected": -14.757411003112793, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 1.31280626962067e-06, |
|
"logits/chosen": -2.2737619876861572, |
|
"logits/rejected": -2.488204002380371, |
|
"logps/chosen": -49.427528381347656, |
|
"logps/rejected": -1363.083740234375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27385297417640686, |
|
"rewards/margins": 12.863523483276367, |
|
"rewards/rejected": -13.137374877929688, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 1.2936678743168813e-06, |
|
"logits/chosen": -2.2063140869140625, |
|
"logits/rejected": -2.450613498687744, |
|
"logps/chosen": -47.45142364501953, |
|
"logps/rejected": -1494.2115478515625, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2554013133049011, |
|
"rewards/margins": 14.182083129882812, |
|
"rewards/rejected": -14.437482833862305, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 1.2746211929556777e-06, |
|
"logits/chosen": -2.171708583831787, |
|
"logits/rejected": -2.4892578125, |
|
"logps/chosen": -47.59801483154297, |
|
"logps/rejected": -1927.90234375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25736138224601746, |
|
"rewards/margins": 18.46487808227539, |
|
"rewards/rejected": -18.722238540649414, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 5.900859832763672e-06, |
|
"learning_rate": 1.2556676736011558e-06, |
|
"logits/chosen": -2.200247287750244, |
|
"logits/rejected": -2.433065891265869, |
|
"logps/chosen": -47.600555419921875, |
|
"logps/rejected": -1647.335693359375, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25239020586013794, |
|
"rewards/margins": 15.647438049316406, |
|
"rewards/rejected": -15.89982795715332, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.0279541015625, |
|
"learning_rate": 1.2368087572345772e-06, |
|
"logits/chosen": -2.235849380493164, |
|
"logits/rejected": -2.4413654804229736, |
|
"logps/chosen": -48.26280975341797, |
|
"logps/rejected": -1258.86572265625, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26613086462020874, |
|
"rewards/margins": 11.844823837280273, |
|
"rewards/rejected": -12.110954284667969, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.2180458776448067e-06, |
|
"logits/chosen": -2.187344551086426, |
|
"logits/rejected": -2.4354748725891113, |
|
"logps/chosen": -40.0956916809082, |
|
"logps/rejected": -1655.0335693359375, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18102389574050903, |
|
"rewards/margins": 15.813700675964355, |
|
"rewards/rejected": -15.994723320007324, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.0234375, |
|
"learning_rate": 1.1993804613193158e-06, |
|
"logits/chosen": -2.18884539604187, |
|
"logits/rejected": -2.4379589557647705, |
|
"logps/chosen": -64.91096496582031, |
|
"logps/rejected": -1493.458740234375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4194706082344055, |
|
"rewards/margins": 13.966886520385742, |
|
"rewards/rejected": -14.386357307434082, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.3649463653564453e-05, |
|
"learning_rate": 1.1808139273357232e-06, |
|
"logits/chosen": -2.1439809799194336, |
|
"logits/rejected": -2.3814640045166016, |
|
"logps/chosen": -47.53407669067383, |
|
"logps/rejected": -1624.342529296875, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25394895672798157, |
|
"rewards/margins": 15.44206714630127, |
|
"rewards/rejected": -15.696017265319824, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.0003833770751953125, |
|
"learning_rate": 1.1623476872539108e-06, |
|
"logits/chosen": -2.1601688861846924, |
|
"logits/rejected": -2.4301838874816895, |
|
"logps/chosen": -46.782203674316406, |
|
"logps/rejected": -1727.318115234375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24715037643909454, |
|
"rewards/margins": 16.47604751586914, |
|
"rewards/rejected": -16.72319984436035, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 1.1439831450087032e-06, |
|
"logits/chosen": -2.204617738723755, |
|
"logits/rejected": -2.4746241569519043, |
|
"logps/chosen": -70.88643646240234, |
|
"logps/rejected": -1754.577392578125, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.48623570799827576, |
|
"rewards/margins": 16.506263732910156, |
|
"rewards/rejected": -16.992502212524414, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.1257216968031357e-06, |
|
"logits/chosen": -2.172727108001709, |
|
"logits/rejected": -2.4189679622650146, |
|
"logps/chosen": -53.47440719604492, |
|
"logps/rejected": -1510.17333984375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3195067048072815, |
|
"rewards/margins": 14.29955768585205, |
|
"rewards/rejected": -14.619064331054688, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.0003604888916015625, |
|
"learning_rate": 1.1075647310022974e-06, |
|
"logits/chosen": -2.324207305908203, |
|
"logits/rejected": -2.5505588054656982, |
|
"logps/chosen": -48.77964401245117, |
|
"logps/rejected": -1276.3153076171875, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2692165970802307, |
|
"rewards/margins": 12.017995834350586, |
|
"rewards/rejected": -12.287213325500488, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.00640869140625, |
|
"learning_rate": 1.0895136280277863e-06, |
|
"logits/chosen": -2.1405930519104004, |
|
"logits/rejected": -2.389354705810547, |
|
"logps/chosen": -52.074989318847656, |
|
"logps/rejected": -1792.354248046875, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2989426851272583, |
|
"rewards/margins": 17.034626007080078, |
|
"rewards/rejected": -17.33357048034668, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 1.0715697602527542e-06, |
|
"logits/chosen": -2.0093884468078613, |
|
"logits/rejected": -2.2803351879119873, |
|
"logps/chosen": -60.875953674316406, |
|
"logps/rejected": -1733.0355224609375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3902908265590668, |
|
"rewards/margins": 16.359115600585938, |
|
"rewards/rejected": -16.749406814575195, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.10546875, |
|
"learning_rate": 1.0537344918975708e-06, |
|
"logits/chosen": -2.2281734943389893, |
|
"logits/rejected": -2.414677381515503, |
|
"logps/chosen": -56.65943145751953, |
|
"logps/rejected": -1399.209228515625, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3390200734138489, |
|
"rewards/margins": 13.123127937316895, |
|
"rewards/rejected": -13.46214771270752, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.910064697265625e-05, |
|
"learning_rate": 1.036009178926107e-06, |
|
"logits/chosen": -2.1897904872894287, |
|
"logits/rejected": -2.426058530807495, |
|
"logps/chosen": -48.45244216918945, |
|
"logps/rejected": -1510.665771484375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26533347368240356, |
|
"rewards/margins": 14.326631546020508, |
|
"rewards/rejected": -14.59196662902832, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 1.0183951689426438e-06, |
|
"logits/chosen": -2.1068902015686035, |
|
"logits/rejected": -2.3621068000793457, |
|
"logps/chosen": -49.524208068847656, |
|
"logps/rejected": -1805.392333984375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2760918438434601, |
|
"rewards/margins": 17.226123809814453, |
|
"rewards/rejected": -17.502214431762695, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 1.0008938010894156e-06, |
|
"logits/chosen": -2.0732312202453613, |
|
"logits/rejected": -2.359827756881714, |
|
"logps/chosen": -49.63148880004883, |
|
"logps/rejected": -1718.0699462890625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2772377133369446, |
|
"rewards/margins": 16.361858367919922, |
|
"rewards/rejected": -16.639095306396484, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 9.83506405944804e-07, |
|
"logits/chosen": -2.0447497367858887, |
|
"logits/rejected": -2.2771499156951904, |
|
"logps/chosen": -39.81549072265625, |
|
"logps/rejected": -1609.721923828125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17990897595882416, |
|
"rewards/margins": 15.340120315551758, |
|
"rewards/rejected": -15.520029067993164, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 9.894371032714844e-06, |
|
"learning_rate": 9.662343054221743e-07, |
|
"logits/chosen": -2.053480625152588, |
|
"logits/rejected": -2.3034961223602295, |
|
"logps/chosen": -49.807559967041016, |
|
"logps/rejected": -1805.352294921875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2724885642528534, |
|
"rewards/margins": 17.18954849243164, |
|
"rewards/rejected": -17.462038040161133, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 9.490788126693754e-07, |
|
"logits/chosen": -2.081925868988037, |
|
"logits/rejected": -2.337897777557373, |
|
"logps/chosen": -39.89293670654297, |
|
"logps/rejected": -1609.5567626953125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18545950949192047, |
|
"rewards/margins": 15.35168170928955, |
|
"rewards/rejected": -15.537139892578125, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.007781982421875, |
|
"learning_rate": 9.32041231968904e-07, |
|
"logits/chosen": -2.1510047912597656, |
|
"logits/rejected": -2.4004898071289062, |
|
"logps/chosen": -42.21772766113281, |
|
"logps/rejected": -1647.2308349609375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20343096554279327, |
|
"rewards/margins": 15.7313232421875, |
|
"rewards/rejected": -15.93475341796875, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.030029296875, |
|
"learning_rate": 9.151228586387464e-07, |
|
"logits/chosen": -2.2137999534606934, |
|
"logits/rejected": -2.4432384967803955, |
|
"logps/chosen": -46.389976501464844, |
|
"logps/rejected": -1448.36328125, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24593684077262878, |
|
"rewards/margins": 13.737565994262695, |
|
"rewards/rejected": -13.983503341674805, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 8.983249789338941e-07, |
|
"logits/chosen": -2.1793341636657715, |
|
"logits/rejected": -2.4067111015319824, |
|
"logps/chosen": -53.83050537109375, |
|
"logps/rejected": -1414.4827880859375, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3225783705711365, |
|
"rewards/margins": 13.334991455078125, |
|
"rewards/rejected": -13.657569885253906, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -2.61519455909729, |
|
"eval_logits/rejected": -2.734154462814331, |
|
"eval_logps/chosen": -62.61328125, |
|
"eval_logps/rejected": -755.4296264648438, |
|
"eval_loss": 0.002784780925139785, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.36699774861335754, |
|
"eval_rewards/margins": 6.738577365875244, |
|
"eval_rewards/rejected": -7.1055755615234375, |
|
"eval_runtime": 0.6552, |
|
"eval_samples_per_second": 7.632, |
|
"eval_steps_per_second": 4.579, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 8.816488699485593e-07, |
|
"logits/chosen": -2.2049620151519775, |
|
"logits/rejected": -2.431889057159424, |
|
"logps/chosen": -43.32733917236328, |
|
"logps/rejected": -1466.1322021484375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21807484328746796, |
|
"rewards/margins": 13.928044319152832, |
|
"rewards/rejected": -14.146120071411133, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.00012302398681640625, |
|
"learning_rate": 8.650957995190784e-07, |
|
"logits/chosen": -2.168497085571289, |
|
"logits/rejected": -2.439462661743164, |
|
"logps/chosen": -43.25156784057617, |
|
"logps/rejected": -1766.5902099609375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20799896121025085, |
|
"rewards/margins": 16.896240234375, |
|
"rewards/rejected": -17.104238510131836, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 8.486670261275193e-07, |
|
"logits/chosen": -2.28559947013855, |
|
"logits/rejected": -2.535123348236084, |
|
"logps/chosen": -46.22868347167969, |
|
"logps/rejected": -1491.29638671875, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24624836444854736, |
|
"rewards/margins": 14.186907768249512, |
|
"rewards/rejected": -14.43315601348877, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.01068115234375, |
|
"learning_rate": 8.32363798806011e-07, |
|
"logits/chosen": -2.2580156326293945, |
|
"logits/rejected": -2.499662160873413, |
|
"logps/chosen": -43.39426803588867, |
|
"logps/rejected": -1570.83154296875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2131904661655426, |
|
"rewards/margins": 14.96166706085205, |
|
"rewards/rejected": -15.174858093261719, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0703125, |
|
"learning_rate": 8.161873570417742e-07, |
|
"logits/chosen": -2.205913543701172, |
|
"logits/rejected": -2.461812973022461, |
|
"logps/chosen": -51.799095153808594, |
|
"logps/rejected": -1628.696044921875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2980988919734955, |
|
"rewards/margins": 15.454202651977539, |
|
"rewards/rejected": -15.752302169799805, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0263671875, |
|
"learning_rate": 8.001389306828897e-07, |
|
"logits/chosen": -2.1009681224823, |
|
"logits/rejected": -2.3735690116882324, |
|
"logps/chosen": -57.14350128173828, |
|
"logps/rejected": -1912.2584228515625, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3501451313495636, |
|
"rewards/margins": 18.137147903442383, |
|
"rewards/rejected": -18.487293243408203, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.002716064453125e-05, |
|
"learning_rate": 7.842197398447993e-07, |
|
"logits/chosen": -2.145404100418091, |
|
"logits/rejected": -2.3879191875457764, |
|
"logps/chosen": -46.874935150146484, |
|
"logps/rejected": -1601.4229736328125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25245028734207153, |
|
"rewards/margins": 15.218485832214355, |
|
"rewards/rejected": -15.470934867858887, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.0014801025390625, |
|
"learning_rate": 7.684309948175414e-07, |
|
"logits/chosen": -2.1167359352111816, |
|
"logits/rejected": -2.340625047683716, |
|
"logps/chosen": -41.6113395690918, |
|
"logps/rejected": -1543.5413818359375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19553497433662415, |
|
"rewards/margins": 14.705103874206543, |
|
"rewards/rejected": -14.900639533996582, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.0126953125, |
|
"learning_rate": 7.527738959737371e-07, |
|
"logits/chosen": -2.1751418113708496, |
|
"logits/rejected": -2.421253204345703, |
|
"logps/chosen": -55.10563278198242, |
|
"logps/rejected": -1535.4105224609375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33046358823776245, |
|
"rewards/margins": 14.514165878295898, |
|
"rewards/rejected": -14.844629287719727, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 7.372496336773269e-07, |
|
"logits/chosen": -2.143078565597534, |
|
"logits/rejected": -2.3641624450683594, |
|
"logps/chosen": -44.76749038696289, |
|
"logps/rejected": -1385.771728515625, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23040492832660675, |
|
"rewards/margins": 13.126627922058105, |
|
"rewards/rejected": -13.357030868530273, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 7.218593881930744e-07, |
|
"logits/chosen": -2.237316370010376, |
|
"logits/rejected": -2.46457839012146, |
|
"logps/chosen": -43.913902282714844, |
|
"logps/rejected": -1421.6866455078125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21920108795166016, |
|
"rewards/margins": 13.50879955291748, |
|
"rewards/rejected": -13.728001594543457, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 7.066043295968342e-07, |
|
"logits/chosen": -2.2042956352233887, |
|
"logits/rejected": -2.437238931655884, |
|
"logps/chosen": -38.968666076660156, |
|
"logps/rejected": -1539.2254638671875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1654917150735855, |
|
"rewards/margins": 14.6710786819458, |
|
"rewards/rejected": -14.836568832397461, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.00147247314453125, |
|
"learning_rate": 6.914856176865891e-07, |
|
"logits/chosen": -2.2930877208709717, |
|
"logits/rejected": -2.530980110168457, |
|
"logps/chosen": -39.30299377441406, |
|
"logps/rejected": -1486.0648193359375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18379421532154083, |
|
"rewards/margins": 14.16865348815918, |
|
"rewards/rejected": -14.352447509765625, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0001888275146484375, |
|
"learning_rate": 6.765044018942804e-07, |
|
"logits/chosen": -2.2794032096862793, |
|
"logits/rejected": -2.5219268798828125, |
|
"logps/chosen": -37.822265625, |
|
"logps/rejected": -1375.9476318359375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16687843203544617, |
|
"rewards/margins": 13.096723556518555, |
|
"rewards/rejected": -13.263601303100586, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 6.616618211984169e-07, |
|
"logits/chosen": -2.189056873321533, |
|
"logits/rejected": -2.428335666656494, |
|
"logps/chosen": -45.38810348510742, |
|
"logps/rejected": -1504.1986083984375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24011921882629395, |
|
"rewards/margins": 14.305384635925293, |
|
"rewards/rejected": -14.545504570007324, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0289306640625, |
|
"learning_rate": 6.469590040374799e-07, |
|
"logits/chosen": -2.135713815689087, |
|
"logits/rejected": -2.3790910243988037, |
|
"logps/chosen": -32.32978057861328, |
|
"logps/rejected": -1641.7115478515625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10296233743429184, |
|
"rewards/margins": 15.768649101257324, |
|
"rewards/rejected": -15.871612548828125, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 6.32397068224136e-07, |
|
"logits/chosen": -2.248927593231201, |
|
"logits/rejected": -2.501868963241577, |
|
"logps/chosen": -40.18678283691406, |
|
"logps/rejected": -1579.708251953125, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18390187621116638, |
|
"rewards/margins": 15.054614067077637, |
|
"rewards/rejected": -15.238515853881836, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 6.17977120860249e-07, |
|
"logits/chosen": -2.2377326488494873, |
|
"logits/rejected": -2.4842400550842285, |
|
"logps/chosen": -68.27392578125, |
|
"logps/rejected": -1504.4427490234375, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4537542462348938, |
|
"rewards/margins": 14.08979606628418, |
|
"rewards/rejected": -14.543548583984375, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 6.037002582527121e-07, |
|
"logits/chosen": -2.17307710647583, |
|
"logits/rejected": -2.4036478996276855, |
|
"logps/chosen": -38.560646057128906, |
|
"logps/rejected": -1505.03662109375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.169607013463974, |
|
"rewards/margins": 14.335187911987305, |
|
"rewards/rejected": -14.504794120788574, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 5.895675658300981e-07, |
|
"logits/chosen": -2.3447728157043457, |
|
"logits/rejected": -2.5695431232452393, |
|
"logps/chosen": -52.10234451293945, |
|
"logps/rejected": -1275.713134765625, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3004547953605652, |
|
"rewards/margins": 11.987954139709473, |
|
"rewards/rejected": -12.288411140441895, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 5.755801180601381e-07, |
|
"logits/chosen": -2.2320406436920166, |
|
"logits/rejected": -2.4947474002838135, |
|
"logps/chosen": -46.99077224731445, |
|
"logps/rejected": -1553.0062255859375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25020334124565125, |
|
"rewards/margins": 14.758695602416992, |
|
"rewards/rejected": -15.008898735046387, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 5.617389783680307e-07, |
|
"logits/chosen": -2.0936381816864014, |
|
"logits/rejected": -2.3752357959747314, |
|
"logps/chosen": -44.832740783691406, |
|
"logps/rejected": -1831.029296875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22681677341461182, |
|
"rewards/margins": 17.518943786621094, |
|
"rewards/rejected": -17.74576187133789, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 5.48045199055596e-07, |
|
"logits/chosen": -2.19124174118042, |
|
"logits/rejected": -2.438732624053955, |
|
"logps/chosen": -44.39277648925781, |
|
"logps/rejected": -1470.6522216796875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22872138023376465, |
|
"rewards/margins": 13.976783752441406, |
|
"rewards/rejected": -14.205507278442383, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 7.264316082000732e-07, |
|
"learning_rate": 5.344998212212704e-07, |
|
"logits/chosen": -2.103717565536499, |
|
"logits/rejected": -2.3787877559661865, |
|
"logps/chosen": -46.40209197998047, |
|
"logps/rejected": -1813.8245849609375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23586265742778778, |
|
"rewards/margins": 17.30778694152832, |
|
"rewards/rejected": -17.543649673461914, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.01300048828125, |
|
"learning_rate": 5.211038746809551e-07, |
|
"logits/chosen": -2.2235634326934814, |
|
"logits/rejected": -2.4578189849853516, |
|
"logps/chosen": -50.56513595581055, |
|
"logps/rejected": -1465.8817138671875, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29284316301345825, |
|
"rewards/margins": 13.869397163391113, |
|
"rewards/rejected": -14.162240982055664, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 5.078583778897216e-07, |
|
"logits/chosen": -2.2172188758850098, |
|
"logits/rejected": -2.4327051639556885, |
|
"logps/chosen": -58.36212158203125, |
|
"logps/rejected": -1398.1490478515625, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36190560460090637, |
|
"rewards/margins": 13.11742877960205, |
|
"rewards/rejected": -13.479333877563477, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 4.94764337864384e-07, |
|
"logits/chosen": -2.304565668106079, |
|
"logits/rejected": -2.5306572914123535, |
|
"logps/chosen": -43.99140548706055, |
|
"logps/rejected": -1435.986572265625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2204219549894333, |
|
"rewards/margins": 13.629618644714355, |
|
"rewards/rejected": -13.8500394821167, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.0184326171875, |
|
"learning_rate": 4.818227501069328e-07, |
|
"logits/chosen": -2.259232521057129, |
|
"logits/rejected": -2.5605130195617676, |
|
"logps/chosen": -62.873863220214844, |
|
"logps/rejected": -1876.69140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39689213037490845, |
|
"rewards/margins": 17.804046630859375, |
|
"rewards/rejected": -18.200939178466797, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.0203857421875, |
|
"learning_rate": 4.690345985288572e-07, |
|
"logits/chosen": -2.158508777618408, |
|
"logits/rejected": -2.399550437927246, |
|
"logps/chosen": -40.623416900634766, |
|
"logps/rejected": -1611.9981689453125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18651778995990753, |
|
"rewards/margins": 15.386564254760742, |
|
"rewards/rejected": -15.573080062866211, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.5273690223693848e-06, |
|
"learning_rate": 4.5640085537633633e-07, |
|
"logits/chosen": -2.185797691345215, |
|
"logits/rejected": -2.462428331375122, |
|
"logps/chosen": -64.81330871582031, |
|
"logps/rejected": -1722.494384765625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42417916655540466, |
|
"rewards/margins": 16.25531578063965, |
|
"rewards/rejected": -16.679494857788086, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 5.936622619628906e-05, |
|
"learning_rate": 4.439224811563211e-07, |
|
"logits/chosen": -2.0893611907958984, |
|
"logits/rejected": -2.3352718353271484, |
|
"logps/chosen": -42.231239318847656, |
|
"logps/rejected": -1722.9840087890625, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20374104380607605, |
|
"rewards/margins": 16.47307586669922, |
|
"rewards/rejected": -16.67681884765625, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.000370025634765625, |
|
"learning_rate": 4.316004245635158e-07, |
|
"logits/chosen": -2.1728897094726562, |
|
"logits/rejected": -2.4218878746032715, |
|
"logps/chosen": -47.56116485595703, |
|
"logps/rejected": -1730.8961181640625, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25542110204696655, |
|
"rewards/margins": 16.49363899230957, |
|
"rewards/rejected": -16.74905776977539, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 9.1552734375e-05, |
|
"learning_rate": 4.194356224082455e-07, |
|
"logits/chosen": -2.095263957977295, |
|
"logits/rejected": -2.3779187202453613, |
|
"logps/chosen": -44.129798889160156, |
|
"logps/rejected": -1774.712158203125, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22583921253681183, |
|
"rewards/margins": 16.942506790161133, |
|
"rewards/rejected": -17.168346405029297, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.00445556640625, |
|
"learning_rate": 4.074289995452338e-07, |
|
"logits/chosen": -2.1644439697265625, |
|
"logits/rejected": -2.4039626121520996, |
|
"logps/chosen": -55.25883102416992, |
|
"logps/rejected": -1481.6717529296875, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3257646858692169, |
|
"rewards/margins": 13.985359191894531, |
|
"rewards/rejected": -14.311124801635742, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.00010395050048828125, |
|
"learning_rate": 3.9558146880329246e-07, |
|
"logits/chosen": -2.1858904361724854, |
|
"logits/rejected": -2.422576427459717, |
|
"logps/chosen": -38.67001724243164, |
|
"logps/rejected": -1623.956298828125, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1640951931476593, |
|
"rewards/margins": 15.4973783493042, |
|
"rewards/rejected": -15.661474227905273, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 3.838939309159187e-07, |
|
"logits/chosen": -2.179760694503784, |
|
"logits/rejected": -2.4091451168060303, |
|
"logps/chosen": -44.970279693603516, |
|
"logps/rejected": -1523.0604248046875, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23116175830364227, |
|
"rewards/margins": 14.479741096496582, |
|
"rewards/rejected": -14.710905075073242, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.005523681640625, |
|
"learning_rate": 3.723672744528162e-07, |
|
"logits/chosen": -2.256727695465088, |
|
"logits/rejected": -2.5047221183776855, |
|
"logps/chosen": -41.15488815307617, |
|
"logps/rejected": -1610.76220703125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19345328211784363, |
|
"rewards/margins": 15.386802673339844, |
|
"rewards/rejected": -15.580256462097168, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.00286865234375, |
|
"learning_rate": 3.6100237575233647e-07, |
|
"logits/chosen": -2.3228962421417236, |
|
"logits/rejected": -2.5387914180755615, |
|
"logps/chosen": -51.057395935058594, |
|
"logps/rejected": -1306.785400390625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2881939113140106, |
|
"rewards/margins": 12.320541381835938, |
|
"rewards/rejected": -12.608736038208008, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 3.4980009885486054e-07, |
|
"logits/chosen": -2.25309157371521, |
|
"logits/rejected": -2.4575724601745605, |
|
"logps/chosen": -41.59147644042969, |
|
"logps/rejected": -1270.520751953125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20340308547019958, |
|
"rewards/margins": 12.038886070251465, |
|
"rewards/rejected": -12.242289543151855, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 3.3876129543710197e-07, |
|
"logits/chosen": -2.2136871814727783, |
|
"logits/rejected": -2.4533486366271973, |
|
"logps/chosen": -39.60608673095703, |
|
"logps/rejected": -1685.295654296875, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1774289309978485, |
|
"rewards/margins": 16.114295959472656, |
|
"rewards/rejected": -16.291725158691406, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.002960205078125, |
|
"learning_rate": 3.2788680474735687e-07, |
|
"logits/chosen": -2.194180488586426, |
|
"logits/rejected": -2.439089298248291, |
|
"logps/chosen": -38.77867889404297, |
|
"logps/rejected": -1473.8935546875, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.174292653799057, |
|
"rewards/margins": 14.067087173461914, |
|
"rewards/rejected": -14.24138069152832, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.01177978515625, |
|
"learning_rate": 3.1717745354170214e-07, |
|
"logits/chosen": -2.0905921459198, |
|
"logits/rejected": -2.367194175720215, |
|
"logps/chosen": -52.29819869995117, |
|
"logps/rejected": -1684.010986328125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3022548258304596, |
|
"rewards/margins": 16.003009796142578, |
|
"rewards/rejected": -16.30526351928711, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.002471923828125, |
|
"learning_rate": 3.0663405602113727e-07, |
|
"logits/chosen": -2.258749485015869, |
|
"logits/rejected": -2.5248327255249023, |
|
"logps/chosen": -48.49363327026367, |
|
"logps/rejected": -1557.198486328125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2676192820072174, |
|
"rewards/margins": 14.78343391418457, |
|
"rewards/rejected": -15.051053047180176, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.021240234375, |
|
"learning_rate": 2.9625741376968107e-07, |
|
"logits/chosen": -2.0779199600219727, |
|
"logits/rejected": -2.3387115001678467, |
|
"logps/chosen": -61.849632263183594, |
|
"logps/rejected": -1759.1298828125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3940272629261017, |
|
"rewards/margins": 16.581546783447266, |
|
"rewards/rejected": -16.975570678710938, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 2.8604831569343324e-07, |
|
"logits/chosen": -2.3138976097106934, |
|
"logits/rejected": -2.529716968536377, |
|
"logps/chosen": -50.220458984375, |
|
"logps/rejected": -1407.71826171875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2772979736328125, |
|
"rewards/margins": 13.286694526672363, |
|
"rewards/rejected": -13.563992500305176, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0184326171875, |
|
"learning_rate": 2.760075379605942e-07, |
|
"logits/chosen": -2.144134759902954, |
|
"logits/rejected": -2.3687326908111572, |
|
"logps/chosen": -49.850528717041016, |
|
"logps/rejected": -1546.928955078125, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2831845283508301, |
|
"rewards/margins": 14.664377212524414, |
|
"rewards/rejected": -14.947561264038086, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 2.661358439424552e-07, |
|
"logits/chosen": -2.209009885787964, |
|
"logits/rejected": -2.4345531463623047, |
|
"logps/chosen": -46.4788818359375, |
|
"logps/rejected": -1371.7965087890625, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24716854095458984, |
|
"rewards/margins": 12.960786819458008, |
|
"rewards/rejected": -13.207954406738281, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.005462646484375, |
|
"learning_rate": 2.564339841553615e-07, |
|
"logits/chosen": -2.200819969177246, |
|
"logits/rejected": -2.416544198989868, |
|
"logps/chosen": -43.68715286254883, |
|
"logps/rejected": -1402.603759765625, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22630712389945984, |
|
"rewards/margins": 13.287277221679688, |
|
"rewards/rejected": -13.513586044311523, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 2.469026962036539e-07, |
|
"logits/chosen": -2.1682403087615967, |
|
"logits/rejected": -2.384089946746826, |
|
"logps/chosen": -43.91130447387695, |
|
"logps/rejected": -1500.0106201171875, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22072705626487732, |
|
"rewards/margins": 14.21442699432373, |
|
"rewards/rejected": -14.435153007507324, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 2.3754270472358786e-07, |
|
"logits/chosen": -2.17598032951355, |
|
"logits/rejected": -2.39375638961792, |
|
"logps/chosen": -40.356300354003906, |
|
"logps/rejected": -1466.735595703125, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18523935973644257, |
|
"rewards/margins": 13.940629959106445, |
|
"rewards/rejected": -14.125869750976562, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 2.283547213282458e-07, |
|
"logits/chosen": -2.2732253074645996, |
|
"logits/rejected": -2.502781629562378, |
|
"logps/chosen": -46.22926330566406, |
|
"logps/rejected": -1531.5400390625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24311251938343048, |
|
"rewards/margins": 14.52368450164795, |
|
"rewards/rejected": -14.76679515838623, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 2.1933944455343166e-07, |
|
"logits/chosen": -2.0053231716156006, |
|
"logits/rejected": -2.297400951385498, |
|
"logps/chosen": -57.17345428466797, |
|
"logps/rejected": -1729.294921875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35318198800086975, |
|
"rewards/margins": 16.38203239440918, |
|
"rewards/rejected": -16.735218048095703, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.00653076171875, |
|
"learning_rate": 2.104975598045647e-07, |
|
"logits/chosen": -2.1619279384613037, |
|
"logits/rejected": -2.387904167175293, |
|
"logps/chosen": -37.9974250793457, |
|
"logps/rejected": -1388.173583984375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16347357630729675, |
|
"rewards/margins": 13.225934982299805, |
|
"rewards/rejected": -13.38940715789795, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 2.018297393045701e-07, |
|
"logits/chosen": -2.201099395751953, |
|
"logits/rejected": -2.4077112674713135, |
|
"logps/chosen": -43.07371139526367, |
|
"logps/rejected": -1454.396728515625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21469160914421082, |
|
"rewards/margins": 13.828866958618164, |
|
"rewards/rejected": -14.043559074401855, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.05859375, |
|
"learning_rate": 1.9333664204277236e-07, |
|
"logits/chosen": -2.0957770347595215, |
|
"logits/rejected": -2.3300156593322754, |
|
"logps/chosen": -40.987003326416016, |
|
"logps/rejected": -1783.749755859375, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1888490617275238, |
|
"rewards/margins": 17.056163787841797, |
|
"rewards/rejected": -17.245014190673828, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0001068115234375, |
|
"learning_rate": 1.8501891372479124e-07, |
|
"logits/chosen": -2.1852810382843018, |
|
"logits/rejected": -2.4316954612731934, |
|
"logps/chosen": -44.98752975463867, |
|
"logps/rejected": -1581.531982421875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22910073399543762, |
|
"rewards/margins": 15.049044609069824, |
|
"rewards/rejected": -15.278146743774414, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.7687718672345533e-07, |
|
"logits/chosen": -2.1352264881134033, |
|
"logits/rejected": -2.3711695671081543, |
|
"logps/chosen": -51.448890686035156, |
|
"logps/rejected": -1699.1624755859375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29201555252075195, |
|
"rewards/margins": 16.14071273803711, |
|
"rewards/rejected": -16.432727813720703, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.002471923828125, |
|
"learning_rate": 1.689120800307212e-07, |
|
"logits/chosen": -2.0329132080078125, |
|
"logits/rejected": -2.287954092025757, |
|
"logps/chosen": -46.30390167236328, |
|
"logps/rejected": -1923.584716796875, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24167513847351074, |
|
"rewards/margins": 18.367229461669922, |
|
"rewards/rejected": -18.608905792236328, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 1.6112419921061357e-07, |
|
"logits/chosen": -2.1787500381469727, |
|
"logits/rejected": -2.4122672080993652, |
|
"logps/chosen": -50.2169075012207, |
|
"logps/rejected": -1459.5537109375, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2793883681297302, |
|
"rewards/margins": 13.814018249511719, |
|
"rewards/rejected": -14.093404769897461, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.2060546875, |
|
"learning_rate": 1.5351413635318807e-07, |
|
"logits/chosen": -2.2764883041381836, |
|
"logits/rejected": -2.5183794498443604, |
|
"logps/chosen": -47.43731689453125, |
|
"logps/rejected": -1477.555419921875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2550382614135742, |
|
"rewards/margins": 13.986851692199707, |
|
"rewards/rejected": -14.241891860961914, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.460824700295138e-07, |
|
"logits/chosen": -2.268395185470581, |
|
"logits/rejected": -2.5008223056793213, |
|
"logps/chosen": -55.70990753173828, |
|
"logps/rejected": -1575.289794921875, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3271048367023468, |
|
"rewards/margins": 14.906885147094727, |
|
"rewards/rejected": -15.233988761901855, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 1.3882976524768694e-07, |
|
"logits/chosen": -2.2560763359069824, |
|
"logits/rejected": -2.474360942840576, |
|
"logps/chosen": -48.290626525878906, |
|
"logps/rejected": -1297.683349609375, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25945621728897095, |
|
"rewards/margins": 12.217463493347168, |
|
"rewards/rejected": -12.476920127868652, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.0004482269287109375, |
|
"learning_rate": 1.3175657340987664e-07, |
|
"logits/chosen": -2.1752967834472656, |
|
"logits/rejected": -2.405945301055908, |
|
"logps/chosen": -39.73582077026367, |
|
"logps/rejected": -1538.538818359375, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1809137761592865, |
|
"rewards/margins": 14.694430351257324, |
|
"rewards/rejected": -14.875345230102539, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 1.2486343227040122e-07, |
|
"logits/chosen": -2.286973476409912, |
|
"logits/rejected": -2.5369372367858887, |
|
"logps/chosen": -47.38744354248047, |
|
"logps/rejected": -1557.68798828125, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24152731895446777, |
|
"rewards/margins": 14.780197143554688, |
|
"rewards/rejected": -15.021723747253418, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.181508658948452e-07, |
|
"logits/chosen": -2.2179925441741943, |
|
"logits/rejected": -2.4420266151428223, |
|
"logps/chosen": -38.083351135253906, |
|
"logps/rejected": -1484.4072265625, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16277261078357697, |
|
"rewards/margins": 14.176862716674805, |
|
"rewards/rejected": -14.339635848999023, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 1.1161938462021627e-07, |
|
"logits/chosen": -2.1011550426483154, |
|
"logits/rejected": -2.328584671020508, |
|
"logps/chosen": -42.358421325683594, |
|
"logps/rejected": -1466.247802734375, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19884276390075684, |
|
"rewards/margins": 13.929216384887695, |
|
"rewards/rejected": -14.128057479858398, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 1.0526948501614536e-07, |
|
"logits/chosen": -2.129077434539795, |
|
"logits/rejected": -2.3908514976501465, |
|
"logps/chosen": -53.09492874145508, |
|
"logps/rejected": -1710.9476318359375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3066931366920471, |
|
"rewards/margins": 16.256519317626953, |
|
"rewards/rejected": -16.563209533691406, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 9.910164984713477e-08, |
|
"logits/chosen": -2.135789394378662, |
|
"logits/rejected": -2.3950417041778564, |
|
"logps/chosen": -43.19747543334961, |
|
"logps/rejected": -1662.0009765625, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2104502171278, |
|
"rewards/margins": 15.8755464553833, |
|
"rewards/rejected": -16.085996627807617, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 9.311634803585323e-08, |
|
"logits/chosen": -2.1814115047454834, |
|
"logits/rejected": -2.445276975631714, |
|
"logps/chosen": -53.1719970703125, |
|
"logps/rejected": -1662.022705078125, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3151262402534485, |
|
"rewards/margins": 15.78093433380127, |
|
"rewards/rejected": -16.0960636138916, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.00125885009765625, |
|
"learning_rate": 8.7314034627487e-08, |
|
"logits/chosen": -2.230149507522583, |
|
"logits/rejected": -2.479522705078125, |
|
"logps/chosen": -36.06591033935547, |
|
"logps/rejected": -1619.81884765625, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14542549848556519, |
|
"rewards/margins": 15.533602714538574, |
|
"rewards/rejected": -15.679028511047363, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.023681640625, |
|
"learning_rate": 8.16951507551439e-08, |
|
"logits/chosen": -2.2386298179626465, |
|
"logits/rejected": -2.456665515899658, |
|
"logps/chosen": -45.547454833984375, |
|
"logps/rejected": -1492.232666015625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2374843806028366, |
|
"rewards/margins": 14.150115966796875, |
|
"rewards/rejected": -14.387600898742676, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.00019931793212890625, |
|
"learning_rate": 7.626012360631291e-08, |
|
"logits/chosen": -2.266707420349121, |
|
"logits/rejected": -2.5029749870300293, |
|
"logps/chosen": -49.74803924560547, |
|
"logps/rejected": -1482.993896484375, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2803342640399933, |
|
"rewards/margins": 14.048635482788086, |
|
"rewards/rejected": -14.328969955444336, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.03662109375, |
|
"learning_rate": 7.100936639038936e-08, |
|
"logits/chosen": -2.040673017501831, |
|
"logits/rejected": -2.3331856727600098, |
|
"logps/chosen": -43.85186004638672, |
|
"logps/rejected": -1894.5054931640625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2176673710346222, |
|
"rewards/margins": 18.155574798583984, |
|
"rewards/rejected": -18.373241424560547, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0609626770019531e-05, |
|
"learning_rate": 6.594327830725916e-08, |
|
"logits/chosen": -2.190392017364502, |
|
"logits/rejected": -2.451129913330078, |
|
"logps/chosen": -54.63507080078125, |
|
"logps/rejected": -1576.1376953125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32724398374557495, |
|
"rewards/margins": 14.939753532409668, |
|
"rewards/rejected": -15.266998291015625, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 6.106224451694592e-08, |
|
"logits/chosen": -2.2175679206848145, |
|
"logits/rejected": -2.4564685821533203, |
|
"logps/chosen": -46.39513397216797, |
|
"logps/rejected": -1616.2586669921875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24283328652381897, |
|
"rewards/margins": 15.379629135131836, |
|
"rewards/rejected": -15.622464179992676, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 5.636663611033266e-08, |
|
"logits/chosen": -2.0778698921203613, |
|
"logits/rejected": -2.3482048511505127, |
|
"logps/chosen": -44.864990234375, |
|
"logps/rejected": -1604.7249755859375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23550191521644592, |
|
"rewards/margins": 15.299467086791992, |
|
"rewards/rejected": -15.534968376159668, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 5.185681008094579e-08, |
|
"logits/chosen": -2.284482479095459, |
|
"logits/rejected": -2.5175766944885254, |
|
"logps/chosen": -47.569602966308594, |
|
"logps/rejected": -1563.1016845703125, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25453075766563416, |
|
"rewards/margins": 14.83684253692627, |
|
"rewards/rejected": -15.0913724899292, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.00015735626220703125, |
|
"learning_rate": 4.753310929781513e-08, |
|
"logits/chosen": -2.2356629371643066, |
|
"logits/rejected": -2.451608657836914, |
|
"logps/chosen": -53.69340133666992, |
|
"logps/rejected": -1443.2952880859375, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3177313208580017, |
|
"rewards/margins": 13.615710258483887, |
|
"rewards/rejected": -13.933443069458008, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 6.246566772460938e-05, |
|
"learning_rate": 4.3395862479405914e-08, |
|
"logits/chosen": -2.156893253326416, |
|
"logits/rejected": -2.395084857940674, |
|
"logps/chosen": -46.39254379272461, |
|
"logps/rejected": -1630.5118408203125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24043354392051697, |
|
"rewards/margins": 15.48585319519043, |
|
"rewards/rejected": -15.726287841796875, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 3.9445384168628474e-08, |
|
"logits/chosen": -2.328781843185425, |
|
"logits/rejected": -2.580176591873169, |
|
"logps/chosen": -52.544822692871094, |
|
"logps/rejected": -1474.49365234375, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3075386881828308, |
|
"rewards/margins": 13.93040657043457, |
|
"rewards/rejected": -14.237945556640625, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 3.5681974708923484e-08, |
|
"logits/chosen": -2.120448589324951, |
|
"logits/rejected": -2.3445982933044434, |
|
"logps/chosen": -37.53495788574219, |
|
"logps/rejected": -1442.9287109375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1521245837211609, |
|
"rewards/margins": 13.743891716003418, |
|
"rewards/rejected": -13.896017074584961, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.01068115234375, |
|
"learning_rate": 3.210592022142717e-08, |
|
"logits/chosen": -2.1601128578186035, |
|
"logits/rejected": -2.3705685138702393, |
|
"logps/chosen": -52.31241989135742, |
|
"logps/rejected": -1557.874755859375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3095288872718811, |
|
"rewards/margins": 14.724299430847168, |
|
"rewards/rejected": -15.033828735351562, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 2.8717492583220095e-08, |
|
"logits/chosen": -2.2527565956115723, |
|
"logits/rejected": -2.4976978302001953, |
|
"logps/chosen": -45.60851287841797, |
|
"logps/rejected": -1558.29931640625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24002361297607422, |
|
"rewards/margins": 14.838605880737305, |
|
"rewards/rejected": -15.078630447387695, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 2.551694940665539e-08, |
|
"logits/chosen": -2.191880464553833, |
|
"logits/rejected": -2.41998291015625, |
|
"logps/chosen": -51.693626403808594, |
|
"logps/rejected": -1440.926513671875, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29828059673309326, |
|
"rewards/margins": 13.601608276367188, |
|
"rewards/rejected": -13.899889945983887, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.0006561279296875, |
|
"learning_rate": 2.2504534019774092e-08, |
|
"logits/chosen": -2.347978353500366, |
|
"logits/rejected": -2.5543174743652344, |
|
"logps/chosen": -42.8640251159668, |
|
"logps/rejected": -1363.5374755859375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2091234177350998, |
|
"rewards/margins": 12.935150146484375, |
|
"rewards/rejected": -13.14427375793457, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 1.9680475447805826e-08, |
|
"logits/chosen": -2.231818437576294, |
|
"logits/rejected": -2.4574391841888428, |
|
"logps/chosen": -58.65113067626953, |
|
"logps/rejected": -1416.2275390625, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3713977038860321, |
|
"rewards/margins": 13.290824890136719, |
|
"rewards/rejected": -13.662221908569336, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.00102996826171875, |
|
"learning_rate": 1.70449883957563e-08, |
|
"logits/chosen": -2.254411220550537, |
|
"logits/rejected": -2.486921787261963, |
|
"logps/chosen": -50.42361831665039, |
|
"logps/rejected": -1524.43603515625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28153225779533386, |
|
"rewards/margins": 14.412869453430176, |
|
"rewards/rejected": -14.694402694702148, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.07080078125, |
|
"learning_rate": 1.4598273232083182e-08, |
|
"logits/chosen": -2.2290568351745605, |
|
"logits/rejected": -2.4446380138397217, |
|
"logps/chosen": -40.07840347290039, |
|
"logps/rejected": -1425.9326171875, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18418380618095398, |
|
"rewards/margins": 13.563642501831055, |
|
"rewards/rejected": -13.747825622558594, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.000240325927734375, |
|
"learning_rate": 1.2340515973464917e-08, |
|
"logits/chosen": -2.1592583656311035, |
|
"logits/rejected": -2.4244697093963623, |
|
"logps/chosen": -54.51990509033203, |
|
"logps/rejected": -1623.231689453125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32529979944229126, |
|
"rewards/margins": 15.373883247375488, |
|
"rewards/rejected": -15.699182510375977, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.007537841796875, |
|
"learning_rate": 1.0271888270655118e-08, |
|
"logits/chosen": -2.0656638145446777, |
|
"logits/rejected": -2.2895803451538086, |
|
"logps/chosen": -38.120445251464844, |
|
"logps/rejected": -1580.198486328125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16423656046390533, |
|
"rewards/margins": 15.07524299621582, |
|
"rewards/rejected": -15.239479064941406, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 8.392547395435769e-09, |
|
"logits/chosen": -2.413311243057251, |
|
"logits/rejected": -2.6234774589538574, |
|
"logps/chosen": -62.75360107421875, |
|
"logps/rejected": -1339.970458984375, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40989524126052856, |
|
"rewards/margins": 12.507749557495117, |
|
"rewards/rejected": -12.917645454406738, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.00030517578125, |
|
"learning_rate": 6.702636228657911e-09, |
|
"logits/chosen": -2.292491912841797, |
|
"logits/rejected": -2.5207715034484863, |
|
"logps/chosen": -43.633766174316406, |
|
"logps/rejected": -1431.236083984375, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2170587033033371, |
|
"rewards/margins": 13.580774307250977, |
|
"rewards/rejected": -13.797833442687988, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 5.2022832493800465e-09, |
|
"logits/chosen": -2.363647937774658, |
|
"logits/rejected": -2.5729119777679443, |
|
"logps/chosen": -53.60352325439453, |
|
"logps/rejected": -1316.2359619140625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3134381175041199, |
|
"rewards/margins": 12.360208511352539, |
|
"rewards/rejected": -12.673646926879883, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.0001983642578125, |
|
"learning_rate": 3.891602525100124e-09, |
|
"logits/chosen": -2.2279720306396484, |
|
"logits/rejected": -2.4751038551330566, |
|
"logps/chosen": -46.06736373901367, |
|
"logps/rejected": -1572.96337890625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24313127994537354, |
|
"rewards/margins": 14.95093059539795, |
|
"rewards/rejected": -15.194061279296875, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.01434326171875, |
|
"learning_rate": 2.7706937030827495e-09, |
|
"logits/chosen": -2.2832016944885254, |
|
"logits/rejected": -2.5126912593841553, |
|
"logps/chosen": -51.094390869140625, |
|
"logps/rejected": -1317.524169921875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2949807047843933, |
|
"rewards/margins": 12.386547088623047, |
|
"rewards/rejected": -12.681528091430664, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.028564453125, |
|
"learning_rate": 1.839642002783859e-09, |
|
"logits/chosen": -2.2053470611572266, |
|
"logits/rejected": -2.42755389213562, |
|
"logps/chosen": -39.788963317871094, |
|
"logps/rejected": -1376.3099365234375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18181900680065155, |
|
"rewards/margins": 13.076495170593262, |
|
"rewards/rejected": -13.25831413269043, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 1.0985182093714574e-09, |
|
"logits/chosen": -2.253420352935791, |
|
"logits/rejected": -2.4623093605041504, |
|
"logps/chosen": -59.57801055908203, |
|
"logps/rejected": -1399.8045654296875, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3837854266166687, |
|
"rewards/margins": 13.101987838745117, |
|
"rewards/rejected": -13.485774040222168, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.0198974609375, |
|
"learning_rate": 5.473786683440896e-10, |
|
"logits/chosen": -2.1431727409362793, |
|
"logits/rejected": -2.3964176177978516, |
|
"logps/chosen": -56.668495178222656, |
|
"logps/rejected": -1672.459716796875, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34237998723983765, |
|
"rewards/margins": 15.848222732543945, |
|
"rewards/rejected": -16.190601348876953, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 1.862652812467669e-10, |
|
"logits/chosen": -2.1814169883728027, |
|
"logits/rejected": -2.422477960586548, |
|
"logps/chosen": -39.490379333496094, |
|
"logps/rejected": -1714.8070068359375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17177413403987885, |
|
"rewards/margins": 16.374120712280273, |
|
"rewards/rejected": -16.545894622802734, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0001277923583984375, |
|
"learning_rate": 1.5205502486292932e-11, |
|
"logits/chosen": -2.172867774963379, |
|
"logits/rejected": -2.422581911087036, |
|
"logps/chosen": -44.31547164916992, |
|
"logps/rejected": -1577.330078125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23060818016529083, |
|
"rewards/margins": 15.038507461547852, |
|
"rewards/rejected": -15.269113540649414, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.614182472229004, |
|
"eval_logits/rejected": -2.732987642288208, |
|
"eval_logps/chosen": -60.27886962890625, |
|
"eval_logps/rejected": -752.9370727539062, |
|
"eval_loss": 0.0027744148392230272, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.34365367889404297, |
|
"eval_rewards/margins": 6.736997127532959, |
|
"eval_rewards/rejected": -7.080650329589844, |
|
"eval_runtime": 0.6551, |
|
"eval_samples_per_second": 7.633, |
|
"eval_steps_per_second": 4.58, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4004, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04242442899794605, |
|
"train_runtime": 8772.4234, |
|
"train_samples_per_second": 1.826, |
|
"train_steps_per_second": 0.456 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4004, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|