|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6543432030099787, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"logits/chosen": -2.586496591567993, |
|
"logits/rejected": -2.58866286277771, |
|
"logps/chosen": -140.71868896484375, |
|
"logps/rejected": -141.9235382080078, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.005378031637519598, |
|
"rewards/margins": -0.0017773156287148595, |
|
"rewards/rejected": -0.0036007165908813477, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -2.7250607013702393, |
|
"logits/rejected": -2.737359046936035, |
|
"logps/chosen": -239.8214111328125, |
|
"logps/rejected": -245.04498291015625, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0041117193177342415, |
|
"rewards/margins": 0.006983352825045586, |
|
"rewards/rejected": -0.002871633041650057, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": -2.407137393951416, |
|
"logits/rejected": -2.3473005294799805, |
|
"logps/chosen": -184.03599548339844, |
|
"logps/rejected": -155.21461486816406, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.003687595948576927, |
|
"rewards/margins": 0.009552741423249245, |
|
"rewards/rejected": -0.0058651454746723175, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.7127842903137207, |
|
"logits/rejected": -2.777282953262329, |
|
"logps/chosen": -171.6352081298828, |
|
"logps/rejected": -191.40785217285156, |
|
"loss": 0.7021, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.004523229319602251, |
|
"rewards/margins": -0.01729598268866539, |
|
"rewards/rejected": 0.012772750109434128, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.4465034008026123, |
|
"logits/rejected": -2.466231107711792, |
|
"logps/chosen": -254.792724609375, |
|
"logps/rejected": -245.39108276367188, |
|
"loss": 0.6988, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.004072976298630238, |
|
"rewards/margins": -0.010413647629320621, |
|
"rewards/rejected": 0.006340669468045235, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": -2.2196829319000244, |
|
"logits/rejected": -2.2641539573669434, |
|
"logps/chosen": -179.39495849609375, |
|
"logps/rejected": -220.39300537109375, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.01630725897848606, |
|
"rewards/margins": 0.008536052890121937, |
|
"rewards/rejected": 0.00777120515704155, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"logits/chosen": -2.3294830322265625, |
|
"logits/rejected": -2.380467653274536, |
|
"logps/chosen": -194.7043914794922, |
|
"logps/rejected": -199.31422424316406, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005030441097915173, |
|
"rewards/margins": 0.0030234563164412975, |
|
"rewards/rejected": 0.0020069843158125877, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.590843677520752, |
|
"logits/rejected": -2.5107312202453613, |
|
"logps/chosen": -215.23556518554688, |
|
"logps/rejected": -189.38424682617188, |
|
"loss": 0.6995, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0025288108736276627, |
|
"rewards/margins": -0.012505888007581234, |
|
"rewards/rejected": 0.009977078065276146, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-06, |
|
"logits/chosen": -2.659663200378418, |
|
"logits/rejected": -2.6844048500061035, |
|
"logps/chosen": -229.0231170654297, |
|
"logps/rejected": -205.54054260253906, |
|
"loss": 0.7032, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.014649391174316406, |
|
"rewards/margins": -0.019562961533665657, |
|
"rewards/rejected": 0.0049135684967041016, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -2.6927154064178467, |
|
"logits/rejected": -2.6597423553466797, |
|
"logps/chosen": -207.30467224121094, |
|
"logps/rejected": -197.54656982421875, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0019803522154688835, |
|
"rewards/margins": 0.002554560313001275, |
|
"rewards/rejected": -0.004534911829978228, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.500000000000001e-06, |
|
"logits/chosen": -2.6474952697753906, |
|
"logits/rejected": -2.6725170612335205, |
|
"logps/chosen": -231.35108947753906, |
|
"logps/rejected": -274.9834289550781, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0055945878848433495, |
|
"rewards/margins": 0.0016048436518758535, |
|
"rewards/rejected": 0.003989744000136852, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6e-06, |
|
"logits/chosen": -2.2198939323425293, |
|
"logits/rejected": -2.2628731727600098, |
|
"logps/chosen": -198.29022216796875, |
|
"logps/rejected": -170.82257080078125, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0031775482930243015, |
|
"rewards/margins": 0.010572671890258789, |
|
"rewards/rejected": -0.013750219717621803, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"logits/chosen": -2.7613415718078613, |
|
"logits/rejected": -2.8206562995910645, |
|
"logps/chosen": -239.75253295898438, |
|
"logps/rejected": -238.97320556640625, |
|
"loss": 0.6999, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0029205563478171825, |
|
"rewards/margins": -0.013042164966464043, |
|
"rewards/rejected": 0.010121609084308147, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.000000000000001e-06, |
|
"logits/chosen": -2.0942578315734863, |
|
"logits/rejected": -2.017871856689453, |
|
"logps/chosen": -184.2721710205078, |
|
"logps/rejected": -175.73641967773438, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.004038786515593529, |
|
"rewards/margins": 0.01193075068295002, |
|
"rewards/rejected": -0.015969539061188698, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.5e-06, |
|
"logits/chosen": -2.526791572570801, |
|
"logits/rejected": -2.5704824924468994, |
|
"logps/chosen": -200.25115966796875, |
|
"logps/rejected": -223.4153289794922, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.00700113782659173, |
|
"rewards/margins": 0.003126000752672553, |
|
"rewards/rejected": 0.003875136375427246, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -2.5145041942596436, |
|
"logits/rejected": -2.478262424468994, |
|
"logps/chosen": -169.4720001220703, |
|
"logps/rejected": -179.3622283935547, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00018756365170702338, |
|
"rewards/margins": 0.012972594238817692, |
|
"rewards/rejected": -0.013160157017409801, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.500000000000002e-06, |
|
"logits/chosen": -2.1660895347595215, |
|
"logits/rejected": -2.1799678802490234, |
|
"logps/chosen": -158.1959686279297, |
|
"logps/rejected": -225.02365112304688, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01399233564734459, |
|
"rewards/margins": 0.01266777515411377, |
|
"rewards/rejected": 0.001324558281339705, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9e-06, |
|
"logits/chosen": -2.417539596557617, |
|
"logits/rejected": -2.4626646041870117, |
|
"logps/chosen": -170.4958038330078, |
|
"logps/rejected": -178.34300231933594, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0049651628360152245, |
|
"rewards/margins": 0.011217641644179821, |
|
"rewards/rejected": -0.006252479739487171, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.5e-06, |
|
"logits/chosen": -2.6470260620117188, |
|
"logits/rejected": -2.7213802337646484, |
|
"logps/chosen": -179.53497314453125, |
|
"logps/rejected": -200.94956970214844, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0027004480361938477, |
|
"rewards/margins": 0.006518864538520575, |
|
"rewards/rejected": -0.00921931304037571, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -2.4962029457092285, |
|
"logits/rejected": -2.5313973426818848, |
|
"logps/chosen": -208.752685546875, |
|
"logps/rejected": -244.1136474609375, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.009996438398957253, |
|
"rewards/margins": 0.020868420600891113, |
|
"rewards/rejected": -0.01087198406457901, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.05e-05, |
|
"logits/chosen": -2.531207799911499, |
|
"logits/rejected": -2.5359435081481934, |
|
"logps/chosen": -194.47694396972656, |
|
"logps/rejected": -185.80142211914062, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.002445125486701727, |
|
"rewards/margins": 0.0077151767909526825, |
|
"rewards/rejected": -0.005270051304250956, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"logits/chosen": -2.430760622024536, |
|
"logits/rejected": -2.395613431930542, |
|
"logps/chosen": -144.2230987548828, |
|
"logps/rejected": -148.8902587890625, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00968785211443901, |
|
"rewards/margins": -0.001990032149478793, |
|
"rewards/rejected": -0.007697821129113436, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"logits/chosen": -2.4417223930358887, |
|
"logits/rejected": -2.441425323486328, |
|
"logps/chosen": -165.82875061035156, |
|
"logps/rejected": -178.82815551757812, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0060547590255737305, |
|
"rewards/margins": 0.009464193135499954, |
|
"rewards/rejected": -0.0034094336442649364, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2e-05, |
|
"logits/chosen": -2.3494763374328613, |
|
"logits/rejected": -2.40555477142334, |
|
"logps/chosen": -133.24130249023438, |
|
"logps/rejected": -151.85446166992188, |
|
"loss": 0.6986, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.011995697394013405, |
|
"rewards/margins": -0.010638857260346413, |
|
"rewards/rejected": -0.0013568403664976358, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.25e-05, |
|
"logits/chosen": -2.3450560569763184, |
|
"logits/rejected": -2.5124807357788086, |
|
"logps/chosen": -242.19821166992188, |
|
"logps/rejected": -259.1798095703125, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.013580609112977982, |
|
"rewards/margins": 0.004313135519623756, |
|
"rewards/rejected": -0.017893744632601738, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"logits/chosen": -2.5465288162231445, |
|
"logits/rejected": -2.5980730056762695, |
|
"logps/chosen": -202.898193359375, |
|
"logps/rejected": -248.03968811035156, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0018544672057032585, |
|
"rewards/margins": 0.016238166019320488, |
|
"rewards/rejected": -0.01438369695097208, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"logits/chosen": -2.733663320541382, |
|
"logits/rejected": -2.712355375289917, |
|
"logps/chosen": -205.80429077148438, |
|
"logps/rejected": -174.95797729492188, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -8.790497668087482e-05, |
|
"rewards/margins": 2.1266518160700798e-05, |
|
"rewards/rejected": -0.0001091718440875411, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"logits/chosen": -2.550997257232666, |
|
"logits/rejected": -2.4390013217926025, |
|
"logps/chosen": -171.59786987304688, |
|
"logps/rejected": -176.4530029296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.02987699769437313, |
|
"rewards/margins": 0.0004779808223247528, |
|
"rewards/rejected": -0.030354974791407585, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.45e-05, |
|
"logits/chosen": -2.4879837036132812, |
|
"logits/rejected": -2.480923652648926, |
|
"logps/chosen": -149.10079956054688, |
|
"logps/rejected": -162.999267578125, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.005112767685204744, |
|
"rewards/margins": 0.01993861421942711, |
|
"rewards/rejected": -0.025051379576325417, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.5e-05, |
|
"logits/chosen": -2.3427562713623047, |
|
"logits/rejected": -2.5123889446258545, |
|
"logps/chosen": -160.62950134277344, |
|
"logps/rejected": -201.14747619628906, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.015586448833346367, |
|
"rewards/margins": -0.003298282390460372, |
|
"rewards/rejected": -0.012288165278732777, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.55e-05, |
|
"logits/chosen": -2.2548415660858154, |
|
"logits/rejected": -2.3827996253967285, |
|
"logps/chosen": -126.6205825805664, |
|
"logps/rejected": -145.7274627685547, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02384941652417183, |
|
"rewards/margins": 0.010512137785553932, |
|
"rewards/rejected": -0.03436155617237091, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"logits/chosen": -2.5090818405151367, |
|
"logits/rejected": -2.560624122619629, |
|
"logps/chosen": -182.53944396972656, |
|
"logps/rejected": -208.08763122558594, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0014843230601400137, |
|
"rewards/margins": 0.011496281251311302, |
|
"rewards/rejected": -0.01298060454428196, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.65e-05, |
|
"logits/chosen": -2.800107479095459, |
|
"logits/rejected": -2.7355594635009766, |
|
"logps/chosen": -207.99392700195312, |
|
"logps/rejected": -208.31610107421875, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.014949416741728783, |
|
"rewards/margins": 0.0012244456447660923, |
|
"rewards/rejected": -0.016173863783478737, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"logits/chosen": -2.6325690746307373, |
|
"logits/rejected": -2.630256175994873, |
|
"logps/chosen": -196.8790283203125, |
|
"logps/rejected": -202.53794860839844, |
|
"loss": 0.7116, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.041500113904476166, |
|
"rewards/margins": -0.03608906269073486, |
|
"rewards/rejected": -0.005411052145063877, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.75e-05, |
|
"logits/chosen": -2.476334571838379, |
|
"logits/rejected": -2.473987579345703, |
|
"logps/chosen": -256.1812438964844, |
|
"logps/rejected": -250.76165771484375, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.01104288175702095, |
|
"rewards/margins": 0.010350894182920456, |
|
"rewards/rejected": -0.021393775939941406, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8e-05, |
|
"logits/chosen": -2.342329502105713, |
|
"logits/rejected": -2.3805429935455322, |
|
"logps/chosen": -164.03692626953125, |
|
"logps/rejected": -217.39849853515625, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.009070659056305885, |
|
"rewards/margins": 0.010929775424301624, |
|
"rewards/rejected": -0.020000435411930084, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.85e-05, |
|
"logits/chosen": -2.4253318309783936, |
|
"logits/rejected": -2.4550042152404785, |
|
"logps/chosen": -146.70420837402344, |
|
"logps/rejected": -129.80674743652344, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.031095195561647415, |
|
"rewards/margins": 0.017679547891020775, |
|
"rewards/rejected": -0.04877474159002304, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9e-05, |
|
"logits/chosen": -2.4936635494232178, |
|
"logits/rejected": -2.5476386547088623, |
|
"logps/chosen": -164.6866455078125, |
|
"logps/rejected": -198.52139282226562, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.025130080059170723, |
|
"rewards/margins": 0.007643342949450016, |
|
"rewards/rejected": -0.03277342766523361, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"logits/chosen": -2.4614553451538086, |
|
"logits/rejected": -2.4693663120269775, |
|
"logps/chosen": -172.24966430664062, |
|
"logps/rejected": -166.52056884765625, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.027715325355529785, |
|
"rewards/margins": -0.00841212272644043, |
|
"rewards/rejected": -0.019303202629089355, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -2.3024024963378906, |
|
"logits/rejected": -2.2906301021575928, |
|
"logps/chosen": -243.98683166503906, |
|
"logps/rejected": -199.12408447265625, |
|
"loss": 0.6979, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.028235863894224167, |
|
"rewards/margins": -0.00847182422876358, |
|
"rewards/rejected": -0.019764041528105736, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.05e-05, |
|
"logits/chosen": -2.660562753677368, |
|
"logits/rejected": -2.6754074096679688, |
|
"logps/chosen": -156.67514038085938, |
|
"logps/rejected": -147.5068359375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03220677375793457, |
|
"rewards/margins": 0.009226083755493164, |
|
"rewards/rejected": -0.041432857513427734, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.1e-05, |
|
"logits/chosen": -2.482787847518921, |
|
"logits/rejected": -2.4663233757019043, |
|
"logps/chosen": -207.6859588623047, |
|
"logps/rejected": -182.35931396484375, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.014715791679918766, |
|
"rewards/margins": 0.012153576128184795, |
|
"rewards/rejected": -0.02686937153339386, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.15e-05, |
|
"logits/chosen": -2.6544148921966553, |
|
"logits/rejected": -2.717411756515503, |
|
"logps/chosen": -197.96400451660156, |
|
"logps/rejected": -223.0219268798828, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024796580895781517, |
|
"rewards/margins": 0.005184031091630459, |
|
"rewards/rejected": -0.0299806110560894, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"logits/chosen": -2.561187744140625, |
|
"logits/rejected": -2.6224913597106934, |
|
"logps/chosen": -194.60935974121094, |
|
"logps/rejected": -194.01577758789062, |
|
"loss": 0.6971, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.020246006548404694, |
|
"rewards/margins": -0.0076716188341379166, |
|
"rewards/rejected": -0.012574386782944202, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.25e-05, |
|
"logits/chosen": -2.5130224227905273, |
|
"logits/rejected": -2.672990083694458, |
|
"logps/chosen": -154.0092010498047, |
|
"logps/rejected": -190.70748901367188, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.018519926816225052, |
|
"rewards/margins": 0.023535681888461113, |
|
"rewards/rejected": -0.042055610567331314, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"logits/chosen": -2.738673686981201, |
|
"logits/rejected": -2.82279896736145, |
|
"logps/chosen": -174.6641387939453, |
|
"logps/rejected": -163.0157928466797, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.014652942307293415, |
|
"rewards/margins": -0.006221937946975231, |
|
"rewards/rejected": -0.008431006222963333, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.35e-05, |
|
"logits/chosen": -2.581393241882324, |
|
"logits/rejected": -2.5734691619873047, |
|
"logps/chosen": -137.8702392578125, |
|
"logps/rejected": -154.74896240234375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.007426738273352385, |
|
"rewards/margins": 0.009324884042143822, |
|
"rewards/rejected": -0.016751624643802643, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.4e-05, |
|
"logits/chosen": -2.544344425201416, |
|
"logits/rejected": -2.690079689025879, |
|
"logps/chosen": -185.8907012939453, |
|
"logps/rejected": -195.9602508544922, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.02690129354596138, |
|
"rewards/margins": 0.007246016524732113, |
|
"rewards/rejected": -0.03414731100201607, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.45e-05, |
|
"logits/chosen": -2.583885669708252, |
|
"logits/rejected": -2.609614849090576, |
|
"logps/chosen": -215.93264770507812, |
|
"logps/rejected": -234.49636840820312, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.022774625569581985, |
|
"rewards/margins": 0.015723586082458496, |
|
"rewards/rejected": -0.03849821165204048, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.5e-05, |
|
"logits/chosen": -2.6080195903778076, |
|
"logits/rejected": -2.655003070831299, |
|
"logps/chosen": -229.5977020263672, |
|
"logps/rejected": -211.47779846191406, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.026349734514951706, |
|
"rewards/margins": 0.0026414887979626656, |
|
"rewards/rejected": -0.028991222381591797, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"logits/chosen": -2.180418014526367, |
|
"logits/rejected": -2.210296869277954, |
|
"logps/chosen": -221.1846466064453, |
|
"logps/rejected": -185.85655212402344, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.013596129603683949, |
|
"rewards/margins": 0.009130668826401234, |
|
"rewards/rejected": -0.022726796567440033, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"logits/chosen": -2.647798538208008, |
|
"logits/rejected": -2.715623617172241, |
|
"logps/chosen": -186.01138305664062, |
|
"logps/rejected": -180.89259338378906, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.019299650564789772, |
|
"rewards/margins": 0.030588869005441666, |
|
"rewards/rejected": -0.011289214715361595, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"logits/chosen": -2.570209503173828, |
|
"logits/rejected": -2.5728917121887207, |
|
"logps/chosen": -209.2962188720703, |
|
"logps/rejected": -216.60145568847656, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.005204535089433193, |
|
"rewards/margins": 0.021862652152776718, |
|
"rewards/rejected": -0.027067184448242188, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"logits/chosen": -2.672147512435913, |
|
"logits/rejected": -2.678018808364868, |
|
"logps/chosen": -167.4713134765625, |
|
"logps/rejected": -170.08120727539062, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.013661455363035202, |
|
"rewards/margins": 0.004836535546928644, |
|
"rewards/rejected": -0.018497992306947708, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"logits/chosen": -2.4589505195617676, |
|
"logits/rejected": -2.462191343307495, |
|
"logps/chosen": -200.73976135253906, |
|
"logps/rejected": -195.30276489257812, |
|
"loss": 0.704, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.03748317062854767, |
|
"rewards/margins": -0.020142268389463425, |
|
"rewards/rejected": -0.017340898513793945, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"logits/chosen": -2.230989933013916, |
|
"logits/rejected": -2.2140636444091797, |
|
"logps/chosen": -143.33631896972656, |
|
"logps/rejected": -136.0796661376953, |
|
"loss": 0.6989, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.020144915208220482, |
|
"rewards/margins": -0.01022801361978054, |
|
"rewards/rejected": -0.009916901588439941, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"logits/chosen": -2.5017194747924805, |
|
"logits/rejected": -2.58662486076355, |
|
"logps/chosen": -167.50990295410156, |
|
"logps/rejected": -208.83607482910156, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0027861359994858503, |
|
"rewards/margins": 0.02604994922876358, |
|
"rewards/rejected": -0.02883608266711235, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9e-05, |
|
"logits/chosen": -2.53066349029541, |
|
"logits/rejected": -2.608471393585205, |
|
"logps/chosen": -241.85113525390625, |
|
"logps/rejected": -260.0408630371094, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.024016117677092552, |
|
"rewards/margins": -0.0005100721027702093, |
|
"rewards/rejected": -0.023506049066781998, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.95e-05, |
|
"logits/chosen": -2.2705140113830566, |
|
"logits/rejected": -2.4387779235839844, |
|
"logps/chosen": -179.22540283203125, |
|
"logps/rejected": -182.5544891357422, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00418963422998786, |
|
"rewards/margins": 0.033549048006534576, |
|
"rewards/rejected": -0.02935941144824028, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3e-05, |
|
"logits/chosen": -2.46372652053833, |
|
"logits/rejected": -2.513279438018799, |
|
"logps/chosen": -230.43133544921875, |
|
"logps/rejected": -227.0118865966797, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.010919665917754173, |
|
"rewards/margins": 0.03570995107293129, |
|
"rewards/rejected": -0.04662961885333061, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.05e-05, |
|
"logits/chosen": -2.530576467514038, |
|
"logits/rejected": -2.5415897369384766, |
|
"logps/chosen": -200.46568298339844, |
|
"logps/rejected": -220.82989501953125, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0384901762008667, |
|
"rewards/margins": 0.03156058490276337, |
|
"rewards/rejected": -0.07005076855421066, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.1e-05, |
|
"logits/chosen": -2.5888161659240723, |
|
"logits/rejected": -2.639291286468506, |
|
"logps/chosen": -132.55116271972656, |
|
"logps/rejected": -167.082275390625, |
|
"loss": 0.7042, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.03156990930438042, |
|
"rewards/margins": -0.018804360181093216, |
|
"rewards/rejected": -0.0127655528485775, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.15e-05, |
|
"logits/chosen": -2.46744704246521, |
|
"logits/rejected": -2.494837522506714, |
|
"logps/chosen": -210.9451141357422, |
|
"logps/rejected": -211.72433471679688, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0017502065747976303, |
|
"rewards/margins": 0.027423406019806862, |
|
"rewards/rejected": -0.02567320130765438, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"logits/chosen": -2.4783239364624023, |
|
"logits/rejected": -2.490576982498169, |
|
"logps/chosen": -197.71629333496094, |
|
"logps/rejected": -207.23048400878906, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.01412363164126873, |
|
"rewards/margins": 0.01561451144516468, |
|
"rewards/rejected": -0.029738139361143112, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"logits/chosen": -2.3918569087982178, |
|
"logits/rejected": -2.4426636695861816, |
|
"logps/chosen": -164.30029296875, |
|
"logps/rejected": -201.85992431640625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.025302361696958542, |
|
"rewards/margins": 0.009253643453121185, |
|
"rewards/rejected": -0.034556008875370026, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.3e-05, |
|
"logits/chosen": -2.4934239387512207, |
|
"logits/rejected": -2.5188210010528564, |
|
"logps/chosen": -178.12030029296875, |
|
"logps/rejected": -211.23947143554688, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.05651288107037544, |
|
"rewards/margins": 0.0065690516494214535, |
|
"rewards/rejected": -0.06308193504810333, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.35e-05, |
|
"logits/chosen": -2.415985345840454, |
|
"logits/rejected": -2.3687984943389893, |
|
"logps/chosen": -211.91453552246094, |
|
"logps/rejected": -216.65591430664062, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.045519113540649414, |
|
"rewards/margins": 0.028419354930520058, |
|
"rewards/rejected": -0.07393846660852432, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"logits/chosen": -2.37481427192688, |
|
"logits/rejected": -2.4693892002105713, |
|
"logps/chosen": -172.92459106445312, |
|
"logps/rejected": -209.3857421875, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03707296401262283, |
|
"rewards/margins": 0.027949143201112747, |
|
"rewards/rejected": -0.06502211093902588, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.45e-05, |
|
"logits/chosen": -2.3615283966064453, |
|
"logits/rejected": -2.3259239196777344, |
|
"logps/chosen": -154.25360107421875, |
|
"logps/rejected": -161.130126953125, |
|
"loss": 0.7022, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.04533124342560768, |
|
"rewards/margins": -0.016323519870638847, |
|
"rewards/rejected": -0.029007721692323685, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.5e-05, |
|
"logits/chosen": -2.64551043510437, |
|
"logits/rejected": -2.7289645671844482, |
|
"logps/chosen": -200.1268768310547, |
|
"logps/rejected": -225.6541748046875, |
|
"loss": 0.6993, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0578455924987793, |
|
"rewards/margins": -0.008899472653865814, |
|
"rewards/rejected": -0.04894612357020378, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.55e-05, |
|
"logits/chosen": -2.703721046447754, |
|
"logits/rejected": -2.669755220413208, |
|
"logps/chosen": -208.99668884277344, |
|
"logps/rejected": -164.8292694091797, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.022734597325325012, |
|
"rewards/margins": 0.03882308304309845, |
|
"rewards/rejected": -0.06155767664313316, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.6e-05, |
|
"logits/chosen": -2.6480765342712402, |
|
"logits/rejected": -2.696770191192627, |
|
"logps/chosen": -172.99888610839844, |
|
"logps/rejected": -201.09979248046875, |
|
"loss": 0.7063, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.053855180740356445, |
|
"rewards/margins": -0.024987507611513138, |
|
"rewards/rejected": -0.028867674991488457, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.65e-05, |
|
"logits/chosen": -2.6311445236206055, |
|
"logits/rejected": -2.6575632095336914, |
|
"logps/chosen": -251.72789001464844, |
|
"logps/rejected": -271.0470886230469, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03289387375116348, |
|
"rewards/margins": 0.014702942222356796, |
|
"rewards/rejected": -0.04759681224822998, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.7e-05, |
|
"logits/chosen": -2.3990683555603027, |
|
"logits/rejected": -2.502671480178833, |
|
"logps/chosen": -197.92665100097656, |
|
"logps/rejected": -211.00967407226562, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.014272330328822136, |
|
"rewards/margins": 0.020740672945976257, |
|
"rewards/rejected": -0.03501300886273384, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"logits/chosen": -2.575502872467041, |
|
"logits/rejected": -2.527902364730835, |
|
"logps/chosen": -214.5215301513672, |
|
"logps/rejected": -201.05194091796875, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.006246686447411776, |
|
"rewards/margins": 0.059338975697755814, |
|
"rewards/rejected": -0.065585657954216, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.8e-05, |
|
"logits/chosen": -2.217294692993164, |
|
"logits/rejected": -2.223574638366699, |
|
"logps/chosen": -174.7819366455078, |
|
"logps/rejected": -178.09852600097656, |
|
"loss": 0.7013, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09115451574325562, |
|
"rewards/margins": -0.012327454052865505, |
|
"rewards/rejected": -0.07882705330848694, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.85e-05, |
|
"logits/chosen": -2.518965721130371, |
|
"logits/rejected": -2.443446159362793, |
|
"logps/chosen": -201.1251220703125, |
|
"logps/rejected": -170.15533447265625, |
|
"loss": 0.7029, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04162323474884033, |
|
"rewards/margins": -0.015459035523235798, |
|
"rewards/rejected": -0.02616419643163681, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"logits/chosen": -2.521008253097534, |
|
"logits/rejected": -2.6866934299468994, |
|
"logps/chosen": -269.85284423828125, |
|
"logps/rejected": -240.82626342773438, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.05473289266228676, |
|
"rewards/margins": 0.00423135980963707, |
|
"rewards/rejected": -0.05896425247192383, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"logits/chosen": -2.3015501499176025, |
|
"logits/rejected": -2.351773500442505, |
|
"logps/chosen": -225.17691040039062, |
|
"logps/rejected": -249.8846435546875, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.06839534640312195, |
|
"rewards/margins": 0.014748764224350452, |
|
"rewards/rejected": -0.08314411342144012, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -2.6201720237731934, |
|
"logits/rejected": -2.680964708328247, |
|
"logps/chosen": -225.08645629882812, |
|
"logps/rejected": -232.00096130371094, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06486918777227402, |
|
"rewards/margins": 0.04111311584711075, |
|
"rewards/rejected": -0.10598230361938477, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.05e-05, |
|
"logits/chosen": -2.5782110691070557, |
|
"logits/rejected": -2.5646634101867676, |
|
"logps/chosen": -249.90745544433594, |
|
"logps/rejected": -257.51275634765625, |
|
"loss": 0.6976, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08720846474170685, |
|
"rewards/margins": -0.0017095585353672504, |
|
"rewards/rejected": -0.08549890667200089, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.1e-05, |
|
"logits/chosen": -2.5777688026428223, |
|
"logits/rejected": -2.665797472000122, |
|
"logps/chosen": -171.82347106933594, |
|
"logps/rejected": -211.63311767578125, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08461908996105194, |
|
"rewards/margins": 0.013853237964212894, |
|
"rewards/rejected": -0.09847234189510345, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.15e-05, |
|
"logits/chosen": -2.3364946842193604, |
|
"logits/rejected": -2.3084681034088135, |
|
"logps/chosen": -193.4939727783203, |
|
"logps/rejected": -184.15200805664062, |
|
"loss": 0.717, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0981694683432579, |
|
"rewards/margins": -0.04480106756091118, |
|
"rewards/rejected": -0.053368404507637024, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.2e-05, |
|
"logits/chosen": -2.445204257965088, |
|
"logits/rejected": -2.5341625213623047, |
|
"logps/chosen": -174.59225463867188, |
|
"logps/rejected": -210.94137573242188, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.09675168246030807, |
|
"rewards/margins": -0.00985276885330677, |
|
"rewards/rejected": -0.08689891546964645, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.25e-05, |
|
"logits/chosen": -2.4798362255096436, |
|
"logits/rejected": -2.4760632514953613, |
|
"logps/chosen": -148.0768585205078, |
|
"logps/rejected": -176.31700134277344, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.07728591561317444, |
|
"rewards/margins": 0.0020556673407554626, |
|
"rewards/rejected": -0.0793415829539299, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.3e-05, |
|
"logits/chosen": -2.5911381244659424, |
|
"logits/rejected": -2.630659341812134, |
|
"logps/chosen": -203.1630859375, |
|
"logps/rejected": -183.4739227294922, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04599933326244354, |
|
"rewards/margins": 0.08558819442987442, |
|
"rewards/rejected": -0.13158753514289856, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.35e-05, |
|
"logits/chosen": -2.600797414779663, |
|
"logits/rejected": -2.6318492889404297, |
|
"logps/chosen": -156.8225860595703, |
|
"logps/rejected": -161.6145477294922, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07066044956445694, |
|
"rewards/margins": 0.05500438064336777, |
|
"rewards/rejected": -0.1256648302078247, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"logits/chosen": -2.2124781608581543, |
|
"logits/rejected": -2.259019374847412, |
|
"logps/chosen": -142.0679473876953, |
|
"logps/rejected": -177.9369659423828, |
|
"loss": 0.6949, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13633377850055695, |
|
"rewards/margins": 0.000554969534277916, |
|
"rewards/rejected": -0.1368887573480606, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"logits/chosen": -2.396183729171753, |
|
"logits/rejected": -2.533904552459717, |
|
"logps/chosen": -171.38790893554688, |
|
"logps/rejected": -202.30979919433594, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09015192836523056, |
|
"rewards/margins": 0.08280421048402786, |
|
"rewards/rejected": -0.17295613884925842, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5e-05, |
|
"logits/chosen": -2.640425443649292, |
|
"logits/rejected": -2.674539804458618, |
|
"logps/chosen": -175.3508758544922, |
|
"logps/rejected": -180.97915649414062, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1077599823474884, |
|
"rewards/margins": 0.03759467601776123, |
|
"rewards/rejected": -0.14535464346408844, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.55e-05, |
|
"logits/chosen": -2.5857696533203125, |
|
"logits/rejected": -2.5822975635528564, |
|
"logps/chosen": -176.55982971191406, |
|
"logps/rejected": -196.5354461669922, |
|
"loss": 0.7, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.09103889763355255, |
|
"rewards/margins": -0.009319041855633259, |
|
"rewards/rejected": -0.08171986043453217, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.600000000000001e-05, |
|
"logits/chosen": -2.660163402557373, |
|
"logits/rejected": -2.5935940742492676, |
|
"logps/chosen": -233.04049682617188, |
|
"logps/rejected": -212.1655731201172, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.118477463722229, |
|
"rewards/margins": 0.017601586878299713, |
|
"rewards/rejected": -0.13607905805110931, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"logits/chosen": -2.5211241245269775, |
|
"logits/rejected": -2.6064364910125732, |
|
"logps/chosen": -176.89476013183594, |
|
"logps/rejected": -187.39271545410156, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07700353115797043, |
|
"rewards/margins": 0.04530329257249832, |
|
"rewards/rejected": -0.12230683863162994, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7e-05, |
|
"logits/chosen": -2.6894078254699707, |
|
"logits/rejected": -2.6806745529174805, |
|
"logps/chosen": -244.7545623779297, |
|
"logps/rejected": -212.83453369140625, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10891007632017136, |
|
"rewards/margins": 0.10942523181438446, |
|
"rewards/rejected": -0.2183353304862976, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.75e-05, |
|
"logits/chosen": -2.4758429527282715, |
|
"logits/rejected": -2.5564448833465576, |
|
"logps/chosen": -161.29554748535156, |
|
"logps/rejected": -185.48199462890625, |
|
"loss": 0.6991, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.152861088514328, |
|
"rewards/margins": -0.0035788798704743385, |
|
"rewards/rejected": -0.14928221702575684, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8e-05, |
|
"logits/chosen": -2.47318959236145, |
|
"logits/rejected": -2.439746856689453, |
|
"logps/chosen": -241.09283447265625, |
|
"logps/rejected": -218.68580627441406, |
|
"loss": 0.7017, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.1952984780073166, |
|
"rewards/margins": -0.01051153801381588, |
|
"rewards/rejected": -0.18478693068027496, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.85e-05, |
|
"logits/chosen": -2.541466474533081, |
|
"logits/rejected": -2.56956148147583, |
|
"logps/chosen": -180.84725952148438, |
|
"logps/rejected": -211.02352905273438, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.15894030034542084, |
|
"rewards/margins": 0.06507530808448792, |
|
"rewards/rejected": -0.22401559352874756, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9e-05, |
|
"logits/chosen": -2.226611852645874, |
|
"logits/rejected": -2.359558343887329, |
|
"logps/chosen": -145.781494140625, |
|
"logps/rejected": -181.21531677246094, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.177232027053833, |
|
"rewards/margins": 0.058251187205314636, |
|
"rewards/rejected": -0.23548321425914764, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"logits/chosen": -2.5605039596557617, |
|
"logits/rejected": -2.641690492630005, |
|
"logps/chosen": -156.43446350097656, |
|
"logps/rejected": -171.6468505859375, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.23164159059524536, |
|
"rewards/margins": 0.03432049974799156, |
|
"rewards/rejected": -0.2659620940685272, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"logits/chosen": -2.7840309143066406, |
|
"logits/rejected": -2.8028616905212402, |
|
"logps/chosen": -221.6836395263672, |
|
"logps/rejected": -247.433349609375, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.14767608046531677, |
|
"rewards/margins": 0.1068597286939621, |
|
"rewards/rejected": -0.25453582406044006, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.99999978299634e-05, |
|
"logits/chosen": -2.1325531005859375, |
|
"logits/rejected": -2.1438934803009033, |
|
"logps/chosen": -171.369140625, |
|
"logps/rejected": -184.96827697753906, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.18850459158420563, |
|
"rewards/margins": 0.12018898129463196, |
|
"rewards/rejected": -0.3086935877799988, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.999999131985394e-05, |
|
"logits/chosen": -2.6781508922576904, |
|
"logits/rejected": -2.5359549522399902, |
|
"logps/chosen": -232.26742553710938, |
|
"logps/rejected": -214.5188446044922, |
|
"loss": 0.7372, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.32363808155059814, |
|
"rewards/margins": -0.07766950130462646, |
|
"rewards/rejected": -0.2459685504436493, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.999998046967279e-05, |
|
"logits/chosen": -2.54486346244812, |
|
"logits/rejected": -2.6447086334228516, |
|
"logps/chosen": -179.6219482421875, |
|
"logps/rejected": -222.37010192871094, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.23128211498260498, |
|
"rewards/margins": 0.08162947744131088, |
|
"rewards/rejected": -0.31291159987449646, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9999965279421804e-05, |
|
"logits/chosen": -2.4281625747680664, |
|
"logits/rejected": -2.5068893432617188, |
|
"logps/chosen": -163.3339385986328, |
|
"logps/rejected": -180.06222534179688, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.22076301276683807, |
|
"rewards/margins": 0.05238480493426323, |
|
"rewards/rejected": -0.2731478214263916, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999994574910364e-05, |
|
"logits/chosen": -2.5497522354125977, |
|
"logits/rejected": -2.558523178100586, |
|
"logps/chosen": -214.64537048339844, |
|
"logps/rejected": -210.29678344726562, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.19515390694141388, |
|
"rewards/margins": 0.06168302148580551, |
|
"rewards/rejected": -0.2568369209766388, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999992187872167e-05, |
|
"logits/chosen": -2.6358773708343506, |
|
"logits/rejected": -2.672025680541992, |
|
"logps/chosen": -235.18109130859375, |
|
"logps/rejected": -233.78045654296875, |
|
"loss": 0.7203, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.25453630089759827, |
|
"rewards/margins": -0.03941688686609268, |
|
"rewards/rejected": -0.215119406580925, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9999893668280043e-05, |
|
"logits/chosen": -2.305128812789917, |
|
"logits/rejected": -2.384552478790283, |
|
"logps/chosen": -168.1253662109375, |
|
"logps/rejected": -210.5357666015625, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.30615508556365967, |
|
"rewards/margins": 0.10809167474508286, |
|
"rewards/rejected": -0.4142467677593231, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999986111778367e-05, |
|
"logits/chosen": -2.4207632541656494, |
|
"logits/rejected": -2.4426565170288086, |
|
"logps/chosen": -187.8740234375, |
|
"logps/rejected": -188.6339569091797, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2796909809112549, |
|
"rewards/margins": 0.13547499477863312, |
|
"rewards/rejected": -0.4151660203933716, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999982422723818e-05, |
|
"logits/chosen": -2.505187749862671, |
|
"logits/rejected": -2.4738471508026123, |
|
"logps/chosen": -187.51710510253906, |
|
"logps/rejected": -227.13916015625, |
|
"loss": 0.7216, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5267550945281982, |
|
"rewards/margins": -0.03421187028288841, |
|
"rewards/rejected": -0.4925432801246643, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9999782996649994e-05, |
|
"logits/chosen": -2.4367451667785645, |
|
"logits/rejected": -2.4629135131835938, |
|
"logps/chosen": -221.5149688720703, |
|
"logps/rejected": -255.66635131835938, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4186938405036926, |
|
"rewards/margins": 0.03650692105293274, |
|
"rewards/rejected": -0.45520079135894775, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999973742602626e-05, |
|
"logits/chosen": -2.6166908740997314, |
|
"logits/rejected": -2.638493061065674, |
|
"logps/chosen": -212.32894897460938, |
|
"logps/rejected": -216.49720764160156, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.47340863943099976, |
|
"rewards/margins": 0.059217117726802826, |
|
"rewards/rejected": -0.5326257944107056, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999968751537489e-05, |
|
"logits/chosen": -2.549008846282959, |
|
"logits/rejected": -2.6095943450927734, |
|
"logps/chosen": -205.5695343017578, |
|
"logps/rejected": -196.99383544921875, |
|
"loss": 0.7338, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.47608208656311035, |
|
"rewards/margins": -0.05748309940099716, |
|
"rewards/rejected": -0.4185989201068878, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9999633264704564e-05, |
|
"logits/chosen": -2.2947440147399902, |
|
"logits/rejected": -2.240459680557251, |
|
"logps/chosen": -203.31182861328125, |
|
"logps/rejected": -175.49354553222656, |
|
"loss": 0.7139, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.39874717593193054, |
|
"rewards/margins": -0.014322709292173386, |
|
"rewards/rejected": -0.38442447781562805, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999957467402468e-05, |
|
"logits/chosen": -2.118520736694336, |
|
"logits/rejected": -2.0125582218170166, |
|
"logps/chosen": -159.09979248046875, |
|
"logps/rejected": -151.80679321289062, |
|
"loss": 0.6981, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.43373599648475647, |
|
"rewards/margins": 0.0064653148874640465, |
|
"rewards/rejected": -0.4402012825012207, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9999511743345426e-05, |
|
"logits/chosen": -2.251662254333496, |
|
"logits/rejected": -2.343283176422119, |
|
"logps/chosen": -166.46377563476562, |
|
"logps/rejected": -213.02813720703125, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.44466906785964966, |
|
"rewards/margins": 0.18120086193084717, |
|
"rewards/rejected": -0.625869870185852, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999944447267771e-05, |
|
"logits/chosen": -2.347130298614502, |
|
"logits/rejected": -2.342409610748291, |
|
"logps/chosen": -216.30650329589844, |
|
"logps/rejected": -210.64273071289062, |
|
"loss": 0.7966, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6098219752311707, |
|
"rewards/margins": -0.12615808844566345, |
|
"rewards/rejected": -0.4836638867855072, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999937286203322e-05, |
|
"logits/chosen": -2.3265388011932373, |
|
"logits/rejected": -2.3534939289093018, |
|
"logps/chosen": -204.47706604003906, |
|
"logps/rejected": -186.42286682128906, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.291274756193161, |
|
"rewards/margins": 0.0668107196688652, |
|
"rewards/rejected": -0.3580854535102844, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999929691142439e-05, |
|
"logits/chosen": -2.062469720840454, |
|
"logits/rejected": -2.2004528045654297, |
|
"logps/chosen": -144.79664611816406, |
|
"logps/rejected": -149.7763671875, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.46431243419647217, |
|
"rewards/margins": 0.1997339278459549, |
|
"rewards/rejected": -0.6640464067459106, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.99992166208644e-05, |
|
"logits/chosen": -2.2899487018585205, |
|
"logits/rejected": -2.2257750034332275, |
|
"logps/chosen": -208.61517333984375, |
|
"logps/rejected": -200.343505859375, |
|
"loss": 0.7233, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5118069052696228, |
|
"rewards/margins": -0.018164699897170067, |
|
"rewards/rejected": -0.4936422109603882, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.999913199036719e-05, |
|
"logits/chosen": -2.520472288131714, |
|
"logits/rejected": -2.612544298171997, |
|
"logps/chosen": -160.17779541015625, |
|
"logps/rejected": -191.498291015625, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4993933439254761, |
|
"rewards/margins": 0.15375012159347534, |
|
"rewards/rejected": -0.6531434059143066, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9999043019947454e-05, |
|
"logits/chosen": -2.5590789318084717, |
|
"logits/rejected": -2.594666004180908, |
|
"logps/chosen": -219.09494018554688, |
|
"logps/rejected": -271.0511474609375, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.37969574332237244, |
|
"rewards/margins": 0.06747917830944061, |
|
"rewards/rejected": -0.44717487692832947, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9998949709620636e-05, |
|
"logits/chosen": -2.292649269104004, |
|
"logits/rejected": -2.4197192192077637, |
|
"logps/chosen": -156.77658081054688, |
|
"logps/rejected": -181.56752014160156, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.24466285109519958, |
|
"rewards/margins": 0.17808005213737488, |
|
"rewards/rejected": -0.4227428734302521, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.999885205940293e-05, |
|
"logits/chosen": -2.526571750640869, |
|
"logits/rejected": -2.6078643798828125, |
|
"logps/chosen": -188.48431396484375, |
|
"logps/rejected": -209.88192749023438, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.38001808524131775, |
|
"rewards/margins": 0.21444422006607056, |
|
"rewards/rejected": -0.5944622755050659, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9998750069311306e-05, |
|
"logits/chosen": -2.477735757827759, |
|
"logits/rejected": -2.524329900741577, |
|
"logps/chosen": -170.30780029296875, |
|
"logps/rejected": -167.05538940429688, |
|
"loss": 0.7105, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.45474332571029663, |
|
"rewards/margins": -0.0021874159574508667, |
|
"rewards/rejected": -0.45255595445632935, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.999864373936345e-05, |
|
"logits/chosen": -2.486284017562866, |
|
"logits/rejected": -2.511571168899536, |
|
"logps/chosen": -213.28150939941406, |
|
"logps/rejected": -220.744140625, |
|
"loss": 0.7154, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5180980563163757, |
|
"rewards/margins": -0.013586281798779964, |
|
"rewards/rejected": -0.5045117139816284, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.999853306957783e-05, |
|
"logits/chosen": -2.149216651916504, |
|
"logits/rejected": -2.331148862838745, |
|
"logps/chosen": -212.50167846679688, |
|
"logps/rejected": -229.76211547851562, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.45535096526145935, |
|
"rewards/margins": 0.13961246609687805, |
|
"rewards/rejected": -0.5949634909629822, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9998418059973654e-05, |
|
"logits/chosen": -2.3633456230163574, |
|
"logits/rejected": -2.3958113193511963, |
|
"logps/chosen": -260.77191162109375, |
|
"logps/rejected": -269.7747497558594, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3385522663593292, |
|
"rewards/margins": 0.154096320271492, |
|
"rewards/rejected": -0.4926486015319824, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.99982987105709e-05, |
|
"logits/chosen": -2.4027421474456787, |
|
"logits/rejected": -2.47273588180542, |
|
"logps/chosen": -190.19549560546875, |
|
"logps/rejected": -199.79869079589844, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.45771288871765137, |
|
"rewards/margins": 0.16400352120399475, |
|
"rewards/rejected": -0.6217163801193237, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.999817502139027e-05, |
|
"logits/chosen": -2.459967851638794, |
|
"logits/rejected": -2.5030205249786377, |
|
"logps/chosen": -152.8588104248047, |
|
"logps/rejected": -151.28167724609375, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4902128279209137, |
|
"rewards/margins": 0.07620880752801895, |
|
"rewards/rejected": -0.566421627998352, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.999804699245325e-05, |
|
"logits/chosen": -2.613358497619629, |
|
"logits/rejected": -2.6325273513793945, |
|
"logps/chosen": -240.51898193359375, |
|
"logps/rejected": -209.2674560546875, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5984245538711548, |
|
"rewards/margins": 0.08296503126621246, |
|
"rewards/rejected": -0.6813895106315613, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.999791462378206e-05, |
|
"logits/chosen": -2.523002862930298, |
|
"logits/rejected": -2.544940948486328, |
|
"logps/chosen": -209.53164672851562, |
|
"logps/rejected": -211.6482391357422, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4731237292289734, |
|
"rewards/margins": 0.139340341091156, |
|
"rewards/rejected": -0.6124641299247742, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.999777791539968e-05, |
|
"logits/chosen": -2.3467066287994385, |
|
"logits/rejected": -2.4002304077148438, |
|
"logps/chosen": -229.88079833984375, |
|
"logps/rejected": -236.3416290283203, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5287673473358154, |
|
"rewards/margins": 0.11849495768547058, |
|
"rewards/rejected": -0.6472623348236084, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9997636867329844e-05, |
|
"logits/chosen": -2.3951451778411865, |
|
"logits/rejected": -2.3899013996124268, |
|
"logps/chosen": -230.42471313476562, |
|
"logps/rejected": -204.70712280273438, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7095022797584534, |
|
"rewards/margins": 0.06169421225786209, |
|
"rewards/rejected": -0.7711963653564453, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.999749147959703e-05, |
|
"logits/chosen": -2.4759864807128906, |
|
"logits/rejected": -2.461402177810669, |
|
"logps/chosen": -208.62255859375, |
|
"logps/rejected": -215.98049926757812, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.47989076375961304, |
|
"rewards/margins": 0.18254442512989044, |
|
"rewards/rejected": -0.6624351143836975, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.99973417522265e-05, |
|
"logits/chosen": -2.6637802124023438, |
|
"logits/rejected": -2.6086812019348145, |
|
"logps/chosen": -201.55992126464844, |
|
"logps/rejected": -185.3202667236328, |
|
"loss": 0.7158, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5636375546455383, |
|
"rewards/margins": 0.09092222899198532, |
|
"rewards/rejected": -0.6545597910881042, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9997187685244234e-05, |
|
"logits/chosen": -2.386868953704834, |
|
"logits/rejected": -2.438833475112915, |
|
"logps/chosen": -149.93809509277344, |
|
"logps/rejected": -164.56199645996094, |
|
"loss": 0.7271, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.43254008889198303, |
|
"rewards/margins": -0.0416962131857872, |
|
"rewards/rejected": -0.39084386825561523, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.999702927867698e-05, |
|
"logits/chosen": -2.382352113723755, |
|
"logits/rejected": -2.447472095489502, |
|
"logps/chosen": -195.46499633789062, |
|
"logps/rejected": -191.3668212890625, |
|
"loss": 0.7562, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5873669385910034, |
|
"rewards/margins": -0.07318583130836487, |
|
"rewards/rejected": -0.5141811370849609, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.999686653255222e-05, |
|
"logits/chosen": -2.6838254928588867, |
|
"logits/rejected": -2.735358476638794, |
|
"logps/chosen": -232.84722900390625, |
|
"logps/rejected": -253.7335205078125, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5806477069854736, |
|
"rewards/margins": 0.33124226331710815, |
|
"rewards/rejected": -0.9118900299072266, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9996699446898235e-05, |
|
"logits/chosen": -2.3523752689361572, |
|
"logits/rejected": -2.455883026123047, |
|
"logps/chosen": -158.80831909179688, |
|
"logps/rejected": -215.85203552246094, |
|
"loss": 0.7447, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5600742697715759, |
|
"rewards/margins": 0.03419441357254982, |
|
"rewards/rejected": -0.5942687392234802, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.999652802174402e-05, |
|
"logits/chosen": -2.5024056434631348, |
|
"logits/rejected": -2.4992480278015137, |
|
"logps/chosen": -233.17120361328125, |
|
"logps/rejected": -249.13568115234375, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7229613065719604, |
|
"rewards/margins": 0.06977183371782303, |
|
"rewards/rejected": -0.7927330732345581, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.999635225711933e-05, |
|
"logits/chosen": -2.3752949237823486, |
|
"logits/rejected": -2.499027729034424, |
|
"logps/chosen": -165.4285888671875, |
|
"logps/rejected": -213.1377716064453, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4794481098651886, |
|
"rewards/margins": 0.1226089596748352, |
|
"rewards/rejected": -0.602057158946991, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.999617215305468e-05, |
|
"logits/chosen": -2.578481674194336, |
|
"logits/rejected": -2.5905745029449463, |
|
"logps/chosen": -160.28704833984375, |
|
"logps/rejected": -186.60206604003906, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.379621684551239, |
|
"rewards/margins": 0.28185874223709106, |
|
"rewards/rejected": -0.6614804863929749, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.999598770958134e-05, |
|
"logits/chosen": -2.1814939975738525, |
|
"logits/rejected": -2.273460865020752, |
|
"logps/chosen": -178.5402069091797, |
|
"logps/rejected": -246.2064208984375, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.47223299741744995, |
|
"rewards/margins": 0.11296035349369049, |
|
"rewards/rejected": -0.5851933360099792, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.999579892673133e-05, |
|
"logits/chosen": -2.6444742679595947, |
|
"logits/rejected": -2.651982545852661, |
|
"logps/chosen": -191.74351501464844, |
|
"logps/rejected": -191.7657928466797, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6643162965774536, |
|
"rewards/margins": 0.027530232444405556, |
|
"rewards/rejected": -0.691846489906311, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9995605804537426e-05, |
|
"logits/chosen": -2.4908974170684814, |
|
"logits/rejected": -2.550518274307251, |
|
"logps/chosen": -188.7380828857422, |
|
"logps/rejected": -211.574951171875, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3170633912086487, |
|
"rewards/margins": 0.3337726294994354, |
|
"rewards/rejected": -0.6508359313011169, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.999540834303315e-05, |
|
"logits/chosen": -2.403130054473877, |
|
"logits/rejected": -2.5044126510620117, |
|
"logps/chosen": -216.05221557617188, |
|
"logps/rejected": -224.658203125, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5969533920288086, |
|
"rewards/margins": 0.4197196662425995, |
|
"rewards/rejected": -1.0166730880737305, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.999520654225278e-05, |
|
"logits/chosen": -2.5251312255859375, |
|
"logits/rejected": -2.6076760292053223, |
|
"logps/chosen": -171.32049560546875, |
|
"logps/rejected": -189.77108764648438, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5219372510910034, |
|
"rewards/margins": 0.3483983874320984, |
|
"rewards/rejected": -0.8703356385231018, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9995000402231354e-05, |
|
"logits/chosen": -2.564474105834961, |
|
"logits/rejected": -2.510350227355957, |
|
"logps/chosen": -219.81732177734375, |
|
"logps/rejected": -202.83493041992188, |
|
"loss": 0.7793, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7294387817382812, |
|
"rewards/margins": -0.11600115895271301, |
|
"rewards/rejected": -0.6134375333786011, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.999478992300466e-05, |
|
"logits/chosen": -2.5275182723999023, |
|
"logits/rejected": -2.6380527019500732, |
|
"logps/chosen": -165.94358825683594, |
|
"logps/rejected": -212.28086853027344, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4685615599155426, |
|
"rewards/margins": 0.44902655482292175, |
|
"rewards/rejected": -0.9175881147384644, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.999457510460923e-05, |
|
"logits/chosen": -2.407663106918335, |
|
"logits/rejected": -2.5699005126953125, |
|
"logps/chosen": -167.9168701171875, |
|
"logps/rejected": -214.4752197265625, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.48059117794036865, |
|
"rewards/margins": 0.3546959161758423, |
|
"rewards/rejected": -0.8352870345115662, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.999435594708236e-05, |
|
"logits/chosen": -2.3764097690582275, |
|
"logits/rejected": -2.3675248622894287, |
|
"logps/chosen": -201.25814819335938, |
|
"logps/rejected": -229.6868133544922, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6616516709327698, |
|
"rewards/margins": 0.10846880823373795, |
|
"rewards/rejected": -0.7701205611228943, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.999413245046211e-05, |
|
"logits/chosen": -2.5667271614074707, |
|
"logits/rejected": -2.5309338569641113, |
|
"logps/chosen": -210.30267333984375, |
|
"logps/rejected": -206.71014404296875, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.572422444820404, |
|
"rewards/margins": 0.30542662739753723, |
|
"rewards/rejected": -0.8778490424156189, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9993904614787254e-05, |
|
"logits/chosen": -2.463524341583252, |
|
"logits/rejected": -2.530627965927124, |
|
"logps/chosen": -179.8057861328125, |
|
"logps/rejected": -200.56394958496094, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6925724148750305, |
|
"rewards/margins": 0.21750670671463013, |
|
"rewards/rejected": -0.9100791215896606, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.999367244009736e-05, |
|
"logits/chosen": -2.4720635414123535, |
|
"logits/rejected": -2.5370302200317383, |
|
"logps/chosen": -194.29440307617188, |
|
"logps/rejected": -196.02944946289062, |
|
"loss": 0.8265, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0657517910003662, |
|
"rewards/margins": -0.16590073704719543, |
|
"rewards/rejected": -0.8998512029647827, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.999343592643274e-05, |
|
"logits/chosen": -2.3978195190429688, |
|
"logits/rejected": -2.397984266281128, |
|
"logps/chosen": -229.4518585205078, |
|
"logps/rejected": -217.56869506835938, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6397801637649536, |
|
"rewards/margins": 0.10495337843894958, |
|
"rewards/rejected": -0.7447335720062256, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.999319507383444e-05, |
|
"logits/chosen": -2.3354411125183105, |
|
"logits/rejected": -2.4454195499420166, |
|
"logps/chosen": -149.3455810546875, |
|
"logps/rejected": -187.3743438720703, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7036041617393494, |
|
"rewards/margins": 0.3835078775882721, |
|
"rewards/rejected": -1.0871120691299438, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999294988234428e-05, |
|
"logits/chosen": -2.491671323776245, |
|
"logits/rejected": -2.550612449645996, |
|
"logps/chosen": -180.36758422851562, |
|
"logps/rejected": -251.5354461669922, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7723766565322876, |
|
"rewards/margins": 0.2752906382083893, |
|
"rewards/rejected": -1.047667384147644, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999270035200483e-05, |
|
"logits/chosen": -2.492790937423706, |
|
"logits/rejected": -2.365570306777954, |
|
"logps/chosen": -195.1597900390625, |
|
"logps/rejected": -185.96673583984375, |
|
"loss": 0.7169, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2230509519577026, |
|
"rewards/margins": 0.14210942387580872, |
|
"rewards/rejected": -1.3651604652404785, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.99924464828594e-05, |
|
"logits/chosen": -2.412907838821411, |
|
"logits/rejected": -2.508164882659912, |
|
"logps/chosen": -196.51846313476562, |
|
"logps/rejected": -218.0734405517578, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.813502848148346, |
|
"rewards/margins": 0.3059760332107544, |
|
"rewards/rejected": -1.1194789409637451, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9992188274952064e-05, |
|
"logits/chosen": -2.4737277030944824, |
|
"logits/rejected": -2.5720887184143066, |
|
"logps/chosen": -155.16171264648438, |
|
"logps/rejected": -195.86997985839844, |
|
"loss": 0.7379, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8073078989982605, |
|
"rewards/margins": 0.11351939290761948, |
|
"rewards/rejected": -0.920827329158783, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999192572832765e-05, |
|
"logits/chosen": -2.5303239822387695, |
|
"logits/rejected": -2.5628015995025635, |
|
"logps/chosen": -281.7694396972656, |
|
"logps/rejected": -303.6089782714844, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6562893986701965, |
|
"rewards/margins": 0.12363787740468979, |
|
"rewards/rejected": -0.7799273133277893, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999165884303174e-05, |
|
"logits/chosen": -2.481058359146118, |
|
"logits/rejected": -2.4526753425598145, |
|
"logps/chosen": -182.2716064453125, |
|
"logps/rejected": -242.58828735351562, |
|
"loss": 0.7317, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7687569260597229, |
|
"rewards/margins": -0.007903970777988434, |
|
"rewards/rejected": -0.7608529329299927, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999138761911066e-05, |
|
"logits/chosen": -2.459987163543701, |
|
"logits/rejected": -2.5180883407592773, |
|
"logps/chosen": -209.83102416992188, |
|
"logps/rejected": -216.1147918701172, |
|
"loss": 0.7265, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0292317867279053, |
|
"rewards/margins": 0.04909829795360565, |
|
"rewards/rejected": -1.0783300399780273, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.99911120566115e-05, |
|
"logits/chosen": -2.5340957641601562, |
|
"logits/rejected": -2.654463052749634, |
|
"logps/chosen": -210.88929748535156, |
|
"logps/rejected": -198.64614868164062, |
|
"loss": 0.7475, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2450902462005615, |
|
"rewards/margins": 0.012418277561664581, |
|
"rewards/rejected": -1.2575085163116455, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.99908321555821e-05, |
|
"logits/chosen": -2.4791364669799805, |
|
"logits/rejected": -2.5890302658081055, |
|
"logps/chosen": -192.5817108154297, |
|
"logps/rejected": -224.9603271484375, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9010199308395386, |
|
"rewards/margins": 0.4015497863292694, |
|
"rewards/rejected": -1.3025696277618408, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.999054791607105e-05, |
|
"logits/chosen": -2.428842067718506, |
|
"logits/rejected": -2.372598171234131, |
|
"logps/chosen": -221.89833068847656, |
|
"logps/rejected": -212.29595947265625, |
|
"loss": 0.7305, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0515369176864624, |
|
"rewards/margins": 0.017772257328033447, |
|
"rewards/rejected": -1.0693092346191406, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.999025933812769e-05, |
|
"logits/chosen": -2.50471830368042, |
|
"logits/rejected": -2.5308945178985596, |
|
"logps/chosen": -198.7537384033203, |
|
"logps/rejected": -236.5478515625, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8205583095550537, |
|
"rewards/margins": 0.18831993639469147, |
|
"rewards/rejected": -1.008878231048584, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9989966421802114e-05, |
|
"logits/chosen": -2.410905122756958, |
|
"logits/rejected": -2.5401611328125, |
|
"logps/chosen": -202.42991638183594, |
|
"logps/rejected": -219.6848602294922, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9180124402046204, |
|
"rewards/margins": 0.24881067872047424, |
|
"rewards/rejected": -1.166823148727417, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.998966916714519e-05, |
|
"logits/chosen": -2.4587323665618896, |
|
"logits/rejected": -2.5724194049835205, |
|
"logps/chosen": -154.88470458984375, |
|
"logps/rejected": -172.47268676757812, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7627564072608948, |
|
"rewards/margins": 0.294120192527771, |
|
"rewards/rejected": -1.0568766593933105, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.998936757420851e-05, |
|
"logits/chosen": -2.284227132797241, |
|
"logits/rejected": -2.380892515182495, |
|
"logps/chosen": -138.282958984375, |
|
"logps/rejected": -180.36441040039062, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7448742389678955, |
|
"rewards/margins": 0.4253446161746979, |
|
"rewards/rejected": -1.1702189445495605, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9989061643044434e-05, |
|
"logits/chosen": -2.5783276557922363, |
|
"logits/rejected": -2.5471560955047607, |
|
"logps/chosen": -186.851318359375, |
|
"logps/rejected": -193.73837280273438, |
|
"loss": 0.7108, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0091472864151, |
|
"rewards/margins": 0.033400412648916245, |
|
"rewards/rejected": -1.0425477027893066, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9988751373706075e-05, |
|
"logits/chosen": -2.398940086364746, |
|
"logits/rejected": -2.3498027324676514, |
|
"logps/chosen": -171.69107055664062, |
|
"logps/rejected": -209.19424438476562, |
|
"loss": 0.7045, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9662235975265503, |
|
"rewards/margins": 0.137020543217659, |
|
"rewards/rejected": -1.1032441854476929, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9988436766247284e-05, |
|
"logits/chosen": -2.415847063064575, |
|
"logits/rejected": -2.3855319023132324, |
|
"logps/chosen": -228.1761932373047, |
|
"logps/rejected": -195.01991271972656, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9869641661643982, |
|
"rewards/margins": 0.11401443928480148, |
|
"rewards/rejected": -1.1009787321090698, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9988117820722704e-05, |
|
"logits/chosen": -2.638780117034912, |
|
"logits/rejected": -2.607104539871216, |
|
"logps/chosen": -234.91183471679688, |
|
"logps/rejected": -203.67996215820312, |
|
"loss": 0.9279, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.2375922203063965, |
|
"rewards/margins": -0.281325101852417, |
|
"rewards/rejected": -0.956267237663269, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.998779453718768e-05, |
|
"logits/chosen": -2.254124879837036, |
|
"logits/rejected": -2.429471969604492, |
|
"logps/chosen": -235.76698303222656, |
|
"logps/rejected": -262.7959899902344, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0454847812652588, |
|
"rewards/margins": 0.3030804395675659, |
|
"rewards/rejected": -1.3485652208328247, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9987466915698346e-05, |
|
"logits/chosen": -2.276986598968506, |
|
"logits/rejected": -2.337780475616455, |
|
"logps/chosen": -138.3777313232422, |
|
"logps/rejected": -166.30181884765625, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5320202708244324, |
|
"rewards/margins": 0.315082311630249, |
|
"rewards/rejected": -0.8471025228500366, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.998713495631156e-05, |
|
"logits/chosen": -2.420923948287964, |
|
"logits/rejected": -2.565662384033203, |
|
"logps/chosen": -180.71446228027344, |
|
"logps/rejected": -192.2035369873047, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8985862731933594, |
|
"rewards/margins": 0.2061445415019989, |
|
"rewards/rejected": -1.1047309637069702, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.998679865908499e-05, |
|
"logits/chosen": -2.5881500244140625, |
|
"logits/rejected": -2.6503522396087646, |
|
"logps/chosen": -183.07540893554688, |
|
"logps/rejected": -233.943603515625, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1374621391296387, |
|
"rewards/margins": 0.19141921401023865, |
|
"rewards/rejected": -1.3288813829421997, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9986458024076984e-05, |
|
"logits/chosen": -2.4952712059020996, |
|
"logits/rejected": -2.50299072265625, |
|
"logps/chosen": -192.31314086914062, |
|
"logps/rejected": -197.241455078125, |
|
"loss": 0.7729, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.13700532913208, |
|
"rewards/margins": -0.10435198992490768, |
|
"rewards/rejected": -1.0326533317565918, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.998611305134669e-05, |
|
"logits/chosen": -2.5952248573303223, |
|
"logits/rejected": -2.600719928741455, |
|
"logps/chosen": -191.4530487060547, |
|
"logps/rejected": -212.82955932617188, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9059873819351196, |
|
"rewards/margins": 0.1599172055721283, |
|
"rewards/rejected": -1.0659046173095703, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9985763740954e-05, |
|
"logits/chosen": -2.4821391105651855, |
|
"logits/rejected": -2.4574878215789795, |
|
"logps/chosen": -264.071533203125, |
|
"logps/rejected": -256.51611328125, |
|
"loss": 0.7333, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.0796548128128052, |
|
"rewards/margins": 0.06612614542245865, |
|
"rewards/rejected": -1.1457810401916504, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9985410092959553e-05, |
|
"logits/chosen": -2.4601786136627197, |
|
"logits/rejected": -2.519011974334717, |
|
"logps/chosen": -158.57687377929688, |
|
"logps/rejected": -173.29666137695312, |
|
"loss": 0.7693, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.7817510366439819, |
|
"rewards/margins": -0.05254516005516052, |
|
"rewards/rejected": -0.729205846786499, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.998505210742472e-05, |
|
"logits/chosen": -2.6099700927734375, |
|
"logits/rejected": -2.58197021484375, |
|
"logps/chosen": -195.2617950439453, |
|
"logps/rejected": -188.54165649414062, |
|
"loss": 0.9197, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -1.1366631984710693, |
|
"rewards/margins": -0.32979804277420044, |
|
"rewards/rejected": -0.8068650960922241, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9984689784411686e-05, |
|
"logits/chosen": -2.679287910461426, |
|
"logits/rejected": -2.5679209232330322, |
|
"logps/chosen": -211.75323486328125, |
|
"logps/rejected": -245.02603149414062, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9691303372383118, |
|
"rewards/margins": 0.17891569435596466, |
|
"rewards/rejected": -1.1480460166931152, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9984323123983334e-05, |
|
"logits/chosen": -2.537733316421509, |
|
"logits/rejected": -2.6055424213409424, |
|
"logps/chosen": -151.74288940429688, |
|
"logps/rejected": -250.8179168701172, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.928146243095398, |
|
"rewards/margins": 0.24633805453777313, |
|
"rewards/rejected": -1.174484372138977, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.998395212620332e-05, |
|
"logits/chosen": -2.4368271827697754, |
|
"logits/rejected": -2.529536247253418, |
|
"logps/chosen": -207.87677001953125, |
|
"logps/rejected": -229.0767822265625, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7506311535835266, |
|
"rewards/margins": 0.24917976558208466, |
|
"rewards/rejected": -0.9998109340667725, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.998357679113603e-05, |
|
"logits/chosen": -2.5217807292938232, |
|
"logits/rejected": -2.6954245567321777, |
|
"logps/chosen": -192.20407104492188, |
|
"logps/rejected": -200.1387176513672, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6829215288162231, |
|
"rewards/margins": 0.2588457763195038, |
|
"rewards/rejected": -0.9417673349380493, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9983197118846655e-05, |
|
"logits/chosen": -2.4711546897888184, |
|
"logits/rejected": -2.4839439392089844, |
|
"logps/chosen": -160.9691925048828, |
|
"logps/rejected": -167.21469116210938, |
|
"loss": 0.7301, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7170782685279846, |
|
"rewards/margins": 0.04331670328974724, |
|
"rewards/rejected": -0.7603949904441833, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9982813109401096e-05, |
|
"logits/chosen": -2.3429412841796875, |
|
"logits/rejected": -2.3843770027160645, |
|
"logps/chosen": -221.22274780273438, |
|
"logps/rejected": -246.99005126953125, |
|
"loss": 0.7711, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.114179015159607, |
|
"rewards/margins": -0.07230883836746216, |
|
"rewards/rejected": -1.0418701171875, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.998242476286601e-05, |
|
"logits/chosen": -2.7864720821380615, |
|
"logits/rejected": -2.7745487689971924, |
|
"logps/chosen": -194.08201599121094, |
|
"logps/rejected": -226.28591918945312, |
|
"loss": 0.71, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.8893502354621887, |
|
"rewards/margins": 0.06053268164396286, |
|
"rewards/rejected": -0.949882984161377, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.998203207930882e-05, |
|
"logits/chosen": -2.6679065227508545, |
|
"logits/rejected": -2.711076498031616, |
|
"logps/chosen": -190.53013610839844, |
|
"logps/rejected": -200.187744140625, |
|
"loss": 0.8201, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0141409635543823, |
|
"rewards/margins": -0.17862781882286072, |
|
"rewards/rejected": -0.8355131149291992, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.998163505879769e-05, |
|
"logits/chosen": -2.7404122352600098, |
|
"logits/rejected": -2.732025146484375, |
|
"logps/chosen": -178.48135375976562, |
|
"logps/rejected": -183.08074951171875, |
|
"loss": 0.7078, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8886659145355225, |
|
"rewards/margins": 0.043551601469516754, |
|
"rewards/rejected": -0.9322174787521362, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.998123370140156e-05, |
|
"logits/chosen": -2.6989643573760986, |
|
"logits/rejected": -2.7124342918395996, |
|
"logps/chosen": -223.03314208984375, |
|
"logps/rejected": -221.12120056152344, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.764815628528595, |
|
"rewards/margins": 0.1835068166255951, |
|
"rewards/rejected": -0.9483224153518677, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.99808280071901e-05, |
|
"logits/chosen": -2.618727684020996, |
|
"logits/rejected": -2.6078970432281494, |
|
"logps/chosen": -176.1873779296875, |
|
"logps/rejected": -199.52859497070312, |
|
"loss": 0.7406, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8613646626472473, |
|
"rewards/margins": -0.05548467859625816, |
|
"rewards/rejected": -0.805880069732666, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9980417976233735e-05, |
|
"logits/chosen": -2.6192498207092285, |
|
"logits/rejected": -2.732285261154175, |
|
"logps/chosen": -211.3184814453125, |
|
"logps/rejected": -195.73056030273438, |
|
"loss": 0.7921, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9030998945236206, |
|
"rewards/margins": -0.1349114179611206, |
|
"rewards/rejected": -0.7681884765625, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9980003608603656e-05, |
|
"logits/chosen": -2.5657694339752197, |
|
"logits/rejected": -2.5230712890625, |
|
"logps/chosen": -203.0819091796875, |
|
"logps/rejected": -226.5617218017578, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.609666109085083, |
|
"rewards/margins": 0.08627453446388245, |
|
"rewards/rejected": -0.6959406137466431, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.997958490437178e-05, |
|
"logits/chosen": -2.641559362411499, |
|
"logits/rejected": -2.76076602935791, |
|
"logps/chosen": -169.1241455078125, |
|
"logps/rejected": -183.4690399169922, |
|
"loss": 0.7612, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7050959467887878, |
|
"rewards/margins": -0.03217136114835739, |
|
"rewards/rejected": -0.6729245781898499, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9979161863610816e-05, |
|
"logits/chosen": -2.441647529602051, |
|
"logits/rejected": -2.467510223388672, |
|
"logps/chosen": -143.13748168945312, |
|
"logps/rejected": -172.0884246826172, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5210601091384888, |
|
"rewards/margins": 0.14004816114902496, |
|
"rewards/rejected": -0.6611082553863525, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.99787344863942e-05, |
|
"logits/chosen": -2.627023458480835, |
|
"logits/rejected": -2.5984585285186768, |
|
"logps/chosen": -203.31980895996094, |
|
"logps/rejected": -218.84060668945312, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5382375717163086, |
|
"rewards/margins": 0.20881405472755432, |
|
"rewards/rejected": -0.7470515966415405, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.997830277279612e-05, |
|
"logits/chosen": -2.595003604888916, |
|
"logits/rejected": -2.722808361053467, |
|
"logps/chosen": -223.01495361328125, |
|
"logps/rejected": -240.90130615234375, |
|
"loss": 0.7333, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7480457425117493, |
|
"rewards/margins": -0.029575012624263763, |
|
"rewards/rejected": -0.7184706926345825, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.997786672289152e-05, |
|
"logits/chosen": -2.6220602989196777, |
|
"logits/rejected": -2.6606202125549316, |
|
"logps/chosen": -204.98797607421875, |
|
"logps/rejected": -206.25839233398438, |
|
"loss": 0.7368, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6979295611381531, |
|
"rewards/margins": -0.07002773880958557, |
|
"rewards/rejected": -0.6279018521308899, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.997742633675612e-05, |
|
"logits/chosen": -2.547811508178711, |
|
"logits/rejected": -2.551816463470459, |
|
"logps/chosen": -196.38748168945312, |
|
"logps/rejected": -222.4247589111328, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5409201383590698, |
|
"rewards/margins": 0.19667872786521912, |
|
"rewards/rejected": -0.7375988364219666, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9976981614466344e-05, |
|
"logits/chosen": -2.2461767196655273, |
|
"logits/rejected": -2.2968175411224365, |
|
"logps/chosen": -185.43954467773438, |
|
"logps/rejected": -196.11392211914062, |
|
"loss": 0.7743, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5852402448654175, |
|
"rewards/margins": -0.08380623161792755, |
|
"rewards/rejected": -0.5014340281486511, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.997653255609942e-05, |
|
"logits/chosen": -2.4160966873168945, |
|
"logits/rejected": -2.389375925064087, |
|
"logps/chosen": -189.16510009765625, |
|
"logps/rejected": -202.54649353027344, |
|
"loss": 0.7306, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.729293167591095, |
|
"rewards/margins": -0.006243497133255005, |
|
"rewards/rejected": -0.7230496406555176, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.997607916173329e-05, |
|
"logits/chosen": -2.6787378787994385, |
|
"logits/rejected": -2.6836445331573486, |
|
"logps/chosen": -177.60980224609375, |
|
"logps/rejected": -177.10487365722656, |
|
"loss": 0.765, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5183064341545105, |
|
"rewards/margins": -0.11038734763860703, |
|
"rewards/rejected": -0.4079190790653229, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.997562143144668e-05, |
|
"logits/chosen": -2.559563398361206, |
|
"logits/rejected": -2.5205612182617188, |
|
"logps/chosen": -208.58892822265625, |
|
"logps/rejected": -248.25648498535156, |
|
"loss": 0.6524, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5725008845329285, |
|
"rewards/margins": 0.11152593046426773, |
|
"rewards/rejected": -0.6840267777442932, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.997515936531903e-05, |
|
"logits/chosen": -2.6034438610076904, |
|
"logits/rejected": -2.528724193572998, |
|
"logps/chosen": -159.13522338867188, |
|
"logps/rejected": -135.77056884765625, |
|
"loss": 0.7413, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5185827016830444, |
|
"rewards/margins": -0.04235066473484039, |
|
"rewards/rejected": -0.47623202204704285, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9974692963430595e-05, |
|
"logits/chosen": -2.3184618949890137, |
|
"logits/rejected": -2.3163304328918457, |
|
"logps/chosen": -130.70643615722656, |
|
"logps/rejected": -142.0390625, |
|
"loss": 0.7362, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6057229042053223, |
|
"rewards/margins": -0.014669769443571568, |
|
"rewards/rejected": -0.5910531282424927, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.99742222258623e-05, |
|
"logits/chosen": -2.660839796066284, |
|
"logits/rejected": -2.6470634937286377, |
|
"logps/chosen": -221.08424377441406, |
|
"logps/rejected": -213.25003051757812, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5536531209945679, |
|
"rewards/margins": 0.022671688348054886, |
|
"rewards/rejected": -0.5763247609138489, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.997374715269589e-05, |
|
"logits/chosen": -2.2223334312438965, |
|
"logits/rejected": -2.2852203845977783, |
|
"logps/chosen": -172.71421813964844, |
|
"logps/rejected": -206.3236541748047, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5476804375648499, |
|
"rewards/margins": 0.10758630931377411, |
|
"rewards/rejected": -0.6552667617797852, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.997326774401383e-05, |
|
"logits/chosen": -2.262892961502075, |
|
"logits/rejected": -2.343332052230835, |
|
"logps/chosen": -152.0110626220703, |
|
"logps/rejected": -178.68345642089844, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.48357081413269043, |
|
"rewards/margins": 0.07582694292068481, |
|
"rewards/rejected": -0.55939781665802, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9972783999899366e-05, |
|
"logits/chosen": -2.2378625869750977, |
|
"logits/rejected": -2.2833354473114014, |
|
"logps/chosen": -154.71405029296875, |
|
"logps/rejected": -177.74502563476562, |
|
"loss": 0.709, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5034060478210449, |
|
"rewards/margins": -0.0023919548839330673, |
|
"rewards/rejected": -0.5010141134262085, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.997229592043645e-05, |
|
"logits/chosen": -2.4182140827178955, |
|
"logits/rejected": -2.477410078048706, |
|
"logps/chosen": -217.6938018798828, |
|
"logps/rejected": -236.60140991210938, |
|
"loss": 0.7474, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.42065930366516113, |
|
"rewards/margins": -0.08166539669036865, |
|
"rewards/rejected": -0.33899393677711487, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.997180350570984e-05, |
|
"logits/chosen": -2.6971006393432617, |
|
"logits/rejected": -2.709580659866333, |
|
"logps/chosen": -231.80714416503906, |
|
"logps/rejected": -234.41290283203125, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4388115406036377, |
|
"rewards/margins": 0.1023733913898468, |
|
"rewards/rejected": -0.5411849617958069, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9971306755804995e-05, |
|
"logits/chosen": -2.600799083709717, |
|
"logits/rejected": -2.5639290809631348, |
|
"logps/chosen": -211.75051879882812, |
|
"logps/rejected": -185.4350128173828, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3958936929702759, |
|
"rewards/margins": 0.08459535241127014, |
|
"rewards/rejected": -0.4804890751838684, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.997080567080817e-05, |
|
"logits/chosen": -2.2965052127838135, |
|
"logits/rejected": -2.3450510501861572, |
|
"logps/chosen": -170.9476318359375, |
|
"logps/rejected": -174.3068389892578, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5262787938117981, |
|
"rewards/margins": 0.09660908579826355, |
|
"rewards/rejected": -0.6228878498077393, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9970300250806346e-05, |
|
"logits/chosen": -2.5405097007751465, |
|
"logits/rejected": -2.5496726036071777, |
|
"logps/chosen": -266.0992736816406, |
|
"logps/rejected": -260.38275146484375, |
|
"loss": 0.7048, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5492247939109802, |
|
"rewards/margins": 0.042401984333992004, |
|
"rewards/rejected": -0.591626763343811, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.996979049588727e-05, |
|
"logits/chosen": -2.502474308013916, |
|
"logits/rejected": -2.439131736755371, |
|
"logps/chosen": -227.49476623535156, |
|
"logps/rejected": -206.38299560546875, |
|
"loss": 0.711, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5280985236167908, |
|
"rewards/margins": 0.010388553142547607, |
|
"rewards/rejected": -0.5384870767593384, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.996927640613944e-05, |
|
"logits/chosen": -2.3517448902130127, |
|
"logits/rejected": -2.5404696464538574, |
|
"logps/chosen": -162.60267639160156, |
|
"logps/rejected": -216.92562866210938, |
|
"loss": 0.6095, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2759331464767456, |
|
"rewards/margins": 0.19823111593723297, |
|
"rewards/rejected": -0.474164217710495, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.99687579816521e-05, |
|
"logits/chosen": -2.411978006362915, |
|
"logits/rejected": -2.5019097328186035, |
|
"logps/chosen": -201.91445922851562, |
|
"logps/rejected": -211.31448364257812, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4115257263183594, |
|
"rewards/margins": 0.07280793786048889, |
|
"rewards/rejected": -0.48433366417884827, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9968235222515246e-05, |
|
"logits/chosen": -2.4315428733825684, |
|
"logits/rejected": -2.4550862312316895, |
|
"logps/chosen": -162.66555786132812, |
|
"logps/rejected": -156.19947814941406, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.36304140090942383, |
|
"rewards/margins": 0.12375527620315552, |
|
"rewards/rejected": -0.48679661750793457, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.996770812881964e-05, |
|
"logits/chosen": -2.5614113807678223, |
|
"logits/rejected": -2.5391292572021484, |
|
"logps/chosen": -214.3634033203125, |
|
"logps/rejected": -209.9477996826172, |
|
"loss": 0.7018, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.319223552942276, |
|
"rewards/margins": 0.018056731671094894, |
|
"rewards/rejected": -0.3372803032398224, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9967176700656776e-05, |
|
"logits/chosen": -2.438467264175415, |
|
"logits/rejected": -2.4638776779174805, |
|
"logps/chosen": -227.1153564453125, |
|
"logps/rejected": -219.0038299560547, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.45765751600265503, |
|
"rewards/margins": 0.19506625831127167, |
|
"rewards/rejected": -0.6527237892150879, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.996664093811892e-05, |
|
"logits/chosen": -2.5968563556671143, |
|
"logits/rejected": -2.5226142406463623, |
|
"logps/chosen": -184.9003143310547, |
|
"logps/rejected": -202.8119354248047, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.36458614468574524, |
|
"rewards/margins": 0.22884023189544678, |
|
"rewards/rejected": -0.5934264063835144, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.996610084129908e-05, |
|
"logits/chosen": -2.356076955795288, |
|
"logits/rejected": -2.464617967605591, |
|
"logps/chosen": -233.56741333007812, |
|
"logps/rejected": -252.82614135742188, |
|
"loss": 0.7174, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.475138783454895, |
|
"rewards/margins": 0.015569500625133514, |
|
"rewards/rejected": -0.49070829153060913, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996555641029101e-05, |
|
"logits/chosen": -2.4807751178741455, |
|
"logits/rejected": -2.5052688121795654, |
|
"logps/chosen": -218.75250244140625, |
|
"logps/rejected": -202.75149536132812, |
|
"loss": 0.7541, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5363876819610596, |
|
"rewards/margins": -0.06761842221021652, |
|
"rewards/rejected": -0.4687691926956177, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996500764518923e-05, |
|
"logits/chosen": -2.5997631549835205, |
|
"logits/rejected": -2.6119892597198486, |
|
"logps/chosen": -212.090087890625, |
|
"logps/rejected": -220.1112823486328, |
|
"loss": 0.7185, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5663864016532898, |
|
"rewards/margins": -0.016858436167240143, |
|
"rewards/rejected": -0.5495280027389526, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9964454546089026e-05, |
|
"logits/chosen": -2.25441837310791, |
|
"logits/rejected": -2.251708507537842, |
|
"logps/chosen": -216.14996337890625, |
|
"logps/rejected": -218.1949005126953, |
|
"loss": 0.7617, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.46758705377578735, |
|
"rewards/margins": -0.09628176689147949, |
|
"rewards/rejected": -0.37130531668663025, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996389711308639e-05, |
|
"logits/chosen": -2.475829601287842, |
|
"logits/rejected": -2.4804282188415527, |
|
"logps/chosen": -197.95965576171875, |
|
"logps/rejected": -227.29605102539062, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2889898717403412, |
|
"rewards/margins": 0.18446122109889984, |
|
"rewards/rejected": -0.47345107793807983, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996333534627809e-05, |
|
"logits/chosen": -2.4503121376037598, |
|
"logits/rejected": -2.40040922164917, |
|
"logps/chosen": -190.1997833251953, |
|
"logps/rejected": -170.68679809570312, |
|
"loss": 0.7168, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4417100250720978, |
|
"rewards/margins": -0.0062090009450912476, |
|
"rewards/rejected": -0.4355010688304901, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996276924576169e-05, |
|
"logits/chosen": -2.5104312896728516, |
|
"logits/rejected": -2.5510809421539307, |
|
"logps/chosen": -134.1800079345703, |
|
"logps/rejected": -153.13365173339844, |
|
"loss": 0.7468, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.42858752608299255, |
|
"rewards/margins": -0.08554935455322266, |
|
"rewards/rejected": -0.3430381715297699, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996219881163543e-05, |
|
"logits/chosen": -2.456216335296631, |
|
"logits/rejected": -2.4285402297973633, |
|
"logps/chosen": -209.37985229492188, |
|
"logps/rejected": -195.06568908691406, |
|
"loss": 0.7159, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.46927523612976074, |
|
"rewards/margins": -0.0166518222540617, |
|
"rewards/rejected": -0.4526233971118927, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996162404399835e-05, |
|
"logits/chosen": -2.450845956802368, |
|
"logits/rejected": -2.4209864139556885, |
|
"logps/chosen": -198.9377899169922, |
|
"logps/rejected": -203.7089385986328, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4800047278404236, |
|
"rewards/margins": 0.09784451127052307, |
|
"rewards/rejected": -0.5778492093086243, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.996104494295024e-05, |
|
"logits/chosen": -2.5744130611419678, |
|
"logits/rejected": -2.641494035720825, |
|
"logps/chosen": -182.59878540039062, |
|
"logps/rejected": -207.31797790527344, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4756154417991638, |
|
"rewards/margins": 0.08592241257429123, |
|
"rewards/rejected": -0.5615378618240356, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.996046150859161e-05, |
|
"logits/chosen": -2.0956687927246094, |
|
"logits/rejected": -2.1018872261047363, |
|
"logps/chosen": -200.86215209960938, |
|
"logps/rejected": -221.92138671875, |
|
"loss": 0.6445, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.488430380821228, |
|
"rewards/margins": 0.1436658799648285, |
|
"rewards/rejected": -0.6320962905883789, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9959873741023774e-05, |
|
"logits/chosen": -2.3680927753448486, |
|
"logits/rejected": -2.415205240249634, |
|
"logps/chosen": -163.829833984375, |
|
"logps/rejected": -182.74481201171875, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.35495901107788086, |
|
"rewards/margins": 0.03828255832195282, |
|
"rewards/rejected": -0.3932415544986725, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.995928164034876e-05, |
|
"logits/chosen": -2.5983476638793945, |
|
"logits/rejected": -2.7171671390533447, |
|
"logps/chosen": -189.3108673095703, |
|
"logps/rejected": -195.94888305664062, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4241011142730713, |
|
"rewards/margins": 0.0877021998167038, |
|
"rewards/rejected": -0.5118032693862915, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.995868520666936e-05, |
|
"logits/chosen": -2.510660171508789, |
|
"logits/rejected": -2.5224432945251465, |
|
"logps/chosen": -227.51687622070312, |
|
"logps/rejected": -231.05897521972656, |
|
"loss": 0.7269, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.40695255994796753, |
|
"rewards/margins": -0.046068765223026276, |
|
"rewards/rejected": -0.36088380217552185, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9958084440089095e-05, |
|
"logits/chosen": -2.256051540374756, |
|
"logits/rejected": -2.2608485221862793, |
|
"logps/chosen": -209.19252014160156, |
|
"logps/rejected": -206.2628631591797, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3002999424934387, |
|
"rewards/margins": 0.18500341475009918, |
|
"rewards/rejected": -0.4853033423423767, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.995747934071229e-05, |
|
"logits/chosen": -2.8373260498046875, |
|
"logits/rejected": -2.8713366985321045, |
|
"logps/chosen": -186.55723571777344, |
|
"logps/rejected": -221.16917419433594, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3408931493759155, |
|
"rewards/margins": 0.18887601792812347, |
|
"rewards/rejected": -0.5297691822052002, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.995686990864398e-05, |
|
"logits/chosen": -2.328822135925293, |
|
"logits/rejected": -2.4501209259033203, |
|
"logps/chosen": -164.64404296875, |
|
"logps/rejected": -164.53396606445312, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5059252977371216, |
|
"rewards/margins": 0.046310462057590485, |
|
"rewards/rejected": -0.5522357225418091, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.995625614398996e-05, |
|
"logits/chosen": -2.254148006439209, |
|
"logits/rejected": -2.45564603805542, |
|
"logps/chosen": -215.61080932617188, |
|
"logps/rejected": -236.83526611328125, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3230094313621521, |
|
"rewards/margins": 0.2154739797115326, |
|
"rewards/rejected": -0.5384833812713623, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.995563804685678e-05, |
|
"logits/chosen": -2.4298973083496094, |
|
"logits/rejected": -2.6152379512786865, |
|
"logps/chosen": -171.45925903320312, |
|
"logps/rejected": -215.27342224121094, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5956869721412659, |
|
"rewards/margins": 0.1696903109550476, |
|
"rewards/rejected": -0.7653772830963135, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.995501561735176e-05, |
|
"logits/chosen": -2.5064899921417236, |
|
"logits/rejected": -2.667478561401367, |
|
"logps/chosen": -201.28167724609375, |
|
"logps/rejected": -200.6697998046875, |
|
"loss": 0.7252, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.50804603099823, |
|
"rewards/margins": -0.021207518875598907, |
|
"rewards/rejected": -0.48683851957321167, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.995438885558294e-05, |
|
"logits/chosen": -2.575934886932373, |
|
"logits/rejected": -2.5915310382843018, |
|
"logps/chosen": -241.2596893310547, |
|
"logps/rejected": -236.16864013671875, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.41618824005126953, |
|
"rewards/margins": 0.21841469407081604, |
|
"rewards/rejected": -0.6346028447151184, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.995375776165913e-05, |
|
"logits/chosen": -2.6145882606506348, |
|
"logits/rejected": -2.6391665935516357, |
|
"logps/chosen": -230.53524780273438, |
|
"logps/rejected": -228.5028076171875, |
|
"loss": 0.7431, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5892909169197083, |
|
"rewards/margins": -0.044729601591825485, |
|
"rewards/rejected": -0.5445613265037537, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.995312233568989e-05, |
|
"logits/chosen": -2.279015302658081, |
|
"logits/rejected": -2.4627904891967773, |
|
"logps/chosen": -205.08226013183594, |
|
"logps/rejected": -245.180908203125, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.37771591544151306, |
|
"rewards/margins": 0.17377406358718872, |
|
"rewards/rejected": -0.5514900088310242, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9952482577785545e-05, |
|
"logits/chosen": -2.4535770416259766, |
|
"logits/rejected": -2.5108795166015625, |
|
"logps/chosen": -186.94288635253906, |
|
"logps/rejected": -191.04786682128906, |
|
"loss": 0.7335, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6674529314041138, |
|
"rewards/margins": -0.03079444169998169, |
|
"rewards/rejected": -0.6366585493087769, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9951838488057134e-05, |
|
"logits/chosen": -2.2177467346191406, |
|
"logits/rejected": -2.3985226154327393, |
|
"logps/chosen": -151.24977111816406, |
|
"logps/rejected": -185.8676300048828, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3900182843208313, |
|
"rewards/margins": 0.13504652678966522, |
|
"rewards/rejected": -0.5250648260116577, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9951190066616495e-05, |
|
"logits/chosen": -2.42427396774292, |
|
"logits/rejected": -2.4583323001861572, |
|
"logps/chosen": -255.53150939941406, |
|
"logps/rejected": -261.932373046875, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3976048231124878, |
|
"rewards/margins": 0.12972235679626465, |
|
"rewards/rejected": -0.5273271799087524, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.995053731357618e-05, |
|
"logits/chosen": -2.462376832962036, |
|
"logits/rejected": -2.568171262741089, |
|
"logps/chosen": -197.1337432861328, |
|
"logps/rejected": -202.1964569091797, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6016325354576111, |
|
"rewards/margins": 0.15742458403110504, |
|
"rewards/rejected": -0.7590572237968445, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9949880229049526e-05, |
|
"logits/chosen": -2.4696521759033203, |
|
"logits/rejected": -2.4089298248291016, |
|
"logps/chosen": -187.97976684570312, |
|
"logps/rejected": -178.5655059814453, |
|
"loss": 0.7341, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.43453776836395264, |
|
"rewards/margins": -0.025840837508440018, |
|
"rewards/rejected": -0.4086969494819641, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.994921881315059e-05, |
|
"logits/chosen": -2.292558431625366, |
|
"logits/rejected": -2.199901580810547, |
|
"logps/chosen": -192.6279754638672, |
|
"logps/rejected": -203.34487915039062, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.48495134711265564, |
|
"rewards/margins": 0.07142490148544312, |
|
"rewards/rejected": -0.5563762784004211, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9948553065994197e-05, |
|
"logits/chosen": -2.231633424758911, |
|
"logits/rejected": -2.2734503746032715, |
|
"logps/chosen": -184.4136962890625, |
|
"logps/rejected": -194.4268341064453, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.44051364064216614, |
|
"rewards/margins": 0.19739174842834473, |
|
"rewards/rejected": -0.6379053592681885, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.994788298769593e-05, |
|
"logits/chosen": -2.5150344371795654, |
|
"logits/rejected": -2.4598844051361084, |
|
"logps/chosen": -238.438232421875, |
|
"logps/rejected": -217.13897705078125, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.43908870220184326, |
|
"rewards/margins": 0.03722059726715088, |
|
"rewards/rejected": -0.47630926966667175, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.994720857837211e-05, |
|
"logits/chosen": -2.2310400009155273, |
|
"logits/rejected": -2.1882810592651367, |
|
"logps/chosen": -184.02981567382812, |
|
"logps/rejected": -208.0157470703125, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5550325512886047, |
|
"rewards/margins": 0.08377734571695328, |
|
"rewards/rejected": -0.638809859752655, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.994652983813982e-05, |
|
"logits/chosen": -2.4377846717834473, |
|
"logits/rejected": -2.5074000358581543, |
|
"logps/chosen": -187.0503387451172, |
|
"logps/rejected": -195.54049682617188, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5530596971511841, |
|
"rewards/margins": 0.2584923803806305, |
|
"rewards/rejected": -0.811552107334137, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.994584676711689e-05, |
|
"logits/chosen": -2.464853286743164, |
|
"logits/rejected": -2.504178047180176, |
|
"logps/chosen": -186.5401611328125, |
|
"logps/rejected": -227.3590087890625, |
|
"loss": 0.7102, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5265690088272095, |
|
"rewards/margins": 0.009033482521772385, |
|
"rewards/rejected": -0.5356025695800781, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.994515936542191e-05, |
|
"logits/chosen": -2.2838244438171387, |
|
"logits/rejected": -2.3256828784942627, |
|
"logps/chosen": -173.54270935058594, |
|
"logps/rejected": -166.37547302246094, |
|
"loss": 0.7262, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4617432951927185, |
|
"rewards/margins": -0.03625304624438286, |
|
"rewards/rejected": -0.42549026012420654, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.99444676331742e-05, |
|
"logits/chosen": -2.407790422439575, |
|
"logits/rejected": -2.3871612548828125, |
|
"logps/chosen": -248.3179931640625, |
|
"logps/rejected": -235.79884338378906, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.458143413066864, |
|
"rewards/margins": 0.10666979104280472, |
|
"rewards/rejected": -0.5648132562637329, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9943771570493856e-05, |
|
"logits/chosen": -2.560073137283325, |
|
"logits/rejected": -2.51522159576416, |
|
"logps/chosen": -191.05734252929688, |
|
"logps/rejected": -195.287841796875, |
|
"loss": 0.7288, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6246404051780701, |
|
"rewards/margins": -0.01798657327890396, |
|
"rewards/rejected": -0.6066538095474243, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9943071177501724e-05, |
|
"logits/chosen": -2.364337682723999, |
|
"logits/rejected": -2.39058780670166, |
|
"logps/chosen": -200.97915649414062, |
|
"logps/rejected": -183.07151794433594, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5552209615707397, |
|
"rewards/margins": 0.09874990582466125, |
|
"rewards/rejected": -0.6539708375930786, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.994236645431938e-05, |
|
"logits/chosen": -2.6555843353271484, |
|
"logits/rejected": -2.7670748233795166, |
|
"logps/chosen": -244.5951690673828, |
|
"logps/rejected": -246.04124450683594, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.45607852935791016, |
|
"rewards/margins": 0.08103760331869125, |
|
"rewards/rejected": -0.5371161103248596, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.994165740106918e-05, |
|
"logits/chosen": -2.527970314025879, |
|
"logits/rejected": -2.6416518688201904, |
|
"logps/chosen": -231.80966186523438, |
|
"logps/rejected": -246.3621826171875, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5444769263267517, |
|
"rewards/margins": 0.21878674626350403, |
|
"rewards/rejected": -0.7632635831832886, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.99409440178742e-05, |
|
"logits/chosen": -2.584487199783325, |
|
"logits/rejected": -2.5451977252960205, |
|
"logps/chosen": -184.02330017089844, |
|
"logps/rejected": -187.47161865234375, |
|
"loss": 0.7151, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6932749152183533, |
|
"rewards/margins": -0.005857221782207489, |
|
"rewards/rejected": -0.6874176263809204, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9940226304858296e-05, |
|
"logits/chosen": -2.556795597076416, |
|
"logits/rejected": -2.5516929626464844, |
|
"logps/chosen": -187.25588989257812, |
|
"logps/rejected": -206.25067138671875, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.563089907169342, |
|
"rewards/margins": 0.009414192289113998, |
|
"rewards/rejected": -0.5725040435791016, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.993950426214606e-05, |
|
"logits/chosen": -2.3932271003723145, |
|
"logits/rejected": -2.393653154373169, |
|
"logps/chosen": -218.82334899902344, |
|
"logps/rejected": -226.3861541748047, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6887062788009644, |
|
"rewards/margins": 0.031964417546987534, |
|
"rewards/rejected": -0.720670759677887, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.993877788986285e-05, |
|
"logits/chosen": -2.3766613006591797, |
|
"logits/rejected": -2.507246971130371, |
|
"logps/chosen": -165.4411163330078, |
|
"logps/rejected": -199.56741333007812, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.49406641721725464, |
|
"rewards/margins": 0.3596181869506836, |
|
"rewards/rejected": -0.853684663772583, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9938047188134776e-05, |
|
"logits/chosen": -2.4775924682617188, |
|
"logits/rejected": -2.4857354164123535, |
|
"logps/chosen": -195.7847442626953, |
|
"logps/rejected": -216.16220092773438, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5620791912078857, |
|
"rewards/margins": 0.07040975242853165, |
|
"rewards/rejected": -0.632489025592804, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.993731215708866e-05, |
|
"logits/chosen": -2.637573719024658, |
|
"logits/rejected": -2.700226306915283, |
|
"logps/chosen": -200.697509765625, |
|
"logps/rejected": -214.4364471435547, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5127323269844055, |
|
"rewards/margins": 0.24725483357906342, |
|
"rewards/rejected": -0.7599871754646301, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.993657279685212e-05, |
|
"logits/chosen": -2.4737141132354736, |
|
"logits/rejected": -2.576711654663086, |
|
"logps/chosen": -198.31393432617188, |
|
"logps/rejected": -219.9713592529297, |
|
"loss": 0.7162, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7182321548461914, |
|
"rewards/margins": 0.04057084769010544, |
|
"rewards/rejected": -0.7588030099868774, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9935829107553516e-05, |
|
"logits/chosen": -2.3163719177246094, |
|
"logits/rejected": -2.3124592304229736, |
|
"logps/chosen": -198.31076049804688, |
|
"logps/rejected": -210.8530731201172, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5568541288375854, |
|
"rewards/margins": 0.1273648738861084, |
|
"rewards/rejected": -0.6842190623283386, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.993508108932195e-05, |
|
"logits/chosen": -2.4512782096862793, |
|
"logits/rejected": -2.508169651031494, |
|
"logps/chosen": -202.17613220214844, |
|
"logps/rejected": -203.27789306640625, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7900542616844177, |
|
"rewards/margins": 0.12749117612838745, |
|
"rewards/rejected": -0.91754549741745, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9934328742287285e-05, |
|
"logits/chosen": -2.391414165496826, |
|
"logits/rejected": -2.3899242877960205, |
|
"logps/chosen": -231.99913024902344, |
|
"logps/rejected": -232.77243041992188, |
|
"loss": 0.7863, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6857910752296448, |
|
"rewards/margins": -0.05944066494703293, |
|
"rewards/rejected": -0.626350462436676, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.993357206658011e-05, |
|
"logits/chosen": -2.5768861770629883, |
|
"logits/rejected": -2.6738839149475098, |
|
"logps/chosen": -175.33010864257812, |
|
"logps/rejected": -185.26443481445312, |
|
"loss": 0.7157, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6001064777374268, |
|
"rewards/margins": 0.052334412932395935, |
|
"rewards/rejected": -0.6524409651756287, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.993281106233182e-05, |
|
"logits/chosen": -2.3842387199401855, |
|
"logits/rejected": -2.425611972808838, |
|
"logps/chosen": -165.09873962402344, |
|
"logps/rejected": -185.34361267089844, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4550285339355469, |
|
"rewards/margins": 0.2833191156387329, |
|
"rewards/rejected": -0.738347589969635, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9932045729674505e-05, |
|
"logits/chosen": -2.370333671569824, |
|
"logits/rejected": -2.343536376953125, |
|
"logps/chosen": -211.58517456054688, |
|
"logps/rejected": -191.9432373046875, |
|
"loss": 0.7727, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7500544786453247, |
|
"rewards/margins": -0.05222831293940544, |
|
"rewards/rejected": -0.697826087474823, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.993127606874104e-05, |
|
"logits/chosen": -2.3698928356170654, |
|
"logits/rejected": -2.459376573562622, |
|
"logps/chosen": -241.92343139648438, |
|
"logps/rejected": -265.6181945800781, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6877275705337524, |
|
"rewards/margins": 0.16285517811775208, |
|
"rewards/rejected": -0.8505828380584717, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9930502079665025e-05, |
|
"logits/chosen": -2.3997507095336914, |
|
"logits/rejected": -2.4078052043914795, |
|
"logps/chosen": -228.6896514892578, |
|
"logps/rejected": -221.53338623046875, |
|
"loss": 0.7984, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6329948306083679, |
|
"rewards/margins": -0.1108212098479271, |
|
"rewards/rejected": -0.5221735835075378, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9929723762580835e-05, |
|
"logits/chosen": -2.498244285583496, |
|
"logits/rejected": -2.571398973464966, |
|
"logps/chosen": -192.15635681152344, |
|
"logps/rejected": -176.19012451171875, |
|
"loss": 0.7497, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5479970574378967, |
|
"rewards/margins": -0.0442587286233902, |
|
"rewards/rejected": -0.5037383437156677, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9928941117623604e-05, |
|
"logits/chosen": -2.6329376697540283, |
|
"logits/rejected": -2.635847806930542, |
|
"logps/chosen": -207.86293029785156, |
|
"logps/rejected": -224.18490600585938, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5313466787338257, |
|
"rewards/margins": 0.07021217793226242, |
|
"rewards/rejected": -0.6015588045120239, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9928154144929175e-05, |
|
"logits/chosen": -2.5966103076934814, |
|
"logits/rejected": -2.571523666381836, |
|
"logps/chosen": -192.47384643554688, |
|
"logps/rejected": -180.02134704589844, |
|
"loss": 0.8799, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8716793656349182, |
|
"rewards/margins": -0.2290811538696289, |
|
"rewards/rejected": -0.6425981521606445, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9927362844634186e-05, |
|
"logits/chosen": -2.6513888835906982, |
|
"logits/rejected": -2.606253147125244, |
|
"logps/chosen": -208.56338500976562, |
|
"logps/rejected": -200.686279296875, |
|
"loss": 0.7609, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5643545389175415, |
|
"rewards/margins": -0.08666092902421951, |
|
"rewards/rejected": -0.4776935577392578, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9926567216876e-05, |
|
"logits/chosen": -2.5372819900512695, |
|
"logits/rejected": -2.5824756622314453, |
|
"logps/chosen": -187.18386840820312, |
|
"logps/rejected": -211.16427612304688, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5520645380020142, |
|
"rewards/margins": 0.10136236250400543, |
|
"rewards/rejected": -0.6534268856048584, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.992576726179274e-05, |
|
"logits/chosen": -2.5787792205810547, |
|
"logits/rejected": -2.5688636302948, |
|
"logps/chosen": -199.59490966796875, |
|
"logps/rejected": -221.02023315429688, |
|
"loss": 0.7033, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5855346918106079, |
|
"rewards/margins": 0.010282933712005615, |
|
"rewards/rejected": -0.5958175659179688, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9924962979523296e-05, |
|
"logits/chosen": -2.588792085647583, |
|
"logits/rejected": -2.5663416385650635, |
|
"logps/chosen": -197.04592895507812, |
|
"logps/rejected": -184.49290466308594, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.46529656648635864, |
|
"rewards/margins": 0.1208263412117958, |
|
"rewards/rejected": -0.5861228704452515, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.992415437020727e-05, |
|
"logits/chosen": -2.6553092002868652, |
|
"logits/rejected": -2.763751268386841, |
|
"logps/chosen": -217.5886993408203, |
|
"logps/rejected": -233.33489990234375, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4947764575481415, |
|
"rewards/margins": 0.2827147841453552, |
|
"rewards/rejected": -0.7774912118911743, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.992334143398506e-05, |
|
"logits/chosen": -2.5335421562194824, |
|
"logits/rejected": -2.600069999694824, |
|
"logps/chosen": -204.98912048339844, |
|
"logps/rejected": -212.0712890625, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5413306355476379, |
|
"rewards/margins": 0.22219431400299072, |
|
"rewards/rejected": -0.7635249495506287, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.992252417099778e-05, |
|
"logits/chosen": -2.638511896133423, |
|
"logits/rejected": -2.600456476211548, |
|
"logps/chosen": -172.59390258789062, |
|
"logps/rejected": -192.1689910888672, |
|
"loss": 0.7194, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5202851891517639, |
|
"rewards/margins": 0.017805464565753937, |
|
"rewards/rejected": -0.5380906462669373, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.992170258138732e-05, |
|
"logits/chosen": -2.5361521244049072, |
|
"logits/rejected": -2.4191856384277344, |
|
"logps/chosen": -177.23558044433594, |
|
"logps/rejected": -169.40321350097656, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20310896635055542, |
|
"rewards/margins": 0.2397383451461792, |
|
"rewards/rejected": -0.4428473711013794, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.99208766652963e-05, |
|
"logits/chosen": -2.727092742919922, |
|
"logits/rejected": -2.613562822341919, |
|
"logps/chosen": -228.9248504638672, |
|
"logps/rejected": -222.22886657714844, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5637404322624207, |
|
"rewards/margins": 0.21417354047298431, |
|
"rewards/rejected": -0.7779139280319214, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.99200464228681e-05, |
|
"logits/chosen": -2.530290126800537, |
|
"logits/rejected": -2.49137282371521, |
|
"logps/chosen": -218.17156982421875, |
|
"logps/rejected": -225.41952514648438, |
|
"loss": 0.7465, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.567755937576294, |
|
"rewards/margins": -0.07816261053085327, |
|
"rewards/rejected": -0.4895933270454407, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9919211854246874e-05, |
|
"logits/chosen": -2.683086633682251, |
|
"logits/rejected": -2.588696002960205, |
|
"logps/chosen": -268.5683898925781, |
|
"logps/rejected": -255.58445739746094, |
|
"loss": 0.7069, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5820844769477844, |
|
"rewards/margins": 0.0005566291511058807, |
|
"rewards/rejected": -0.582641065120697, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9918372959577486e-05, |
|
"logits/chosen": -2.7265396118164062, |
|
"logits/rejected": -2.7635843753814697, |
|
"logps/chosen": -206.09573364257812, |
|
"logps/rejected": -214.75784301757812, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6429769396781921, |
|
"rewards/margins": 0.1274566352367401, |
|
"rewards/rejected": -0.7704335451126099, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.9917529739005574e-05, |
|
"logits/chosen": -2.507643938064575, |
|
"logits/rejected": -2.4392926692962646, |
|
"logps/chosen": -219.877197265625, |
|
"logps/rejected": -217.41537475585938, |
|
"loss": 0.729, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5904738306999207, |
|
"rewards/margins": -0.04813346266746521, |
|
"rewards/rejected": -0.5423403382301331, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.991668219267752e-05, |
|
"logits/chosen": -2.613016128540039, |
|
"logits/rejected": -2.7239303588867188, |
|
"logps/chosen": -235.21534729003906, |
|
"logps/rejected": -266.6222839355469, |
|
"loss": 0.7173, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6593362092971802, |
|
"rewards/margins": 0.04644089192152023, |
|
"rewards/rejected": -0.7057771682739258, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.991583032074047e-05, |
|
"logits/chosen": -2.774909257888794, |
|
"logits/rejected": -2.7659873962402344, |
|
"logps/chosen": -250.04261779785156, |
|
"logps/rejected": -242.6505584716797, |
|
"loss": 0.7062, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6024019718170166, |
|
"rewards/margins": 0.010418819263577461, |
|
"rewards/rejected": -0.6128207445144653, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.99149741233423e-05, |
|
"logits/chosen": -2.447873592376709, |
|
"logits/rejected": -2.468458414077759, |
|
"logps/chosen": -214.55784606933594, |
|
"logps/rejected": -189.22238159179688, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6992676854133606, |
|
"rewards/margins": 0.09424494951963425, |
|
"rewards/rejected": -0.7935126423835754, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.9914113600631665e-05, |
|
"logits/chosen": -2.5641584396362305, |
|
"logits/rejected": -2.602008104324341, |
|
"logps/chosen": -310.73077392578125, |
|
"logps/rejected": -290.17352294921875, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5222407579421997, |
|
"rewards/margins": 0.06505537033081055, |
|
"rewards/rejected": -0.587296187877655, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.991324875275794e-05, |
|
"logits/chosen": -2.5343639850616455, |
|
"logits/rejected": -2.610719919204712, |
|
"logps/chosen": -199.84573364257812, |
|
"logps/rejected": -203.96478271484375, |
|
"loss": 0.718, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5667111873626709, |
|
"rewards/margins": -0.001658361405134201, |
|
"rewards/rejected": -0.5650528073310852, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.991237957987127e-05, |
|
"logits/chosen": -2.379605293273926, |
|
"logits/rejected": -2.3678669929504395, |
|
"logps/chosen": -211.86761474609375, |
|
"logps/rejected": -213.41464233398438, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.46346691250801086, |
|
"rewards/margins": 0.20269399881362915, |
|
"rewards/rejected": -0.6661609411239624, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.991150608212254e-05, |
|
"logits/chosen": -2.6828835010528564, |
|
"logits/rejected": -2.7126364707946777, |
|
"logps/chosen": -196.61216735839844, |
|
"logps/rejected": -223.1612548828125, |
|
"loss": 0.7503, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5514590740203857, |
|
"rewards/margins": -0.08642277121543884, |
|
"rewards/rejected": -0.4650362730026245, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.9910628259663404e-05, |
|
"logits/chosen": -2.4960732460021973, |
|
"logits/rejected": -2.608139991760254, |
|
"logps/chosen": -197.00277709960938, |
|
"logps/rejected": -196.33804321289062, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.35482868552207947, |
|
"rewards/margins": 0.38187745213508606, |
|
"rewards/rejected": -0.7367061376571655, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.990974611264625e-05, |
|
"logits/chosen": -2.3627309799194336, |
|
"logits/rejected": -2.3418447971343994, |
|
"logps/chosen": -232.52740478515625, |
|
"logps/rejected": -227.15753173828125, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6158957481384277, |
|
"rewards/margins": 0.18052013218402863, |
|
"rewards/rejected": -0.7964158654212952, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.990885964122421e-05, |
|
"logits/chosen": -2.594547748565674, |
|
"logits/rejected": -2.6074790954589844, |
|
"logps/chosen": -170.09300231933594, |
|
"logps/rejected": -190.0100860595703, |
|
"loss": 0.7467, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5137202739715576, |
|
"rewards/margins": -0.07406175136566162, |
|
"rewards/rejected": -0.439658522605896, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.990796884555119e-05, |
|
"logits/chosen": -2.632927417755127, |
|
"logits/rejected": -2.6694023609161377, |
|
"logps/chosen": -215.45489501953125, |
|
"logps/rejected": -203.8738555908203, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4411565661430359, |
|
"rewards/margins": 0.1269129067659378, |
|
"rewards/rejected": -0.5680694580078125, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.9907073725781836e-05, |
|
"logits/chosen": -2.4353699684143066, |
|
"logits/rejected": -2.537163734436035, |
|
"logps/chosen": -279.5519714355469, |
|
"logps/rejected": -290.08135986328125, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.32637274265289307, |
|
"rewards/margins": 0.053382109850645065, |
|
"rewards/rejected": -0.379754900932312, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.9906174282071535e-05, |
|
"logits/chosen": -2.586729049682617, |
|
"logits/rejected": -2.583314895629883, |
|
"logps/chosen": -197.04298400878906, |
|
"logps/rejected": -195.334228515625, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.515608549118042, |
|
"rewards/margins": 0.06315431743860245, |
|
"rewards/rejected": -0.5787628293037415, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.990527051457644e-05, |
|
"logits/chosen": -2.45725154876709, |
|
"logits/rejected": -2.445600748062134, |
|
"logps/chosen": -204.52867126464844, |
|
"logps/rejected": -290.3397216796875, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6991719007492065, |
|
"rewards/margins": 0.08316923677921295, |
|
"rewards/rejected": -0.7823411226272583, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9904362423453446e-05, |
|
"logits/chosen": -2.604722738265991, |
|
"logits/rejected": -2.6120543479919434, |
|
"logps/chosen": -193.1224822998047, |
|
"logps/rejected": -189.89694213867188, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3250601589679718, |
|
"rewards/margins": 0.19382469356060028, |
|
"rewards/rejected": -0.5188848376274109, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.990345000886019e-05, |
|
"logits/chosen": -2.6838831901550293, |
|
"logits/rejected": -2.6718058586120605, |
|
"logps/chosen": -177.02090454101562, |
|
"logps/rejected": -173.667724609375, |
|
"loss": 0.6993, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5827906131744385, |
|
"rewards/margins": 0.027324199676513672, |
|
"rewards/rejected": -0.6101148128509521, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.990253327095509e-05, |
|
"logits/chosen": -2.60197377204895, |
|
"logits/rejected": -2.522291660308838, |
|
"logps/chosen": -173.03811645507812, |
|
"logps/rejected": -149.1508026123047, |
|
"loss": 0.7939, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.4563007950782776, |
|
"rewards/margins": -0.13545696437358856, |
|
"rewards/rejected": -0.32084381580352783, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9901612209897275e-05, |
|
"logits/chosen": -2.643686532974243, |
|
"logits/rejected": -2.6443119049072266, |
|
"logps/chosen": -187.93931579589844, |
|
"logps/rejected": -208.4308624267578, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6419711709022522, |
|
"rewards/margins": 0.10658866912126541, |
|
"rewards/rejected": -0.748559832572937, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.990068682584666e-05, |
|
"logits/chosen": -2.469681739807129, |
|
"logits/rejected": -2.4439382553100586, |
|
"logps/chosen": -186.7505340576172, |
|
"logps/rejected": -204.6378936767578, |
|
"loss": 0.7857, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5965012311935425, |
|
"rewards/margins": -0.12374071031808853, |
|
"rewards/rejected": -0.47276046872138977, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.989975711896388e-05, |
|
"logits/chosen": -2.3346047401428223, |
|
"logits/rejected": -2.323143720626831, |
|
"logps/chosen": -223.70867919921875, |
|
"logps/rejected": -238.58311462402344, |
|
"loss": 0.7061, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5982979536056519, |
|
"rewards/margins": -0.005420597270131111, |
|
"rewards/rejected": -0.5928773283958435, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.989882308941034e-05, |
|
"logits/chosen": -2.759549379348755, |
|
"logits/rejected": -2.7803878784179688, |
|
"logps/chosen": -239.48947143554688, |
|
"logps/rejected": -181.70718383789062, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.46423962712287903, |
|
"rewards/margins": 0.11677178740501404, |
|
"rewards/rejected": -0.5810113549232483, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9897884737348196e-05, |
|
"logits/chosen": -2.485264778137207, |
|
"logits/rejected": -2.5983753204345703, |
|
"logps/chosen": -175.43971252441406, |
|
"logps/rejected": -219.08517456054688, |
|
"loss": 0.7124, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5487624406814575, |
|
"rewards/margins": 0.0002348199486732483, |
|
"rewards/rejected": -0.5489972829818726, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.989694206294035e-05, |
|
"logits/chosen": -2.6732518672943115, |
|
"logits/rejected": -2.6291115283966064, |
|
"logps/chosen": -193.59197998046875, |
|
"logps/rejected": -207.9779052734375, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4739716649055481, |
|
"rewards/margins": 0.12173515558242798, |
|
"rewards/rejected": -0.5957068204879761, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.989599506635044e-05, |
|
"logits/chosen": -2.4193854331970215, |
|
"logits/rejected": -2.565293073654175, |
|
"logps/chosen": -263.6156311035156, |
|
"logps/rejected": -251.44192504882812, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.47510460019111633, |
|
"rewards/margins": 0.25979098677635193, |
|
"rewards/rejected": -0.734895646572113, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.989504374774288e-05, |
|
"logits/chosen": -2.3681142330169678, |
|
"logits/rejected": -2.4691953659057617, |
|
"logps/chosen": -163.5777587890625, |
|
"logps/rejected": -194.8612060546875, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4559204876422882, |
|
"rewards/margins": 0.19214320182800293, |
|
"rewards/rejected": -0.648063600063324, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.989408810728281e-05, |
|
"logits/chosen": -2.477172613143921, |
|
"logits/rejected": -2.5207676887512207, |
|
"logps/chosen": -141.12867736816406, |
|
"logps/rejected": -146.79791259765625, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.22462037205696106, |
|
"rewards/margins": 0.09705987572669983, |
|
"rewards/rejected": -0.3216802775859833, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.989312814513614e-05, |
|
"logits/chosen": -2.534242630004883, |
|
"logits/rejected": -2.47320294380188, |
|
"logps/chosen": -223.2980499267578, |
|
"logps/rejected": -224.9616241455078, |
|
"loss": 0.7372, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5472955107688904, |
|
"rewards/margins": -0.037168219685554504, |
|
"rewards/rejected": -0.5101273059844971, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.989216386146953e-05, |
|
"logits/chosen": -2.3438358306884766, |
|
"logits/rejected": -2.623857021331787, |
|
"logps/chosen": -190.59585571289062, |
|
"logps/rejected": -234.1730194091797, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3276049494743347, |
|
"rewards/margins": 0.06552110612392426, |
|
"rewards/rejected": -0.39312607049942017, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.9891195256450366e-05, |
|
"logits/chosen": -2.210477828979492, |
|
"logits/rejected": -2.195955991744995, |
|
"logps/chosen": -185.43902587890625, |
|
"logps/rejected": -224.94488525390625, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.38528165221214294, |
|
"rewards/margins": 0.09291733056306839, |
|
"rewards/rejected": -0.47819897532463074, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.989022233024681e-05, |
|
"logits/chosen": -2.7818360328674316, |
|
"logits/rejected": -2.764118194580078, |
|
"logps/chosen": -224.84347534179688, |
|
"logps/rejected": -205.856201171875, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3358863592147827, |
|
"rewards/margins": 0.23018960654735565, |
|
"rewards/rejected": -0.5660759806632996, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9889245083027755e-05, |
|
"logits/chosen": -2.3800809383392334, |
|
"logits/rejected": -2.3793954849243164, |
|
"logps/chosen": -168.0996551513672, |
|
"logps/rejected": -197.4810791015625, |
|
"loss": 0.7001, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.36885035037994385, |
|
"rewards/margins": 0.015535619109869003, |
|
"rewards/rejected": -0.3843860328197479, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.988826351496287e-05, |
|
"logits/chosen": -2.5323662757873535, |
|
"logits/rejected": -2.5999341011047363, |
|
"logps/chosen": -215.6177978515625, |
|
"logps/rejected": -255.65231323242188, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6092446446418762, |
|
"rewards/margins": 0.09709976613521576, |
|
"rewards/rejected": -0.7063443660736084, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.988727762622255e-05, |
|
"logits/chosen": -2.7337067127227783, |
|
"logits/rejected": -2.742793083190918, |
|
"logps/chosen": -214.25506591796875, |
|
"logps/rejected": -221.33468627929688, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6302060484886169, |
|
"rewards/margins": 0.12371520698070526, |
|
"rewards/rejected": -0.7539212107658386, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9886287416977936e-05, |
|
"logits/chosen": -2.510671377182007, |
|
"logits/rejected": -2.5760793685913086, |
|
"logps/chosen": -169.99656677246094, |
|
"logps/rejected": -181.1496124267578, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4545098841190338, |
|
"rewards/margins": 0.014530147425830364, |
|
"rewards/rejected": -0.46904003620147705, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.988529288740096e-05, |
|
"logits/chosen": -2.6438777446746826, |
|
"logits/rejected": -2.6174240112304688, |
|
"logps/chosen": -187.69232177734375, |
|
"logps/rejected": -198.16409301757812, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.37409377098083496, |
|
"rewards/margins": 0.1910334676504135, |
|
"rewards/rejected": -0.5651272535324097, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9884294037664245e-05, |
|
"logits/chosen": -2.456332206726074, |
|
"logits/rejected": -2.4327914714813232, |
|
"logps/chosen": -221.3645782470703, |
|
"logps/rejected": -220.4224853515625, |
|
"loss": 0.726, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9011685848236084, |
|
"rewards/margins": 0.07650195062160492, |
|
"rewards/rejected": -0.9776705503463745, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.988329086794122e-05, |
|
"logits/chosen": -2.463491439819336, |
|
"logits/rejected": -2.49444842338562, |
|
"logps/chosen": -197.96144104003906, |
|
"logps/rejected": -214.404296875, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.42488348484039307, |
|
"rewards/margins": 0.11507010459899902, |
|
"rewards/rejected": -0.5399536490440369, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.9882283378406015e-05, |
|
"logits/chosen": -2.5106682777404785, |
|
"logits/rejected": -2.5157594680786133, |
|
"logps/chosen": -167.09947204589844, |
|
"logps/rejected": -174.6180419921875, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4473687410354614, |
|
"rewards/margins": 0.22422267496585846, |
|
"rewards/rejected": -0.6715914011001587, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.988127156923355e-05, |
|
"logits/chosen": -2.4497499465942383, |
|
"logits/rejected": -2.491909980773926, |
|
"logps/chosen": -167.1006622314453, |
|
"logps/rejected": -165.177978515625, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5522390604019165, |
|
"rewards/margins": 0.06803999096155167, |
|
"rewards/rejected": -0.6202789545059204, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.9880255440599476e-05, |
|
"logits/chosen": -2.445422649383545, |
|
"logits/rejected": -2.4025139808654785, |
|
"logps/chosen": -199.83444213867188, |
|
"logps/rejected": -203.16355895996094, |
|
"loss": 0.7404, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.658203125, |
|
"rewards/margins": -0.06064491346478462, |
|
"rewards/rejected": -0.5975580811500549, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.987923499268018e-05, |
|
"logits/chosen": -2.540513753890991, |
|
"logits/rejected": -2.6426820755004883, |
|
"logps/chosen": -181.93406677246094, |
|
"logps/rejected": -203.6474609375, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.45222824811935425, |
|
"rewards/margins": 0.17588049173355103, |
|
"rewards/rejected": -0.6281087398529053, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.987821022565284e-05, |
|
"logits/chosen": -2.608978509902954, |
|
"logits/rejected": -2.632282257080078, |
|
"logps/chosen": -195.6852569580078, |
|
"logps/rejected": -216.0919952392578, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4888995289802551, |
|
"rewards/margins": 0.3496631681919098, |
|
"rewards/rejected": -0.8385627269744873, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.987718113969534e-05, |
|
"logits/chosen": -2.637908458709717, |
|
"logits/rejected": -2.6849193572998047, |
|
"logps/chosen": -307.9834289550781, |
|
"logps/rejected": -321.95867919921875, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4099658131599426, |
|
"rewards/margins": 0.3652358055114746, |
|
"rewards/rejected": -0.7752016186714172, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.9876147734986335e-05, |
|
"logits/chosen": -2.6805038452148438, |
|
"logits/rejected": -2.659087657928467, |
|
"logps/chosen": -235.20855712890625, |
|
"logps/rejected": -222.38107299804688, |
|
"loss": 0.7633, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7779167294502258, |
|
"rewards/margins": -0.11177889257669449, |
|
"rewards/rejected": -0.6661379337310791, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.987511001170523e-05, |
|
"logits/chosen": -2.4979588985443115, |
|
"logits/rejected": -2.509103775024414, |
|
"logps/chosen": -205.0665283203125, |
|
"logps/rejected": -197.5661163330078, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.603629469871521, |
|
"rewards/margins": 0.3201472759246826, |
|
"rewards/rejected": -0.9237766861915588, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.987406797003218e-05, |
|
"logits/chosen": -2.6034300327301025, |
|
"logits/rejected": -2.606010913848877, |
|
"logps/chosen": -212.59959411621094, |
|
"logps/rejected": -266.6811218261719, |
|
"loss": 0.7099, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7883586287498474, |
|
"rewards/margins": 0.047954261302948, |
|
"rewards/rejected": -0.8363128900527954, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.987302161014808e-05, |
|
"logits/chosen": -2.26737380027771, |
|
"logits/rejected": -2.2702267169952393, |
|
"logps/chosen": -149.69259643554688, |
|
"logps/rejected": -207.59146118164062, |
|
"loss": 0.718, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6731799244880676, |
|
"rewards/margins": 0.02036801353096962, |
|
"rewards/rejected": -0.6935478448867798, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.9871970932234586e-05, |
|
"logits/chosen": -2.3482635021209717, |
|
"logits/rejected": -2.363096237182617, |
|
"logps/chosen": -196.5614013671875, |
|
"logps/rejected": -206.21414184570312, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5754539966583252, |
|
"rewards/margins": 0.2127307951450348, |
|
"rewards/rejected": -0.7881847620010376, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.9870915936474095e-05, |
|
"logits/chosen": -2.7459514141082764, |
|
"logits/rejected": -2.7396438121795654, |
|
"logps/chosen": -206.6090087890625, |
|
"logps/rejected": -211.6893768310547, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7424849271774292, |
|
"rewards/margins": 0.08226263523101807, |
|
"rewards/rejected": -0.8247475624084473, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.986985662304976e-05, |
|
"logits/chosen": -2.769526243209839, |
|
"logits/rejected": -2.807554244995117, |
|
"logps/chosen": -184.0075225830078, |
|
"logps/rejected": -225.2920379638672, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7790610194206238, |
|
"rewards/margins": 0.24687500298023224, |
|
"rewards/rejected": -1.0259360074996948, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.9868792992145484e-05, |
|
"logits/chosen": -2.6607320308685303, |
|
"logits/rejected": -2.6989316940307617, |
|
"logps/chosen": -218.40530395507812, |
|
"logps/rejected": -227.24240112304688, |
|
"loss": 0.7374, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6406981945037842, |
|
"rewards/margins": 0.009773064404726028, |
|
"rewards/rejected": -0.6504712104797363, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.9867725043945904e-05, |
|
"logits/chosen": -2.5469489097595215, |
|
"logits/rejected": -2.539520740509033, |
|
"logps/chosen": -222.92864990234375, |
|
"logps/rejected": -236.92977905273438, |
|
"loss": 0.6992, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.640974223613739, |
|
"rewards/margins": 0.09771876782178879, |
|
"rewards/rejected": -0.7386929988861084, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9866652778636436e-05, |
|
"logits/chosen": -2.545572280883789, |
|
"logits/rejected": -2.541029453277588, |
|
"logps/chosen": -182.341552734375, |
|
"logps/rejected": -197.535888671875, |
|
"loss": 0.7149, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7396100163459778, |
|
"rewards/margins": 0.06020259112119675, |
|
"rewards/rejected": -0.7998126149177551, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.986557619640322e-05, |
|
"logits/chosen": -2.479189157485962, |
|
"logits/rejected": -2.504956007003784, |
|
"logps/chosen": -184.10472106933594, |
|
"logps/rejected": -184.99070739746094, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6914107799530029, |
|
"rewards/margins": 0.33003267645835876, |
|
"rewards/rejected": -1.021443486213684, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.986449529743314e-05, |
|
"logits/chosen": -2.7267708778381348, |
|
"logits/rejected": -2.711848735809326, |
|
"logps/chosen": -225.98228454589844, |
|
"logps/rejected": -204.31724548339844, |
|
"loss": 0.7177, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8359745740890503, |
|
"rewards/margins": 0.011185385286808014, |
|
"rewards/rejected": -0.8471599221229553, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9863410081913875e-05, |
|
"logits/chosen": -2.691619396209717, |
|
"logits/rejected": -2.669311285018921, |
|
"logps/chosen": -157.73721313476562, |
|
"logps/rejected": -161.97061157226562, |
|
"loss": 0.7285, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5839475393295288, |
|
"rewards/margins": -0.027979355305433273, |
|
"rewards/rejected": -0.555968165397644, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.98623205500338e-05, |
|
"logits/chosen": -2.6664159297943115, |
|
"logits/rejected": -2.653743267059326, |
|
"logps/chosen": -155.03836059570312, |
|
"logps/rejected": -141.4946746826172, |
|
"loss": 0.7569, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5298622250556946, |
|
"rewards/margins": -0.08286149799823761, |
|
"rewards/rejected": -0.44700077176094055, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.986122670198205e-05, |
|
"logits/chosen": -2.588876485824585, |
|
"logits/rejected": -2.706108808517456, |
|
"logps/chosen": -170.23751831054688, |
|
"logps/rejected": -172.2996826171875, |
|
"loss": 0.8284, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9628427624702454, |
|
"rewards/margins": -0.18341386318206787, |
|
"rewards/rejected": -0.779429018497467, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9860128537948555e-05, |
|
"logits/chosen": -2.8683338165283203, |
|
"logits/rejected": -2.814868211746216, |
|
"logps/chosen": -194.464599609375, |
|
"logps/rejected": -197.70077514648438, |
|
"loss": 0.7405, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6296394467353821, |
|
"rewards/margins": -0.0025831498205661774, |
|
"rewards/rejected": -0.6270563006401062, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9859026058123925e-05, |
|
"logits/chosen": -2.978065013885498, |
|
"logits/rejected": -2.9368367195129395, |
|
"logps/chosen": -217.73974609375, |
|
"logps/rejected": -220.19183349609375, |
|
"loss": 0.7014, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6989356279373169, |
|
"rewards/margins": 0.007979679852724075, |
|
"rewards/rejected": -0.7069153189659119, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.985791926269958e-05, |
|
"logits/chosen": -2.666344165802002, |
|
"logits/rejected": -2.8283472061157227, |
|
"logps/chosen": -163.43722534179688, |
|
"logps/rejected": -190.4180908203125, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5998314023017883, |
|
"rewards/margins": 0.11102181673049927, |
|
"rewards/rejected": -0.7108532190322876, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.985680815186764e-05, |
|
"logits/chosen": -2.6322436332702637, |
|
"logits/rejected": -2.6792352199554443, |
|
"logps/chosen": -225.80972290039062, |
|
"logps/rejected": -221.8551025390625, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6963925957679749, |
|
"rewards/margins": 0.1484571099281311, |
|
"rewards/rejected": -0.844849705696106, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.985569272582101e-05, |
|
"logits/chosen": -2.829981803894043, |
|
"logits/rejected": -2.8077688217163086, |
|
"logps/chosen": -245.94029235839844, |
|
"logps/rejected": -279.1697082519531, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6325417757034302, |
|
"rewards/margins": 0.1735651046037674, |
|
"rewards/rejected": -0.8061069250106812, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.9854572984753334e-05, |
|
"logits/chosen": -2.8024842739105225, |
|
"logits/rejected": -2.750588893890381, |
|
"logps/chosen": -161.65240478515625, |
|
"logps/rejected": -201.25054931640625, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.46800869703292847, |
|
"rewards/margins": 0.12990553677082062, |
|
"rewards/rejected": -0.5979142189025879, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.985344892885899e-05, |
|
"logits/chosen": -2.347231149673462, |
|
"logits/rejected": -2.4064972400665283, |
|
"logps/chosen": -158.72801208496094, |
|
"logps/rejected": -167.15805053710938, |
|
"loss": 0.7264, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7969176173210144, |
|
"rewards/margins": 0.016100093722343445, |
|
"rewards/rejected": -0.8130176663398743, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.985232055833313e-05, |
|
"logits/chosen": -2.603712320327759, |
|
"logits/rejected": -2.6721928119659424, |
|
"logps/chosen": -211.3256378173828, |
|
"logps/rejected": -189.5877685546875, |
|
"loss": 0.7658, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6097235679626465, |
|
"rewards/margins": -0.10059280693531036, |
|
"rewards/rejected": -0.5091307163238525, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.985118787337164e-05, |
|
"logits/chosen": -2.6673433780670166, |
|
"logits/rejected": -2.766993284225464, |
|
"logps/chosen": -162.41415405273438, |
|
"logps/rejected": -218.15077209472656, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5241696238517761, |
|
"rewards/margins": 0.28995370864868164, |
|
"rewards/rejected": -0.8141233921051025, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.985005087417115e-05, |
|
"logits/chosen": -2.4291069507598877, |
|
"logits/rejected": -2.382821798324585, |
|
"logps/chosen": -221.48736572265625, |
|
"logps/rejected": -205.37940979003906, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4502621591091156, |
|
"rewards/margins": 0.2195020467042923, |
|
"rewards/rejected": -0.6697642207145691, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.984890956092905e-05, |
|
"logits/chosen": -2.5559186935424805, |
|
"logits/rejected": -2.496387481689453, |
|
"logps/chosen": -253.63047790527344, |
|
"logps/rejected": -244.56698608398438, |
|
"loss": 0.9484, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9435632228851318, |
|
"rewards/margins": -0.268410325050354, |
|
"rewards/rejected": -0.6751528978347778, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.984776393384348e-05, |
|
"logits/chosen": -2.407367706298828, |
|
"logits/rejected": -2.3134310245513916, |
|
"logps/chosen": -173.61795043945312, |
|
"logps/rejected": -157.41207885742188, |
|
"loss": 0.7949, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5948637127876282, |
|
"rewards/margins": -0.14962351322174072, |
|
"rewards/rejected": -0.44524019956588745, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.984661399311332e-05, |
|
"logits/chosen": -2.6404154300689697, |
|
"logits/rejected": -2.7036595344543457, |
|
"logps/chosen": -155.3403778076172, |
|
"logps/rejected": -141.01382446289062, |
|
"loss": 0.7475, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4795231521129608, |
|
"rewards/margins": -0.06228278949856758, |
|
"rewards/rejected": -0.4172403812408447, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.9845459738938204e-05, |
|
"logits/chosen": -2.5403473377227783, |
|
"logits/rejected": -2.5674545764923096, |
|
"logps/chosen": -154.85910034179688, |
|
"logps/rejected": -143.22906494140625, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4790140390396118, |
|
"rewards/margins": 0.1586410105228424, |
|
"rewards/rejected": -0.6376550197601318, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.9844301171518516e-05, |
|
"logits/chosen": -2.6081302165985107, |
|
"logits/rejected": -2.641979217529297, |
|
"logps/chosen": -185.13006591796875, |
|
"logps/rejected": -198.2803955078125, |
|
"loss": 0.6726, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3776107430458069, |
|
"rewards/margins": 0.08190791308879852, |
|
"rewards/rejected": -0.4595187306404114, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.984313829105538e-05, |
|
"logits/chosen": -2.6662757396698, |
|
"logits/rejected": -2.7398993968963623, |
|
"logps/chosen": -201.8738555908203, |
|
"logps/rejected": -195.76390075683594, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.38797736167907715, |
|
"rewards/margins": 0.2769092321395874, |
|
"rewards/rejected": -0.6648865342140198, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.984197109775068e-05, |
|
"logits/chosen": -2.4933226108551025, |
|
"logits/rejected": -2.484591245651245, |
|
"logps/chosen": -202.7592315673828, |
|
"logps/rejected": -199.02105712890625, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.35886555910110474, |
|
"rewards/margins": 0.1531214416027069, |
|
"rewards/rejected": -0.511987030506134, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.984079959180705e-05, |
|
"logits/chosen": -2.3560287952423096, |
|
"logits/rejected": -2.385936975479126, |
|
"logps/chosen": -198.06044006347656, |
|
"logps/rejected": -203.5518798828125, |
|
"loss": 0.7203, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.35552191734313965, |
|
"rewards/margins": 0.03736201673746109, |
|
"rewards/rejected": -0.39288395643234253, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.983962377342786e-05, |
|
"logits/chosen": -2.456765651702881, |
|
"logits/rejected": -2.522660970687866, |
|
"logps/chosen": -236.66744995117188, |
|
"logps/rejected": -241.58963012695312, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.551971435546875, |
|
"rewards/margins": 0.30021384358406067, |
|
"rewards/rejected": -0.8521853089332581, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.983844364281723e-05, |
|
"logits/chosen": -2.5745279788970947, |
|
"logits/rejected": -2.5961902141571045, |
|
"logps/chosen": -186.73411560058594, |
|
"logps/rejected": -194.74974060058594, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.312610000371933, |
|
"rewards/margins": 0.14659735560417175, |
|
"rewards/rejected": -0.45920735597610474, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.983725920018004e-05, |
|
"logits/chosen": -2.3747611045837402, |
|
"logits/rejected": -2.4170022010803223, |
|
"logps/chosen": -179.91162109375, |
|
"logps/rejected": -170.89920043945312, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.43839430809020996, |
|
"rewards/margins": 0.16247621178627014, |
|
"rewards/rejected": -0.6008704900741577, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.9836070445721924e-05, |
|
"logits/chosen": -2.573714017868042, |
|
"logits/rejected": -2.567589282989502, |
|
"logps/chosen": -204.4001922607422, |
|
"logps/rejected": -197.39869689941406, |
|
"loss": 0.751, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.350676029920578, |
|
"rewards/margins": -0.06606191396713257, |
|
"rewards/rejected": -0.28461411595344543, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.983487737964924e-05, |
|
"logits/chosen": -2.398193597793579, |
|
"logits/rejected": -2.4593253135681152, |
|
"logps/chosen": -199.0366973876953, |
|
"logps/rejected": -199.51275634765625, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.397927463054657, |
|
"rewards/margins": 0.08031069487333298, |
|
"rewards/rejected": -0.47823822498321533, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.9833680002169105e-05, |
|
"logits/chosen": -2.5120034217834473, |
|
"logits/rejected": -2.553926706314087, |
|
"logps/chosen": -226.15040588378906, |
|
"logps/rejected": -249.55255126953125, |
|
"loss": 0.6996, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3695937395095825, |
|
"rewards/margins": 0.0394027940928936, |
|
"rewards/rejected": -0.4089965522289276, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.983247831348939e-05, |
|
"logits/chosen": -2.289033889770508, |
|
"logits/rejected": -2.2501816749572754, |
|
"logps/chosen": -201.75392150878906, |
|
"logps/rejected": -217.8618621826172, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.28367459774017334, |
|
"rewards/margins": 0.1500779092311859, |
|
"rewards/rejected": -0.43375247716903687, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.9831272313818716e-05, |
|
"logits/chosen": -2.4449117183685303, |
|
"logits/rejected": -2.522752046585083, |
|
"logps/chosen": -201.63587951660156, |
|
"logps/rejected": -215.33270263671875, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.23286518454551697, |
|
"rewards/margins": 0.127617746591568, |
|
"rewards/rejected": -0.36048293113708496, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.983006200336645e-05, |
|
"logits/chosen": -2.4317235946655273, |
|
"logits/rejected": -2.3696205615997314, |
|
"logps/chosen": -243.03509521484375, |
|
"logps/rejected": -186.61427307128906, |
|
"loss": 0.7762, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5067302584648132, |
|
"rewards/margins": -0.11989758908748627, |
|
"rewards/rejected": -0.3868326246738434, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.98288473823427e-05, |
|
"logits/chosen": -2.4131155014038086, |
|
"logits/rejected": -2.498098611831665, |
|
"logps/chosen": -190.73397827148438, |
|
"logps/rejected": -213.9364471435547, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.24977169930934906, |
|
"rewards/margins": 0.1747249811887741, |
|
"rewards/rejected": -0.42449668049812317, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.982762845095833e-05, |
|
"logits/chosen": -2.5675809383392334, |
|
"logits/rejected": -2.6289899349212646, |
|
"logps/chosen": -233.9481658935547, |
|
"logps/rejected": -245.3311767578125, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3384949564933777, |
|
"rewards/margins": 0.07099074125289917, |
|
"rewards/rejected": -0.40948566794395447, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.982640520942494e-05, |
|
"logits/chosen": -2.3582189083099365, |
|
"logits/rejected": -2.405932903289795, |
|
"logps/chosen": -157.98828125, |
|
"logps/rejected": -179.5477294921875, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.30075955390930176, |
|
"rewards/margins": 0.2314756065607071, |
|
"rewards/rejected": -0.5322352051734924, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.9825177657954914e-05, |
|
"logits/chosen": -2.6346352100372314, |
|
"logits/rejected": -2.7042407989501953, |
|
"logps/chosen": -174.1151123046875, |
|
"logps/rejected": -203.845458984375, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3591979742050171, |
|
"rewards/margins": 0.10004588216543198, |
|
"rewards/rejected": -0.45924389362335205, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.982394579676133e-05, |
|
"logits/chosen": -2.3420321941375732, |
|
"logits/rejected": -2.3010876178741455, |
|
"logps/chosen": -186.43467712402344, |
|
"logps/rejected": -183.117431640625, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3466501235961914, |
|
"rewards/margins": 0.06071464717388153, |
|
"rewards/rejected": -0.40736478567123413, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.982270962605806e-05, |
|
"logits/chosen": -2.4120569229125977, |
|
"logits/rejected": -2.449502944946289, |
|
"logps/chosen": -168.2112579345703, |
|
"logps/rejected": -227.885986328125, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.22820252180099487, |
|
"rewards/margins": 0.35979852080345154, |
|
"rewards/rejected": -0.588001012802124, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9821469146059704e-05, |
|
"logits/chosen": -2.3874173164367676, |
|
"logits/rejected": -2.4257519245147705, |
|
"logps/chosen": -158.00424194335938, |
|
"logps/rejected": -176.4921417236328, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.28017669916152954, |
|
"rewards/margins": 0.168703094124794, |
|
"rewards/rejected": -0.44887974858283997, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.982022435698161e-05, |
|
"logits/chosen": -2.444685697555542, |
|
"logits/rejected": -2.653350591659546, |
|
"logps/chosen": -207.88034057617188, |
|
"logps/rejected": -231.86582946777344, |
|
"loss": 0.7048, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5877898931503296, |
|
"rewards/margins": 0.10215066373348236, |
|
"rewards/rejected": -0.6899405121803284, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.981897525903988e-05, |
|
"logits/chosen": -2.5300025939941406, |
|
"logits/rejected": -2.4865880012512207, |
|
"logps/chosen": -155.8424835205078, |
|
"logps/rejected": -152.6171875, |
|
"loss": 0.8172, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5343464612960815, |
|
"rewards/margins": -0.17394664883613586, |
|
"rewards/rejected": -0.3603998124599457, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.981772185245135e-05, |
|
"logits/chosen": -2.61796236038208, |
|
"logits/rejected": -2.662163734436035, |
|
"logps/chosen": -198.32403564453125, |
|
"logps/rejected": -198.2169647216797, |
|
"loss": 0.751, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4620332419872284, |
|
"rewards/margins": -0.09868823736906052, |
|
"rewards/rejected": -0.3633449673652649, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.981646413743363e-05, |
|
"logits/chosen": -2.386425733566284, |
|
"logits/rejected": -2.452526092529297, |
|
"logps/chosen": -187.49838256835938, |
|
"logps/rejected": -181.8330535888672, |
|
"loss": 0.606, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.27474457025527954, |
|
"rewards/margins": 0.2754724621772766, |
|
"rewards/rejected": -0.5502170324325562, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.981520211420506e-05, |
|
"logits/chosen": -2.4169254302978516, |
|
"logits/rejected": -2.4883086681365967, |
|
"logps/chosen": -175.3878173828125, |
|
"logps/rejected": -210.00119018554688, |
|
"loss": 0.7642, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.532840371131897, |
|
"rewards/margins": -0.09793217480182648, |
|
"rewards/rejected": -0.4349081814289093, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9813935782984724e-05, |
|
"logits/chosen": -2.528134346008301, |
|
"logits/rejected": -2.491497039794922, |
|
"logps/chosen": -239.28878784179688, |
|
"logps/rejected": -241.08883666992188, |
|
"loss": 0.7798, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6152268648147583, |
|
"rewards/margins": -0.10364706814289093, |
|
"rewards/rejected": -0.5115798711776733, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9812665143992466e-05, |
|
"logits/chosen": -2.5107626914978027, |
|
"logits/rejected": -2.591826915740967, |
|
"logps/chosen": -146.5967559814453, |
|
"logps/rejected": -154.36585998535156, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3082922697067261, |
|
"rewards/margins": 0.11212008446455002, |
|
"rewards/rejected": -0.4204123616218567, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.981139019744887e-05, |
|
"logits/chosen": -2.5607235431671143, |
|
"logits/rejected": -2.562152862548828, |
|
"logps/chosen": -185.95343017578125, |
|
"logps/rejected": -194.59483337402344, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2080746442079544, |
|
"rewards/margins": 0.12636929750442505, |
|
"rewards/rejected": -0.33444392681121826, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.981011094357527e-05, |
|
"logits/chosen": -2.4996774196624756, |
|
"logits/rejected": -2.5331335067749023, |
|
"logps/chosen": -215.7071075439453, |
|
"logps/rejected": -231.0633087158203, |
|
"loss": 0.7228, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4276806116104126, |
|
"rewards/margins": 0.03171852231025696, |
|
"rewards/rejected": -0.45939913392066956, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.980882738259376e-05, |
|
"logits/chosen": -2.3114399909973145, |
|
"logits/rejected": -2.3782169818878174, |
|
"logps/chosen": -169.04246520996094, |
|
"logps/rejected": -188.5185089111328, |
|
"loss": 0.7661, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.31136631965637207, |
|
"rewards/margins": -0.09910126030445099, |
|
"rewards/rejected": -0.21226505935192108, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.980753951472715e-05, |
|
"logits/chosen": -2.466881275177002, |
|
"logits/rejected": -2.5022642612457275, |
|
"logps/chosen": -168.63385009765625, |
|
"logps/rejected": -192.87521362304688, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3558157682418823, |
|
"rewards/margins": 0.13948221504688263, |
|
"rewards/rejected": -0.49529796838760376, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.980624734019903e-05, |
|
"logits/chosen": -2.5671474933624268, |
|
"logits/rejected": -2.6410720348358154, |
|
"logps/chosen": -183.6251220703125, |
|
"logps/rejected": -214.81272888183594, |
|
"loss": 0.7078, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4311671555042267, |
|
"rewards/margins": 0.0398234948515892, |
|
"rewards/rejected": -0.4709906280040741, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.980495085923372e-05, |
|
"logits/chosen": -2.602818012237549, |
|
"logits/rejected": -2.5820987224578857, |
|
"logps/chosen": -167.52012634277344, |
|
"logps/rejected": -180.94418334960938, |
|
"loss": 0.7179, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.41106754541397095, |
|
"rewards/margins": 0.013390736654400826, |
|
"rewards/rejected": -0.42445826530456543, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.980365007205631e-05, |
|
"logits/chosen": -2.4487011432647705, |
|
"logits/rejected": -2.5343518257141113, |
|
"logps/chosen": -201.9692840576172, |
|
"logps/rejected": -244.95530700683594, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3986441195011139, |
|
"rewards/margins": 0.3018190264701843, |
|
"rewards/rejected": -0.700463056564331, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.980234497889259e-05, |
|
"logits/chosen": -1.9015774726867676, |
|
"logits/rejected": -1.9911203384399414, |
|
"logps/chosen": -165.83837890625, |
|
"logps/rejected": -178.608154296875, |
|
"loss": 0.7531, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.47146371006965637, |
|
"rewards/margins": -0.03502827137708664, |
|
"rewards/rejected": -0.43643543124198914, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.980103557996915e-05, |
|
"logits/chosen": -2.609720468521118, |
|
"logits/rejected": -2.631443738937378, |
|
"logps/chosen": -235.0199737548828, |
|
"logps/rejected": -234.86245727539062, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2306605726480484, |
|
"rewards/margins": 0.16340221464633942, |
|
"rewards/rejected": -0.3940627872943878, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.9799721875513306e-05, |
|
"logits/chosen": -2.283406972885132, |
|
"logits/rejected": -2.386524200439453, |
|
"logps/chosen": -226.02059936523438, |
|
"logps/rejected": -220.51963806152344, |
|
"loss": 0.7208, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5439196228981018, |
|
"rewards/margins": 0.06111856549978256, |
|
"rewards/rejected": -0.6050382256507874, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.979840386575311e-05, |
|
"logits/chosen": -2.2238452434539795, |
|
"logits/rejected": -2.3374826908111572, |
|
"logps/chosen": -169.82667541503906, |
|
"logps/rejected": -197.92726135253906, |
|
"loss": 0.7292, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.46263885498046875, |
|
"rewards/margins": 0.007239609956741333, |
|
"rewards/rejected": -0.46987849473953247, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.979708155091737e-05, |
|
"logits/chosen": -2.381389856338501, |
|
"logits/rejected": -2.358883857727051, |
|
"logps/chosen": -153.38771057128906, |
|
"logps/rejected": -147.0846405029297, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2459157109260559, |
|
"rewards/margins": 0.1274775117635727, |
|
"rewards/rejected": -0.3733932375907898, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.979575493123566e-05, |
|
"logits/chosen": -2.4942612648010254, |
|
"logits/rejected": -2.4526309967041016, |
|
"logps/chosen": -222.35780334472656, |
|
"logps/rejected": -205.19630432128906, |
|
"loss": 0.7278, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4296140670776367, |
|
"rewards/margins": -0.018014922738075256, |
|
"rewards/rejected": -0.41159915924072266, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.979442400693827e-05, |
|
"logits/chosen": -2.4821577072143555, |
|
"logits/rejected": -2.4502267837524414, |
|
"logps/chosen": -157.32427978515625, |
|
"logps/rejected": -167.9297332763672, |
|
"loss": 0.7119, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4199235439300537, |
|
"rewards/margins": 0.024679476395249367, |
|
"rewards/rejected": -0.4446030259132385, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.979308877825626e-05, |
|
"logits/chosen": -2.1102793216705322, |
|
"logits/rejected": -2.100888729095459, |
|
"logps/chosen": -235.02105712890625, |
|
"logps/rejected": -232.0230712890625, |
|
"loss": 0.747, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4274646043777466, |
|
"rewards/margins": 0.0070232609286904335, |
|
"rewards/rejected": -0.43448784947395325, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9791749245421434e-05, |
|
"logits/chosen": -2.4029147624969482, |
|
"logits/rejected": -2.436694622039795, |
|
"logps/chosen": -160.94296264648438, |
|
"logps/rejected": -219.91183471679688, |
|
"loss": 0.7659, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6857771873474121, |
|
"rewards/margins": -0.07571852207183838, |
|
"rewards/rejected": -0.6100587248802185, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.979040540866632e-05, |
|
"logits/chosen": -2.423585891723633, |
|
"logits/rejected": -2.583218574523926, |
|
"logps/chosen": -152.8751220703125, |
|
"logps/rejected": -195.98130798339844, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.29784753918647766, |
|
"rewards/margins": 0.13703800737857819, |
|
"rewards/rejected": -0.43488556146621704, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9789057268224234e-05, |
|
"logits/chosen": -2.0688211917877197, |
|
"logits/rejected": -2.1250336170196533, |
|
"logps/chosen": -192.48826599121094, |
|
"logps/rejected": -219.7627716064453, |
|
"loss": 0.7074, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.16141924262046814, |
|
"rewards/margins": 0.035948604345321655, |
|
"rewards/rejected": -0.197367861866951, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.978770482432921e-05, |
|
"logits/chosen": -2.4140584468841553, |
|
"logits/rejected": -2.394195079803467, |
|
"logps/chosen": -177.2361297607422, |
|
"logps/rejected": -201.13475036621094, |
|
"loss": 0.7109, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.41423746943473816, |
|
"rewards/margins": 0.01436915248632431, |
|
"rewards/rejected": -0.42860662937164307, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9786348077216024e-05, |
|
"logits/chosen": -2.399442672729492, |
|
"logits/rejected": -2.4223692417144775, |
|
"logps/chosen": -184.38839721679688, |
|
"logps/rejected": -176.9226531982422, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2739775776863098, |
|
"rewards/margins": 0.14736850559711456, |
|
"rewards/rejected": -0.4213460683822632, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9784987027120236e-05, |
|
"logits/chosen": -2.331202745437622, |
|
"logits/rejected": -2.3324198722839355, |
|
"logps/chosen": -199.08740234375, |
|
"logps/rejected": -171.50718688964844, |
|
"loss": 0.7564, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4433014988899231, |
|
"rewards/margins": -0.06983453035354614, |
|
"rewards/rejected": -0.37346696853637695, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9783621674278104e-05, |
|
"logits/chosen": -2.402174949645996, |
|
"logits/rejected": -2.5383033752441406, |
|
"logps/chosen": -174.37954711914062, |
|
"logps/rejected": -165.9345703125, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5206565856933594, |
|
"rewards/margins": 0.13473454117774963, |
|
"rewards/rejected": -0.6553912162780762, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.978225201892667e-05, |
|
"logits/chosen": -2.4111886024475098, |
|
"logits/rejected": -2.3553833961486816, |
|
"logps/chosen": -220.5755615234375, |
|
"logps/rejected": -195.2405242919922, |
|
"loss": 0.7531, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.4778462052345276, |
|
"rewards/margins": -0.041694507002830505, |
|
"rewards/rejected": -0.4361516833305359, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.97808780613037e-05, |
|
"logits/chosen": -2.307465076446533, |
|
"logits/rejected": -2.3099443912506104, |
|
"logps/chosen": -171.25372314453125, |
|
"logps/rejected": -200.85305786132812, |
|
"loss": 0.6326, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.445933997631073, |
|
"rewards/margins": 0.2173961102962494, |
|
"rewards/rejected": -0.663330078125, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.977949980164773e-05, |
|
"logits/chosen": -2.3666648864746094, |
|
"logits/rejected": -2.433722496032715, |
|
"logps/chosen": -217.6551513671875, |
|
"logps/rejected": -234.6415557861328, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3358496427536011, |
|
"rewards/margins": 0.309315949678421, |
|
"rewards/rejected": -0.6451655626296997, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.977811724019802e-05, |
|
"logits/chosen": -2.336599111557007, |
|
"logits/rejected": -2.409275770187378, |
|
"logps/chosen": -191.2588653564453, |
|
"logps/rejected": -208.126953125, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4646199941635132, |
|
"rewards/margins": 0.13836784660816193, |
|
"rewards/rejected": -0.6029877662658691, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.9776730377194596e-05, |
|
"logits/chosen": -2.1639509201049805, |
|
"logits/rejected": -2.2069835662841797, |
|
"logps/chosen": -203.13632202148438, |
|
"logps/rejected": -169.3188018798828, |
|
"loss": 0.7455, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.23145976662635803, |
|
"rewards/margins": 0.013234168291091919, |
|
"rewards/rejected": -0.24469394981861115, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.9775339212878215e-05, |
|
"logits/chosen": -2.1670339107513428, |
|
"logits/rejected": -2.1488430500030518, |
|
"logps/chosen": -166.50318908691406, |
|
"logps/rejected": -169.4437713623047, |
|
"loss": 0.698, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2539255619049072, |
|
"rewards/margins": 0.030521919950842857, |
|
"rewards/rejected": -0.28444746136665344, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.977394374749039e-05, |
|
"logits/chosen": -2.336040735244751, |
|
"logits/rejected": -2.3590757846832275, |
|
"logps/chosen": -150.48399353027344, |
|
"logps/rejected": -154.54652404785156, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.30479297041893005, |
|
"rewards/margins": 0.10228873789310455, |
|
"rewards/rejected": -0.4070816934108734, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.9772543981273374e-05, |
|
"logits/chosen": -2.25268292427063, |
|
"logits/rejected": -2.267082691192627, |
|
"logps/chosen": -246.07342529296875, |
|
"logps/rejected": -230.28424072265625, |
|
"loss": 0.7204, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.42524397373199463, |
|
"rewards/margins": -0.008991291746497154, |
|
"rewards/rejected": -0.4162527322769165, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.977113991447017e-05, |
|
"logits/chosen": -2.35402250289917, |
|
"logits/rejected": -2.3599345684051514, |
|
"logps/chosen": -163.17568969726562, |
|
"logps/rejected": -175.94029235839844, |
|
"loss": 0.7346, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.4037836790084839, |
|
"rewards/margins": 0.02548382803797722, |
|
"rewards/rejected": -0.4292675256729126, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.976973154732454e-05, |
|
"logits/chosen": -2.3611769676208496, |
|
"logits/rejected": -2.3299450874328613, |
|
"logps/chosen": -205.6555633544922, |
|
"logps/rejected": -213.8665771484375, |
|
"loss": 0.7124, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.22228708863258362, |
|
"rewards/margins": 0.031078480184078217, |
|
"rewards/rejected": -0.25336551666259766, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.976831888008096e-05, |
|
"logits/chosen": -2.316866636276245, |
|
"logits/rejected": -2.299907684326172, |
|
"logps/chosen": -168.08087158203125, |
|
"logps/rejected": -160.0249786376953, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13129538297653198, |
|
"rewards/margins": 0.04759233817458153, |
|
"rewards/rejected": -0.17888770997524261, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.976690191298469e-05, |
|
"logits/chosen": -2.1294949054718018, |
|
"logits/rejected": -2.1100010871887207, |
|
"logps/chosen": -176.1876678466797, |
|
"logps/rejected": -175.78619384765625, |
|
"loss": 0.7597, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.16827335953712463, |
|
"rewards/margins": -0.110628642141819, |
|
"rewards/rejected": -0.05764475464820862, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.9765480646281716e-05, |
|
"logits/chosen": -2.120499849319458, |
|
"logits/rejected": -2.2186028957366943, |
|
"logps/chosen": -178.4534912109375, |
|
"logps/rejected": -200.6439971923828, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11943019181489944, |
|
"rewards/margins": 0.28324708342552185, |
|
"rewards/rejected": -0.4026772677898407, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.976405508021877e-05, |
|
"logits/chosen": -2.430680274963379, |
|
"logits/rejected": -2.5126051902770996, |
|
"logps/chosen": -201.0321502685547, |
|
"logps/rejected": -231.42811584472656, |
|
"loss": 0.7632, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.47548148036003113, |
|
"rewards/margins": -0.046219632029533386, |
|
"rewards/rejected": -0.42926183342933655, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.9762625215043334e-05, |
|
"logits/chosen": -2.396533966064453, |
|
"logits/rejected": -2.4125595092773438, |
|
"logps/chosen": -236.99049377441406, |
|
"logps/rejected": -253.93577575683594, |
|
"loss": 0.7165, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4821574091911316, |
|
"rewards/margins": -0.007029315456748009, |
|
"rewards/rejected": -0.4751281142234802, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9761191051003644e-05, |
|
"logits/chosen": -2.1031394004821777, |
|
"logits/rejected": -2.143960475921631, |
|
"logps/chosen": -168.68551635742188, |
|
"logps/rejected": -209.39537048339844, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1691710501909256, |
|
"rewards/margins": 0.2633129060268402, |
|
"rewards/rejected": -0.4324839413166046, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.975975258834867e-05, |
|
"logits/chosen": -2.451735496520996, |
|
"logits/rejected": -2.410836696624756, |
|
"logps/chosen": -219.5807342529297, |
|
"logps/rejected": -238.2914276123047, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.26019036769866943, |
|
"rewards/margins": 0.09737718105316162, |
|
"rewards/rejected": -0.35756757855415344, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9758309827328134e-05, |
|
"logits/chosen": -2.3229422569274902, |
|
"logits/rejected": -2.3514108657836914, |
|
"logps/chosen": -181.05836486816406, |
|
"logps/rejected": -211.49786376953125, |
|
"loss": 0.7734, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2722405195236206, |
|
"rewards/margins": -0.07979361712932587, |
|
"rewards/rejected": -0.19244688749313354, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9756862768192504e-05, |
|
"logits/chosen": -2.3667490482330322, |
|
"logits/rejected": -2.328806161880493, |
|
"logps/chosen": -201.50511169433594, |
|
"logps/rejected": -179.98643493652344, |
|
"loss": 0.7353, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3051126301288605, |
|
"rewards/margins": -0.007734470069408417, |
|
"rewards/rejected": -0.29737818241119385, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9755411411192996e-05, |
|
"logits/chosen": -2.4068377017974854, |
|
"logits/rejected": -2.532482862472534, |
|
"logps/chosen": -186.65342712402344, |
|
"logps/rejected": -205.83154296875, |
|
"loss": 0.7144, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.17129601538181305, |
|
"rewards/margins": -0.002290443517267704, |
|
"rewards/rejected": -0.1690055876970291, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.975395575658156e-05, |
|
"logits/chosen": -2.382935047149658, |
|
"logits/rejected": -2.4527859687805176, |
|
"logps/chosen": -200.10215759277344, |
|
"logps/rejected": -207.49302673339844, |
|
"loss": 0.7113, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.33162015676498413, |
|
"rewards/margins": 0.029873518273234367, |
|
"rewards/rejected": -0.36149367690086365, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9752495804610916e-05, |
|
"logits/chosen": -2.2459821701049805, |
|
"logits/rejected": -2.2536497116088867, |
|
"logps/chosen": -249.44638061523438, |
|
"logps/rejected": -216.59483337402344, |
|
"loss": 0.8425, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3279973268508911, |
|
"rewards/margins": -0.22765566408634186, |
|
"rewards/rejected": -0.10034167766571045, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9751031555534504e-05, |
|
"logits/chosen": -2.416114330291748, |
|
"logits/rejected": -2.5028560161590576, |
|
"logps/chosen": -186.53961181640625, |
|
"logps/rejected": -160.81617736816406, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1654774248600006, |
|
"rewards/margins": 0.07480952888727188, |
|
"rewards/rejected": -0.2402869611978531, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.9749563009606534e-05, |
|
"logits/chosen": -2.4489054679870605, |
|
"logits/rejected": -2.527320623397827, |
|
"logps/chosen": -202.46507263183594, |
|
"logps/rejected": -185.72853088378906, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22387953102588654, |
|
"rewards/margins": 0.19476865231990814, |
|
"rewards/rejected": -0.4186481833457947, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.9748090167081936e-05, |
|
"logits/chosen": -2.147719144821167, |
|
"logits/rejected": -2.249868154525757, |
|
"logps/chosen": -152.69786071777344, |
|
"logps/rejected": -190.48863220214844, |
|
"loss": 0.6568, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.29554668068885803, |
|
"rewards/margins": 0.11645788699388504, |
|
"rewards/rejected": -0.4120045602321625, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.974661302821641e-05, |
|
"logits/chosen": -2.142338275909424, |
|
"logits/rejected": -2.2330148220062256, |
|
"logps/chosen": -157.3348388671875, |
|
"logps/rejected": -194.91302490234375, |
|
"loss": 0.7378, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.13629794120788574, |
|
"rewards/margins": -0.030040191486477852, |
|
"rewards/rejected": -0.10625775158405304, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.974513159326638e-05, |
|
"logits/chosen": -2.551227569580078, |
|
"logits/rejected": -2.5135083198547363, |
|
"logps/chosen": -201.37612915039062, |
|
"logps/rejected": -167.43287658691406, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.25598540902137756, |
|
"rewards/margins": 0.18629388511180878, |
|
"rewards/rejected": -0.4422793388366699, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.974364586248904e-05, |
|
"logits/chosen": -2.174506425857544, |
|
"logits/rejected": -2.1649577617645264, |
|
"logps/chosen": -181.4037628173828, |
|
"logps/rejected": -171.9210968017578, |
|
"loss": 0.8572, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3619764745235443, |
|
"rewards/margins": -0.19866357743740082, |
|
"rewards/rejected": -0.1633128821849823, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.974215583614232e-05, |
|
"logits/chosen": -2.3266444206237793, |
|
"logits/rejected": -2.418710470199585, |
|
"logps/chosen": -207.11448669433594, |
|
"logps/rejected": -287.95574951171875, |
|
"loss": 0.7347, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.41178515553474426, |
|
"rewards/margins": -0.002230718731880188, |
|
"rewards/rejected": -0.40955445170402527, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.974066151448488e-05, |
|
"logits/chosen": -2.5598104000091553, |
|
"logits/rejected": -2.559662342071533, |
|
"logps/chosen": -231.59133911132812, |
|
"logps/rejected": -239.7825469970703, |
|
"loss": 0.7079, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.42168766260147095, |
|
"rewards/margins": 0.02927880734205246, |
|
"rewards/rejected": -0.4509664475917816, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.9739162897776146e-05, |
|
"logits/chosen": -2.3569555282592773, |
|
"logits/rejected": -2.3967788219451904, |
|
"logps/chosen": -185.6054229736328, |
|
"logps/rejected": -192.92169189453125, |
|
"loss": 0.7437, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4934594929218292, |
|
"rewards/margins": -0.05449381470680237, |
|
"rewards/rejected": -0.43896567821502686, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.973765998627628e-05, |
|
"logits/chosen": -2.5715792179107666, |
|
"logits/rejected": -2.5525949001312256, |
|
"logps/chosen": -252.23037719726562, |
|
"logps/rejected": -268.3929443359375, |
|
"loss": 0.7203, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3784821331501007, |
|
"rewards/margins": -0.01087496429681778, |
|
"rewards/rejected": -0.3676071763038635, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.973615278024619e-05, |
|
"logits/chosen": -2.441962718963623, |
|
"logits/rejected": -2.481163501739502, |
|
"logps/chosen": -161.66839599609375, |
|
"logps/rejected": -192.90371704101562, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3044736981391907, |
|
"rewards/margins": 0.08197857439517975, |
|
"rewards/rejected": -0.38645222783088684, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9734641279947535e-05, |
|
"logits/chosen": -2.500675916671753, |
|
"logits/rejected": -2.487545967102051, |
|
"logps/chosen": -215.66336059570312, |
|
"logps/rejected": -232.06967163085938, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4935031235218048, |
|
"rewards/margins": 0.12285958230495453, |
|
"rewards/rejected": -0.6163626909255981, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.973312548564272e-05, |
|
"logits/chosen": -2.4314112663269043, |
|
"logits/rejected": -2.4294910430908203, |
|
"logps/chosen": -170.31471252441406, |
|
"logps/rejected": -159.71585083007812, |
|
"loss": 0.7036, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.32274457812309265, |
|
"rewards/margins": 0.005533523857593536, |
|
"rewards/rejected": -0.328278124332428, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9731605397594884e-05, |
|
"logits/chosen": -2.4075756072998047, |
|
"logits/rejected": -2.32165789604187, |
|
"logps/chosen": -186.2813720703125, |
|
"logps/rejected": -199.41989135742188, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3598267138004303, |
|
"rewards/margins": 0.11941798776388168, |
|
"rewards/rejected": -0.4792447090148926, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.973008101606792e-05, |
|
"logits/chosen": -2.422717571258545, |
|
"logits/rejected": -2.5152597427368164, |
|
"logps/chosen": -173.51361083984375, |
|
"logps/rejected": -203.63848876953125, |
|
"loss": 0.7204, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.25253090262413025, |
|
"rewards/margins": 0.029171746224164963, |
|
"rewards/rejected": -0.2817026376724243, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.972855234132646e-05, |
|
"logits/chosen": -2.517770767211914, |
|
"logits/rejected": -2.501011848449707, |
|
"logps/chosen": -202.05300903320312, |
|
"logps/rejected": -191.65887451171875, |
|
"loss": 0.7912, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.42360594868659973, |
|
"rewards/margins": -0.1400626003742218, |
|
"rewards/rejected": -0.28354334831237793, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.9727019373635895e-05, |
|
"logits/chosen": -2.612367630004883, |
|
"logits/rejected": -2.617859125137329, |
|
"logps/chosen": -210.05746459960938, |
|
"logps/rejected": -199.4877166748047, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12031766027212143, |
|
"rewards/margins": 0.193304181098938, |
|
"rewards/rejected": -0.3136218190193176, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.972548211326235e-05, |
|
"logits/chosen": -2.5540568828582764, |
|
"logits/rejected": -2.587779998779297, |
|
"logps/chosen": -183.94273376464844, |
|
"logps/rejected": -219.0413818359375, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1985998898744583, |
|
"rewards/margins": 0.12929418683052063, |
|
"rewards/rejected": -0.32789406180381775, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.9723940560472705e-05, |
|
"logits/chosen": -2.573051691055298, |
|
"logits/rejected": -2.6791186332702637, |
|
"logps/chosen": -193.5257568359375, |
|
"logps/rejected": -189.68743896484375, |
|
"loss": 0.7393, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.31839922070503235, |
|
"rewards/margins": -0.04755366966128349, |
|
"rewards/rejected": -0.27084553241729736, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.972239471553457e-05, |
|
"logits/chosen": -2.127181053161621, |
|
"logits/rejected": -2.1366987228393555, |
|
"logps/chosen": -180.3720703125, |
|
"logps/rejected": -206.37628173828125, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.14177769422531128, |
|
"rewards/margins": 0.04955483227968216, |
|
"rewards/rejected": -0.19133250415325165, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.97208445787163e-05, |
|
"logits/chosen": -2.4841840267181396, |
|
"logits/rejected": -2.576199531555176, |
|
"logps/chosen": -175.8242645263672, |
|
"logps/rejected": -205.9224090576172, |
|
"loss": 0.7229, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.39802610874176025, |
|
"rewards/margins": -0.00046849995851516724, |
|
"rewards/rejected": -0.3975576162338257, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.9719290150287026e-05, |
|
"logits/chosen": -2.4974851608276367, |
|
"logits/rejected": -2.552859306335449, |
|
"logps/chosen": -204.64480590820312, |
|
"logps/rejected": -229.373291015625, |
|
"loss": 0.7028, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09486077725887299, |
|
"rewards/margins": 0.017538849264383316, |
|
"rewards/rejected": -0.1123996153473854, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.9717731430516576e-05, |
|
"logits/chosen": -2.5486257076263428, |
|
"logits/rejected": -2.505126953125, |
|
"logps/chosen": -216.0121307373047, |
|
"logps/rejected": -204.5452880859375, |
|
"loss": 0.6996, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1612711399793625, |
|
"rewards/margins": 0.06856641173362732, |
|
"rewards/rejected": -0.229837566614151, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.9716168419675555e-05, |
|
"logits/chosen": -2.5866286754608154, |
|
"logits/rejected": -2.667116403579712, |
|
"logps/chosen": -198.39340209960938, |
|
"logps/rejected": -250.51675415039062, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3453162908554077, |
|
"rewards/margins": 0.07628250867128372, |
|
"rewards/rejected": -0.42159876227378845, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.9714601118035325e-05, |
|
"logits/chosen": -2.339556932449341, |
|
"logits/rejected": -2.2843379974365234, |
|
"logps/chosen": -202.150634765625, |
|
"logps/rejected": -192.74267578125, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1799849569797516, |
|
"rewards/margins": 0.09412900358438492, |
|
"rewards/rejected": -0.2741139531135559, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.971302952586796e-05, |
|
"logits/chosen": -2.3514528274536133, |
|
"logits/rejected": -2.313333749771118, |
|
"logps/chosen": -216.48309326171875, |
|
"logps/rejected": -205.1566925048828, |
|
"loss": 0.7625, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3306673467159271, |
|
"rewards/margins": -0.07343533635139465, |
|
"rewards/rejected": -0.25723204016685486, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.971145364344628e-05, |
|
"logits/chosen": -2.475745916366577, |
|
"logits/rejected": -2.5058016777038574, |
|
"logps/chosen": -198.23568725585938, |
|
"logps/rejected": -226.68544006347656, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.38944318890571594, |
|
"rewards/margins": 0.15158149600028992, |
|
"rewards/rejected": -0.5410246849060059, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.970987347104389e-05, |
|
"logits/chosen": -2.490628242492676, |
|
"logits/rejected": -2.5108678340911865, |
|
"logps/chosen": -180.23779296875, |
|
"logps/rejected": -232.67515563964844, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3212130665779114, |
|
"rewards/margins": 0.09368535876274109, |
|
"rewards/rejected": -0.41489848494529724, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.9708289008935096e-05, |
|
"logits/chosen": -2.6224091053009033, |
|
"logits/rejected": -2.6361618041992188, |
|
"logps/chosen": -254.05960083007812, |
|
"logps/rejected": -247.78634643554688, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40414828062057495, |
|
"rewards/margins": 0.051116712391376495, |
|
"rewards/rejected": -0.4552650451660156, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.9706700257394966e-05, |
|
"logits/chosen": -2.6673390865325928, |
|
"logits/rejected": -2.6409029960632324, |
|
"logps/chosen": -160.94146728515625, |
|
"logps/rejected": -228.09808349609375, |
|
"loss": 0.7103, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4117045998573303, |
|
"rewards/margins": 0.047504693269729614, |
|
"rewards/rejected": -0.45920926332473755, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.970510721669932e-05, |
|
"logits/chosen": -2.12448787689209, |
|
"logits/rejected": -2.209928512573242, |
|
"logps/chosen": -247.7496337890625, |
|
"logps/rejected": -256.54254150390625, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13797114789485931, |
|
"rewards/margins": 0.25247055292129517, |
|
"rewards/rejected": -0.3904416561126709, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.97035098871247e-05, |
|
"logits/chosen": -2.531524896621704, |
|
"logits/rejected": -2.5506911277770996, |
|
"logps/chosen": -239.89340209960938, |
|
"logps/rejected": -232.53662109375, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2409316897392273, |
|
"rewards/margins": 0.11869845539331436, |
|
"rewards/rejected": -0.35963016748428345, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.970190826894842e-05, |
|
"logits/chosen": -2.4936788082122803, |
|
"logits/rejected": -2.6132445335388184, |
|
"logps/chosen": -193.3863067626953, |
|
"logps/rejected": -196.8754119873047, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.33218735456466675, |
|
"rewards/margins": 0.244198739528656, |
|
"rewards/rejected": -0.5763860940933228, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.9700302362448517e-05, |
|
"logits/chosen": -2.7358896732330322, |
|
"logits/rejected": -2.860752582550049, |
|
"logps/chosen": -193.43789672851562, |
|
"logps/rejected": -240.47451782226562, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2744450271129608, |
|
"rewards/margins": 0.07577859610319138, |
|
"rewards/rejected": -0.3502236306667328, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.9698692167903794e-05, |
|
"logits/chosen": -2.581242799758911, |
|
"logits/rejected": -2.7139995098114014, |
|
"logps/chosen": -206.38645935058594, |
|
"logps/rejected": -210.1461639404297, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3665695786476135, |
|
"rewards/margins": 0.26593247056007385, |
|
"rewards/rejected": -0.632502019405365, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.9697077685593766e-05, |
|
"logits/chosen": -2.6083261966705322, |
|
"logits/rejected": -2.6498053073883057, |
|
"logps/chosen": -131.37237548828125, |
|
"logps/rejected": -151.5311737060547, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15027746558189392, |
|
"rewards/margins": 0.126561239361763, |
|
"rewards/rejected": -0.2768386900424957, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.969545891579873e-05, |
|
"logits/chosen": -2.359971046447754, |
|
"logits/rejected": -2.3982698917388916, |
|
"logps/chosen": -157.39181518554688, |
|
"logps/rejected": -158.3177032470703, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10072011500597, |
|
"rewards/margins": 0.24248063564300537, |
|
"rewards/rejected": -0.34320077300071716, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.9693835858799696e-05, |
|
"logits/chosen": -2.408668279647827, |
|
"logits/rejected": -2.4540035724639893, |
|
"logps/chosen": -190.36312866210938, |
|
"logps/rejected": -204.20359802246094, |
|
"loss": 0.6997, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.18467962741851807, |
|
"rewards/margins": 0.015133664011955261, |
|
"rewards/rejected": -0.19981329143047333, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"logits/chosen": -2.270054340362549, |
|
"logits/rejected": -2.177553653717041, |
|
"logps/chosen": -197.311767578125, |
|
"logps/rejected": -205.99948120117188, |
|
"loss": 0.7255, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2719980776309967, |
|
"rewards/margins": -0.018820375204086304, |
|
"rewards/rejected": -0.2531776428222656, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.969057688431748e-05, |
|
"logits/chosen": -2.4876270294189453, |
|
"logits/rejected": -2.516055107116699, |
|
"logps/chosen": -150.5498046875, |
|
"logps/rejected": -173.20225524902344, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2806074023246765, |
|
"rewards/margins": 0.18431711196899414, |
|
"rewards/rejected": -0.46492451429367065, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.968894096740006e-05, |
|
"logits/chosen": -2.699005365371704, |
|
"logits/rejected": -2.706855297088623, |
|
"logps/chosen": -208.8563232421875, |
|
"logps/rejected": -229.45355224609375, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3221612572669983, |
|
"rewards/margins": 0.1469050496816635, |
|
"rewards/rejected": -0.4690663516521454, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.968730076441017e-05, |
|
"logits/chosen": -2.752363681793213, |
|
"logits/rejected": -2.8258612155914307, |
|
"logps/chosen": -233.5675811767578, |
|
"logps/rejected": -261.7062683105469, |
|
"loss": 0.7128, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.604475200176239, |
|
"rewards/margins": 0.022365085780620575, |
|
"rewards/rejected": -0.6268402338027954, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.9685656275632575e-05, |
|
"logits/chosen": -2.5177550315856934, |
|
"logits/rejected": -2.510917901992798, |
|
"logps/chosen": -217.02195739746094, |
|
"logps/rejected": -189.55174255371094, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3391067683696747, |
|
"rewards/margins": 0.13497701287269592, |
|
"rewards/rejected": -0.4740837812423706, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.968400750135276e-05, |
|
"logits/chosen": -2.540860652923584, |
|
"logits/rejected": -2.582563877105713, |
|
"logps/chosen": -194.17721557617188, |
|
"logps/rejected": -198.79684448242188, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.574460506439209, |
|
"rewards/margins": 0.10229263454675674, |
|
"rewards/rejected": -0.6767531633377075, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.968235444185695e-05, |
|
"logits/chosen": -2.7020184993743896, |
|
"logits/rejected": -2.7175564765930176, |
|
"logps/chosen": -211.08863830566406, |
|
"logps/rejected": -235.41836547851562, |
|
"loss": 0.7235, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5369959473609924, |
|
"rewards/margins": 0.011504769325256348, |
|
"rewards/rejected": -0.5485007762908936, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.968069709743212e-05, |
|
"logits/chosen": -2.4362404346466064, |
|
"logits/rejected": -2.5247411727905273, |
|
"logps/chosen": -149.7013397216797, |
|
"logps/rejected": -185.23251342773438, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1017133966088295, |
|
"rewards/margins": 0.3292151391506195, |
|
"rewards/rejected": -0.4309285283088684, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.9679035468365986e-05, |
|
"logits/chosen": -2.3711907863616943, |
|
"logits/rejected": -2.412506580352783, |
|
"logps/chosen": -202.7202911376953, |
|
"logps/rejected": -219.67977905273438, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3317970931529999, |
|
"rewards/margins": 0.12470696866512299, |
|
"rewards/rejected": -0.45650404691696167, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.967736955494703e-05, |
|
"logits/chosen": -2.204392910003662, |
|
"logits/rejected": -2.2046332359313965, |
|
"logps/chosen": -195.41537475585938, |
|
"logps/rejected": -196.8472442626953, |
|
"loss": 0.7358, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.43655478954315186, |
|
"rewards/margins": -0.000651337206363678, |
|
"rewards/rejected": -0.4359034597873688, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.9675699357464445e-05, |
|
"logits/chosen": -2.198075771331787, |
|
"logits/rejected": -2.1894662380218506, |
|
"logps/chosen": -248.62149047851562, |
|
"logps/rejected": -267.9733581542969, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.14261871576309204, |
|
"rewards/margins": 0.31586068868637085, |
|
"rewards/rejected": -0.4584794044494629, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.967402487620818e-05, |
|
"logits/chosen": -2.5536584854125977, |
|
"logits/rejected": -2.577322006225586, |
|
"logps/chosen": -224.8876190185547, |
|
"logps/rejected": -227.71383666992188, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40421125292778015, |
|
"rewards/margins": 0.11385571211576462, |
|
"rewards/rejected": -0.5180670022964478, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.9672346111468934e-05, |
|
"logits/chosen": -2.508915424346924, |
|
"logits/rejected": -2.624011993408203, |
|
"logps/chosen": -181.42379760742188, |
|
"logps/rejected": -231.30458068847656, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.31741198897361755, |
|
"rewards/margins": 0.24159038066864014, |
|
"rewards/rejected": -0.5590023994445801, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.967066306353816e-05, |
|
"logits/chosen": -2.4504952430725098, |
|
"logits/rejected": -2.4388234615325928, |
|
"logps/chosen": -229.2549591064453, |
|
"logps/rejected": -238.2850341796875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.47322726249694824, |
|
"rewards/margins": 0.07009989768266678, |
|
"rewards/rejected": -0.5433271527290344, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.966897573270801e-05, |
|
"logits/chosen": -2.555962562561035, |
|
"logits/rejected": -2.5629987716674805, |
|
"logps/chosen": -172.14407348632812, |
|
"logps/rejected": -168.1295166015625, |
|
"loss": 0.7069, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2661295533180237, |
|
"rewards/margins": 0.07032840698957443, |
|
"rewards/rejected": -0.3364579379558563, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.966728411927144e-05, |
|
"logits/chosen": -2.4634742736816406, |
|
"logits/rejected": -2.4717013835906982, |
|
"logps/chosen": -206.756591796875, |
|
"logps/rejected": -202.884033203125, |
|
"loss": 0.7013, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5119624733924866, |
|
"rewards/margins": 0.043263014405965805, |
|
"rewards/rejected": -0.5552253723144531, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.9665588223522096e-05, |
|
"logits/chosen": -2.333207607269287, |
|
"logits/rejected": -2.3922958374023438, |
|
"logps/chosen": -210.04153442382812, |
|
"logps/rejected": -239.76979064941406, |
|
"loss": 0.7059, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.32687515020370483, |
|
"rewards/margins": 0.09048546105623245, |
|
"rewards/rejected": -0.41736066341400146, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.96638880457544e-05, |
|
"logits/chosen": -2.414531707763672, |
|
"logits/rejected": -2.4198668003082275, |
|
"logps/chosen": -225.68203735351562, |
|
"logps/rejected": -252.04661560058594, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44954052567481995, |
|
"rewards/margins": 0.19488830864429474, |
|
"rewards/rejected": -0.6444287896156311, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.9662183586263514e-05, |
|
"logits/chosen": -2.523651361465454, |
|
"logits/rejected": -2.4278194904327393, |
|
"logps/chosen": -222.92274475097656, |
|
"logps/rejected": -203.31668090820312, |
|
"loss": 0.8255, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6954134106636047, |
|
"rewards/margins": -0.12540754675865173, |
|
"rewards/rejected": -0.5700058937072754, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.966047484534533e-05, |
|
"logits/chosen": -2.465467929840088, |
|
"logits/rejected": -2.4773805141448975, |
|
"logps/chosen": -215.38780212402344, |
|
"logps/rejected": -222.23593139648438, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.43675118684768677, |
|
"rewards/margins": 0.19925275444984436, |
|
"rewards/rejected": -0.6360039710998535, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.965876182329648e-05, |
|
"logits/chosen": -2.4241979122161865, |
|
"logits/rejected": -2.4904754161834717, |
|
"logps/chosen": -176.40281677246094, |
|
"logps/rejected": -189.89398193359375, |
|
"loss": 0.7316, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.35698071122169495, |
|
"rewards/margins": 0.12830819189548492, |
|
"rewards/rejected": -0.48528894782066345, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.965704452041437e-05, |
|
"logits/chosen": -2.4424967765808105, |
|
"logits/rejected": -2.447554588317871, |
|
"logps/chosen": -186.7346954345703, |
|
"logps/rejected": -202.56729125976562, |
|
"loss": 0.6988, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.379626601934433, |
|
"rewards/margins": 0.05914265662431717, |
|
"rewards/rejected": -0.43876922130584717, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.9655322936997115e-05, |
|
"logits/chosen": -2.384314775466919, |
|
"logits/rejected": -2.6753270626068115, |
|
"logps/chosen": -168.5001220703125, |
|
"logps/rejected": -201.58462524414062, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.30074769258499146, |
|
"rewards/margins": 0.31708773970603943, |
|
"rewards/rejected": -0.6178354024887085, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.9653597073343594e-05, |
|
"logits/chosen": -2.44107723236084, |
|
"logits/rejected": -2.518571615219116, |
|
"logps/chosen": -190.8749542236328, |
|
"logps/rejected": -195.59471130371094, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3541903495788574, |
|
"rewards/margins": 0.2186165750026703, |
|
"rewards/rejected": -0.5728069543838501, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 7640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|