|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 4164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007204610951008645, |
|
"grad_norm": 14.58157977905889, |
|
"learning_rate": 1.199040767386091e-10, |
|
"logits/chosen": -1.901450514793396, |
|
"logits/rejected": -1.9076323509216309, |
|
"logps/chosen": -0.8524526953697205, |
|
"logps/rejected": -0.9626365900039673, |
|
"loss": 1.1927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.704905390739441, |
|
"rewards/margins": 0.22036786377429962, |
|
"rewards/rejected": -1.9252731800079346, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007204610951008645, |
|
"grad_norm": 17.76736608782741, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -2.0206170082092285, |
|
"logits/rejected": -2.0063347816467285, |
|
"logps/chosen": -1.0049196481704712, |
|
"logps/rejected": -1.1093952655792236, |
|
"loss": 1.2168, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -2.0098392963409424, |
|
"rewards/margins": 0.2089509218931198, |
|
"rewards/rejected": -2.2187905311584473, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01440922190201729, |
|
"grad_norm": 22.614753087644292, |
|
"learning_rate": 2.398081534772182e-09, |
|
"logits/chosen": -2.026459217071533, |
|
"logits/rejected": -2.0231809616088867, |
|
"logps/chosen": -1.051859736442566, |
|
"logps/rejected": -1.1832743883132935, |
|
"loss": 1.1863, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.103719472885132, |
|
"rewards/margins": 0.2628290057182312, |
|
"rewards/rejected": -2.366548776626587, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021613832853025938, |
|
"grad_norm": 17.824346372572926, |
|
"learning_rate": 3.597122302158273e-09, |
|
"logits/chosen": -1.981697678565979, |
|
"logits/rejected": -1.9744222164154053, |
|
"logps/chosen": -1.053879976272583, |
|
"logps/rejected": -1.1511423587799072, |
|
"loss": 1.2353, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.107759952545166, |
|
"rewards/margins": 0.19452433288097382, |
|
"rewards/rejected": -2.3022847175598145, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 19.247706292689507, |
|
"learning_rate": 4.796163069544364e-09, |
|
"logits/chosen": -2.0287587642669678, |
|
"logits/rejected": -2.028596878051758, |
|
"logps/chosen": -1.0359481573104858, |
|
"logps/rejected": -1.1375384330749512, |
|
"loss": 1.2355, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0718963146209717, |
|
"rewards/margins": 0.20318038761615753, |
|
"rewards/rejected": -2.2750768661499023, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03602305475504323, |
|
"grad_norm": 14.992901360893413, |
|
"learning_rate": 5.995203836930456e-09, |
|
"logits/chosen": -1.962505578994751, |
|
"logits/rejected": -1.9632362127304077, |
|
"logps/chosen": -0.9416370391845703, |
|
"logps/rejected": -1.0078415870666504, |
|
"loss": 1.2545, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8832740783691406, |
|
"rewards/margins": 0.13240887224674225, |
|
"rewards/rejected": -2.015683174133301, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043227665706051875, |
|
"grad_norm": 21.508515110852976, |
|
"learning_rate": 7.194244604316546e-09, |
|
"logits/chosen": -2.0391106605529785, |
|
"logits/rejected": -2.034660816192627, |
|
"logps/chosen": -1.0891697406768799, |
|
"logps/rejected": -1.145775556564331, |
|
"loss": 1.2676, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1783394813537598, |
|
"rewards/margins": 0.11321155726909637, |
|
"rewards/rejected": -2.291551113128662, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05043227665706052, |
|
"grad_norm": 20.688044326046224, |
|
"learning_rate": 8.393285371702639e-09, |
|
"logits/chosen": -2.029348373413086, |
|
"logits/rejected": -2.016831636428833, |
|
"logps/chosen": -1.1090962886810303, |
|
"logps/rejected": -1.204714059829712, |
|
"loss": 1.226, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.2181925773620605, |
|
"rewards/margins": 0.19123554229736328, |
|
"rewards/rejected": -2.409428119659424, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 24.41033214526541, |
|
"learning_rate": 9.592326139088728e-09, |
|
"logits/chosen": -2.046764850616455, |
|
"logits/rejected": -2.043759822845459, |
|
"logps/chosen": -1.166001558303833, |
|
"logps/rejected": -1.237687110900879, |
|
"loss": 1.2535, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.332003116607666, |
|
"rewards/margins": 0.1433708667755127, |
|
"rewards/rejected": -2.475374221801758, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06484149855907781, |
|
"grad_norm": 15.594986746473925, |
|
"learning_rate": 1.0791366906474819e-08, |
|
"logits/chosen": -2.0026838779449463, |
|
"logits/rejected": -2.00419545173645, |
|
"logps/chosen": -1.0416425466537476, |
|
"logps/rejected": -1.148652195930481, |
|
"loss": 1.215, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.083285093307495, |
|
"rewards/margins": 0.2140192985534668, |
|
"rewards/rejected": -2.297304391860962, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07204610951008646, |
|
"grad_norm": 19.00699314951204, |
|
"learning_rate": 1.1990407673860912e-08, |
|
"logits/chosen": -2.040858268737793, |
|
"logits/rejected": -2.0346200466156006, |
|
"logps/chosen": -1.0072879791259766, |
|
"logps/rejected": -1.1140906810760498, |
|
"loss": 1.2176, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.014575958251953, |
|
"rewards/margins": 0.21360567212104797, |
|
"rewards/rejected": -2.2281813621520996, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0792507204610951, |
|
"grad_norm": 16.51858878389513, |
|
"learning_rate": 1.3189448441247003e-08, |
|
"logits/chosen": -1.9792842864990234, |
|
"logits/rejected": -1.9680954217910767, |
|
"logps/chosen": -1.0292143821716309, |
|
"logps/rejected": -1.1284914016723633, |
|
"loss": 1.2285, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0584287643432617, |
|
"rewards/margins": 0.19855372607707977, |
|
"rewards/rejected": -2.2569828033447266, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 18.233760151089655, |
|
"learning_rate": 1.4388489208633092e-08, |
|
"logits/chosen": -1.972887396812439, |
|
"logits/rejected": -1.9710506200790405, |
|
"logps/chosen": -0.9646250009536743, |
|
"logps/rejected": -1.0660240650177002, |
|
"loss": 1.2089, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9292500019073486, |
|
"rewards/margins": 0.20279808342456818, |
|
"rewards/rejected": -2.1320481300354004, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0936599423631124, |
|
"grad_norm": 17.354185009707173, |
|
"learning_rate": 1.5587529976019183e-08, |
|
"logits/chosen": -2.062894105911255, |
|
"logits/rejected": -2.0622401237487793, |
|
"logps/chosen": -1.0803730487823486, |
|
"logps/rejected": -1.1523029804229736, |
|
"loss": 1.2547, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.1607460975646973, |
|
"rewards/margins": 0.14385986328125, |
|
"rewards/rejected": -2.3046059608459473, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10086455331412104, |
|
"grad_norm": 20.84722939621763, |
|
"learning_rate": 1.6786570743405277e-08, |
|
"logits/chosen": -1.9781713485717773, |
|
"logits/rejected": -1.971671462059021, |
|
"logps/chosen": -0.9779410362243652, |
|
"logps/rejected": -1.1225957870483398, |
|
"loss": 1.1689, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9558820724487305, |
|
"rewards/margins": 0.28930944204330444, |
|
"rewards/rejected": -2.2451915740966797, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10806916426512968, |
|
"grad_norm": 20.178997351363016, |
|
"learning_rate": 1.7985611510791365e-08, |
|
"logits/chosen": -1.9949369430541992, |
|
"logits/rejected": -1.990666389465332, |
|
"logps/chosen": -1.0193713903427124, |
|
"logps/rejected": -1.136603593826294, |
|
"loss": 1.2076, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.038742780685425, |
|
"rewards/margins": 0.23446419835090637, |
|
"rewards/rejected": -2.273207187652588, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 17.43558543499963, |
|
"learning_rate": 1.9184652278177456e-08, |
|
"logits/chosen": -2.002195358276367, |
|
"logits/rejected": -1.9960581064224243, |
|
"logps/chosen": -0.948249340057373, |
|
"logps/rejected": -1.0968583822250366, |
|
"loss": 1.1513, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.896498680114746, |
|
"rewards/margins": 0.2972180247306824, |
|
"rewards/rejected": -2.1937167644500732, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12247838616714697, |
|
"grad_norm": 22.73064399272494, |
|
"learning_rate": 2.038369304556355e-08, |
|
"logits/chosen": -2.005837917327881, |
|
"logits/rejected": -1.9983062744140625, |
|
"logps/chosen": -1.0370620489120483, |
|
"logps/rejected": -1.1609737873077393, |
|
"loss": 1.2056, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0741240978240967, |
|
"rewards/margins": 0.24782316386699677, |
|
"rewards/rejected": -2.3219475746154785, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12968299711815562, |
|
"grad_norm": 23.537767016698364, |
|
"learning_rate": 2.1582733812949638e-08, |
|
"logits/chosen": -2.0367612838745117, |
|
"logits/rejected": -2.029956817626953, |
|
"logps/chosen": -1.02077317237854, |
|
"logps/rejected": -1.1086028814315796, |
|
"loss": 1.2477, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.04154634475708, |
|
"rewards/margins": 0.1756592094898224, |
|
"rewards/rejected": -2.217205762863159, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13688760806916425, |
|
"grad_norm": 23.18810653891807, |
|
"learning_rate": 2.278177458033573e-08, |
|
"logits/chosen": -2.077205181121826, |
|
"logits/rejected": -2.0750718116760254, |
|
"logps/chosen": -0.9699970483779907, |
|
"logps/rejected": -1.065187692642212, |
|
"loss": 1.2125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9399940967559814, |
|
"rewards/margins": 0.19038136303424835, |
|
"rewards/rejected": -2.130375385284424, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 22.445024845318002, |
|
"learning_rate": 2.3980815347721823e-08, |
|
"logits/chosen": -2.0375380516052246, |
|
"logits/rejected": -2.034369945526123, |
|
"logps/chosen": -1.026186227798462, |
|
"logps/rejected": -1.1526433229446411, |
|
"loss": 1.1878, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.052372455596924, |
|
"rewards/margins": 0.2529138922691345, |
|
"rewards/rejected": -2.3052866458892822, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15129682997118155, |
|
"grad_norm": 21.106523936582494, |
|
"learning_rate": 2.517985611510791e-08, |
|
"logits/chosen": -2.036905288696289, |
|
"logits/rejected": -2.0340917110443115, |
|
"logps/chosen": -1.073853611946106, |
|
"logps/rejected": -1.150638461112976, |
|
"loss": 1.2507, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.147707223892212, |
|
"rewards/margins": 0.1535697877407074, |
|
"rewards/rejected": -2.301276922225952, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1585014409221902, |
|
"grad_norm": 15.517023295570512, |
|
"learning_rate": 2.6378896882494006e-08, |
|
"logits/chosen": -1.9886398315429688, |
|
"logits/rejected": -1.9846597909927368, |
|
"logps/chosen": -1.0078786611557007, |
|
"logps/rejected": -1.1769925355911255, |
|
"loss": 1.1505, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0157573223114014, |
|
"rewards/margins": 0.3382275700569153, |
|
"rewards/rejected": -2.353985071182251, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16570605187319884, |
|
"grad_norm": 17.085486504816398, |
|
"learning_rate": 2.7577937649880097e-08, |
|
"logits/chosen": -2.0190815925598145, |
|
"logits/rejected": -2.0195024013519287, |
|
"logps/chosen": -1.01227605342865, |
|
"logps/rejected": -1.1264681816101074, |
|
"loss": 1.2015, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0245521068573, |
|
"rewards/margins": 0.2283841073513031, |
|
"rewards/rejected": -2.252936363220215, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 22.24970353019487, |
|
"learning_rate": 2.8776978417266184e-08, |
|
"logits/chosen": -2.0530002117156982, |
|
"logits/rejected": -2.0478739738464355, |
|
"logps/chosen": -1.0617554187774658, |
|
"logps/rejected": -1.1395084857940674, |
|
"loss": 1.2618, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1235108375549316, |
|
"rewards/margins": 0.15550628304481506, |
|
"rewards/rejected": -2.2790169715881348, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18011527377521613, |
|
"grad_norm": 19.11674829624308, |
|
"learning_rate": 2.997601918465228e-08, |
|
"logits/chosen": -1.9721157550811768, |
|
"logits/rejected": -1.968205451965332, |
|
"logps/chosen": -1.0830333232879639, |
|
"logps/rejected": -1.1736047267913818, |
|
"loss": 1.2384, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1660666465759277, |
|
"rewards/margins": 0.18114277720451355, |
|
"rewards/rejected": -2.3472094535827637, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1873198847262248, |
|
"grad_norm": 21.26207716688974, |
|
"learning_rate": 3.1175059952038366e-08, |
|
"logits/chosen": -1.9892946481704712, |
|
"logits/rejected": -1.997536063194275, |
|
"logps/chosen": -1.1055234670639038, |
|
"logps/rejected": -1.2160685062408447, |
|
"loss": 1.2139, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.2110469341278076, |
|
"rewards/margins": 0.22109034657478333, |
|
"rewards/rejected": -2.4321370124816895, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19452449567723343, |
|
"grad_norm": 20.68682788684347, |
|
"learning_rate": 3.237410071942446e-08, |
|
"logits/chosen": -2.064192295074463, |
|
"logits/rejected": -2.0562119483947754, |
|
"logps/chosen": -1.0712614059448242, |
|
"logps/rejected": -1.2003023624420166, |
|
"loss": 1.1803, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1425228118896484, |
|
"rewards/margins": 0.2580817937850952, |
|
"rewards/rejected": -2.400604724884033, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 25.11253609162999, |
|
"learning_rate": 3.3573141486810555e-08, |
|
"logits/chosen": -2.008389472961426, |
|
"logits/rejected": -2.0066072940826416, |
|
"logps/chosen": -0.9357258677482605, |
|
"logps/rejected": -1.049773097038269, |
|
"loss": 1.1981, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.871451735496521, |
|
"rewards/margins": 0.22809453308582306, |
|
"rewards/rejected": -2.099546194076538, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20893371757925072, |
|
"grad_norm": 21.796986905635144, |
|
"learning_rate": 3.477218225419664e-08, |
|
"logits/chosen": -2.0430212020874023, |
|
"logits/rejected": -2.044867992401123, |
|
"logps/chosen": -1.0136518478393555, |
|
"logps/rejected": -1.1080281734466553, |
|
"loss": 1.2347, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.027303695678711, |
|
"rewards/margins": 0.1887526512145996, |
|
"rewards/rejected": -2.2160563468933105, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21613832853025935, |
|
"grad_norm": 20.372987042015918, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -2.0230350494384766, |
|
"logits/rejected": -2.0147769451141357, |
|
"logps/chosen": -1.0902057886123657, |
|
"logps/rejected": -1.191245436668396, |
|
"loss": 1.2137, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1804115772247314, |
|
"rewards/margins": 0.20207944512367249, |
|
"rewards/rejected": -2.382490873336792, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22334293948126802, |
|
"grad_norm": 18.57882925465559, |
|
"learning_rate": 3.717026378896883e-08, |
|
"logits/chosen": -1.9549649953842163, |
|
"logits/rejected": -1.9548736810684204, |
|
"logps/chosen": -1.0871379375457764, |
|
"logps/rejected": -1.1725897789001465, |
|
"loss": 1.2377, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1742758750915527, |
|
"rewards/margins": 0.17090332508087158, |
|
"rewards/rejected": -2.345179557800293, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 15.975684555438873, |
|
"learning_rate": 3.836930455635491e-08, |
|
"logits/chosen": -2.0300118923187256, |
|
"logits/rejected": -2.0213980674743652, |
|
"logps/chosen": -1.0087685585021973, |
|
"logps/rejected": -1.1406135559082031, |
|
"loss": 1.1934, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0175371170043945, |
|
"rewards/margins": 0.2636898159980774, |
|
"rewards/rejected": -2.2812271118164062, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2377521613832853, |
|
"grad_norm": 15.772574632019396, |
|
"learning_rate": 3.9568345323741003e-08, |
|
"logits/chosen": -2.0156402587890625, |
|
"logits/rejected": -2.0179450511932373, |
|
"logps/chosen": -1.0460145473480225, |
|
"logps/rejected": -1.069695234298706, |
|
"loss": 1.3364, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -2.092029094696045, |
|
"rewards/margins": 0.04736141860485077, |
|
"rewards/rejected": -2.139390468597412, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24495677233429394, |
|
"grad_norm": 18.38190578321181, |
|
"learning_rate": 4.07673860911271e-08, |
|
"logits/chosen": -2.0608153343200684, |
|
"logits/rejected": -2.055126667022705, |
|
"logps/chosen": -1.0875434875488281, |
|
"logps/rejected": -1.167794108390808, |
|
"loss": 1.2366, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1750869750976562, |
|
"rewards/margins": 0.16050121188163757, |
|
"rewards/rejected": -2.335588216781616, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2521613832853026, |
|
"grad_norm": 19.343882527589155, |
|
"learning_rate": 4.1966426858513185e-08, |
|
"logits/chosen": -1.9883911609649658, |
|
"logits/rejected": -1.9827582836151123, |
|
"logps/chosen": -0.9889104962348938, |
|
"logps/rejected": -1.1158192157745361, |
|
"loss": 1.1858, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9778209924697876, |
|
"rewards/margins": 0.25381720066070557, |
|
"rewards/rejected": -2.2316384315490723, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 21.595830091643787, |
|
"learning_rate": 4.3165467625899276e-08, |
|
"logits/chosen": -1.9964408874511719, |
|
"logits/rejected": -1.9924728870391846, |
|
"logps/chosen": -1.0861265659332275, |
|
"logps/rejected": -1.2027567625045776, |
|
"loss": 1.1971, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.172253131866455, |
|
"rewards/margins": 0.23326051235198975, |
|
"rewards/rejected": -2.4055135250091553, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2665706051873199, |
|
"grad_norm": 18.205487526741695, |
|
"learning_rate": 4.4364508393285374e-08, |
|
"logits/chosen": -2.007871389389038, |
|
"logits/rejected": -2.007930278778076, |
|
"logps/chosen": -1.05240797996521, |
|
"logps/rejected": -1.1806955337524414, |
|
"loss": 1.1777, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.10481595993042, |
|
"rewards/margins": 0.25657495856285095, |
|
"rewards/rejected": -2.361391067504883, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2737752161383285, |
|
"grad_norm": 16.5239346092299, |
|
"learning_rate": 4.556354916067146e-08, |
|
"logits/chosen": -2.0331404209136963, |
|
"logits/rejected": -2.0373125076293945, |
|
"logps/chosen": -1.0126136541366577, |
|
"logps/rejected": -1.0856488943099976, |
|
"loss": 1.2688, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0252273082733154, |
|
"rewards/margins": 0.14607074856758118, |
|
"rewards/rejected": -2.171297788619995, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28097982708933716, |
|
"grad_norm": 15.274744597058485, |
|
"learning_rate": 4.676258992805755e-08, |
|
"logits/chosen": -2.0328099727630615, |
|
"logits/rejected": -2.0266880989074707, |
|
"logps/chosen": -1.0222868919372559, |
|
"logps/rejected": -1.1483510732650757, |
|
"loss": 1.1822, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0445737838745117, |
|
"rewards/margins": 0.2521281838417053, |
|
"rewards/rejected": -2.2967021465301514, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 19.044775104319285, |
|
"learning_rate": 4.796163069544365e-08, |
|
"logits/chosen": -2.0326080322265625, |
|
"logits/rejected": -2.0330114364624023, |
|
"logps/chosen": -0.9962165951728821, |
|
"logps/rejected": -1.049239993095398, |
|
"loss": 1.2716, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9924331903457642, |
|
"rewards/margins": 0.106046661734581, |
|
"rewards/rejected": -2.098479986190796, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2953890489913545, |
|
"grad_norm": 18.59587302480316, |
|
"learning_rate": 4.916067146282973e-08, |
|
"logits/chosen": -2.0307698249816895, |
|
"logits/rejected": -2.028846025466919, |
|
"logps/chosen": -1.0742970705032349, |
|
"logps/rejected": -1.1461079120635986, |
|
"loss": 1.2611, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1485941410064697, |
|
"rewards/margins": 0.1436215192079544, |
|
"rewards/rejected": -2.2922158241271973, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3025936599423631, |
|
"grad_norm": 16.815742736953002, |
|
"learning_rate": 4.999992091672379e-08, |
|
"logits/chosen": -2.0101230144500732, |
|
"logits/rejected": -2.0144143104553223, |
|
"logps/chosen": -1.0453675985336304, |
|
"logps/rejected": -1.1239204406738281, |
|
"loss": 1.2427, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0907351970672607, |
|
"rewards/margins": 0.1571054756641388, |
|
"rewards/rejected": -2.2478408813476562, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30979827089337175, |
|
"grad_norm": 17.75452942549568, |
|
"learning_rate": 4.999851500573209e-08, |
|
"logits/chosen": -1.9898436069488525, |
|
"logits/rejected": -1.9907649755477905, |
|
"logps/chosen": -1.0584254264831543, |
|
"logps/rejected": -1.0997257232666016, |
|
"loss": 1.3009, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -2.1168508529663086, |
|
"rewards/margins": 0.08260075747966766, |
|
"rewards/rejected": -2.199451446533203, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 15.929802428494124, |
|
"learning_rate": 4.999535180235972e-08, |
|
"logits/chosen": -1.9864879846572876, |
|
"logits/rejected": -1.9866752624511719, |
|
"logps/chosen": -1.0216079950332642, |
|
"logps/rejected": -1.143937110900879, |
|
"loss": 1.1961, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0432159900665283, |
|
"rewards/margins": 0.2446581870317459, |
|
"rewards/rejected": -2.287874221801758, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3242074927953891, |
|
"grad_norm": 17.924032675250256, |
|
"learning_rate": 4.9990431528966836e-08, |
|
"logits/chosen": -2.0104360580444336, |
|
"logits/rejected": -2.006624221801758, |
|
"logps/chosen": -1.1455620527267456, |
|
"logps/rejected": -1.1853464841842651, |
|
"loss": 1.3022, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.291124105453491, |
|
"rewards/margins": 0.07956884801387787, |
|
"rewards/rejected": -2.3706929683685303, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3314121037463977, |
|
"grad_norm": 24.17220460895587, |
|
"learning_rate": 4.9983754531428326e-08, |
|
"logits/chosen": -2.0079081058502197, |
|
"logits/rejected": -2.00258731842041, |
|
"logps/chosen": -1.1706523895263672, |
|
"logps/rejected": -1.2871944904327393, |
|
"loss": 1.2011, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.3413047790527344, |
|
"rewards/margins": 0.23308411240577698, |
|
"rewards/rejected": -2.5743889808654785, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33861671469740634, |
|
"grad_norm": 22.958829143377635, |
|
"learning_rate": 4.997532127910954e-08, |
|
"logits/chosen": -2.04119873046875, |
|
"logits/rejected": -2.029076099395752, |
|
"logps/chosen": -1.100618839263916, |
|
"logps/rejected": -1.202358365058899, |
|
"loss": 1.2196, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.201237678527832, |
|
"rewards/margins": 0.20347890257835388, |
|
"rewards/rejected": -2.404716730117798, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 21.220771734165737, |
|
"learning_rate": 4.996513236483331e-08, |
|
"logits/chosen": -2.10054087638855, |
|
"logits/rejected": -2.090383768081665, |
|
"logps/chosen": -0.9847520589828491, |
|
"logps/rejected": -1.1071968078613281, |
|
"loss": 1.1835, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.9695041179656982, |
|
"rewards/margins": 0.24488914012908936, |
|
"rewards/rejected": -2.2143936157226562, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3530259365994236, |
|
"grad_norm": 18.815281247411335, |
|
"learning_rate": 4.9953188504838225e-08, |
|
"logits/chosen": -2.023686408996582, |
|
"logits/rejected": -2.0228590965270996, |
|
"logps/chosen": -0.9880355596542358, |
|
"logps/rejected": -1.1021173000335693, |
|
"loss": 1.1932, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.9760711193084717, |
|
"rewards/margins": 0.22816362977027893, |
|
"rewards/rejected": -2.2042346000671387, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36023054755043227, |
|
"grad_norm": 18.652142843140656, |
|
"learning_rate": 4.993949053872834e-08, |
|
"logits/chosen": -2.0161242485046387, |
|
"logits/rejected": -2.0025501251220703, |
|
"logps/chosen": -1.012613296508789, |
|
"logps/rejected": -1.140053391456604, |
|
"loss": 1.1806, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.025226593017578, |
|
"rewards/margins": 0.25488021969795227, |
|
"rewards/rejected": -2.280106782913208, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36743515850144093, |
|
"grad_norm": 19.243693238325108, |
|
"learning_rate": 4.9924039429414086e-08, |
|
"logits/chosen": -2.087251663208008, |
|
"logits/rejected": -2.080854654312134, |
|
"logps/chosen": -1.0440865755081177, |
|
"logps/rejected": -1.158582091331482, |
|
"loss": 1.2076, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0881731510162354, |
|
"rewards/margins": 0.2289910614490509, |
|
"rewards/rejected": -2.317164182662964, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 16.084908380302842, |
|
"learning_rate": 4.990683626304467e-08, |
|
"logits/chosen": -2.010878801345825, |
|
"logits/rejected": -2.009476900100708, |
|
"logps/chosen": -1.1068270206451416, |
|
"logps/rejected": -1.2030669450759888, |
|
"loss": 1.2196, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.213654041290283, |
|
"rewards/margins": 0.19247998297214508, |
|
"rewards/rejected": -2.4061338901519775, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3818443804034582, |
|
"grad_norm": 17.74791604713886, |
|
"learning_rate": 4.9887882248931646e-08, |
|
"logits/chosen": -1.9751720428466797, |
|
"logits/rejected": -1.9651315212249756, |
|
"logps/chosen": -0.9842063188552856, |
|
"logps/rejected": -1.0612623691558838, |
|
"loss": 1.25, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9684126377105713, |
|
"rewards/margins": 0.1541123390197754, |
|
"rewards/rejected": -2.1225247383117676, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38904899135446686, |
|
"grad_norm": 22.74397828503106, |
|
"learning_rate": 4.986717871946393e-08, |
|
"logits/chosen": -2.0001473426818848, |
|
"logits/rejected": -1.9932626485824585, |
|
"logps/chosen": -1.0306423902511597, |
|
"logps/rejected": -1.132361650466919, |
|
"loss": 1.2207, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0612847805023193, |
|
"rewards/margins": 0.20343880355358124, |
|
"rewards/rejected": -2.264723300933838, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3962536023054755, |
|
"grad_norm": 17.28890965818364, |
|
"learning_rate": 4.984472713001416e-08, |
|
"logits/chosen": -1.9692100286483765, |
|
"logits/rejected": -1.9698715209960938, |
|
"logps/chosen": -1.0003505945205688, |
|
"logps/rejected": -1.0772594213485718, |
|
"loss": 1.2685, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0007011890411377, |
|
"rewards/margins": 0.1538175493478775, |
|
"rewards/rejected": -2.1545188426971436, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 17.119812277596985, |
|
"learning_rate": 4.982052905883637e-08, |
|
"logits/chosen": -2.0280909538269043, |
|
"logits/rejected": -2.0286812782287598, |
|
"logps/chosen": -1.0809977054595947, |
|
"logps/rejected": -1.1807363033294678, |
|
"loss": 1.2255, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1619954109191895, |
|
"rewards/margins": 0.1994771659374237, |
|
"rewards/rejected": -2.3614726066589355, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4106628242074928, |
|
"grad_norm": 16.296045615559738, |
|
"learning_rate": 4.979458620695505e-08, |
|
"logits/chosen": -2.0341217517852783, |
|
"logits/rejected": -2.0200934410095215, |
|
"logps/chosen": -1.0948175191879272, |
|
"logps/rejected": -1.2078857421875, |
|
"loss": 1.2102, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1896350383758545, |
|
"rewards/margins": 0.22613653540611267, |
|
"rewards/rejected": -2.415771484375, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41786743515850144, |
|
"grad_norm": 19.547805034076156, |
|
"learning_rate": 4.976690039804555e-08, |
|
"logits/chosen": -2.0328009128570557, |
|
"logits/rejected": -2.03126859664917, |
|
"logps/chosen": -0.9873042106628418, |
|
"logps/rejected": -1.0673751831054688, |
|
"loss": 1.2467, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9746084213256836, |
|
"rewards/margins": 0.16014157235622406, |
|
"rewards/rejected": -2.1347503662109375, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4250720461095101, |
|
"grad_norm": 21.43040480262364, |
|
"learning_rate": 4.973747357830592e-08, |
|
"logits/chosen": -2.020263195037842, |
|
"logits/rejected": -2.0205166339874268, |
|
"logps/chosen": -1.0274614095687866, |
|
"logps/rejected": -1.164903998374939, |
|
"loss": 1.1672, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0549228191375732, |
|
"rewards/margins": 0.2748851776123047, |
|
"rewards/rejected": -2.329807996749878, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 19.58018590074545, |
|
"learning_rate": 4.970630781632009e-08, |
|
"logits/chosen": -2.076049566268921, |
|
"logits/rejected": -2.072026491165161, |
|
"logps/chosen": -1.0331029891967773, |
|
"logps/rejected": -1.1752078533172607, |
|
"loss": 1.1687, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0662059783935547, |
|
"rewards/margins": 0.2842100262641907, |
|
"rewards/rejected": -2.3504157066345215, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43948126801152737, |
|
"grad_norm": 21.116047152924747, |
|
"learning_rate": 4.967340530291242e-08, |
|
"logits/chosen": -2.02950382232666, |
|
"logits/rejected": -2.01965594291687, |
|
"logps/chosen": -1.09267258644104, |
|
"logps/rejected": -1.150689959526062, |
|
"loss": 1.2681, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.18534517288208, |
|
"rewards/margins": 0.11603420972824097, |
|
"rewards/rejected": -2.301379919052124, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44668587896253603, |
|
"grad_norm": 24.714444992958434, |
|
"learning_rate": 4.9638768350993755e-08, |
|
"logits/chosen": -2.0273375511169434, |
|
"logits/rejected": -2.019911289215088, |
|
"logps/chosen": -0.9958856701850891, |
|
"logps/rejected": -1.082914113998413, |
|
"loss": 1.2353, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.9917713403701782, |
|
"rewards/margins": 0.1740569919347763, |
|
"rewards/rejected": -2.165828227996826, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4538904899135447, |
|
"grad_norm": 20.802905839756523, |
|
"learning_rate": 4.9602399395398786e-08, |
|
"logits/chosen": -2.040907859802246, |
|
"logits/rejected": -2.040799379348755, |
|
"logps/chosen": -1.0272337198257446, |
|
"logps/rejected": -1.1544668674468994, |
|
"loss": 1.1828, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0544674396514893, |
|
"rewards/margins": 0.2544659674167633, |
|
"rewards/rejected": -2.308933734893799, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 16.14908066968672, |
|
"learning_rate": 4.9564300992714914e-08, |
|
"logits/chosen": -1.9591153860092163, |
|
"logits/rejected": -1.9602371454238892, |
|
"logps/chosen": -1.0113928318023682, |
|
"logps/rejected": -1.1170381307601929, |
|
"loss": 1.2104, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0227856636047363, |
|
"rewards/margins": 0.211290642619133, |
|
"rewards/rejected": -2.2340762615203857, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46829971181556196, |
|
"grad_norm": 21.97651048317045, |
|
"learning_rate": 4.952447582110253e-08, |
|
"logits/chosen": -2.0540075302124023, |
|
"logits/rejected": -2.039557933807373, |
|
"logps/chosen": -1.037952184677124, |
|
"logps/rejected": -1.117681860923767, |
|
"loss": 1.2477, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.075904369354248, |
|
"rewards/margins": 0.15945938229560852, |
|
"rewards/rejected": -2.235363721847534, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4755043227665706, |
|
"grad_norm": 23.865036653626944, |
|
"learning_rate": 4.948292668010676e-08, |
|
"logits/chosen": -2.033937454223633, |
|
"logits/rejected": -2.0348212718963623, |
|
"logps/chosen": -1.0879608392715454, |
|
"logps/rejected": -1.1745898723602295, |
|
"loss": 1.2453, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.175921678543091, |
|
"rewards/margins": 0.17325839400291443, |
|
"rewards/rejected": -2.349179744720459, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4827089337175792, |
|
"grad_norm": 20.3891833338485, |
|
"learning_rate": 4.943965649046064e-08, |
|
"logits/chosen": -2.00368332862854, |
|
"logits/rejected": -1.994360327720642, |
|
"logps/chosen": -1.0627825260162354, |
|
"logps/rejected": -1.1664403676986694, |
|
"loss": 1.2155, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1255650520324707, |
|
"rewards/margins": 0.2073155641555786, |
|
"rewards/rejected": -2.332880735397339, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 19.075566809881007, |
|
"learning_rate": 4.9394668293879835e-08, |
|
"logits/chosen": -1.9593700170516968, |
|
"logits/rejected": -1.950269341468811, |
|
"logps/chosen": -1.0373146533966064, |
|
"logps/rejected": -1.1066360473632812, |
|
"loss": 1.2628, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.074629306793213, |
|
"rewards/margins": 0.13864275813102722, |
|
"rewards/rejected": -2.2132720947265625, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.49711815561959655, |
|
"grad_norm": 24.96158275365681, |
|
"learning_rate": 4.93479652528488e-08, |
|
"logits/chosen": -2.0235979557037354, |
|
"logits/rejected": -2.0184168815612793, |
|
"logps/chosen": -1.1050894260406494, |
|
"logps/rejected": -1.2094438076019287, |
|
"loss": 1.226, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.210178852081299, |
|
"rewards/margins": 0.20870868861675262, |
|
"rewards/rejected": -2.4188876152038574, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5043227665706052, |
|
"grad_norm": 20.196803734367005, |
|
"learning_rate": 4.929955065039848e-08, |
|
"logits/chosen": -2.0198333263397217, |
|
"logits/rejected": -2.014254093170166, |
|
"logps/chosen": -1.0191075801849365, |
|
"logps/rejected": -1.1514732837677002, |
|
"loss": 1.183, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.038215160369873, |
|
"rewards/margins": 0.2647314965724945, |
|
"rewards/rejected": -2.3029465675354004, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5115273775216138, |
|
"grad_norm": 19.12481684007211, |
|
"learning_rate": 4.92494278898755e-08, |
|
"logits/chosen": -1.9860668182373047, |
|
"logits/rejected": -1.9829334020614624, |
|
"logps/chosen": -0.8973162770271301, |
|
"logps/rejected": -1.0221717357635498, |
|
"loss": 1.1965, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.7946325540542603, |
|
"rewards/margins": 0.2497110813856125, |
|
"rewards/rejected": -2.0443434715270996, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 18.885298336896575, |
|
"learning_rate": 4.9197600494702955e-08, |
|
"logits/chosen": -2.0109028816223145, |
|
"logits/rejected": -2.0047824382781982, |
|
"logps/chosen": -1.042280912399292, |
|
"logps/rejected": -1.1657941341400146, |
|
"loss": 1.1849, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.084561824798584, |
|
"rewards/margins": 0.2470264732837677, |
|
"rewards/rejected": -2.3315882682800293, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5259365994236311, |
|
"grad_norm": 20.51392204287621, |
|
"learning_rate": 4.9144072108132725e-08, |
|
"logits/chosen": -2.0101022720336914, |
|
"logits/rejected": -1.9990341663360596, |
|
"logps/chosen": -1.0220484733581543, |
|
"logps/rejected": -1.1054035425186157, |
|
"loss": 1.2509, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0440969467163086, |
|
"rewards/margins": 0.16670992970466614, |
|
"rewards/rejected": -2.2108070850372314, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5331412103746398, |
|
"grad_norm": 17.784848123993633, |
|
"learning_rate": 4.908884649298937e-08, |
|
"logits/chosen": -2.0006046295166016, |
|
"logits/rejected": -2.0075409412384033, |
|
"logps/chosen": -1.0191365480422974, |
|
"logps/rejected": -1.079302430152893, |
|
"loss": 1.2835, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -2.0382730960845947, |
|
"rewards/margins": 0.12033157050609589, |
|
"rewards/rejected": -2.158604860305786, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5403458213256485, |
|
"grad_norm": 23.05051802988211, |
|
"learning_rate": 4.903192753140557e-08, |
|
"logits/chosen": -2.0201849937438965, |
|
"logits/rejected": -2.0148415565490723, |
|
"logps/chosen": -1.1010781526565552, |
|
"logps/rejected": -1.1906977891921997, |
|
"loss": 1.2389, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2021563053131104, |
|
"rewards/margins": 0.1792391687631607, |
|
"rewards/rejected": -2.3813955783843994, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 19.69096268460173, |
|
"learning_rate": 4.897331922454931e-08, |
|
"logits/chosen": -1.9745019674301147, |
|
"logits/rejected": -1.9782997369766235, |
|
"logps/chosen": -1.0038034915924072, |
|
"logps/rejected": -1.1137539148330688, |
|
"loss": 1.216, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0076069831848145, |
|
"rewards/margins": 0.21990080177783966, |
|
"rewards/rejected": -2.2275078296661377, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5547550432276657, |
|
"grad_norm": 20.603497657086212, |
|
"learning_rate": 4.891302569234256e-08, |
|
"logits/chosen": -1.9754327535629272, |
|
"logits/rejected": -1.978355050086975, |
|
"logps/chosen": -0.9768314361572266, |
|
"logps/rejected": -1.1296206712722778, |
|
"loss": 1.1631, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9536628723144531, |
|
"rewards/margins": 0.30557847023010254, |
|
"rewards/rejected": -2.2592413425445557, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5619596541786743, |
|
"grad_norm": 22.064873844463353, |
|
"learning_rate": 4.8851051173171656e-08, |
|
"logits/chosen": -1.9894813299179077, |
|
"logits/rejected": -1.988013505935669, |
|
"logps/chosen": -1.0405890941619873, |
|
"logps/rejected": -1.1219489574432373, |
|
"loss": 1.2394, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0811781883239746, |
|
"rewards/margins": 0.1627195179462433, |
|
"rewards/rejected": -2.2438979148864746, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.569164265129683, |
|
"grad_norm": 17.44205394859882, |
|
"learning_rate": 4.87874000235894e-08, |
|
"logits/chosen": -2.015829563140869, |
|
"logits/rejected": -2.010009765625, |
|
"logps/chosen": -1.075958490371704, |
|
"logps/rejected": -1.2331421375274658, |
|
"loss": 1.1593, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.151916980743408, |
|
"rewards/margins": 0.3143673837184906, |
|
"rewards/rejected": -2.4662842750549316, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 19.511392524751066, |
|
"learning_rate": 4.872207671800876e-08, |
|
"logits/chosen": -2.0384624004364014, |
|
"logits/rejected": -2.0348961353302, |
|
"logps/chosen": -1.0448932647705078, |
|
"logps/rejected": -1.1220283508300781, |
|
"loss": 1.2573, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0897865295410156, |
|
"rewards/margins": 0.1542699635028839, |
|
"rewards/rejected": -2.2440567016601562, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5835734870317003, |
|
"grad_norm": 16.00392428501834, |
|
"learning_rate": 4.865508584838841e-08, |
|
"logits/chosen": -2.020700693130493, |
|
"logits/rejected": -2.0231757164001465, |
|
"logps/chosen": -1.0133837461471558, |
|
"logps/rejected": -1.103539228439331, |
|
"loss": 1.2329, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0267674922943115, |
|
"rewards/margins": 0.18031062185764313, |
|
"rewards/rejected": -2.207078456878662, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.590778097982709, |
|
"grad_norm": 21.25089832156162, |
|
"learning_rate": 4.858643212390985e-08, |
|
"logits/chosen": -2.02546763420105, |
|
"logits/rejected": -2.015793561935425, |
|
"logps/chosen": -1.029206395149231, |
|
"logps/rejected": -1.115379810333252, |
|
"loss": 1.2494, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.058412790298462, |
|
"rewards/margins": 0.1723467856645584, |
|
"rewards/rejected": -2.230759620666504, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5979827089337176, |
|
"grad_norm": 18.36102437546264, |
|
"learning_rate": 4.851612037064643e-08, |
|
"logits/chosen": -1.9968347549438477, |
|
"logits/rejected": -1.9947017431259155, |
|
"logps/chosen": -0.9606590270996094, |
|
"logps/rejected": -1.0800572633743286, |
|
"loss": 1.2043, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9213180541992188, |
|
"rewards/margins": 0.2387961596250534, |
|
"rewards/rejected": -2.1601145267486572, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 15.958170019881827, |
|
"learning_rate": 4.8444155531224065e-08, |
|
"logits/chosen": -2.0284435749053955, |
|
"logits/rejected": -2.028543472290039, |
|
"logps/chosen": -1.0880385637283325, |
|
"logps/rejected": -1.1600936651229858, |
|
"loss": 1.2625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.176077127456665, |
|
"rewards/margins": 0.14411017298698425, |
|
"rewards/rejected": -2.3201873302459717, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6123919308357348, |
|
"grad_norm": 15.447742706439062, |
|
"learning_rate": 4.8370542664473805e-08, |
|
"logits/chosen": -2.034883499145508, |
|
"logits/rejected": -2.029095411300659, |
|
"logps/chosen": -1.0500915050506592, |
|
"logps/rejected": -1.1546186208724976, |
|
"loss": 1.2247, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1001830101013184, |
|
"rewards/margins": 0.20905427634716034, |
|
"rewards/rejected": -2.309237241744995, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6195965417867435, |
|
"grad_norm": 17.975604985927344, |
|
"learning_rate": 4.829528694507624e-08, |
|
"logits/chosen": -2.0074715614318848, |
|
"logits/rejected": -2.003307342529297, |
|
"logps/chosen": -1.1618878841400146, |
|
"logps/rejected": -1.2185773849487305, |
|
"loss": 1.2792, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.3237757682800293, |
|
"rewards/margins": 0.11337918043136597, |
|
"rewards/rejected": -2.437154769897461, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6268011527377522, |
|
"grad_norm": 20.210558301206753, |
|
"learning_rate": 4.821839366319768e-08, |
|
"logits/chosen": -2.0488550662994385, |
|
"logits/rejected": -2.0428953170776367, |
|
"logps/chosen": -1.004872441291809, |
|
"logps/rejected": -1.1229604482650757, |
|
"loss": 1.1965, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.009744882583618, |
|
"rewards/margins": 0.2361760139465332, |
|
"rewards/rejected": -2.2459208965301514, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 19.796801319824315, |
|
"learning_rate": 4.813986822411833e-08, |
|
"logits/chosen": -2.0358963012695312, |
|
"logits/rejected": -2.0338990688323975, |
|
"logps/chosen": -1.0155360698699951, |
|
"logps/rejected": -1.0795470476150513, |
|
"loss": 1.2675, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0310721397399902, |
|
"rewards/margins": 0.12802186608314514, |
|
"rewards/rejected": -2.1590940952301025, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6412103746397695, |
|
"grad_norm": 19.799887695381567, |
|
"learning_rate": 4.805971614785231e-08, |
|
"logits/chosen": -2.0680534839630127, |
|
"logits/rejected": -2.0668766498565674, |
|
"logps/chosen": -1.015794038772583, |
|
"logps/rejected": -1.1115624904632568, |
|
"loss": 1.2198, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.031588077545166, |
|
"rewards/margins": 0.19153663516044617, |
|
"rewards/rejected": -2.2231249809265137, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6484149855907781, |
|
"grad_norm": 20.13934417953023, |
|
"learning_rate": 4.797794306875963e-08, |
|
"logits/chosen": -1.9745270013809204, |
|
"logits/rejected": -1.9761186838150024, |
|
"logps/chosen": -1.1419258117675781, |
|
"logps/rejected": -1.2145435810089111, |
|
"loss": 1.2677, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.2838516235351562, |
|
"rewards/margins": 0.14523524045944214, |
|
"rewards/rejected": -2.4290871620178223, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6556195965417867, |
|
"grad_norm": 20.244043065077097, |
|
"learning_rate": 4.7894554735150076e-08, |
|
"logits/chosen": -1.9853111505508423, |
|
"logits/rejected": -1.9889112710952759, |
|
"logps/chosen": -1.0432493686676025, |
|
"logps/rejected": -1.1088694334030151, |
|
"loss": 1.2634, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.086498737335205, |
|
"rewards/margins": 0.13123992085456848, |
|
"rewards/rejected": -2.2177388668060303, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 23.228641230109883, |
|
"learning_rate": 4.7809557008879185e-08, |
|
"logits/chosen": -2.0164051055908203, |
|
"logits/rejected": -2.0110771656036377, |
|
"logps/chosen": -0.9736042022705078, |
|
"logps/rejected": -1.061522126197815, |
|
"loss": 1.2385, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9472084045410156, |
|
"rewards/margins": 0.17583578824996948, |
|
"rewards/rejected": -2.12304425239563, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.670028818443804, |
|
"grad_norm": 18.047096637073768, |
|
"learning_rate": 4.772295586493613e-08, |
|
"logits/chosen": -2.056691884994507, |
|
"logits/rejected": -2.0538461208343506, |
|
"logps/chosen": -1.0346996784210205, |
|
"logps/rejected": -1.1515998840332031, |
|
"loss": 1.1922, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.069399356842041, |
|
"rewards/margins": 0.23380064964294434, |
|
"rewards/rejected": -2.3031997680664062, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6772334293948127, |
|
"grad_norm": 19.617510099326854, |
|
"learning_rate": 4.763475739102374e-08, |
|
"logits/chosen": -2.0092320442199707, |
|
"logits/rejected": -2.015103816986084, |
|
"logps/chosen": -1.1271753311157227, |
|
"logps/rejected": -1.1942684650421143, |
|
"loss": 1.2568, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2543506622314453, |
|
"rewards/margins": 0.13418647646903992, |
|
"rewards/rejected": -2.3885369300842285, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6844380403458213, |
|
"grad_norm": 15.419611013936972, |
|
"learning_rate": 4.754496778713054e-08, |
|
"logits/chosen": -1.9684407711029053, |
|
"logits/rejected": -1.9725189208984375, |
|
"logps/chosen": -1.011788249015808, |
|
"logps/rejected": -1.1345211267471313, |
|
"loss": 1.2007, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.023576498031616, |
|
"rewards/margins": 0.24546551704406738, |
|
"rewards/rejected": -2.2690422534942627, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 21.26727298981788, |
|
"learning_rate": 4.7453593365094926e-08, |
|
"logits/chosen": -2.0424587726593018, |
|
"logits/rejected": -2.0415501594543457, |
|
"logps/chosen": -1.0493271350860596, |
|
"logps/rejected": -1.1593568325042725, |
|
"loss": 1.2079, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.098654270172119, |
|
"rewards/margins": 0.2200593203306198, |
|
"rewards/rejected": -2.318713665008545, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6988472622478387, |
|
"grad_norm": 21.287126597379253, |
|
"learning_rate": 4.736064054816145e-08, |
|
"logits/chosen": -2.0447025299072266, |
|
"logits/rejected": -2.040843963623047, |
|
"logps/chosen": -0.9683746099472046, |
|
"logps/rejected": -1.0945460796356201, |
|
"loss": 1.1791, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9367492198944092, |
|
"rewards/margins": 0.25234299898147583, |
|
"rewards/rejected": -2.1890921592712402, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7060518731988472, |
|
"grad_norm": 17.20400787363182, |
|
"learning_rate": 4.726611587052933e-08, |
|
"logits/chosen": -1.9701964855194092, |
|
"logits/rejected": -1.9697643518447876, |
|
"logps/chosen": -1.1084095239639282, |
|
"logps/rejected": -1.2358492612838745, |
|
"loss": 1.1795, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2168190479278564, |
|
"rewards/margins": 0.25487983226776123, |
|
"rewards/rejected": -2.471698522567749, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7132564841498559, |
|
"grad_norm": 22.283752817420396, |
|
"learning_rate": 4.71700259768931e-08, |
|
"logits/chosen": -2.0302042961120605, |
|
"logits/rejected": -2.027015447616577, |
|
"logps/chosen": -1.1091606616973877, |
|
"logps/rejected": -1.2056801319122314, |
|
"loss": 1.234, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.2183213233947754, |
|
"rewards/margins": 0.19303929805755615, |
|
"rewards/rejected": -2.411360263824463, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 19.81391793524057, |
|
"learning_rate": 4.707237762197549e-08, |
|
"logits/chosen": -2.0068042278289795, |
|
"logits/rejected": -2.0036396980285645, |
|
"logps/chosen": -1.0078332424163818, |
|
"logps/rejected": -1.1272451877593994, |
|
"loss": 1.2127, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0156664848327637, |
|
"rewards/margins": 0.23882417380809784, |
|
"rewards/rejected": -2.254490375518799, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7276657060518732, |
|
"grad_norm": 23.576990411457878, |
|
"learning_rate": 4.697317767005265e-08, |
|
"logits/chosen": -2.0253381729125977, |
|
"logits/rejected": -2.0218160152435303, |
|
"logps/chosen": -1.0018768310546875, |
|
"logps/rejected": -1.0942353010177612, |
|
"loss": 1.2568, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.003753662109375, |
|
"rewards/margins": 0.18471679091453552, |
|
"rewards/rejected": -2.1884706020355225, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7348703170028819, |
|
"grad_norm": 17.3299238751784, |
|
"learning_rate": 4.6872433094471577e-08, |
|
"logits/chosen": -2.0205771923065186, |
|
"logits/rejected": -2.0157430171966553, |
|
"logps/chosen": -1.0319100618362427, |
|
"logps/rejected": -1.1281745433807373, |
|
"loss": 1.2108, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0638201236724854, |
|
"rewards/margins": 0.19252923130989075, |
|
"rewards/rejected": -2.2563490867614746, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7420749279538905, |
|
"grad_norm": 16.502866342573686, |
|
"learning_rate": 4.677015097715994e-08, |
|
"logits/chosen": -1.9677197933197021, |
|
"logits/rejected": -1.9671709537506104, |
|
"logps/chosen": -1.0225335359573364, |
|
"logps/rejected": -1.1552186012268066, |
|
"loss": 1.1991, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.045067071914673, |
|
"rewards/margins": 0.26537007093429565, |
|
"rewards/rejected": -2.3104372024536133, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 17.494781512319758, |
|
"learning_rate": 4.666633850812825e-08, |
|
"logits/chosen": -2.0190138816833496, |
|
"logits/rejected": -2.0128960609436035, |
|
"logps/chosen": -1.0130321979522705, |
|
"logps/rejected": -1.0945827960968018, |
|
"loss": 1.2371, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.026064395904541, |
|
"rewards/margins": 0.163101464509964, |
|
"rewards/rejected": -2.1891655921936035, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7564841498559077, |
|
"grad_norm": 17.52008871289576, |
|
"learning_rate": 4.656100298496439e-08, |
|
"logits/chosen": -1.9722799062728882, |
|
"logits/rejected": -1.9686037302017212, |
|
"logps/chosen": -0.9384390711784363, |
|
"logps/rejected": -1.0692068338394165, |
|
"loss": 1.1854, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.8768781423568726, |
|
"rewards/margins": 0.2615353763103485, |
|
"rewards/rejected": -2.138413667678833, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7636887608069164, |
|
"grad_norm": 17.92122527797355, |
|
"learning_rate": 4.6454151812320715e-08, |
|
"logits/chosen": -2.002856969833374, |
|
"logits/rejected": -1.9969203472137451, |
|
"logps/chosen": -1.0393486022949219, |
|
"logps/rejected": -1.1481153964996338, |
|
"loss": 1.2176, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0786972045898438, |
|
"rewards/margins": 0.21753337979316711, |
|
"rewards/rejected": -2.2962307929992676, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.770893371757925, |
|
"grad_norm": 20.81341856841254, |
|
"learning_rate": 4.6345792501393434e-08, |
|
"logits/chosen": -2.0019800662994385, |
|
"logits/rejected": -2.0001296997070312, |
|
"logps/chosen": -1.074857234954834, |
|
"logps/rejected": -1.201908826828003, |
|
"loss": 1.2046, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.149714469909668, |
|
"rewards/margins": 0.25410330295562744, |
|
"rewards/rejected": -2.403817653656006, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 20.6783251824387, |
|
"learning_rate": 4.6235932669394676e-08, |
|
"logits/chosen": -2.026952028274536, |
|
"logits/rejected": -2.02778697013855, |
|
"logps/chosen": -1.087725043296814, |
|
"logps/rejected": -1.1969935894012451, |
|
"loss": 1.218, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.175450086593628, |
|
"rewards/margins": 0.2185373604297638, |
|
"rewards/rejected": -2.3939871788024902, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7853025936599424, |
|
"grad_norm": 24.297736541402326, |
|
"learning_rate": 4.612458003901698e-08, |
|
"logits/chosen": -2.035487174987793, |
|
"logits/rejected": -2.0278096199035645, |
|
"logps/chosen": -1.1088950634002686, |
|
"logps/rejected": -1.2112846374511719, |
|
"loss": 1.2279, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.217790126800537, |
|
"rewards/margins": 0.2047790288925171, |
|
"rewards/rejected": -2.4225692749023438, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.792507204610951, |
|
"grad_norm": 23.424012674763112, |
|
"learning_rate": 4.6011742437890476e-08, |
|
"logits/chosen": -2.028799533843994, |
|
"logits/rejected": -2.023322582244873, |
|
"logps/chosen": -1.0456212759017944, |
|
"logps/rejected": -1.179602026939392, |
|
"loss": 1.1772, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.091242551803589, |
|
"rewards/margins": 0.2679617702960968, |
|
"rewards/rejected": -2.359204053878784, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7997118155619597, |
|
"grad_norm": 16.84111836422693, |
|
"learning_rate": 4.589742779803259e-08, |
|
"logits/chosen": -2.0229039192199707, |
|
"logits/rejected": -2.015812397003174, |
|
"logps/chosen": -1.008448839187622, |
|
"logps/rejected": -1.1298694610595703, |
|
"loss": 1.1935, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.016897678375244, |
|
"rewards/margins": 0.2428409308195114, |
|
"rewards/rejected": -2.2597389221191406, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 18.417073733768728, |
|
"learning_rate": 4.5781644155290486e-08, |
|
"logits/chosen": -1.9799926280975342, |
|
"logits/rejected": -1.9722731113433838, |
|
"logps/chosen": -1.0475791692733765, |
|
"logps/rejected": -1.1082106828689575, |
|
"loss": 1.2711, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.095158338546753, |
|
"rewards/margins": 0.12126290798187256, |
|
"rewards/rejected": -2.216421365737915, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8141210374639769, |
|
"grad_norm": 18.07389739764431, |
|
"learning_rate": 4.566439964877613e-08, |
|
"logits/chosen": -2.0103983879089355, |
|
"logits/rejected": -2.0063552856445312, |
|
"logps/chosen": -0.9987322092056274, |
|
"logps/rejected": -1.0854665040969849, |
|
"loss": 1.2431, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9974644184112549, |
|
"rewards/margins": 0.17346863448619843, |
|
"rewards/rejected": -2.1709330081939697, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8213256484149856, |
|
"grad_norm": 16.27730585147678, |
|
"learning_rate": 4.554570252029421e-08, |
|
"logits/chosen": -2.0533928871154785, |
|
"logits/rejected": -2.0521607398986816, |
|
"logps/chosen": -1.0483338832855225, |
|
"logps/rejected": -1.164975881576538, |
|
"loss": 1.1998, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.096667766571045, |
|
"rewards/margins": 0.2332839071750641, |
|
"rewards/rejected": -2.329951763153076, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8285302593659942, |
|
"grad_norm": 17.887214280151717, |
|
"learning_rate": 4.542556111376274e-08, |
|
"logits/chosen": -2.045485496520996, |
|
"logits/rejected": -2.039069175720215, |
|
"logps/chosen": -1.0746331214904785, |
|
"logps/rejected": -1.1668939590454102, |
|
"loss": 1.2386, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.149266242980957, |
|
"rewards/margins": 0.1845216453075409, |
|
"rewards/rejected": -2.3337879180908203, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 23.056739915075795, |
|
"learning_rate": 4.5303983874626506e-08, |
|
"logits/chosen": -1.9926433563232422, |
|
"logits/rejected": -1.9910094738006592, |
|
"logps/chosen": -1.0387976169586182, |
|
"logps/rejected": -1.1165183782577515, |
|
"loss": 1.2645, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0775952339172363, |
|
"rewards/margins": 0.15544185042381287, |
|
"rewards/rejected": -2.233036756515503, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8429394812680115, |
|
"grad_norm": 20.255328662941217, |
|
"learning_rate": 4.518097934926339e-08, |
|
"logits/chosen": -1.9955031871795654, |
|
"logits/rejected": -1.9868882894515991, |
|
"logps/chosen": -1.01637601852417, |
|
"logps/rejected": -1.1265536546707153, |
|
"loss": 1.2046, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.03275203704834, |
|
"rewards/margins": 0.22035527229309082, |
|
"rewards/rejected": -2.2531073093414307, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8501440922190202, |
|
"grad_norm": 22.820031947974105, |
|
"learning_rate": 4.505655618438363e-08, |
|
"logits/chosen": -1.9624055624008179, |
|
"logits/rejected": -1.958373785018921, |
|
"logps/chosen": -1.0602306127548218, |
|
"logps/rejected": -1.1650116443634033, |
|
"loss": 1.2288, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1204612255096436, |
|
"rewards/margins": 0.20956222712993622, |
|
"rewards/rejected": -2.3300232887268066, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8573487031700289, |
|
"grad_norm": 17.32188484657031, |
|
"learning_rate": 4.4930723126421945e-08, |
|
"logits/chosen": -2.052605152130127, |
|
"logits/rejected": -2.045747995376587, |
|
"logps/chosen": -1.0718796253204346, |
|
"logps/rejected": -1.1474034786224365, |
|
"loss": 1.2515, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.143759250640869, |
|
"rewards/margins": 0.1510476917028427, |
|
"rewards/rejected": -2.294806957244873, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 22.29345313385636, |
|
"learning_rate": 4.48034890209227e-08, |
|
"logits/chosen": -1.9834630489349365, |
|
"logits/rejected": -1.9713430404663086, |
|
"logps/chosen": -1.0877046585083008, |
|
"logps/rejected": -1.1743061542510986, |
|
"loss": 1.2302, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1754093170166016, |
|
"rewards/margins": 0.17320279777050018, |
|
"rewards/rejected": -2.3486123085021973, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8717579250720461, |
|
"grad_norm": 18.453910031808036, |
|
"learning_rate": 4.4674862811918155e-08, |
|
"logits/chosen": -1.9687246084213257, |
|
"logits/rejected": -1.9770466089248657, |
|
"logps/chosen": -0.9387677907943726, |
|
"logps/rejected": -1.091802954673767, |
|
"loss": 1.1595, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.8775355815887451, |
|
"rewards/margins": 0.30607035756111145, |
|
"rewards/rejected": -2.183605909347534, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8789625360230547, |
|
"grad_norm": 17.380965389789868, |
|
"learning_rate": 4.454485354129966e-08, |
|
"logits/chosen": -1.9993393421173096, |
|
"logits/rejected": -1.9949222803115845, |
|
"logps/chosen": -1.0104951858520508, |
|
"logps/rejected": -1.115613579750061, |
|
"loss": 1.2192, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0209903717041016, |
|
"rewards/margins": 0.21023674309253693, |
|
"rewards/rejected": -2.231227159500122, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8861671469740634, |
|
"grad_norm": 17.212065292460622, |
|
"learning_rate": 4.4413470348182124e-08, |
|
"logits/chosen": -1.9702112674713135, |
|
"logits/rejected": -1.957925796508789, |
|
"logps/chosen": -0.9851275682449341, |
|
"logps/rejected": -1.076827883720398, |
|
"loss": 1.2315, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.9702551364898682, |
|
"rewards/margins": 0.1834007203578949, |
|
"rewards/rejected": -2.153655767440796, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 21.132247964323447, |
|
"learning_rate": 4.42807224682615e-08, |
|
"logits/chosen": -1.9841238260269165, |
|
"logits/rejected": -1.9821048974990845, |
|
"logps/chosen": -0.9365342855453491, |
|
"logps/rejected": -1.0724506378173828, |
|
"loss": 1.1805, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.8730685710906982, |
|
"rewards/margins": 0.2718326449394226, |
|
"rewards/rejected": -2.1449012756347656, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9005763688760807, |
|
"grad_norm": 18.771464131402308, |
|
"learning_rate": 4.4146619233165604e-08, |
|
"logits/chosen": -2.0202784538269043, |
|
"logits/rejected": -2.022472858428955, |
|
"logps/chosen": -1.0653743743896484, |
|
"logps/rejected": -1.2193849086761475, |
|
"loss": 1.1677, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.130748748779297, |
|
"rewards/margins": 0.3080212473869324, |
|
"rewards/rejected": -2.438769817352295, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9077809798270894, |
|
"grad_norm": 24.969245977085052, |
|
"learning_rate": 4.4011170069798126e-08, |
|
"logits/chosen": -2.016045331954956, |
|
"logits/rejected": -2.0211358070373535, |
|
"logps/chosen": -1.1183704137802124, |
|
"logps/rejected": -1.2435810565948486, |
|
"loss": 1.1935, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.236740827560425, |
|
"rewards/margins": 0.2504214644432068, |
|
"rewards/rejected": -2.4871621131896973, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9149855907780979, |
|
"grad_norm": 17.906289261915187, |
|
"learning_rate": 4.387438449967594e-08, |
|
"logits/chosen": -1.981329321861267, |
|
"logits/rejected": -1.9747323989868164, |
|
"logps/chosen": -0.966105580329895, |
|
"logps/rejected": -1.086094856262207, |
|
"loss": 1.1911, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.93221116065979, |
|
"rewards/margins": 0.2399786412715912, |
|
"rewards/rejected": -2.172189712524414, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 21.194952039998515, |
|
"learning_rate": 4.373627213825983e-08, |
|
"logits/chosen": -2.0677618980407715, |
|
"logits/rejected": -2.063303232192993, |
|
"logps/chosen": -1.0270278453826904, |
|
"logps/rejected": -1.1622191667556763, |
|
"loss": 1.1834, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.054055690765381, |
|
"rewards/margins": 0.27038270235061646, |
|
"rewards/rejected": -2.3244383335113525, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9293948126801153, |
|
"grad_norm": 16.751477190296892, |
|
"learning_rate": 4.359684269427848e-08, |
|
"logits/chosen": -2.038684368133545, |
|
"logits/rejected": -2.03769588470459, |
|
"logps/chosen": -0.9954586029052734, |
|
"logps/rejected": -1.0992056131362915, |
|
"loss": 1.2108, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9909172058105469, |
|
"rewards/margins": 0.20749418437480927, |
|
"rewards/rejected": -2.198411226272583, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9365994236311239, |
|
"grad_norm": 23.518080524189983, |
|
"learning_rate": 4.34561059690461e-08, |
|
"logits/chosen": -2.0750319957733154, |
|
"logits/rejected": -2.0769741535186768, |
|
"logps/chosen": -1.048097014427185, |
|
"logps/rejected": -1.1116466522216797, |
|
"loss": 1.2718, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.09619402885437, |
|
"rewards/margins": 0.12709912657737732, |
|
"rewards/rejected": -2.2232933044433594, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9438040345821326, |
|
"grad_norm": 21.385571779096182, |
|
"learning_rate": 4.3314071855773314e-08, |
|
"logits/chosen": -2.0412631034851074, |
|
"logits/rejected": -2.0419461727142334, |
|
"logps/chosen": -0.9842621684074402, |
|
"logps/rejected": -1.079594612121582, |
|
"loss": 1.2226, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9685243368148804, |
|
"rewards/margins": 0.19066500663757324, |
|
"rewards/rejected": -2.159189224243164, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 20.47235892448916, |
|
"learning_rate": 4.3170750338871806e-08, |
|
"logits/chosen": -2.015406847000122, |
|
"logits/rejected": -2.0090079307556152, |
|
"logps/chosen": -1.077161431312561, |
|
"logps/rejected": -1.2194509506225586, |
|
"loss": 1.167, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.154322862625122, |
|
"rewards/margins": 0.28457918763160706, |
|
"rewards/rejected": -2.438901901245117, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9582132564841499, |
|
"grad_norm": 14.760860419836652, |
|
"learning_rate": 4.3026151493252414e-08, |
|
"logits/chosen": -2.04630446434021, |
|
"logits/rejected": -2.0420799255371094, |
|
"logps/chosen": -1.0609397888183594, |
|
"logps/rejected": -1.182420253753662, |
|
"loss": 1.1998, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1218795776367188, |
|
"rewards/margins": 0.24296097457408905, |
|
"rewards/rejected": -2.364840507507324, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9654178674351584, |
|
"grad_norm": 25.51297506847031, |
|
"learning_rate": 4.2880285483616895e-08, |
|
"logits/chosen": -2.006889820098877, |
|
"logits/rejected": -2.007575750350952, |
|
"logps/chosen": -1.0171369314193726, |
|
"logps/rejected": -1.1325743198394775, |
|
"loss": 1.2089, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.034273862838745, |
|
"rewards/margins": 0.23087477684020996, |
|
"rewards/rejected": -2.265148639678955, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9726224783861671, |
|
"grad_norm": 16.088184668896073, |
|
"learning_rate": 4.273316256374342e-08, |
|
"logits/chosen": -1.940446138381958, |
|
"logits/rejected": -1.9386374950408936, |
|
"logps/chosen": -1.0138260126113892, |
|
"logps/rejected": -1.0874342918395996, |
|
"loss": 1.2632, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0276520252227783, |
|
"rewards/margins": 0.1472165733575821, |
|
"rewards/rejected": -2.174868583679199, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 16.07758733928266, |
|
"learning_rate": 4.258479307576576e-08, |
|
"logits/chosen": -1.9868743419647217, |
|
"logits/rejected": -1.9846910238265991, |
|
"logps/chosen": -0.9640612602233887, |
|
"logps/rejected": -1.0554001331329346, |
|
"loss": 1.2393, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9281225204467773, |
|
"rewards/margins": 0.18267770111560822, |
|
"rewards/rejected": -2.110800266265869, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9870317002881844, |
|
"grad_norm": 21.299668948105722, |
|
"learning_rate": 4.243518744944626e-08, |
|
"logits/chosen": -2.015906572341919, |
|
"logits/rejected": -2.0112671852111816, |
|
"logps/chosen": -1.0006954669952393, |
|
"logps/rejected": -1.1211137771606445, |
|
"loss": 1.189, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0013909339904785, |
|
"rewards/margins": 0.24083688855171204, |
|
"rewards/rejected": -2.242227554321289, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9942363112391931, |
|
"grad_norm": 20.892427881150027, |
|
"learning_rate": 4.22843562014427e-08, |
|
"logits/chosen": -1.9761593341827393, |
|
"logits/rejected": -1.9725821018218994, |
|
"logps/chosen": -1.0507876873016357, |
|
"logps/rejected": -1.1257398128509521, |
|
"loss": 1.2494, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1015753746032715, |
|
"rewards/margins": 0.14990456402301788, |
|
"rewards/rejected": -2.2514796257019043, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0014409221902016, |
|
"grad_norm": 27.91445112679855, |
|
"learning_rate": 4.2132309934569e-08, |
|
"logits/chosen": -2.0479187965393066, |
|
"logits/rejected": -2.048383951187134, |
|
"logps/chosen": -1.0160915851593018, |
|
"logps/rejected": -1.1285258531570435, |
|
"loss": 1.211, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0321831703186035, |
|
"rewards/margins": 0.22486881911754608, |
|
"rewards/rejected": -2.257051706314087, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 18.441960013603726, |
|
"learning_rate": 4.197905933704989e-08, |
|
"logits/chosen": -1.9482128620147705, |
|
"logits/rejected": -1.9455543756484985, |
|
"logps/chosen": -1.0604915618896484, |
|
"logps/rejected": -1.1943556070327759, |
|
"loss": 1.2012, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.120983123779297, |
|
"rewards/margins": 0.2677280306816101, |
|
"rewards/rejected": -2.3887112140655518, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.015850144092219, |
|
"grad_norm": 23.66473994264621, |
|
"learning_rate": 4.1824615181769577e-08, |
|
"logits/chosen": -1.9916549921035767, |
|
"logits/rejected": -1.9958763122558594, |
|
"logps/chosen": -1.0126399993896484, |
|
"logps/rejected": -1.138517141342163, |
|
"loss": 1.2028, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.025279998779297, |
|
"rewards/margins": 0.25175410509109497, |
|
"rewards/rejected": -2.277034282684326, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0230547550432276, |
|
"grad_norm": 18.457666274318385, |
|
"learning_rate": 4.1668988325514434e-08, |
|
"logits/chosen": -2.015357494354248, |
|
"logits/rejected": -2.0103044509887695, |
|
"logps/chosen": -1.1170918941497803, |
|
"logps/rejected": -1.232860803604126, |
|
"loss": 1.2242, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.2341837882995605, |
|
"rewards/margins": 0.23153769969940186, |
|
"rewards/rejected": -2.465721607208252, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0302593659942363, |
|
"grad_norm": 21.01670306409276, |
|
"learning_rate": 4.1512189708209844e-08, |
|
"logits/chosen": -2.0597169399261475, |
|
"logits/rejected": -2.058657169342041, |
|
"logps/chosen": -0.9408125877380371, |
|
"logps/rejected": -1.027007818222046, |
|
"loss": 1.2466, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.8816251754760742, |
|
"rewards/margins": 0.17239060997962952, |
|
"rewards/rejected": -2.054015636444092, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 22.302085677116274, |
|
"learning_rate": 4.1354230352151143e-08, |
|
"logits/chosen": -2.0084290504455566, |
|
"logits/rejected": -2.0017716884613037, |
|
"logps/chosen": -1.1378480195999146, |
|
"logps/rejected": -1.2201100587844849, |
|
"loss": 1.2575, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.275696039199829, |
|
"rewards/margins": 0.1645239144563675, |
|
"rewards/rejected": -2.4402201175689697, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0446685878962536, |
|
"grad_norm": 16.946791101993835, |
|
"learning_rate": 4.119512136122882e-08, |
|
"logits/chosen": -2.07346773147583, |
|
"logits/rejected": -2.0827276706695557, |
|
"logps/chosen": -0.9949871897697449, |
|
"logps/rejected": -1.1448405981063843, |
|
"loss": 1.1711, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9899743795394897, |
|
"rewards/margins": 0.2997070550918579, |
|
"rewards/rejected": -2.2896811962127686, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0518731988472623, |
|
"grad_norm": 15.526038466199521, |
|
"learning_rate": 4.103487392014795e-08, |
|
"logits/chosen": -1.9936816692352295, |
|
"logits/rejected": -1.9814279079437256, |
|
"logps/chosen": -1.0004615783691406, |
|
"logps/rejected": -1.1593652963638306, |
|
"loss": 1.1449, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0009231567382812, |
|
"rewards/margins": 0.31780725717544556, |
|
"rewards/rejected": -2.318730592727661, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.059077809798271, |
|
"grad_norm": 16.90228466492342, |
|
"learning_rate": 4.087349929364192e-08, |
|
"logits/chosen": -2.027029514312744, |
|
"logits/rejected": -2.017503261566162, |
|
"logps/chosen": -0.9608215093612671, |
|
"logps/rejected": -1.091578722000122, |
|
"loss": 1.1869, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.9216430187225342, |
|
"rewards/margins": 0.261514276266098, |
|
"rewards/rejected": -2.183157444000244, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 17.442053462217785, |
|
"learning_rate": 4.0711008825680645e-08, |
|
"logits/chosen": -1.9791135787963867, |
|
"logits/rejected": -1.978002905845642, |
|
"logps/chosen": -1.006446123123169, |
|
"logps/rejected": -1.1246168613433838, |
|
"loss": 1.2067, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.012892246246338, |
|
"rewards/margins": 0.23634123802185059, |
|
"rewards/rejected": -2.2492337226867676, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0734870317002883, |
|
"grad_norm": 19.86972344245805, |
|
"learning_rate": 4.054741393867306e-08, |
|
"logits/chosen": -1.994312047958374, |
|
"logits/rejected": -1.9914157390594482, |
|
"logps/chosen": -1.1115689277648926, |
|
"logps/rejected": -1.1622049808502197, |
|
"loss": 1.2879, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.223137855529785, |
|
"rewards/margins": 0.10127194970846176, |
|
"rewards/rejected": -2.3244099617004395, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.080691642651297, |
|
"grad_norm": 18.714554731415358, |
|
"learning_rate": 4.038272613266419e-08, |
|
"logits/chosen": -2.0033118724823, |
|
"logits/rejected": -1.9902782440185547, |
|
"logps/chosen": -1.0098048448562622, |
|
"logps/rejected": -1.1201963424682617, |
|
"loss": 1.2025, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0196096897125244, |
|
"rewards/margins": 0.2207828313112259, |
|
"rewards/rejected": -2.2403926849365234, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0878962536023056, |
|
"grad_norm": 18.243834119172774, |
|
"learning_rate": 4.0216956984526784e-08, |
|
"logits/chosen": -2.0470855236053467, |
|
"logits/rejected": -2.049050807952881, |
|
"logps/chosen": -1.0156313180923462, |
|
"logps/rejected": -1.1249277591705322, |
|
"loss": 1.2154, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0312626361846924, |
|
"rewards/margins": 0.2185930460691452, |
|
"rewards/rejected": -2.2498555183410645, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 16.15363780677068, |
|
"learning_rate": 4.0050118147147446e-08, |
|
"logits/chosen": -1.9841066598892212, |
|
"logits/rejected": -1.9844478368759155, |
|
"logps/chosen": -1.0981109142303467, |
|
"logps/rejected": -1.1102923154830933, |
|
"loss": 1.3395, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -2.1962218284606934, |
|
"rewards/margins": 0.024362847208976746, |
|
"rewards/rejected": -2.2205846309661865, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1023054755043227, |
|
"grad_norm": 17.76262200063469, |
|
"learning_rate": 3.988222134860755e-08, |
|
"logits/chosen": -2.029658317565918, |
|
"logits/rejected": -2.020962953567505, |
|
"logps/chosen": -0.9501702189445496, |
|
"logps/rejected": -1.1164584159851074, |
|
"loss": 1.1391, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.9003404378890991, |
|
"rewards/margins": 0.33257636427879333, |
|
"rewards/rejected": -2.232916831970215, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1095100864553313, |
|
"grad_norm": 23.81823709896279, |
|
"learning_rate": 3.9713278391358724e-08, |
|
"logits/chosen": -2.0359702110290527, |
|
"logits/rejected": -2.0298221111297607, |
|
"logps/chosen": -1.0248148441314697, |
|
"logps/rejected": -1.1481475830078125, |
|
"loss": 1.1877, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.0496296882629395, |
|
"rewards/margins": 0.24666526913642883, |
|
"rewards/rejected": -2.296295166015625, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.11671469740634, |
|
"grad_norm": 17.830908151102502, |
|
"learning_rate": 3.954330115139328e-08, |
|
"logits/chosen": -2.015063762664795, |
|
"logits/rejected": -2.0099833011627197, |
|
"logps/chosen": -1.0277677774429321, |
|
"logps/rejected": -1.1327736377716064, |
|
"loss": 1.2221, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0555355548858643, |
|
"rewards/margins": 0.21001163125038147, |
|
"rewards/rejected": -2.265547275543213, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 25.68887769776998, |
|
"learning_rate": 3.937230157740931e-08, |
|
"logits/chosen": -2.070219039916992, |
|
"logits/rejected": -2.064025640487671, |
|
"logps/chosen": -1.0480725765228271, |
|
"logps/rejected": -1.1831330060958862, |
|
"loss": 1.1827, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0961451530456543, |
|
"rewards/margins": 0.27012091875076294, |
|
"rewards/rejected": -2.3662660121917725, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1311239193083573, |
|
"grad_norm": 16.154765963959324, |
|
"learning_rate": 3.920029168997077e-08, |
|
"logits/chosen": -2.0501182079315186, |
|
"logits/rejected": -2.048215389251709, |
|
"logps/chosen": -1.0040074586868286, |
|
"logps/rejected": -1.1317455768585205, |
|
"loss": 1.1863, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0080149173736572, |
|
"rewards/margins": 0.25547635555267334, |
|
"rewards/rejected": -2.263491153717041, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.138328530259366, |
|
"grad_norm": 29.765979988811136, |
|
"learning_rate": 3.9027283580662476e-08, |
|
"logits/chosen": -2.0178141593933105, |
|
"logits/rejected": -2.0118331909179688, |
|
"logps/chosen": -1.047828197479248, |
|
"logps/rejected": -1.193880319595337, |
|
"loss": 1.1762, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.095656394958496, |
|
"rewards/margins": 0.2921043336391449, |
|
"rewards/rejected": -2.387760639190674, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1455331412103746, |
|
"grad_norm": 16.865277551940466, |
|
"learning_rate": 3.885328941124014e-08, |
|
"logits/chosen": -1.9888120889663696, |
|
"logits/rejected": -1.9842865467071533, |
|
"logps/chosen": -0.9665737152099609, |
|
"logps/rejected": -1.1005498170852661, |
|
"loss": 1.1706, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9331474304199219, |
|
"rewards/margins": 0.2679522633552551, |
|
"rewards/rejected": -2.2010996341705322, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 20.89972191635711, |
|
"learning_rate": 3.867832141277539e-08, |
|
"logits/chosen": -2.0299296379089355, |
|
"logits/rejected": -2.020932912826538, |
|
"logps/chosen": -1.0687669515609741, |
|
"logps/rejected": -1.1805663108825684, |
|
"loss": 1.2101, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1375339031219482, |
|
"rewards/margins": 0.2235983908176422, |
|
"rewards/rejected": -2.3611326217651367, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.159942363112392, |
|
"grad_norm": 20.78071767638211, |
|
"learning_rate": 3.850239188479606e-08, |
|
"logits/chosen": -1.9834659099578857, |
|
"logits/rejected": -1.9868577718734741, |
|
"logps/chosen": -1.0097862482070923, |
|
"logps/rejected": -1.1004573106765747, |
|
"loss": 1.237, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0195724964141846, |
|
"rewards/margins": 0.18134194612503052, |
|
"rewards/rejected": -2.2009146213531494, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1671469740634006, |
|
"grad_norm": 22.02097078416428, |
|
"learning_rate": 3.832551319442151e-08, |
|
"logits/chosen": -2.057338237762451, |
|
"logits/rejected": -2.0585570335388184, |
|
"logps/chosen": -1.057908296585083, |
|
"logps/rejected": -1.1848082542419434, |
|
"loss": 1.1897, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.115816593170166, |
|
"rewards/margins": 0.25380033254623413, |
|
"rewards/rejected": -2.3696165084838867, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1743515850144093, |
|
"grad_norm": 17.325160620804777, |
|
"learning_rate": 3.81476977754933e-08, |
|
"logits/chosen": -1.9559204578399658, |
|
"logits/rejected": -1.952262282371521, |
|
"logps/chosen": -1.0270769596099854, |
|
"logps/rejected": -1.0972059965133667, |
|
"loss": 1.2578, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0541539192199707, |
|
"rewards/margins": 0.14025799930095673, |
|
"rewards/rejected": -2.1944119930267334, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 16.860559579230735, |
|
"learning_rate": 3.796895812770114e-08, |
|
"logits/chosen": -1.9805179834365845, |
|
"logits/rejected": -1.981414794921875, |
|
"logps/chosen": -1.0173685550689697, |
|
"logps/rejected": -1.1094672679901123, |
|
"loss": 1.2405, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0347371101379395, |
|
"rewards/margins": 0.18419703841209412, |
|
"rewards/rejected": -2.2189345359802246, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1887608069164266, |
|
"grad_norm": 22.22569351927079, |
|
"learning_rate": 3.7789306815704216e-08, |
|
"logits/chosen": -2.010031223297119, |
|
"logits/rejected": -2.0077967643737793, |
|
"logps/chosen": -1.0069730281829834, |
|
"logps/rejected": -1.0787549018859863, |
|
"loss": 1.261, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.013946056365967, |
|
"rewards/margins": 0.14356335997581482, |
|
"rewards/rejected": -2.1575098037719727, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.195965417867435, |
|
"grad_norm": 18.941008996813895, |
|
"learning_rate": 3.760875646824795e-08, |
|
"logits/chosen": -1.9386460781097412, |
|
"logits/rejected": -1.942348837852478, |
|
"logps/chosen": -0.9752788543701172, |
|
"logps/rejected": -1.0792890787124634, |
|
"loss": 1.2239, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9505577087402344, |
|
"rewards/margins": 0.20802041888237, |
|
"rewards/rejected": -2.1585781574249268, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2031700288184437, |
|
"grad_norm": 22.29470132845054, |
|
"learning_rate": 3.742731977727623e-08, |
|
"logits/chosen": -2.031289577484131, |
|
"logits/rejected": -2.028223991394043, |
|
"logps/chosen": -1.0405927896499634, |
|
"logps/rejected": -1.1778171062469482, |
|
"loss": 1.1781, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0811855792999268, |
|
"rewards/margins": 0.27444881200790405, |
|
"rewards/rejected": -2.3556342124938965, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 19.668800943191464, |
|
"learning_rate": 3.7245009497039244e-08, |
|
"logits/chosen": -1.9710372686386108, |
|
"logits/rejected": -1.9631189107894897, |
|
"logps/chosen": -1.0121662616729736, |
|
"logps/rejected": -1.1485233306884766, |
|
"loss": 1.1722, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0243325233459473, |
|
"rewards/margins": 0.2727140784263611, |
|
"rewards/rejected": -2.297046661376953, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.217579250720461, |
|
"grad_norm": 18.855322537148837, |
|
"learning_rate": 3.7061838443196886e-08, |
|
"logits/chosen": -2.0141379833221436, |
|
"logits/rejected": -2.0157604217529297, |
|
"logps/chosen": -1.0264530181884766, |
|
"logps/rejected": -1.149954080581665, |
|
"loss": 1.1888, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.052906036376953, |
|
"rewards/margins": 0.24700184166431427, |
|
"rewards/rejected": -2.29990816116333, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2247838616714697, |
|
"grad_norm": 22.698689716068593, |
|
"learning_rate": 3.68778194919179e-08, |
|
"logits/chosen": -1.984043836593628, |
|
"logits/rejected": -1.9850364923477173, |
|
"logps/chosen": -1.0795161724090576, |
|
"logps/rejected": -1.2015224695205688, |
|
"loss": 1.195, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.1590323448181152, |
|
"rewards/margins": 0.24401259422302246, |
|
"rewards/rejected": -2.4030449390411377, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2319884726224783, |
|
"grad_norm": 20.214389140478467, |
|
"learning_rate": 3.66929655789747e-08, |
|
"logits/chosen": -2.0348191261291504, |
|
"logits/rejected": -2.023660659790039, |
|
"logps/chosen": -0.9398587346076965, |
|
"logps/rejected": -1.0924385786056519, |
|
"loss": 1.1626, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.879717469215393, |
|
"rewards/margins": 0.3051597476005554, |
|
"rewards/rejected": -2.1848771572113037, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 16.465610751100254, |
|
"learning_rate": 3.6507289698834064e-08, |
|
"logits/chosen": -1.9764940738677979, |
|
"logits/rejected": -1.9729808568954468, |
|
"logps/chosen": -0.9838182330131531, |
|
"logps/rejected": -1.1163631677627563, |
|
"loss": 1.1955, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9676364660263062, |
|
"rewards/margins": 0.2650895118713379, |
|
"rewards/rejected": -2.2327263355255127, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2463976945244957, |
|
"grad_norm": 25.365894303851952, |
|
"learning_rate": 3.6320804903743684e-08, |
|
"logits/chosen": -2.0223116874694824, |
|
"logits/rejected": -2.0218966007232666, |
|
"logps/chosen": -1.0339914560317993, |
|
"logps/rejected": -1.159183144569397, |
|
"loss": 1.1983, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0679829120635986, |
|
"rewards/margins": 0.250383585691452, |
|
"rewards/rejected": -2.318366289138794, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2536023054755043, |
|
"grad_norm": 17.275189136976564, |
|
"learning_rate": 3.61335243028146e-08, |
|
"logits/chosen": -2.011654853820801, |
|
"logits/rejected": -2.01637601852417, |
|
"logps/chosen": -1.0918588638305664, |
|
"logps/rejected": -1.2234910726547241, |
|
"loss": 1.1905, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.183717727661133, |
|
"rewards/margins": 0.26326465606689453, |
|
"rewards/rejected": -2.4469821453094482, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.260806916426513, |
|
"grad_norm": 18.5007118428664, |
|
"learning_rate": 3.5945461061099736e-08, |
|
"logits/chosen": -1.9712812900543213, |
|
"logits/rejected": -1.9578218460083008, |
|
"logps/chosen": -1.0444309711456299, |
|
"logps/rejected": -1.1218526363372803, |
|
"loss": 1.2707, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0888619422912598, |
|
"rewards/margins": 0.1548432558774948, |
|
"rewards/rejected": -2.2437052726745605, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 19.826773581846037, |
|
"learning_rate": 3.5756628398668446e-08, |
|
"logits/chosen": -2.0560269355773926, |
|
"logits/rejected": -2.061149835586548, |
|
"logps/chosen": -1.1327307224273682, |
|
"logps/rejected": -1.2321850061416626, |
|
"loss": 1.2409, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.2654614448547363, |
|
"rewards/margins": 0.1989085078239441, |
|
"rewards/rejected": -2.464370012283325, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2752161383285303, |
|
"grad_norm": 17.659111449492986, |
|
"learning_rate": 3.556703958967716e-08, |
|
"logits/chosen": -2.042252779006958, |
|
"logits/rejected": -2.0375871658325195, |
|
"logps/chosen": -1.051990270614624, |
|
"logps/rejected": -1.1855313777923584, |
|
"loss": 1.1891, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.103980541229248, |
|
"rewards/margins": 0.2670823037624359, |
|
"rewards/rejected": -2.371062755584717, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.282420749279539, |
|
"grad_norm": 24.065221888255977, |
|
"learning_rate": 3.5376707961436297e-08, |
|
"logits/chosen": -2.0278120040893555, |
|
"logits/rejected": -2.022207498550415, |
|
"logps/chosen": -1.1405036449432373, |
|
"logps/rejected": -1.2026771306991577, |
|
"loss": 1.2719, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2810072898864746, |
|
"rewards/margins": 0.12434691190719604, |
|
"rewards/rejected": -2.4053542613983154, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2896253602305476, |
|
"grad_norm": 12.853822919695737, |
|
"learning_rate": 3.51856468934734e-08, |
|
"logits/chosen": -1.9812190532684326, |
|
"logits/rejected": -1.9826923608779907, |
|
"logps/chosen": -0.9758992195129395, |
|
"logps/rejected": -1.0700486898422241, |
|
"loss": 1.2226, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.951798439025879, |
|
"rewards/margins": 0.18829897046089172, |
|
"rewards/rejected": -2.1400973796844482, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 20.14401512444606, |
|
"learning_rate": 3.499386981659262e-08, |
|
"logits/chosen": -2.0630898475646973, |
|
"logits/rejected": -2.0576171875, |
|
"logps/chosen": -1.018842101097107, |
|
"logps/rejected": -1.209272027015686, |
|
"loss": 1.1238, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.037684202194214, |
|
"rewards/margins": 0.3808597922325134, |
|
"rewards/rejected": -2.418544054031372, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.304034582132565, |
|
"grad_norm": 20.90969448735332, |
|
"learning_rate": 3.480139021193057e-08, |
|
"logits/chosen": -1.9834129810333252, |
|
"logits/rejected": -1.985131859779358, |
|
"logps/chosen": -0.9966486692428589, |
|
"logps/rejected": -1.1168452501296997, |
|
"loss": 1.2127, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9932973384857178, |
|
"rewards/margins": 0.24039287865161896, |
|
"rewards/rejected": -2.2336905002593994, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3112391930835736, |
|
"grad_norm": 28.592045906928604, |
|
"learning_rate": 3.4608221610008666e-08, |
|
"logits/chosen": -2.018594264984131, |
|
"logits/rejected": -2.0142102241516113, |
|
"logps/chosen": -0.9736968278884888, |
|
"logps/rejected": -1.1203354597091675, |
|
"loss": 1.1713, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9473936557769775, |
|
"rewards/margins": 0.2932773232460022, |
|
"rewards/rejected": -2.240670919418335, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.318443804034582, |
|
"grad_norm": 15.191719500704991, |
|
"learning_rate": 3.4414377589782e-08, |
|
"logits/chosen": -1.9855458736419678, |
|
"logits/rejected": -1.9946119785308838, |
|
"logps/chosen": -1.0181246995925903, |
|
"logps/rejected": -1.1509206295013428, |
|
"loss": 1.1963, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0362493991851807, |
|
"rewards/margins": 0.2655918300151825, |
|
"rewards/rejected": -2.3018412590026855, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 18.19541860204369, |
|
"learning_rate": 3.4219871777684745e-08, |
|
"logits/chosen": -1.9971675872802734, |
|
"logits/rejected": -1.984905481338501, |
|
"logps/chosen": -0.9933854937553406, |
|
"logps/rejected": -1.1145892143249512, |
|
"loss": 1.2078, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9867709875106812, |
|
"rewards/margins": 0.24240756034851074, |
|
"rewards/rejected": -2.2291784286499023, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3328530259365994, |
|
"grad_norm": 17.72178124025082, |
|
"learning_rate": 3.4024717846672364e-08, |
|
"logits/chosen": -2.0331177711486816, |
|
"logits/rejected": -2.026477336883545, |
|
"logps/chosen": -0.9942334890365601, |
|
"logps/rejected": -1.1216745376586914, |
|
"loss": 1.1967, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9884669780731201, |
|
"rewards/margins": 0.25488215684890747, |
|
"rewards/rejected": -2.243349075317383, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.340057636887608, |
|
"grad_norm": 17.801651438890392, |
|
"learning_rate": 3.382892951526036e-08, |
|
"logits/chosen": -2.018220901489258, |
|
"logits/rejected": -2.0154216289520264, |
|
"logps/chosen": -1.0521572828292847, |
|
"logps/rejected": -1.2008370161056519, |
|
"loss": 1.163, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1043145656585693, |
|
"rewards/margins": 0.2973593771457672, |
|
"rewards/rejected": -2.4016740322113037, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3472622478386167, |
|
"grad_norm": 20.318968041064025, |
|
"learning_rate": 3.3632520546559974e-08, |
|
"logits/chosen": -1.9867897033691406, |
|
"logits/rejected": -1.9751968383789062, |
|
"logps/chosen": -0.926361083984375, |
|
"logps/rejected": -1.0954601764678955, |
|
"loss": 1.1271, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.85272216796875, |
|
"rewards/margins": 0.3381980061531067, |
|
"rewards/rejected": -2.190920352935791, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 19.630592165862417, |
|
"learning_rate": 3.34355047473107e-08, |
|
"logits/chosen": -2.0014548301696777, |
|
"logits/rejected": -1.997385025024414, |
|
"logps/chosen": -1.0289537906646729, |
|
"logps/rejected": -1.118239164352417, |
|
"loss": 1.245, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0579075813293457, |
|
"rewards/margins": 0.1785707026720047, |
|
"rewards/rejected": -2.236478328704834, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.361671469740634, |
|
"grad_norm": 22.9649823694943, |
|
"learning_rate": 3.323789596690971e-08, |
|
"logits/chosen": -1.9707273244857788, |
|
"logits/rejected": -1.9716689586639404, |
|
"logps/chosen": -1.0208336114883423, |
|
"logps/rejected": -1.1546828746795654, |
|
"loss": 1.1792, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0416672229766846, |
|
"rewards/margins": 0.26769858598709106, |
|
"rewards/rejected": -2.309365749359131, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3688760806916427, |
|
"grad_norm": 15.719014755563348, |
|
"learning_rate": 3.303970809643828e-08, |
|
"logits/chosen": -2.000805139541626, |
|
"logits/rejected": -2.0052528381347656, |
|
"logps/chosen": -1.0358805656433105, |
|
"logps/rejected": -1.164954423904419, |
|
"loss": 1.1925, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.071761131286621, |
|
"rewards/margins": 0.2581479847431183, |
|
"rewards/rejected": -2.329908847808838, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3760806916426513, |
|
"grad_norm": 20.784381707823112, |
|
"learning_rate": 3.2840955067685356e-08, |
|
"logits/chosen": -2.0275561809539795, |
|
"logits/rejected": -2.031751871109009, |
|
"logps/chosen": -1.0541309118270874, |
|
"logps/rejected": -1.2037460803985596, |
|
"loss": 1.1612, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.108261823654175, |
|
"rewards/margins": 0.29923057556152344, |
|
"rewards/rejected": -2.407492160797119, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 16.91937122091795, |
|
"learning_rate": 3.264165085216817e-08, |
|
"logits/chosen": -2.0380663871765137, |
|
"logits/rejected": -2.0380921363830566, |
|
"logps/chosen": -0.9351627230644226, |
|
"logps/rejected": -1.1040947437286377, |
|
"loss": 1.1393, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.8703254461288452, |
|
"rewards/margins": 0.3378642499446869, |
|
"rewards/rejected": -2.2081894874572754, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3904899135446687, |
|
"grad_norm": 18.773223856961657, |
|
"learning_rate": 3.244180946015008e-08, |
|
"logits/chosen": -1.9662561416625977, |
|
"logits/rejected": -1.96682608127594, |
|
"logps/chosen": -1.0346488952636719, |
|
"logps/rejected": -1.0978872776031494, |
|
"loss": 1.2739, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0692977905273438, |
|
"rewards/margins": 0.12647677958011627, |
|
"rewards/rejected": -2.195774555206299, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.397694524495677, |
|
"grad_norm": 15.42949606476335, |
|
"learning_rate": 3.224144493965578e-08, |
|
"logits/chosen": -2.0522544384002686, |
|
"logits/rejected": -2.0557808876037598, |
|
"logps/chosen": -0.9907134175300598, |
|
"logps/rejected": -1.095879316329956, |
|
"loss": 1.217, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9814268350601196, |
|
"rewards/margins": 0.2103317677974701, |
|
"rewards/rejected": -2.191758632659912, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.4048991354466858, |
|
"grad_norm": 17.867258553909902, |
|
"learning_rate": 3.204057137548371e-08, |
|
"logits/chosen": -2.0167171955108643, |
|
"logits/rejected": -2.011385202407837, |
|
"logps/chosen": -0.9775940179824829, |
|
"logps/rejected": -1.0835435390472412, |
|
"loss": 1.2152, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9551880359649658, |
|
"rewards/margins": 0.21189892292022705, |
|
"rewards/rejected": -2.1670870780944824, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 19.490021831335156, |
|
"learning_rate": 3.183920288821597e-08, |
|
"logits/chosen": -1.9968830347061157, |
|
"logits/rejected": -1.993549108505249, |
|
"logps/chosen": -1.0021111965179443, |
|
"logps/rejected": -1.163733959197998, |
|
"loss": 1.1461, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0042223930358887, |
|
"rewards/margins": 0.3232455551624298, |
|
"rewards/rejected": -2.327467918395996, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.419308357348703, |
|
"grad_norm": 23.55301780513215, |
|
"learning_rate": 3.1637353633225735e-08, |
|
"logits/chosen": -2.042677879333496, |
|
"logits/rejected": -2.0366151332855225, |
|
"logps/chosen": -1.0290377140045166, |
|
"logps/rejected": -1.1746978759765625, |
|
"loss": 1.1717, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.058075428009033, |
|
"rewards/margins": 0.2913200259208679, |
|
"rewards/rejected": -2.349395751953125, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4265129682997117, |
|
"grad_norm": 19.629394462964214, |
|
"learning_rate": 3.143503779968213e-08, |
|
"logits/chosen": -2.011504650115967, |
|
"logits/rejected": -2.011737823486328, |
|
"logps/chosen": -1.015564203262329, |
|
"logps/rejected": -1.1511462926864624, |
|
"loss": 1.196, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.031128406524658, |
|
"rewards/margins": 0.27116426825523376, |
|
"rewards/rejected": -2.302292585372925, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4337175792507204, |
|
"grad_norm": 18.105081870748133, |
|
"learning_rate": 3.1232269609552875e-08, |
|
"logits/chosen": -1.9979522228240967, |
|
"logits/rejected": -1.995548963546753, |
|
"logps/chosen": -0.9974485635757446, |
|
"logps/rejected": -1.1187690496444702, |
|
"loss": 1.2004, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9948971271514893, |
|
"rewards/margins": 0.24264100193977356, |
|
"rewards/rejected": -2.2375380992889404, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 16.05671541036881, |
|
"learning_rate": 3.102906331660444e-08, |
|
"logits/chosen": -2.0580544471740723, |
|
"logits/rejected": -2.0497653484344482, |
|
"logps/chosen": -0.9930634498596191, |
|
"logps/rejected": -1.1649951934814453, |
|
"loss": 1.1345, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9861268997192383, |
|
"rewards/margins": 0.3438633382320404, |
|
"rewards/rejected": -2.3299903869628906, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4481268011527377, |
|
"grad_norm": 16.003021454074023, |
|
"learning_rate": 3.082543320540015e-08, |
|
"logits/chosen": -1.9997708797454834, |
|
"logits/rejected": -1.992846131324768, |
|
"logps/chosen": -1.0060840845108032, |
|
"logps/rejected": -1.1501317024230957, |
|
"loss": 1.1672, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0121681690216064, |
|
"rewards/margins": 0.28809523582458496, |
|
"rewards/rejected": -2.3002634048461914, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4553314121037464, |
|
"grad_norm": 17.99920026821121, |
|
"learning_rate": 3.062139359029599e-08, |
|
"logits/chosen": -2.029757022857666, |
|
"logits/rejected": -2.029585361480713, |
|
"logps/chosen": -1.0290124416351318, |
|
"logps/rejected": -1.1135772466659546, |
|
"loss": 1.2472, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.0580248832702637, |
|
"rewards/margins": 0.16912977397441864, |
|
"rewards/rejected": -2.227154493331909, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.462536023054755, |
|
"grad_norm": 18.76605541973094, |
|
"learning_rate": 3.041695881443437e-08, |
|
"logits/chosen": -2.051182270050049, |
|
"logits/rejected": -2.04660964012146, |
|
"logps/chosen": -0.9734565019607544, |
|
"logps/rejected": -1.1085374355316162, |
|
"loss": 1.1781, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9469130039215088, |
|
"rewards/margins": 0.27016210556030273, |
|
"rewards/rejected": -2.2170748710632324, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 22.16241778931301, |
|
"learning_rate": 3.0212143248735886e-08, |
|
"logits/chosen": -2.0314226150512695, |
|
"logits/rejected": -2.0317978858947754, |
|
"logps/chosen": -0.9990888833999634, |
|
"logps/rejected": -1.1364792585372925, |
|
"loss": 1.1757, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9981777667999268, |
|
"rewards/margins": 0.2747807502746582, |
|
"rewards/rejected": -2.272958517074585, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4769452449567724, |
|
"grad_norm": 19.71057724214497, |
|
"learning_rate": 3.0006961290889077e-08, |
|
"logits/chosen": -2.0183329582214355, |
|
"logits/rejected": -2.009127378463745, |
|
"logps/chosen": -1.1189202070236206, |
|
"logps/rejected": -1.286902904510498, |
|
"loss": 1.1652, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.237840414047241, |
|
"rewards/margins": 0.33596524596214294, |
|
"rewards/rejected": -2.573805809020996, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.484149855907781, |
|
"grad_norm": 21.95290350074783, |
|
"learning_rate": 2.980142736433833e-08, |
|
"logits/chosen": -2.008192777633667, |
|
"logits/rejected": -2.001173496246338, |
|
"logps/chosen": -1.0314289331436157, |
|
"logps/rejected": -1.0944207906723022, |
|
"loss": 1.2767, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.0628578662872314, |
|
"rewards/margins": 0.12598386406898499, |
|
"rewards/rejected": -2.1888415813446045, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4913544668587897, |
|
"grad_norm": 24.378073368269256, |
|
"learning_rate": 2.9595555917269997e-08, |
|
"logits/chosen": -2.039536952972412, |
|
"logits/rejected": -2.0248141288757324, |
|
"logps/chosen": -1.140825867652893, |
|
"logps/rejected": -1.2375624179840088, |
|
"loss": 1.2147, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.281651735305786, |
|
"rewards/margins": 0.19347305595874786, |
|
"rewards/rejected": -2.4751248359680176, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 18.478093665467945, |
|
"learning_rate": 2.9389361421596725e-08, |
|
"logits/chosen": -1.9539821147918701, |
|
"logits/rejected": -1.9563089609146118, |
|
"logps/chosen": -1.0598758459091187, |
|
"logps/rejected": -1.1930882930755615, |
|
"loss": 1.1842, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1197516918182373, |
|
"rewards/margins": 0.26642483472824097, |
|
"rewards/rejected": -2.386176586151123, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.505763688760807, |
|
"grad_norm": 20.36561616479061, |
|
"learning_rate": 2.9182858371940126e-08, |
|
"logits/chosen": -2.0380711555480957, |
|
"logits/rejected": -2.032642364501953, |
|
"logps/chosen": -1.046942949295044, |
|
"logps/rejected": -1.1760826110839844, |
|
"loss": 1.1872, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.093885898590088, |
|
"rewards/margins": 0.2582792639732361, |
|
"rewards/rejected": -2.3521652221679688, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.5129682997118157, |
|
"grad_norm": 18.97930654154894, |
|
"learning_rate": 2.8976061284611908e-08, |
|
"logits/chosen": -1.9913969039916992, |
|
"logits/rejected": -2.0002284049987793, |
|
"logps/chosen": -0.9360917806625366, |
|
"logps/rejected": -1.0654878616333008, |
|
"loss": 1.194, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.8721835613250732, |
|
"rewards/margins": 0.2587924301624298, |
|
"rewards/rejected": -2.1309757232666016, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5201729106628243, |
|
"grad_norm": 21.429455246868766, |
|
"learning_rate": 2.8768984696593384e-08, |
|
"logits/chosen": -1.978727102279663, |
|
"logits/rejected": -1.9692051410675049, |
|
"logps/chosen": -1.0171012878417969, |
|
"logps/rejected": -1.1342874765396118, |
|
"loss": 1.2159, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0342025756835938, |
|
"rewards/margins": 0.23437246680259705, |
|
"rewards/rejected": -2.2685749530792236, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 17.997474835260586, |
|
"learning_rate": 2.8561643164513637e-08, |
|
"logits/chosen": -1.9045627117156982, |
|
"logits/rejected": -1.900713324546814, |
|
"logps/chosen": -1.0493916273117065, |
|
"logps/rejected": -1.1679325103759766, |
|
"loss": 1.2012, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.098783254623413, |
|
"rewards/margins": 0.2370818853378296, |
|
"rewards/rejected": -2.335865020751953, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5345821325648417, |
|
"grad_norm": 18.947013598861446, |
|
"learning_rate": 2.8354051263626227e-08, |
|
"logits/chosen": -1.9892809391021729, |
|
"logits/rejected": -1.9950027465820312, |
|
"logps/chosen": -1.0602303743362427, |
|
"logps/rejected": -1.1739084720611572, |
|
"loss": 1.2059, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1204607486724854, |
|
"rewards/margins": 0.22735624015331268, |
|
"rewards/rejected": -2.3478169441223145, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.54178674351585, |
|
"grad_norm": 19.64777274554225, |
|
"learning_rate": 2.8146223586784573e-08, |
|
"logits/chosen": -1.980348825454712, |
|
"logits/rejected": -1.9723879098892212, |
|
"logps/chosen": -1.065375566482544, |
|
"logps/rejected": -1.1992082595825195, |
|
"loss": 1.1877, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.130751132965088, |
|
"rewards/margins": 0.2676653265953064, |
|
"rewards/rejected": -2.398416519165039, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5489913544668588, |
|
"grad_norm": 25.24267359813586, |
|
"learning_rate": 2.7938174743416205e-08, |
|
"logits/chosen": -1.9437439441680908, |
|
"logits/rejected": -1.9405949115753174, |
|
"logps/chosen": -1.0510722398757935, |
|
"logps/rejected": -1.1613985300064087, |
|
"loss": 1.2136, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.102144479751587, |
|
"rewards/margins": 0.22065265476703644, |
|
"rewards/rejected": -2.3227970600128174, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 19.677128525439006, |
|
"learning_rate": 2.7729919358495728e-08, |
|
"logits/chosen": -2.002791404724121, |
|
"logits/rejected": -2.0038230419158936, |
|
"logps/chosen": -1.112272024154663, |
|
"logps/rejected": -1.1909116506576538, |
|
"loss": 1.2588, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.224544048309326, |
|
"rewards/margins": 0.15727964043617249, |
|
"rewards/rejected": -2.3818233013153076, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.563400576368876, |
|
"grad_norm": 19.250624519916794, |
|
"learning_rate": 2.7521472071516772e-08, |
|
"logits/chosen": -1.997267484664917, |
|
"logits/rejected": -1.9961631298065186, |
|
"logps/chosen": -0.9451554417610168, |
|
"logps/rejected": -1.0597981214523315, |
|
"loss": 1.2086, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.8903108835220337, |
|
"rewards/margins": 0.22928544878959656, |
|
"rewards/rejected": -2.119596242904663, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5706051873198847, |
|
"grad_norm": 21.883332512382378, |
|
"learning_rate": 2.731284753546289e-08, |
|
"logits/chosen": -1.987908959388733, |
|
"logits/rejected": -1.9858767986297607, |
|
"logps/chosen": -1.081416130065918, |
|
"logps/rejected": -1.222598671913147, |
|
"loss": 1.1737, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.162832260131836, |
|
"rewards/margins": 0.28236496448516846, |
|
"rewards/rejected": -2.445197343826294, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5778097982708934, |
|
"grad_norm": 21.934933640290577, |
|
"learning_rate": 2.710406041577751e-08, |
|
"logits/chosen": -2.0529561042785645, |
|
"logits/rejected": -2.049743175506592, |
|
"logps/chosen": -1.0325100421905518, |
|
"logps/rejected": -1.1861141920089722, |
|
"loss": 1.1625, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0650200843811035, |
|
"rewards/margins": 0.3072081506252289, |
|
"rewards/rejected": -2.3722283840179443, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 18.04514565686392, |
|
"learning_rate": 2.6895125389333017e-08, |
|
"logits/chosen": -2.01184344291687, |
|
"logits/rejected": -2.0075669288635254, |
|
"logps/chosen": -1.026865839958191, |
|
"logps/rejected": -1.1786205768585205, |
|
"loss": 1.1613, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.053731679916382, |
|
"rewards/margins": 0.30350956320762634, |
|
"rewards/rejected": -2.357241153717041, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5922190201729105, |
|
"grad_norm": 17.097050510727, |
|
"learning_rate": 2.6686057143399028e-08, |
|
"logits/chosen": -2.0109105110168457, |
|
"logits/rejected": -2.0125486850738525, |
|
"logps/chosen": -1.0616767406463623, |
|
"logps/rejected": -1.1599900722503662, |
|
"loss": 1.2429, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.1233534812927246, |
|
"rewards/margins": 0.1966264694929123, |
|
"rewards/rejected": -2.3199801445007324, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5994236311239192, |
|
"grad_norm": 19.369226164400942, |
|
"learning_rate": 2.647687037460996e-08, |
|
"logits/chosen": -2.0144858360290527, |
|
"logits/rejected": -2.0138607025146484, |
|
"logps/chosen": -1.0874732732772827, |
|
"logps/rejected": -1.2833433151245117, |
|
"loss": 1.1241, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.1749465465545654, |
|
"rewards/margins": 0.39173993468284607, |
|
"rewards/rejected": -2.5666866302490234, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.6066282420749278, |
|
"grad_norm": 20.354220577910063, |
|
"learning_rate": 2.626757978793187e-08, |
|
"logits/chosen": -2.025035858154297, |
|
"logits/rejected": -2.018566846847534, |
|
"logps/chosen": -1.0852900743484497, |
|
"logps/rejected": -1.2093784809112549, |
|
"loss": 1.2036, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1705801486968994, |
|
"rewards/margins": 0.24817702174186707, |
|
"rewards/rejected": -2.4187569618225098, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 23.587038296125034, |
|
"learning_rate": 2.6058200095628797e-08, |
|
"logits/chosen": -1.9932403564453125, |
|
"logits/rejected": -1.9965318441390991, |
|
"logps/chosen": -0.917451024055481, |
|
"logps/rejected": -1.0861783027648926, |
|
"loss": 1.1444, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.834902048110962, |
|
"rewards/margins": 0.33745482563972473, |
|
"rewards/rejected": -2.172356605529785, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6210374639769451, |
|
"grad_norm": 18.85455738747805, |
|
"learning_rate": 2.584874601622854e-08, |
|
"logits/chosen": -2.0577220916748047, |
|
"logits/rejected": -2.048609972000122, |
|
"logps/chosen": -1.0844237804412842, |
|
"logps/rejected": -1.2160663604736328, |
|
"loss": 1.2066, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1688475608825684, |
|
"rewards/margins": 0.26328495144844055, |
|
"rewards/rejected": -2.4321327209472656, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6282420749279538, |
|
"grad_norm": 21.402470423847422, |
|
"learning_rate": 2.5639232273487993e-08, |
|
"logits/chosen": -1.9839709997177124, |
|
"logits/rejected": -1.9742904901504517, |
|
"logps/chosen": -0.9783967137336731, |
|
"logps/rejected": -1.1004703044891357, |
|
"loss": 1.2014, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9567934274673462, |
|
"rewards/margins": 0.24414721131324768, |
|
"rewards/rejected": -2.2009406089782715, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6354466858789625, |
|
"grad_norm": 20.835507437389413, |
|
"learning_rate": 2.5429673595358142e-08, |
|
"logits/chosen": -2.0180060863494873, |
|
"logits/rejected": -2.0165977478027344, |
|
"logps/chosen": -1.044081687927246, |
|
"logps/rejected": -1.1655093431472778, |
|
"loss": 1.198, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.088163375854492, |
|
"rewards/margins": 0.24285531044006348, |
|
"rewards/rejected": -2.3310186862945557, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 23.508207659870397, |
|
"learning_rate": 2.5220084712948764e-08, |
|
"logits/chosen": -1.9833030700683594, |
|
"logits/rejected": -1.9724918603897095, |
|
"logps/chosen": -1.1200716495513916, |
|
"logps/rejected": -1.2384498119354248, |
|
"loss": 1.1901, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.240143299102783, |
|
"rewards/margins": 0.2367565631866455, |
|
"rewards/rejected": -2.4768996238708496, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6498559077809798, |
|
"grad_norm": 19.732301868207852, |
|
"learning_rate": 2.5010480359492838e-08, |
|
"logits/chosen": -1.9684820175170898, |
|
"logits/rejected": -1.9655864238739014, |
|
"logps/chosen": -1.0514031648635864, |
|
"logps/rejected": -1.1109318733215332, |
|
"loss": 1.287, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.102806329727173, |
|
"rewards/margins": 0.11905747652053833, |
|
"rewards/rejected": -2.2218637466430664, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6570605187319885, |
|
"grad_norm": 21.18423154150406, |
|
"learning_rate": 2.480087526931091e-08, |
|
"logits/chosen": -2.0103702545166016, |
|
"logits/rejected": -1.99822998046875, |
|
"logps/chosen": -1.0027062892913818, |
|
"logps/rejected": -1.1175159215927124, |
|
"loss": 1.2155, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0054125785827637, |
|
"rewards/margins": 0.22961954772472382, |
|
"rewards/rejected": -2.235031843185425, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6642651296829971, |
|
"grad_norm": 19.472737759749933, |
|
"learning_rate": 2.4591284176775326e-08, |
|
"logits/chosen": -1.969321608543396, |
|
"logits/rejected": -1.9655838012695312, |
|
"logps/chosen": -1.0758287906646729, |
|
"logps/rejected": -1.1590468883514404, |
|
"loss": 1.2565, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.1516575813293457, |
|
"rewards/margins": 0.16643603146076202, |
|
"rewards/rejected": -2.318093776702881, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 21.89430802713767, |
|
"learning_rate": 2.4381721815274443e-08, |
|
"logits/chosen": -2.043560028076172, |
|
"logits/rejected": -2.0437939167022705, |
|
"logps/chosen": -1.0199779272079468, |
|
"logps/rejected": -1.1517260074615479, |
|
"loss": 1.1926, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0399558544158936, |
|
"rewards/margins": 0.26349639892578125, |
|
"rewards/rejected": -2.3034520149230957, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6786743515850144, |
|
"grad_norm": 19.55460769848809, |
|
"learning_rate": 2.4172202916176936e-08, |
|
"logits/chosen": -2.046525478363037, |
|
"logits/rejected": -2.048698663711548, |
|
"logps/chosen": -0.9680402874946594, |
|
"logps/rejected": -1.1348791122436523, |
|
"loss": 1.1576, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9360805749893188, |
|
"rewards/margins": 0.33367738127708435, |
|
"rewards/rejected": -2.2697582244873047, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.685878962536023, |
|
"grad_norm": 19.2299692018981, |
|
"learning_rate": 2.3962742207796268e-08, |
|
"logits/chosen": -1.9817912578582764, |
|
"logits/rejected": -1.9795825481414795, |
|
"logps/chosen": -0.9565426707267761, |
|
"logps/rejected": -1.1191436052322388, |
|
"loss": 1.1591, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9130853414535522, |
|
"rewards/margins": 0.325202077627182, |
|
"rewards/rejected": -2.2382872104644775, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6930835734870318, |
|
"grad_norm": 22.728076530066133, |
|
"learning_rate": 2.3753354414355334e-08, |
|
"logits/chosen": -1.950277328491211, |
|
"logits/rejected": -1.9395864009857178, |
|
"logps/chosen": -1.0648443698883057, |
|
"logps/rejected": -1.1816734075546265, |
|
"loss": 1.2133, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1296887397766113, |
|
"rewards/margins": 0.23365814983844757, |
|
"rewards/rejected": -2.363346815109253, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 18.628936490431073, |
|
"learning_rate": 2.3544054254951408e-08, |
|
"logits/chosen": -1.9891974925994873, |
|
"logits/rejected": -1.9805711507797241, |
|
"logps/chosen": -0.9382593035697937, |
|
"logps/rejected": -1.1349287033081055, |
|
"loss": 1.1143, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.8765186071395874, |
|
"rewards/margins": 0.3933386504650116, |
|
"rewards/rejected": -2.269857406616211, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.707492795389049, |
|
"grad_norm": 18.210045869943635, |
|
"learning_rate": 2.3334856442521435e-08, |
|
"logits/chosen": -2.037346124649048, |
|
"logits/rejected": -2.0299322605133057, |
|
"logps/chosen": -1.0963926315307617, |
|
"logps/rejected": -1.1663601398468018, |
|
"loss": 1.2702, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1927852630615234, |
|
"rewards/margins": 0.1399351954460144, |
|
"rewards/rejected": -2.3327202796936035, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7146974063400577, |
|
"grad_norm": 19.872042875254735, |
|
"learning_rate": 2.3125775682807826e-08, |
|
"logits/chosen": -2.0520217418670654, |
|
"logits/rejected": -2.051881790161133, |
|
"logps/chosen": -1.166526198387146, |
|
"logps/rejected": -1.2666442394256592, |
|
"loss": 1.232, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.333052396774292, |
|
"rewards/margins": 0.20023572444915771, |
|
"rewards/rejected": -2.5332884788513184, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7219020172910664, |
|
"grad_norm": 20.61950838692453, |
|
"learning_rate": 2.291682667332464e-08, |
|
"logits/chosen": -2.0643744468688965, |
|
"logits/rejected": -2.059324264526367, |
|
"logps/chosen": -1.0485190153121948, |
|
"logps/rejected": -1.1792809963226318, |
|
"loss": 1.1918, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0970380306243896, |
|
"rewards/margins": 0.26152390241622925, |
|
"rewards/rejected": -2.3585619926452637, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 15.170314603632118, |
|
"learning_rate": 2.2708024102324454e-08, |
|
"logits/chosen": -2.0271968841552734, |
|
"logits/rejected": -2.0215301513671875, |
|
"logps/chosen": -1.0329598188400269, |
|
"logps/rejected": -1.2096232175827026, |
|
"loss": 1.1496, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0659196376800537, |
|
"rewards/margins": 0.3533265292644501, |
|
"rewards/rejected": -2.4192464351654053, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7363112391930837, |
|
"grad_norm": 22.430906564092297, |
|
"learning_rate": 2.2499382647765797e-08, |
|
"logits/chosen": -2.0221495628356934, |
|
"logits/rejected": -2.018479347229004, |
|
"logps/chosen": -1.0721267461776733, |
|
"logps/rejected": -1.1613738536834717, |
|
"loss": 1.2462, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1442534923553467, |
|
"rewards/margins": 0.17849409580230713, |
|
"rewards/rejected": -2.3227477073669434, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7435158501440924, |
|
"grad_norm": 21.08508798237925, |
|
"learning_rate": 2.2290916976281427e-08, |
|
"logits/chosen": -2.003178119659424, |
|
"logits/rejected": -1.996995210647583, |
|
"logps/chosen": -0.9998496174812317, |
|
"logps/rejected": -1.1318352222442627, |
|
"loss": 1.2149, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9996992349624634, |
|
"rewards/margins": 0.2639711797237396, |
|
"rewards/rejected": -2.2636704444885254, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7507204610951008, |
|
"grad_norm": 18.07451059922042, |
|
"learning_rate": 2.2082641742147238e-08, |
|
"logits/chosen": -1.9808975458145142, |
|
"logits/rejected": -1.9742380380630493, |
|
"logps/chosen": -1.016533613204956, |
|
"logps/rejected": -1.2077422142028809, |
|
"loss": 1.1153, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.033067226409912, |
|
"rewards/margins": 0.3824174702167511, |
|
"rewards/rejected": -2.4154844284057617, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 20.71956116817728, |
|
"learning_rate": 2.1874571586252177e-08, |
|
"logits/chosen": -2.029297351837158, |
|
"logits/rejected": -2.02240252494812, |
|
"logps/chosen": -1.0277835130691528, |
|
"logps/rejected": -1.1069574356079102, |
|
"loss": 1.2557, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0555670261383057, |
|
"rewards/margins": 0.1583479940891266, |
|
"rewards/rejected": -2.2139148712158203, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7651296829971181, |
|
"grad_norm": 20.82700891746505, |
|
"learning_rate": 2.1666721135069037e-08, |
|
"logits/chosen": -2.014781951904297, |
|
"logits/rejected": -2.0112876892089844, |
|
"logps/chosen": -1.1093708276748657, |
|
"logps/rejected": -1.2040464878082275, |
|
"loss": 1.2422, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.2187416553497314, |
|
"rewards/margins": 0.18935146927833557, |
|
"rewards/rejected": -2.408092975616455, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7723342939481268, |
|
"grad_norm": 15.571889556211572, |
|
"learning_rate": 2.145910499962628e-08, |
|
"logits/chosen": -2.065460681915283, |
|
"logits/rejected": -2.0574898719787598, |
|
"logps/chosen": -0.9591764211654663, |
|
"logps/rejected": -1.1014626026153564, |
|
"loss": 1.1832, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9183528423309326, |
|
"rewards/margins": 0.2845722734928131, |
|
"rewards/rejected": -2.202925205230713, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7795389048991355, |
|
"grad_norm": 23.742748347267703, |
|
"learning_rate": 2.1251737774480915e-08, |
|
"logits/chosen": -2.0418546199798584, |
|
"logits/rejected": -2.032393455505371, |
|
"logps/chosen": -1.169003963470459, |
|
"logps/rejected": -1.2600603103637695, |
|
"loss": 1.2691, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.338007926940918, |
|
"rewards/margins": 0.18211248517036438, |
|
"rewards/rejected": -2.520120620727539, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 17.445595980137064, |
|
"learning_rate": 2.104463403669264e-08, |
|
"logits/chosen": -2.0002996921539307, |
|
"logits/rejected": -1.9975649118423462, |
|
"logps/chosen": -1.0450685024261475, |
|
"logps/rejected": -1.189254879951477, |
|
"loss": 1.1814, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.090137004852295, |
|
"rewards/margins": 0.288372665643692, |
|
"rewards/rejected": -2.378509759902954, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7939481268011528, |
|
"grad_norm": 17.073648259711092, |
|
"learning_rate": 2.0837808344799028e-08, |
|
"logits/chosen": -1.982496976852417, |
|
"logits/rejected": -1.9782216548919678, |
|
"logps/chosen": -0.9407302141189575, |
|
"logps/rejected": -1.0727360248565674, |
|
"loss": 1.1832, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.881460428237915, |
|
"rewards/margins": 0.26401159167289734, |
|
"rewards/rejected": -2.1454720497131348, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8011527377521612, |
|
"grad_norm": 18.275713028107056, |
|
"learning_rate": 2.063127523779219e-08, |
|
"logits/chosen": -1.9809592962265015, |
|
"logits/rejected": -1.9768626689910889, |
|
"logps/chosen": -1.0079666376113892, |
|
"logps/rejected": -1.1944630146026611, |
|
"loss": 1.1139, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0159332752227783, |
|
"rewards/margins": 0.372992604970932, |
|
"rewards/rejected": -2.3889260292053223, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8083573487031699, |
|
"grad_norm": 19.734595343704587, |
|
"learning_rate": 2.0425049234096737e-08, |
|
"logits/chosen": -1.9899470806121826, |
|
"logits/rejected": -1.9840829372406006, |
|
"logps/chosen": -1.0095808506011963, |
|
"logps/rejected": -1.1258000135421753, |
|
"loss": 1.2167, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0191617012023926, |
|
"rewards/margins": 0.23243825137615204, |
|
"rewards/rejected": -2.2516000270843506, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 19.367202320168705, |
|
"learning_rate": 2.0219144830549163e-08, |
|
"logits/chosen": -1.9627516269683838, |
|
"logits/rejected": -1.9618648290634155, |
|
"logps/chosen": -1.0147254467010498, |
|
"logps/rejected": -1.1612242460250854, |
|
"loss": 1.1826, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0294508934020996, |
|
"rewards/margins": 0.29299798607826233, |
|
"rewards/rejected": -2.322448492050171, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8227665706051872, |
|
"grad_norm": 19.099230695899127, |
|
"learning_rate": 2.0013576501378823e-08, |
|
"logits/chosen": -1.9792697429656982, |
|
"logits/rejected": -1.9728553295135498, |
|
"logps/chosen": -1.0101633071899414, |
|
"logps/rejected": -1.144830584526062, |
|
"loss": 1.1941, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.020326614379883, |
|
"rewards/margins": 0.26933470368385315, |
|
"rewards/rejected": -2.289661169052124, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8299711815561959, |
|
"grad_norm": 20.166585492166963, |
|
"learning_rate": 1.9808358697190426e-08, |
|
"logits/chosen": -1.972886085510254, |
|
"logits/rejected": -1.969310998916626, |
|
"logps/chosen": -0.9306098222732544, |
|
"logps/rejected": -1.065213918685913, |
|
"loss": 1.1987, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8612196445465088, |
|
"rewards/margins": 0.26920828223228455, |
|
"rewards/rejected": -2.130427837371826, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8371757925072045, |
|
"grad_norm": 21.09479586621852, |
|
"learning_rate": 1.9603505843948214e-08, |
|
"logits/chosen": -2.017627239227295, |
|
"logits/rejected": -2.0076773166656494, |
|
"logps/chosen": -0.9474620819091797, |
|
"logps/rejected": -1.1191534996032715, |
|
"loss": 1.1394, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8949241638183594, |
|
"rewards/margins": 0.3433830738067627, |
|
"rewards/rejected": -2.238306999206543, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 20.423562577336806, |
|
"learning_rate": 1.9399032341961886e-08, |
|
"logits/chosen": -1.9809995889663696, |
|
"logits/rejected": -1.965026617050171, |
|
"logps/chosen": -0.9898856282234192, |
|
"logps/rejected": -1.0629730224609375, |
|
"loss": 1.2699, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -1.9797712564468384, |
|
"rewards/margins": 0.14617487788200378, |
|
"rewards/rejected": -2.125946044921875, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8515850144092219, |
|
"grad_norm": 26.09646394087967, |
|
"learning_rate": 1.9194952564874323e-08, |
|
"logits/chosen": -2.0236237049102783, |
|
"logits/rejected": -2.0175366401672363, |
|
"logps/chosen": -1.0653067827224731, |
|
"logps/rejected": -1.2080482244491577, |
|
"loss": 1.1687, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.1306135654449463, |
|
"rewards/margins": 0.2854826748371124, |
|
"rewards/rejected": -2.4160964488983154, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8587896253602305, |
|
"grad_norm": 20.54864054770584, |
|
"learning_rate": 1.8991280858651157e-08, |
|
"logits/chosen": -1.9798238277435303, |
|
"logits/rejected": -1.9740060567855835, |
|
"logps/chosen": -1.0638706684112549, |
|
"logps/rejected": -1.1493985652923584, |
|
"loss": 1.2503, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.1277413368225098, |
|
"rewards/margins": 0.17105570435523987, |
|
"rewards/rejected": -2.298797130584717, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8659942363112392, |
|
"grad_norm": 16.856180614276347, |
|
"learning_rate": 1.8788031540572327e-08, |
|
"logits/chosen": -1.9806411266326904, |
|
"logits/rejected": -1.97232985496521, |
|
"logps/chosen": -0.9993877410888672, |
|
"logps/rejected": -1.1446069478988647, |
|
"loss": 1.1723, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9987754821777344, |
|
"rewards/margins": 0.2904384136199951, |
|
"rewards/rejected": -2.2892138957977295, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 17.03483106236852, |
|
"learning_rate": 1.858521889822565e-08, |
|
"logits/chosen": -1.99444580078125, |
|
"logits/rejected": -1.9968360662460327, |
|
"logps/chosen": -0.9734609723091125, |
|
"logps/rejected": -1.0829355716705322, |
|
"loss": 1.2235, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.946921944618225, |
|
"rewards/margins": 0.21894919872283936, |
|
"rewards/rejected": -2.1658711433410645, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.8804034582132565, |
|
"grad_norm": 16.460944487260154, |
|
"learning_rate": 1.8382857188502422e-08, |
|
"logits/chosen": -1.987308144569397, |
|
"logits/rejected": -1.9824047088623047, |
|
"logps/chosen": -0.9851255416870117, |
|
"logps/rejected": -1.1126196384429932, |
|
"loss": 1.1823, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9702510833740234, |
|
"rewards/margins": 0.2549881935119629, |
|
"rewards/rejected": -2.2252392768859863, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8876080691642652, |
|
"grad_norm": 22.305541216871692, |
|
"learning_rate": 1.8180960636595234e-08, |
|
"logits/chosen": -1.9680871963500977, |
|
"logits/rejected": -1.9659591913223267, |
|
"logps/chosen": -1.0361220836639404, |
|
"logps/rejected": -1.1791760921478271, |
|
"loss": 1.1801, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.072244167327881, |
|
"rewards/margins": 0.2861078977584839, |
|
"rewards/rejected": -2.3583521842956543, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8948126801152738, |
|
"grad_norm": 20.388566247239506, |
|
"learning_rate": 1.7979543434998015e-08, |
|
"logits/chosen": -2.036707639694214, |
|
"logits/rejected": -2.041584014892578, |
|
"logps/chosen": -1.1235167980194092, |
|
"logps/rejected": -1.2117664813995361, |
|
"loss": 1.238, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2470335960388184, |
|
"rewards/margins": 0.17649903893470764, |
|
"rewards/rejected": -2.4235329627990723, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 26.051372198929382, |
|
"learning_rate": 1.7778619742508345e-08, |
|
"logits/chosen": -2.0007290840148926, |
|
"logits/rejected": -1.9938418865203857, |
|
"logps/chosen": -1.0931371450424194, |
|
"logps/rejected": -1.1865342855453491, |
|
"loss": 1.2551, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.186274290084839, |
|
"rewards/margins": 0.186794251203537, |
|
"rewards/rejected": -2.3730685710906982, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.9092219020172911, |
|
"grad_norm": 23.4369600871899, |
|
"learning_rate": 1.757820368323213e-08, |
|
"logits/chosen": -1.9935007095336914, |
|
"logits/rejected": -1.9837433099746704, |
|
"logps/chosen": -1.1060357093811035, |
|
"logps/rejected": -1.265039324760437, |
|
"loss": 1.1608, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.212071418762207, |
|
"rewards/margins": 0.31800705194473267, |
|
"rewards/rejected": -2.530078649520874, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9164265129682998, |
|
"grad_norm": 22.386136161896175, |
|
"learning_rate": 1.7378309345590803e-08, |
|
"logits/chosen": -2.011643409729004, |
|
"logits/rejected": -2.0210494995117188, |
|
"logps/chosen": -1.0864661931991577, |
|
"logps/rejected": -1.2283092737197876, |
|
"loss": 1.1822, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1729323863983154, |
|
"rewards/margins": 0.28368598222732544, |
|
"rewards/rejected": -2.456618547439575, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.9236311239193085, |
|
"grad_norm": 20.06093474919507, |
|
"learning_rate": 1.717895078133088e-08, |
|
"logits/chosen": -2.058295726776123, |
|
"logits/rejected": -2.054426670074463, |
|
"logps/chosen": -1.0596580505371094, |
|
"logps/rejected": -1.2001304626464844, |
|
"loss": 1.1836, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1193161010742188, |
|
"rewards/margins": 0.2809443771839142, |
|
"rewards/rejected": -2.4002609252929688, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 21.13851274050523, |
|
"learning_rate": 1.698014200453624e-08, |
|
"logits/chosen": -2.0123298168182373, |
|
"logits/rejected": -2.0198843479156494, |
|
"logps/chosen": -1.0314449071884155, |
|
"logps/rejected": -1.162095308303833, |
|
"loss": 1.1778, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.062889814376831, |
|
"rewards/margins": 0.26130083203315735, |
|
"rewards/rejected": -2.324190616607666, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9380403458213258, |
|
"grad_norm": 24.618506838094923, |
|
"learning_rate": 1.6781896990642964e-08, |
|
"logits/chosen": -1.9457242488861084, |
|
"logits/rejected": -1.9430221319198608, |
|
"logps/chosen": -1.1473594903945923, |
|
"logps/rejected": -1.237646460533142, |
|
"loss": 1.2441, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.2947189807891846, |
|
"rewards/margins": 0.18057429790496826, |
|
"rewards/rejected": -2.475292921066284, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9452449567723344, |
|
"grad_norm": 24.327220053141147, |
|
"learning_rate": 1.658422967545693e-08, |
|
"logits/chosen": -2.047414541244507, |
|
"logits/rejected": -2.0341315269470215, |
|
"logps/chosen": -1.0049512386322021, |
|
"logps/rejected": -1.1183750629425049, |
|
"loss": 1.2203, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.0099024772644043, |
|
"rewards/margins": 0.22684772312641144, |
|
"rewards/rejected": -2.2367501258850098, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.952449567723343, |
|
"grad_norm": 20.72145176972204, |
|
"learning_rate": 1.638715395417418e-08, |
|
"logits/chosen": -2.023325204849243, |
|
"logits/rejected": -2.0211892127990723, |
|
"logps/chosen": -1.068524956703186, |
|
"logps/rejected": -1.2049812078475952, |
|
"loss": 1.1841, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.137049913406372, |
|
"rewards/margins": 0.2729126214981079, |
|
"rewards/rejected": -2.4099624156951904, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 22.430027131821607, |
|
"learning_rate": 1.619068368040416e-08, |
|
"logits/chosen": -2.0218818187713623, |
|
"logits/rejected": -2.0176615715026855, |
|
"logps/chosen": -1.0006544589996338, |
|
"logps/rejected": -1.178510069847107, |
|
"loss": 1.1299, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0013089179992676, |
|
"rewards/margins": 0.3557109236717224, |
|
"rewards/rejected": -2.357020139694214, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9668587896253602, |
|
"grad_norm": 17.44373356290383, |
|
"learning_rate": 1.5994832665195853e-08, |
|
"logits/chosen": -1.9683250188827515, |
|
"logits/rejected": -1.9688348770141602, |
|
"logps/chosen": -1.0345633029937744, |
|
"logps/rejected": -1.1477251052856445, |
|
"loss": 1.2115, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.069126605987549, |
|
"rewards/margins": 0.22632364928722382, |
|
"rewards/rejected": -2.295450210571289, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9740634005763689, |
|
"grad_norm": 20.229969474249543, |
|
"learning_rate": 1.5799614676066906e-08, |
|
"logits/chosen": -2.069178819656372, |
|
"logits/rejected": -2.0663199424743652, |
|
"logps/chosen": -0.9492548108100891, |
|
"logps/rejected": -1.086529016494751, |
|
"loss": 1.1756, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8985096216201782, |
|
"rewards/margins": 0.2745482623577118, |
|
"rewards/rejected": -2.173058032989502, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9812680115273775, |
|
"grad_norm": 15.993803030514755, |
|
"learning_rate": 1.560504343603587e-08, |
|
"logits/chosen": -1.9762630462646484, |
|
"logits/rejected": -1.9768295288085938, |
|
"logps/chosen": -1.0684893131256104, |
|
"logps/rejected": -1.2240431308746338, |
|
"loss": 1.1605, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.1369786262512207, |
|
"rewards/margins": 0.3111076056957245, |
|
"rewards/rejected": -2.4480862617492676, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 18.752561218885347, |
|
"learning_rate": 1.541113262265748e-08, |
|
"logits/chosen": -2.069488286972046, |
|
"logits/rejected": -2.0672833919525146, |
|
"logps/chosen": -1.0279682874679565, |
|
"logps/rejected": -1.1458810567855835, |
|
"loss": 1.2067, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.055936574935913, |
|
"rewards/margins": 0.23582550883293152, |
|
"rewards/rejected": -2.291762113571167, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9956772334293948, |
|
"grad_norm": 25.86621954412604, |
|
"learning_rate": 1.5217895867061227e-08, |
|
"logits/chosen": -2.0054798126220703, |
|
"logits/rejected": -1.9995949268341064, |
|
"logps/chosen": -1.083843469619751, |
|
"logps/rejected": -1.1836421489715576, |
|
"loss": 1.2456, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.167686939239502, |
|
"rewards/margins": 0.1995975375175476, |
|
"rewards/rejected": -2.3672842979431152, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0028818443804033, |
|
"grad_norm": 22.661974621135478, |
|
"learning_rate": 1.5025346752993098e-08, |
|
"logits/chosen": -1.9982630014419556, |
|
"logits/rejected": -1.9999568462371826, |
|
"logps/chosen": -1.072928547859192, |
|
"logps/rejected": -1.1990954875946045, |
|
"loss": 1.2011, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.145857095718384, |
|
"rewards/margins": 0.2523340880870819, |
|
"rewards/rejected": -2.398190975189209, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.010086455331412, |
|
"grad_norm": 23.303370032175714, |
|
"learning_rate": 1.4833498815860756e-08, |
|
"logits/chosen": -2.052605390548706, |
|
"logits/rejected": -2.0548830032348633, |
|
"logps/chosen": -0.9998857378959656, |
|
"logps/rejected": -1.1840670108795166, |
|
"loss": 1.1499, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9997714757919312, |
|
"rewards/margins": 0.3683624267578125, |
|
"rewards/rejected": -2.368134021759033, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.0172910662824206, |
|
"grad_norm": 18.168458664217237, |
|
"learning_rate": 1.4642365541781993e-08, |
|
"logits/chosen": -1.9642353057861328, |
|
"logits/rejected": -1.9557300806045532, |
|
"logps/chosen": -1.0272754430770874, |
|
"logps/rejected": -1.1926220655441284, |
|
"loss": 1.1518, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.054550886154175, |
|
"rewards/margins": 0.33069342374801636, |
|
"rewards/rejected": -2.385244131088257, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.0244956772334293, |
|
"grad_norm": 17.943557689678926, |
|
"learning_rate": 1.4451960366636745e-08, |
|
"logits/chosen": -2.021503448486328, |
|
"logits/rejected": -2.032627582550049, |
|
"logps/chosen": -1.0408049821853638, |
|
"logps/rejected": -1.1747071743011475, |
|
"loss": 1.1831, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0816099643707275, |
|
"rewards/margins": 0.2678046226501465, |
|
"rewards/rejected": -2.349414348602295, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.031700288184438, |
|
"grad_norm": 19.245950456686252, |
|
"learning_rate": 1.4262296675122592e-08, |
|
"logits/chosen": -2.014530658721924, |
|
"logits/rejected": -2.0107593536376953, |
|
"logps/chosen": -1.0312591791152954, |
|
"logps/rejected": -1.1913511753082275, |
|
"loss": 1.1526, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.062518358230591, |
|
"rewards/margins": 0.32018426060676575, |
|
"rewards/rejected": -2.382702350616455, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.0389048991354466, |
|
"grad_norm": 17.81271779194038, |
|
"learning_rate": 1.407338779981389e-08, |
|
"logits/chosen": -1.9946855306625366, |
|
"logits/rejected": -1.9926750659942627, |
|
"logps/chosen": -0.9136768579483032, |
|
"logps/rejected": -1.0949757099151611, |
|
"loss": 1.1116, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8273537158966064, |
|
"rewards/margins": 0.3625979423522949, |
|
"rewards/rejected": -2.1899514198303223, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.0461095100864553, |
|
"grad_norm": 21.372732915554494, |
|
"learning_rate": 1.3885247020224534e-08, |
|
"logits/chosen": -2.0047404766082764, |
|
"logits/rejected": -1.9999935626983643, |
|
"logps/chosen": -1.0015006065368652, |
|
"logps/rejected": -1.1312209367752075, |
|
"loss": 1.1909, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0030012130737305, |
|
"rewards/margins": 0.2594410181045532, |
|
"rewards/rejected": -2.262441873550415, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.053314121037464, |
|
"grad_norm": 17.00779028228168, |
|
"learning_rate": 1.369788756187445e-08, |
|
"logits/chosen": -2.0100817680358887, |
|
"logits/rejected": -2.006643056869507, |
|
"logps/chosen": -1.0269567966461182, |
|
"logps/rejected": -1.1220487356185913, |
|
"loss": 1.2348, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.0539135932922363, |
|
"rewards/margins": 0.1901838779449463, |
|
"rewards/rejected": -2.2440974712371826, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0605187319884726, |
|
"grad_norm": 18.64801286449761, |
|
"learning_rate": 1.3511322595359925e-08, |
|
"logits/chosen": -2.035876750946045, |
|
"logits/rejected": -2.0276687145233154, |
|
"logps/chosen": -0.9376466870307922, |
|
"logps/rejected": -1.1057026386260986, |
|
"loss": 1.1393, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8752933740615845, |
|
"rewards/margins": 0.3361119329929352, |
|
"rewards/rejected": -2.2114052772521973, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.0677233429394812, |
|
"grad_norm": 17.383929031131085, |
|
"learning_rate": 1.3325565235427716e-08, |
|
"logits/chosen": -2.0277578830718994, |
|
"logits/rejected": -2.026214122772217, |
|
"logps/chosen": -0.982566237449646, |
|
"logps/rejected": -1.1270108222961426, |
|
"loss": 1.1768, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.965132474899292, |
|
"rewards/margins": 0.2888889014720917, |
|
"rewards/rejected": -2.254021644592285, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.07492795389049, |
|
"grad_norm": 17.047214261003482, |
|
"learning_rate": 1.3140628540053218e-08, |
|
"logits/chosen": -1.9946644306182861, |
|
"logits/rejected": -1.9967546463012695, |
|
"logps/chosen": -0.9750404357910156, |
|
"logps/rejected": -1.1103841066360474, |
|
"loss": 1.1831, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9500808715820312, |
|
"rewards/margins": 0.2706873416900635, |
|
"rewards/rejected": -2.2207682132720947, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.0821325648414986, |
|
"grad_norm": 19.26958654488348, |
|
"learning_rate": 1.2956525509522451e-08, |
|
"logits/chosen": -1.9811160564422607, |
|
"logits/rejected": -1.9807733297348022, |
|
"logps/chosen": -1.111905574798584, |
|
"logps/rejected": -1.2153931856155396, |
|
"loss": 1.2339, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.223811149597168, |
|
"rewards/margins": 0.2069750279188156, |
|
"rewards/rejected": -2.430786371231079, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.089337175792507, |
|
"grad_norm": 19.78025222762576, |
|
"learning_rate": 1.2773269085518267e-08, |
|
"logits/chosen": -2.0117239952087402, |
|
"logits/rejected": -2.0130703449249268, |
|
"logps/chosen": -1.0760631561279297, |
|
"logps/rejected": -1.206182599067688, |
|
"loss": 1.1842, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.1521263122558594, |
|
"rewards/margins": 0.2602389454841614, |
|
"rewards/rejected": -2.412365198135376, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.096541786743516, |
|
"grad_norm": 20.115808813369917, |
|
"learning_rate": 1.2590872150210574e-08, |
|
"logits/chosen": -2.06766414642334, |
|
"logits/rejected": -2.0607457160949707, |
|
"logps/chosen": -1.0578199625015259, |
|
"logps/rejected": -1.1676528453826904, |
|
"loss": 1.226, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1156399250030518, |
|
"rewards/margins": 0.21966581046581268, |
|
"rewards/rejected": -2.335305690765381, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.1037463976945245, |
|
"grad_norm": 20.584492670465366, |
|
"learning_rate": 1.2409347525350775e-08, |
|
"logits/chosen": -2.0331482887268066, |
|
"logits/rejected": -2.02323842048645, |
|
"logps/chosen": -1.1097338199615479, |
|
"logps/rejected": -1.2564305067062378, |
|
"loss": 1.1674, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.2194676399230957, |
|
"rewards/margins": 0.2933935225009918, |
|
"rewards/rejected": -2.5128610134124756, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.110951008645533, |
|
"grad_norm": 22.815499062996494, |
|
"learning_rate": 1.2228707971370421e-08, |
|
"logits/chosen": -2.0209853649139404, |
|
"logits/rejected": -2.0140042304992676, |
|
"logps/chosen": -0.9935145378112793, |
|
"logps/rejected": -1.105963945388794, |
|
"loss": 1.2266, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9870290756225586, |
|
"rewards/margins": 0.22489885985851288, |
|
"rewards/rejected": -2.211927890777588, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.118155619596542, |
|
"grad_norm": 21.166376348681847, |
|
"learning_rate": 1.2048966186484282e-08, |
|
"logits/chosen": -2.017411708831787, |
|
"logits/rejected": -2.0006022453308105, |
|
"logps/chosen": -1.1170910596847534, |
|
"logps/rejected": -1.2319118976593018, |
|
"loss": 1.2115, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.234182119369507, |
|
"rewards/margins": 0.2296416014432907, |
|
"rewards/rejected": -2.4638237953186035, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.1253602305475505, |
|
"grad_norm": 28.013077396296502, |
|
"learning_rate": 1.187013480579762e-08, |
|
"logits/chosen": -2.0150485038757324, |
|
"logits/rejected": -2.0178308486938477, |
|
"logps/chosen": -1.0425622463226318, |
|
"logps/rejected": -1.1764047145843506, |
|
"loss": 1.2005, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0851244926452637, |
|
"rewards/margins": 0.2676849961280823, |
|
"rewards/rejected": -2.352809429168701, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.132564841498559, |
|
"grad_norm": 39.69497918705247, |
|
"learning_rate": 1.1692226400418073e-08, |
|
"logits/chosen": -1.9483245611190796, |
|
"logits/rejected": -1.9468435049057007, |
|
"logps/chosen": -1.0812904834747314, |
|
"logps/rejected": -1.211723804473877, |
|
"loss": 1.2174, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.162580966949463, |
|
"rewards/margins": 0.2608664035797119, |
|
"rewards/rejected": -2.423447608947754, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.139769452449568, |
|
"grad_norm": 16.186051185633424, |
|
"learning_rate": 1.1515253476571923e-08, |
|
"logits/chosen": -1.9795656204223633, |
|
"logits/rejected": -1.9738327264785767, |
|
"logps/chosen": -1.0102039575576782, |
|
"logps/rejected": -1.191947102546692, |
|
"loss": 1.1203, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0204079151153564, |
|
"rewards/margins": 0.36348623037338257, |
|
"rewards/rejected": -2.383894205093384, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.1469740634005765, |
|
"grad_norm": 19.900013908570074, |
|
"learning_rate": 1.133922847472496e-08, |
|
"logits/chosen": -2.0021681785583496, |
|
"logits/rejected": -2.002943515777588, |
|
"logps/chosen": -1.1099964380264282, |
|
"logps/rejected": -1.2084180116653442, |
|
"loss": 1.2521, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2199928760528564, |
|
"rewards/margins": 0.1968432366847992, |
|
"rewards/rejected": -2.4168360233306885, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.154178674351585, |
|
"grad_norm": 22.72131090745606, |
|
"learning_rate": 1.1164163768707952e-08, |
|
"logits/chosen": -2.0033812522888184, |
|
"logits/rejected": -1.9982162714004517, |
|
"logps/chosen": -1.0048575401306152, |
|
"logps/rejected": -1.1429532766342163, |
|
"loss": 1.1846, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0097150802612305, |
|
"rewards/margins": 0.2761916518211365, |
|
"rewards/rejected": -2.2859065532684326, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.161383285302594, |
|
"grad_norm": 17.87155121185044, |
|
"learning_rate": 1.0990071664846861e-08, |
|
"logits/chosen": -1.9833234548568726, |
|
"logits/rejected": -1.9822227954864502, |
|
"logps/chosen": -1.0197571516036987, |
|
"logps/rejected": -1.198885202407837, |
|
"loss": 1.1594, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0395143032073975, |
|
"rewards/margins": 0.35825610160827637, |
|
"rewards/rejected": -2.397770404815674, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1685878962536025, |
|
"grad_norm": 18.894758055694265, |
|
"learning_rate": 1.0816964401097739e-08, |
|
"logits/chosen": -1.9618886709213257, |
|
"logits/rejected": -1.9587116241455078, |
|
"logps/chosen": -0.9558318853378296, |
|
"logps/rejected": -1.0796352624893188, |
|
"loss": 1.205, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9116637706756592, |
|
"rewards/margins": 0.24760663509368896, |
|
"rewards/rejected": -2.1592705249786377, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.175792507204611, |
|
"grad_norm": 19.906085929832862, |
|
"learning_rate": 1.0644854146186406e-08, |
|
"logits/chosen": -2.025203227996826, |
|
"logits/rejected": -2.0189619064331055, |
|
"logps/chosen": -1.024359107017517, |
|
"logps/rejected": -1.1828162670135498, |
|
"loss": 1.1636, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.048718214035034, |
|
"rewards/margins": 0.31691429018974304, |
|
"rewards/rejected": -2.3656325340270996, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.18299711815562, |
|
"grad_norm": 19.050052296968172, |
|
"learning_rate": 1.0473752998753114e-08, |
|
"logits/chosen": -2.0075266361236572, |
|
"logits/rejected": -1.9991521835327148, |
|
"logps/chosen": -1.0193443298339844, |
|
"logps/rejected": -1.1795679330825806, |
|
"loss": 1.1534, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0386886596679688, |
|
"rewards/margins": 0.3204469382762909, |
|
"rewards/rejected": -2.359135866165161, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.1902017291066285, |
|
"grad_norm": 19.33190356248751, |
|
"learning_rate": 1.030367298650201e-08, |
|
"logits/chosen": -2.0206215381622314, |
|
"logits/rejected": -2.0204710960388184, |
|
"logps/chosen": -1.039102554321289, |
|
"logps/rejected": -1.191624402999878, |
|
"loss": 1.1575, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.078205108642578, |
|
"rewards/margins": 0.30504345893859863, |
|
"rewards/rejected": -2.383248805999756, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.1974063400576367, |
|
"grad_norm": 21.9617981131132, |
|
"learning_rate": 1.0134626065355675e-08, |
|
"logits/chosen": -2.074868679046631, |
|
"logits/rejected": -2.071895122528076, |
|
"logps/chosen": -1.0231993198394775, |
|
"logps/rejected": -1.1662580966949463, |
|
"loss": 1.1883, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.046398639678955, |
|
"rewards/margins": 0.2861180305480957, |
|
"rewards/rejected": -2.3325161933898926, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.2046109510086453, |
|
"grad_norm": 19.55623294249282, |
|
"learning_rate": 9.966624118614611e-09, |
|
"logits/chosen": -2.013423442840576, |
|
"logits/rejected": -2.0084660053253174, |
|
"logps/chosen": -1.0631778240203857, |
|
"logps/rejected": -1.2087138891220093, |
|
"loss": 1.1876, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1263556480407715, |
|
"rewards/margins": 0.2910720705986023, |
|
"rewards/rejected": -2.4174277782440186, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.211815561959654, |
|
"grad_norm": 14.867171520984334, |
|
"learning_rate": 9.799678956121976e-09, |
|
"logits/chosen": -1.970645546913147, |
|
"logits/rejected": -1.9662506580352783, |
|
"logps/chosen": -1.030286431312561, |
|
"logps/rejected": -1.1386739015579224, |
|
"loss": 1.2008, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.060572862625122, |
|
"rewards/margins": 0.21677501499652863, |
|
"rewards/rejected": -2.2773478031158447, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.2190201729106627, |
|
"grad_norm": 23.6722369037589, |
|
"learning_rate": 9.633802313433314e-09, |
|
"logits/chosen": -1.942859411239624, |
|
"logits/rejected": -1.9486020803451538, |
|
"logps/chosen": -1.0193486213684082, |
|
"logps/rejected": -1.1251273155212402, |
|
"loss": 1.2058, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0386972427368164, |
|
"rewards/margins": 0.21155771613121033, |
|
"rewards/rejected": -2.2502546310424805, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.2262247838616713, |
|
"grad_norm": 20.75677230247985, |
|
"learning_rate": 9.469005850991705e-09, |
|
"logits/chosen": -2.0128586292266846, |
|
"logits/rejected": -2.0072181224823, |
|
"logps/chosen": -1.0143338441848755, |
|
"logps/rejected": -1.1310632228851318, |
|
"loss": 1.2348, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.028667688369751, |
|
"rewards/margins": 0.23345866799354553, |
|
"rewards/rejected": -2.2621264457702637, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.23342939481268, |
|
"grad_norm": 18.746796539063972, |
|
"learning_rate": 9.305301153307949e-09, |
|
"logits/chosen": -2.0094423294067383, |
|
"logits/rejected": -2.017090320587158, |
|
"logps/chosen": -0.9450882077217102, |
|
"logps/rejected": -1.1097722053527832, |
|
"loss": 1.157, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8901764154434204, |
|
"rewards/margins": 0.32936811447143555, |
|
"rewards/rejected": -2.2195444107055664, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2406340057636887, |
|
"grad_norm": 18.096061545612795, |
|
"learning_rate": 9.142699728146336e-09, |
|
"logits/chosen": -1.9791491031646729, |
|
"logits/rejected": -1.9722025394439697, |
|
"logps/chosen": -1.0312968492507935, |
|
"logps/rejected": -1.163287878036499, |
|
"loss": 1.2018, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.062593698501587, |
|
"rewards/margins": 0.26398202776908875, |
|
"rewards/rejected": -2.326575756072998, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.2478386167146973, |
|
"grad_norm": 16.703535569291137, |
|
"learning_rate": 8.981213005715627e-09, |
|
"logits/chosen": -2.0036609172821045, |
|
"logits/rejected": -2.006706714630127, |
|
"logps/chosen": -0.9921053647994995, |
|
"logps/rejected": -1.1647652387619019, |
|
"loss": 1.1486, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.984210729598999, |
|
"rewards/margins": 0.34531962871551514, |
|
"rewards/rejected": -2.3295304775238037, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.255043227665706, |
|
"grad_norm": 21.958272583090416, |
|
"learning_rate": 8.820852337865611e-09, |
|
"logits/chosen": -2.0320816040039062, |
|
"logits/rejected": -2.028298854827881, |
|
"logps/chosen": -0.9958044290542603, |
|
"logps/rejected": -1.1435072422027588, |
|
"loss": 1.1727, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9916088581085205, |
|
"rewards/margins": 0.29540568590164185, |
|
"rewards/rejected": -2.2870144844055176, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.2622478386167146, |
|
"grad_norm": 17.08140631103118, |
|
"learning_rate": 8.661628997289044e-09, |
|
"logits/chosen": -1.974020004272461, |
|
"logits/rejected": -1.9698307514190674, |
|
"logps/chosen": -1.0156090259552002, |
|
"logps/rejected": -1.1709401607513428, |
|
"loss": 1.169, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.0312180519104004, |
|
"rewards/margins": 0.31066226959228516, |
|
"rewards/rejected": -2.3418803215026855, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.2694524495677233, |
|
"grad_norm": 16.417561627034264, |
|
"learning_rate": 8.503554176729341e-09, |
|
"logits/chosen": -1.9732913970947266, |
|
"logits/rejected": -1.9715712070465088, |
|
"logps/chosen": -1.0267970561981201, |
|
"logps/rejected": -1.1859861612319946, |
|
"loss": 1.1702, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0535941123962402, |
|
"rewards/margins": 0.3183782696723938, |
|
"rewards/rejected": -2.3719723224639893, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.276657060518732, |
|
"grad_norm": 24.932055477370433, |
|
"learning_rate": 8.346638988193636e-09, |
|
"logits/chosen": -1.9996095895767212, |
|
"logits/rejected": -1.9946562051773071, |
|
"logps/chosen": -0.9252532720565796, |
|
"logps/rejected": -1.0754306316375732, |
|
"loss": 1.1774, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8505065441131592, |
|
"rewards/margins": 0.30035486817359924, |
|
"rewards/rejected": -2.1508612632751465, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.2838616714697406, |
|
"grad_norm": 23.217906478731962, |
|
"learning_rate": 8.19089446217176e-09, |
|
"logits/chosen": -1.9767364263534546, |
|
"logits/rejected": -1.9664764404296875, |
|
"logps/chosen": -1.002582311630249, |
|
"logps/rejected": -1.1916791200637817, |
|
"loss": 1.121, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.005164623260498, |
|
"rewards/margins": 0.378193199634552, |
|
"rewards/rejected": -2.3833582401275635, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.2910662824207493, |
|
"grad_norm": 17.141991975793108, |
|
"learning_rate": 8.036331546860777e-09, |
|
"logits/chosen": -1.9834896326065063, |
|
"logits/rejected": -1.9829978942871094, |
|
"logps/chosen": -0.950110912322998, |
|
"logps/rejected": -1.0394455194473267, |
|
"loss": 1.2456, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.900221824645996, |
|
"rewards/margins": 0.17866934835910797, |
|
"rewards/rejected": -2.0788910388946533, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.298270893371758, |
|
"grad_norm": 23.516146286428995, |
|
"learning_rate": 7.882961107395416e-09, |
|
"logits/chosen": -1.9969555139541626, |
|
"logits/rejected": -1.9910898208618164, |
|
"logps/chosen": -1.130173683166504, |
|
"logps/rejected": -1.1781437397003174, |
|
"loss": 1.3138, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.260347366333008, |
|
"rewards/margins": 0.09594009816646576, |
|
"rewards/rejected": -2.3562874794006348, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.3054755043227666, |
|
"grad_norm": 25.884981502834442, |
|
"learning_rate": 7.73079392508428e-09, |
|
"logits/chosen": -1.966968297958374, |
|
"logits/rejected": -1.9663887023925781, |
|
"logps/chosen": -1.0911871194839478, |
|
"logps/rejected": -1.2788745164871216, |
|
"loss": 1.1537, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1823742389678955, |
|
"rewards/margins": 0.3753744959831238, |
|
"rewards/rejected": -2.557749032974243, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.3126801152737753, |
|
"grad_norm": 21.445267273811133, |
|
"learning_rate": 7.579840696651938e-09, |
|
"logits/chosen": -1.9995644092559814, |
|
"logits/rejected": -1.9965426921844482, |
|
"logps/chosen": -1.0473155975341797, |
|
"logps/rejected": -1.171209692955017, |
|
"loss": 1.2093, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0946311950683594, |
|
"rewards/margins": 0.2477881908416748, |
|
"rewards/rejected": -2.342419385910034, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.319884726224784, |
|
"grad_norm": 21.167143894724592, |
|
"learning_rate": 7.43011203348704e-09, |
|
"logits/chosen": -1.9143447875976562, |
|
"logits/rejected": -1.911238431930542, |
|
"logps/chosen": -1.0506112575531006, |
|
"logps/rejected": -1.1263036727905273, |
|
"loss": 1.2681, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.101222515106201, |
|
"rewards/margins": 0.15138480067253113, |
|
"rewards/rejected": -2.2526073455810547, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.3270893371757926, |
|
"grad_norm": 18.7122492381894, |
|
"learning_rate": 7.281618460896344e-09, |
|
"logits/chosen": -1.994127869606018, |
|
"logits/rejected": -1.9916470050811768, |
|
"logps/chosen": -0.9652446508407593, |
|
"logps/rejected": -1.1070702075958252, |
|
"loss": 1.1731, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9304893016815186, |
|
"rewards/margins": 0.2836512625217438, |
|
"rewards/rejected": -2.2141404151916504, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.3342939481268012, |
|
"grad_norm": 20.398678891005122, |
|
"learning_rate": 7.134370417364849e-09, |
|
"logits/chosen": -1.9642471075057983, |
|
"logits/rejected": -1.9637800455093384, |
|
"logps/chosen": -1.0007370710372925, |
|
"logps/rejected": -1.1398149728775024, |
|
"loss": 1.2023, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.001474142074585, |
|
"rewards/margins": 0.27815574407577515, |
|
"rewards/rejected": -2.279629945755005, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.34149855907781, |
|
"grad_norm": 23.240945165202064, |
|
"learning_rate": 6.988378253821981e-09, |
|
"logits/chosen": -1.9668891429901123, |
|
"logits/rejected": -1.9658010005950928, |
|
"logps/chosen": -1.0259102582931519, |
|
"logps/rejected": -1.1435902118682861, |
|
"loss": 1.2094, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0518205165863037, |
|
"rewards/margins": 0.2353595793247223, |
|
"rewards/rejected": -2.2871804237365723, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3487031700288186, |
|
"grad_norm": 20.274147530632312, |
|
"learning_rate": 6.8436522329140186e-09, |
|
"logits/chosen": -1.9758933782577515, |
|
"logits/rejected": -1.9824403524398804, |
|
"logps/chosen": -1.0337318181991577, |
|
"logps/rejected": -1.1588385105133057, |
|
"loss": 1.2104, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0674636363983154, |
|
"rewards/margins": 0.25021329522132874, |
|
"rewards/rejected": -2.3176770210266113, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.3559077809798272, |
|
"grad_norm": 21.874255456099494, |
|
"learning_rate": 6.700202528282603e-09, |
|
"logits/chosen": -1.977266550064087, |
|
"logits/rejected": -1.9675136804580688, |
|
"logps/chosen": -1.0283175706863403, |
|
"logps/rejected": -1.1439546346664429, |
|
"loss": 1.2152, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0566351413726807, |
|
"rewards/margins": 0.2312743216753006, |
|
"rewards/rejected": -2.2879092693328857, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.363112391930836, |
|
"grad_norm": 21.384973629502426, |
|
"learning_rate": 6.558039223849668e-09, |
|
"logits/chosen": -2.0306007862091064, |
|
"logits/rejected": -2.0210115909576416, |
|
"logps/chosen": -1.036071538925171, |
|
"logps/rejected": -1.2443821430206299, |
|
"loss": 1.1148, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.072143077850342, |
|
"rewards/margins": 0.41662105917930603, |
|
"rewards/rejected": -2.4887642860412598, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.3703170028818445, |
|
"grad_norm": 22.082670294571745, |
|
"learning_rate": 6.417172313108471e-09, |
|
"logits/chosen": -1.9587681293487549, |
|
"logits/rejected": -1.9533681869506836, |
|
"logps/chosen": -0.9850085973739624, |
|
"logps/rejected": -1.1136410236358643, |
|
"loss": 1.1988, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9700171947479248, |
|
"rewards/margins": 0.2572648227214813, |
|
"rewards/rejected": -2.2272820472717285, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.377521613832853, |
|
"grad_norm": 21.566072441690903, |
|
"learning_rate": 6.277611698421179e-09, |
|
"logits/chosen": -2.0187790393829346, |
|
"logits/rejected": -2.010676383972168, |
|
"logps/chosen": -0.9041236042976379, |
|
"logps/rejected": -1.0975037813186646, |
|
"loss": 1.1246, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8082472085952759, |
|
"rewards/margins": 0.3867604732513428, |
|
"rewards/rejected": -2.195007562637329, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.3847262247838614, |
|
"grad_norm": 22.686014286093737, |
|
"learning_rate": 6.139367190322714e-09, |
|
"logits/chosen": -2.0076019763946533, |
|
"logits/rejected": -2.0073728561401367, |
|
"logps/chosen": -1.0593101978302002, |
|
"logps/rejected": -1.2181084156036377, |
|
"loss": 1.1614, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1186203956604004, |
|
"rewards/margins": 0.3175966143608093, |
|
"rewards/rejected": -2.4362168312072754, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.39193083573487, |
|
"grad_norm": 17.411733324996547, |
|
"learning_rate": 6.002448506831171e-09, |
|
"logits/chosen": -2.00325083732605, |
|
"logits/rejected": -1.9984287023544312, |
|
"logps/chosen": -0.9814065098762512, |
|
"logps/rejected": -1.124678134918213, |
|
"loss": 1.1735, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9628130197525024, |
|
"rewards/margins": 0.2865433394908905, |
|
"rewards/rejected": -2.249356269836426, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.3991354466858787, |
|
"grad_norm": 18.206116423091125, |
|
"learning_rate": 5.866865272764607e-09, |
|
"logits/chosen": -2.0245230197906494, |
|
"logits/rejected": -2.02435040473938, |
|
"logps/chosen": -1.016035795211792, |
|
"logps/rejected": -1.1609398126602173, |
|
"loss": 1.1763, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.032071590423584, |
|
"rewards/margins": 0.2898081839084625, |
|
"rewards/rejected": -2.3218796253204346, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.4063400576368874, |
|
"grad_norm": 23.272555194691343, |
|
"learning_rate": 5.7326270190645595e-09, |
|
"logits/chosen": -1.900092363357544, |
|
"logits/rejected": -1.901489496231079, |
|
"logps/chosen": -1.0590754747390747, |
|
"logps/rejected": -1.1687304973602295, |
|
"loss": 1.2178, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1181509494781494, |
|
"rewards/margins": 0.2193101942539215, |
|
"rewards/rejected": -2.337460994720459, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.413544668587896, |
|
"grad_norm": 18.379780103310328, |
|
"learning_rate": 5.599743182125938e-09, |
|
"logits/chosen": -2.0489888191223145, |
|
"logits/rejected": -2.048907518386841, |
|
"logps/chosen": -1.04793381690979, |
|
"logps/rejected": -1.1847532987594604, |
|
"loss": 1.1791, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.09586763381958, |
|
"rewards/margins": 0.2736392021179199, |
|
"rewards/rejected": -2.369506597518921, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.4207492795389047, |
|
"grad_norm": 20.327364402710476, |
|
"learning_rate": 5.46822310313379e-09, |
|
"logits/chosen": -2.0488951206207275, |
|
"logits/rejected": -2.05851411819458, |
|
"logps/chosen": -1.0903593301773071, |
|
"logps/rejected": -1.1954041719436646, |
|
"loss": 1.2347, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1807186603546143, |
|
"rewards/margins": 0.21008984744548798, |
|
"rewards/rejected": -2.390808343887329, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.4279538904899134, |
|
"grad_norm": 20.74526034057685, |
|
"learning_rate": 5.33807602740658e-09, |
|
"logits/chosen": -2.0205600261688232, |
|
"logits/rejected": -2.0137457847595215, |
|
"logps/chosen": -0.9561742544174194, |
|
"logps/rejected": -1.1597832441329956, |
|
"loss": 1.1117, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9123485088348389, |
|
"rewards/margins": 0.4072180390357971, |
|
"rewards/rejected": -2.319566488265991, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.435158501440922, |
|
"grad_norm": 21.143223128559057, |
|
"learning_rate": 5.209311103746334e-09, |
|
"logits/chosen": -2.000640869140625, |
|
"logits/rejected": -2.0010857582092285, |
|
"logps/chosen": -1.0521432161331177, |
|
"logps/rejected": -1.224646806716919, |
|
"loss": 1.1583, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1042864322662354, |
|
"rewards/margins": 0.34500715136528015, |
|
"rewards/rejected": -2.449293613433838, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.4423631123919307, |
|
"grad_norm": 24.24368097300932, |
|
"learning_rate": 5.081937383795484e-09, |
|
"logits/chosen": -1.9737918376922607, |
|
"logits/rejected": -1.9732027053833008, |
|
"logps/chosen": -0.9712947010993958, |
|
"logps/rejected": -1.1367398500442505, |
|
"loss": 1.1475, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9425894021987915, |
|
"rewards/margins": 0.3308902680873871, |
|
"rewards/rejected": -2.273479700088501, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.4495677233429394, |
|
"grad_norm": 18.631728815748932, |
|
"learning_rate": 4.955963821400599e-09, |
|
"logits/chosen": -2.028813123703003, |
|
"logits/rejected": -2.0230822563171387, |
|
"logps/chosen": -1.0294839143753052, |
|
"logps/rejected": -1.166856288909912, |
|
"loss": 1.1931, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0589678287506104, |
|
"rewards/margins": 0.2747448980808258, |
|
"rewards/rejected": -2.333712577819824, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.456772334293948, |
|
"grad_norm": 15.405415975081304, |
|
"learning_rate": 4.831399271982928e-09, |
|
"logits/chosen": -1.9567426443099976, |
|
"logits/rejected": -1.9487731456756592, |
|
"logps/chosen": -1.0431255102157593, |
|
"logps/rejected": -1.1727097034454346, |
|
"loss": 1.211, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0862510204315186, |
|
"rewards/margins": 0.2591683268547058, |
|
"rewards/rejected": -2.345419406890869, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.4639769452449567, |
|
"grad_norm": 24.975880962434086, |
|
"learning_rate": 4.708252491915951e-09, |
|
"logits/chosen": -2.0264954566955566, |
|
"logits/rejected": -2.0203347206115723, |
|
"logps/chosen": -1.0453736782073975, |
|
"logps/rejected": -1.1925294399261475, |
|
"loss": 1.1967, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.090747356414795, |
|
"rewards/margins": 0.29431161284446716, |
|
"rewards/rejected": -2.385058879852295, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.4711815561959654, |
|
"grad_norm": 25.25311706276655, |
|
"learning_rate": 4.58653213790981e-09, |
|
"logits/chosen": -2.009765863418579, |
|
"logits/rejected": -2.0017480850219727, |
|
"logps/chosen": -1.0253998041152954, |
|
"logps/rejected": -1.1738344430923462, |
|
"loss": 1.1794, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.050799608230591, |
|
"rewards/margins": 0.2968693673610687, |
|
"rewards/rejected": -2.3476688861846924, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.478386167146974, |
|
"grad_norm": 18.143302706074053, |
|
"learning_rate": 4.466246766402773e-09, |
|
"logits/chosen": -1.989457130432129, |
|
"logits/rejected": -1.9831485748291016, |
|
"logps/chosen": -1.039151906967163, |
|
"logps/rejected": -1.1928648948669434, |
|
"loss": 1.1832, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.078303813934326, |
|
"rewards/margins": 0.307425856590271, |
|
"rewards/rejected": -2.3857297897338867, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.4855907780979827, |
|
"grad_norm": 22.18491810055598, |
|
"learning_rate": 4.347404832959775e-09, |
|
"logits/chosen": -2.036830425262451, |
|
"logits/rejected": -2.0370731353759766, |
|
"logps/chosen": -1.0329066514968872, |
|
"logps/rejected": -1.192126989364624, |
|
"loss": 1.1623, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0658133029937744, |
|
"rewards/margins": 0.31844058632850647, |
|
"rewards/rejected": -2.384253978729248, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.4927953890489913, |
|
"grad_norm": 33.05603885847282, |
|
"learning_rate": 4.230014691678016e-09, |
|
"logits/chosen": -1.9939508438110352, |
|
"logits/rejected": -1.9945671558380127, |
|
"logps/chosen": -1.0595440864562988, |
|
"logps/rejected": -1.1260967254638672, |
|
"loss": 1.2725, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1190881729125977, |
|
"rewards/margins": 0.1331052929162979, |
|
"rewards/rejected": -2.2521934509277344, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 17.95056296756793, |
|
"learning_rate": 4.114084594599707e-09, |
|
"logits/chosen": -1.9955661296844482, |
|
"logits/rejected": -1.9954122304916382, |
|
"logps/chosen": -1.0110971927642822, |
|
"logps/rejected": -1.2275745868682861, |
|
"loss": 1.1027, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0221943855285645, |
|
"rewards/margins": 0.4329546391963959, |
|
"rewards/rejected": -2.4551491737365723, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.5072046109510087, |
|
"grad_norm": 22.46057751431382, |
|
"learning_rate": 3.9996226911319546e-09, |
|
"logits/chosen": -1.9920648336410522, |
|
"logits/rejected": -1.97979736328125, |
|
"logps/chosen": -1.015860915184021, |
|
"logps/rejected": -1.1448417901992798, |
|
"loss": 1.1908, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.031721830368042, |
|
"rewards/margins": 0.25796186923980713, |
|
"rewards/rejected": -2.2896835803985596, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.5144092219020173, |
|
"grad_norm": 17.84685371740395, |
|
"learning_rate": 3.886637027473949e-09, |
|
"logits/chosen": -2.003864049911499, |
|
"logits/rejected": -2.0060501098632812, |
|
"logps/chosen": -1.0762816667556763, |
|
"logps/rejected": -1.239137887954712, |
|
"loss": 1.1561, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1525633335113525, |
|
"rewards/margins": 0.32571229338645935, |
|
"rewards/rejected": -2.478275775909424, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.521613832853026, |
|
"grad_norm": 19.29855967109395, |
|
"learning_rate": 3.775135546051295e-09, |
|
"logits/chosen": -1.9411065578460693, |
|
"logits/rejected": -1.9420562982559204, |
|
"logps/chosen": -1.0252829790115356, |
|
"logps/rejected": -1.1507127285003662, |
|
"loss": 1.1987, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0505659580230713, |
|
"rewards/margins": 0.250859797000885, |
|
"rewards/rejected": -2.3014254570007324, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.5288184438040346, |
|
"grad_norm": 23.2883981469754, |
|
"learning_rate": 3.665126084957723e-09, |
|
"logits/chosen": -1.9881235361099243, |
|
"logits/rejected": -1.9923149347305298, |
|
"logps/chosen": -1.1336690187454224, |
|
"logps/rejected": -1.2319626808166504, |
|
"loss": 1.2595, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.2673380374908447, |
|
"rewards/margins": 0.19658716022968292, |
|
"rewards/rejected": -2.463925361633301, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.5360230547550433, |
|
"grad_norm": 19.49687492971959, |
|
"learning_rate": 3.556616377404101e-09, |
|
"logits/chosen": -2.005202531814575, |
|
"logits/rejected": -2.002882719039917, |
|
"logps/chosen": -1.078424096107483, |
|
"logps/rejected": -1.2361047267913818, |
|
"loss": 1.1542, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.156848192214966, |
|
"rewards/margins": 0.3153611719608307, |
|
"rewards/rejected": -2.4722094535827637, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.543227665706052, |
|
"grad_norm": 19.79099303892538, |
|
"learning_rate": 3.4496140511748125e-09, |
|
"logits/chosen": -1.9998537302017212, |
|
"logits/rejected": -1.9945876598358154, |
|
"logps/chosen": -1.054868459701538, |
|
"logps/rejected": -1.198381781578064, |
|
"loss": 1.176, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.109736919403076, |
|
"rewards/margins": 0.2870263457298279, |
|
"rewards/rejected": -2.396763563156128, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.5504322766570606, |
|
"grad_norm": 31.21393810269857, |
|
"learning_rate": 3.3441266280915427e-09, |
|
"logits/chosen": -1.9868720769882202, |
|
"logits/rejected": -1.9875354766845703, |
|
"logps/chosen": -1.0944864749908447, |
|
"logps/rejected": -1.2088205814361572, |
|
"loss": 1.2139, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1889729499816895, |
|
"rewards/margins": 0.2286679744720459, |
|
"rewards/rejected": -2.4176411628723145, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.5576368876080693, |
|
"grad_norm": 23.58461815941298, |
|
"learning_rate": 3.2401615234845693e-09, |
|
"logits/chosen": -2.00813627243042, |
|
"logits/rejected": -2.0022683143615723, |
|
"logps/chosen": -1.092397928237915, |
|
"logps/rejected": -1.235439658164978, |
|
"loss": 1.1899, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.18479585647583, |
|
"rewards/margins": 0.28608375787734985, |
|
"rewards/rejected": -2.470879316329956, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.564841498559078, |
|
"grad_norm": 16.081046688546003, |
|
"learning_rate": 3.1377260456714375e-09, |
|
"logits/chosen": -1.8945989608764648, |
|
"logits/rejected": -1.8861172199249268, |
|
"logps/chosen": -1.060025930404663, |
|
"logps/rejected": -1.2014561891555786, |
|
"loss": 1.1701, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.120051860809326, |
|
"rewards/margins": 0.2828606069087982, |
|
"rewards/rejected": -2.4029123783111572, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.5720461095100866, |
|
"grad_norm": 17.90788569435218, |
|
"learning_rate": 3.0368273954432698e-09, |
|
"logits/chosen": -2.0311574935913086, |
|
"logits/rejected": -2.022732973098755, |
|
"logps/chosen": -1.0490517616271973, |
|
"logps/rejected": -1.1529314517974854, |
|
"loss": 1.2253, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0981035232543945, |
|
"rewards/margins": 0.20775911211967468, |
|
"rewards/rejected": -2.3058629035949707, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.5792507204610953, |
|
"grad_norm": 17.350027926948087, |
|
"learning_rate": 2.937472665558541e-09, |
|
"logits/chosen": -2.0183825492858887, |
|
"logits/rejected": -2.019484043121338, |
|
"logps/chosen": -1.0362021923065186, |
|
"logps/rejected": -1.1471149921417236, |
|
"loss": 1.2278, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.072404384613037, |
|
"rewards/margins": 0.22182568907737732, |
|
"rewards/rejected": -2.2942299842834473, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.586455331412104, |
|
"grad_norm": 21.819876934855408, |
|
"learning_rate": 2.8396688402445053e-09, |
|
"logits/chosen": -2.0643913745880127, |
|
"logits/rejected": -2.0568366050720215, |
|
"logps/chosen": -1.0093636512756348, |
|
"logps/rejected": -1.2179429531097412, |
|
"loss": 1.1053, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.0187273025512695, |
|
"rewards/margins": 0.41715869307518005, |
|
"rewards/rejected": -2.4358859062194824, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.5936599423631126, |
|
"grad_norm": 23.978636486056804, |
|
"learning_rate": 2.7434227947062324e-09, |
|
"logits/chosen": -2.0086283683776855, |
|
"logits/rejected": -2.002335548400879, |
|
"logps/chosen": -1.1310678720474243, |
|
"logps/rejected": -1.239127516746521, |
|
"loss": 1.2324, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.2621357440948486, |
|
"rewards/margins": 0.2161194384098053, |
|
"rewards/rejected": -2.478255033493042, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.6008645533141213, |
|
"grad_norm": 18.242440029630476, |
|
"learning_rate": 2.6487412946432976e-09, |
|
"logits/chosen": -1.9712591171264648, |
|
"logits/rejected": -1.966138482093811, |
|
"logps/chosen": -1.0688244104385376, |
|
"logps/rejected": -1.2041301727294922, |
|
"loss": 1.1924, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.137648820877075, |
|
"rewards/margins": 0.27061182260513306, |
|
"rewards/rejected": -2.4082603454589844, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.60806916426513, |
|
"grad_norm": 22.81379926161724, |
|
"learning_rate": 2.5556309957742024e-09, |
|
"logits/chosen": -1.9811557531356812, |
|
"logits/rejected": -1.9759891033172607, |
|
"logps/chosen": -1.0247745513916016, |
|
"logps/rejected": -1.220990538597107, |
|
"loss": 1.1157, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.049549102783203, |
|
"rewards/margins": 0.39243215322494507, |
|
"rewards/rejected": -2.441981077194214, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.6152737752161386, |
|
"grad_norm": 22.699037535274126, |
|
"learning_rate": 2.4640984433684758e-09, |
|
"logits/chosen": -2.03954815864563, |
|
"logits/rejected": -2.040444850921631, |
|
"logps/chosen": -1.1181697845458984, |
|
"logps/rejected": -1.234116554260254, |
|
"loss": 1.2351, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.236339569091797, |
|
"rewards/margins": 0.231893390417099, |
|
"rewards/rejected": -2.468233108520508, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.6224783861671472, |
|
"grad_norm": 16.94171934712139, |
|
"learning_rate": 2.3741500717865987e-09, |
|
"logits/chosen": -1.995910882949829, |
|
"logits/rejected": -2.00685715675354, |
|
"logps/chosen": -1.0068349838256836, |
|
"logps/rejected": -1.1508421897888184, |
|
"loss": 1.1796, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.013669967651367, |
|
"rewards/margins": 0.2880145311355591, |
|
"rewards/rejected": -2.3016843795776367, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.629682997118156, |
|
"grad_norm": 17.359736896500603, |
|
"learning_rate": 2.285792204027678e-09, |
|
"logits/chosen": -1.9837639331817627, |
|
"logits/rejected": -1.9810622930526733, |
|
"logps/chosen": -1.0128896236419678, |
|
"logps/rejected": -1.2106821537017822, |
|
"loss": 1.1023, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.0257792472839355, |
|
"rewards/margins": 0.3955853283405304, |
|
"rewards/rejected": -2.4213643074035645, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.636887608069164, |
|
"grad_norm": 20.82292935112201, |
|
"learning_rate": 2.199031051284972e-09, |
|
"logits/chosen": -2.008237838745117, |
|
"logits/rejected": -2.003821611404419, |
|
"logps/chosen": -1.0695806741714478, |
|
"logps/rejected": -1.1950201988220215, |
|
"loss": 1.2201, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1391613483428955, |
|
"rewards/margins": 0.25087904930114746, |
|
"rewards/rejected": -2.390040397644043, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.6440922190201728, |
|
"grad_norm": 16.889253562953712, |
|
"learning_rate": 2.113872712509254e-09, |
|
"logits/chosen": -1.993787407875061, |
|
"logits/rejected": -1.9862686395645142, |
|
"logps/chosen": -1.1294848918914795, |
|
"logps/rejected": -1.2411291599273682, |
|
"loss": 1.2278, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.258969783782959, |
|
"rewards/margins": 0.22328904271125793, |
|
"rewards/rejected": -2.4822583198547363, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.6512968299711814, |
|
"grad_norm": 14.064327131140953, |
|
"learning_rate": 2.0303231739801143e-09, |
|
"logits/chosen": -1.9686027765274048, |
|
"logits/rejected": -1.957910180091858, |
|
"logps/chosen": -1.0182335376739502, |
|
"logps/rejected": -1.1589828729629517, |
|
"loss": 1.182, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0364670753479004, |
|
"rewards/margins": 0.2814987301826477, |
|
"rewards/rejected": -2.3179657459259033, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.65850144092219, |
|
"grad_norm": 23.602606022846373, |
|
"learning_rate": 1.948388308885102e-09, |
|
"logits/chosen": -2.0372962951660156, |
|
"logits/rejected": -2.0287270545959473, |
|
"logps/chosen": -1.063071846961975, |
|
"logps/rejected": -1.1734139919281006, |
|
"loss": 1.2172, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.12614369392395, |
|
"rewards/margins": 0.22068460285663605, |
|
"rewards/rejected": -2.346827983856201, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.6657060518731988, |
|
"grad_norm": 25.359683354788714, |
|
"learning_rate": 1.86807387690692e-09, |
|
"logits/chosen": -2.0645687580108643, |
|
"logits/rejected": -2.061300754547119, |
|
"logps/chosen": -1.0886929035186768, |
|
"logps/rejected": -1.2760602235794067, |
|
"loss": 1.1167, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.1773858070373535, |
|
"rewards/margins": 0.37473443150520325, |
|
"rewards/rejected": -2.5521204471588135, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.6729106628242074, |
|
"grad_norm": 19.291599291445024, |
|
"learning_rate": 1.789385523818493e-09, |
|
"logits/chosen": -2.024766206741333, |
|
"logits/rejected": -2.0262362957000732, |
|
"logps/chosen": -1.0400424003601074, |
|
"logps/rejected": -1.2077550888061523, |
|
"loss": 1.1498, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.080084800720215, |
|
"rewards/margins": 0.33542555570602417, |
|
"rewards/rejected": -2.4155101776123047, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.680115273775216, |
|
"grad_norm": 25.661768967793073, |
|
"learning_rate": 1.712328781086131e-09, |
|
"logits/chosen": -2.051741123199463, |
|
"logits/rejected": -2.046497344970703, |
|
"logps/chosen": -1.1223233938217163, |
|
"logps/rejected": -1.2178256511688232, |
|
"loss": 1.2398, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2446467876434326, |
|
"rewards/margins": 0.19100406765937805, |
|
"rewards/rejected": -2.4356513023376465, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.6873198847262247, |
|
"grad_norm": 21.329919320711415, |
|
"learning_rate": 1.6369090654806543e-09, |
|
"logits/chosen": -2.0555293560028076, |
|
"logits/rejected": -2.0489516258239746, |
|
"logps/chosen": -1.0201804637908936, |
|
"logps/rejected": -1.163674235343933, |
|
"loss": 1.1691, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.040360927581787, |
|
"rewards/margins": 0.28698763251304626, |
|
"rewards/rejected": -2.327348470687866, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.6945244956772334, |
|
"grad_norm": 19.141258585549746, |
|
"learning_rate": 1.5631316786966498e-09, |
|
"logits/chosen": -1.9855105876922607, |
|
"logits/rejected": -1.978864312171936, |
|
"logps/chosen": -1.0213624238967896, |
|
"logps/rejected": -1.1611801385879517, |
|
"loss": 1.1975, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.042724847793579, |
|
"rewards/margins": 0.27963531017303467, |
|
"rewards/rejected": -2.3223602771759033, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.701729106628242, |
|
"grad_norm": 18.560557441799045, |
|
"learning_rate": 1.491001806979772e-09, |
|
"logits/chosen": -2.0349512100219727, |
|
"logits/rejected": -2.028040647506714, |
|
"logps/chosen": -1.0765492916107178, |
|
"logps/rejected": -1.225642442703247, |
|
"loss": 1.1737, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1530985832214355, |
|
"rewards/margins": 0.2981860637664795, |
|
"rewards/rejected": -2.451284885406494, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.7089337175792507, |
|
"grad_norm": 29.217027349904072, |
|
"learning_rate": 1.4205245207621508e-09, |
|
"logits/chosen": -1.9789804220199585, |
|
"logits/rejected": -1.9764864444732666, |
|
"logps/chosen": -1.1173272132873535, |
|
"logps/rejected": -1.2862274646759033, |
|
"loss": 1.1533, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.234654426574707, |
|
"rewards/margins": 0.337800532579422, |
|
"rewards/rejected": -2.5724549293518066, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.7161383285302594, |
|
"grad_norm": 17.685903552737507, |
|
"learning_rate": 1.3517047743059978e-09, |
|
"logits/chosen": -2.0163912773132324, |
|
"logits/rejected": -2.0196452140808105, |
|
"logps/chosen": -1.073099136352539, |
|
"logps/rejected": -1.2338473796844482, |
|
"loss": 1.1562, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.146198272705078, |
|
"rewards/margins": 0.3214961588382721, |
|
"rewards/rejected": -2.4676947593688965, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.723342939481268, |
|
"grad_norm": 17.086412835946355, |
|
"learning_rate": 1.2845474053553156e-09, |
|
"logits/chosen": -2.011781692504883, |
|
"logits/rejected": -2.007628917694092, |
|
"logps/chosen": -1.0312448740005493, |
|
"logps/rejected": -1.1682651042938232, |
|
"loss": 1.2025, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0624897480010986, |
|
"rewards/margins": 0.27404046058654785, |
|
"rewards/rejected": -2.3365302085876465, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.7305475504322767, |
|
"grad_norm": 22.568118558934447, |
|
"learning_rate": 1.2190571347958422e-09, |
|
"logits/chosen": -2.0422775745391846, |
|
"logits/rejected": -2.0436275005340576, |
|
"logps/chosen": -0.9664519429206848, |
|
"logps/rejected": -1.167764663696289, |
|
"loss": 1.1102, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9329038858413696, |
|
"rewards/margins": 0.40262526273727417, |
|
"rewards/rejected": -2.335529327392578, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.7377521613832854, |
|
"grad_norm": 18.280458594650423, |
|
"learning_rate": 1.1552385663231634e-09, |
|
"logits/chosen": -1.9983785152435303, |
|
"logits/rejected": -1.9888427257537842, |
|
"logps/chosen": -1.0933729410171509, |
|
"logps/rejected": -1.1888386011123657, |
|
"loss": 1.2396, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.1867458820343018, |
|
"rewards/margins": 0.19093120098114014, |
|
"rewards/rejected": -2.3776772022247314, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.744956772334294, |
|
"grad_norm": 19.071247762838464, |
|
"learning_rate": 1.0930961861191302e-09, |
|
"logits/chosen": -1.9584522247314453, |
|
"logits/rejected": -1.9630699157714844, |
|
"logps/chosen": -1.0375924110412598, |
|
"logps/rejected": -1.1800395250320435, |
|
"loss": 1.2, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0751848220825195, |
|
"rewards/margins": 0.28489404916763306, |
|
"rewards/rejected": -2.360079050064087, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.7521613832853027, |
|
"grad_norm": 16.698045888323442, |
|
"learning_rate": 1.0326343625364608e-09, |
|
"logits/chosen": -1.9668670892715454, |
|
"logits/rejected": -1.9615176916122437, |
|
"logps/chosen": -1.040906310081482, |
|
"logps/rejected": -1.2131140232086182, |
|
"loss": 1.1383, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.081812620162964, |
|
"rewards/margins": 0.3444153070449829, |
|
"rewards/rejected": -2.4262280464172363, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.7593659942363113, |
|
"grad_norm": 18.443004885087902, |
|
"learning_rate": 9.738573457917066e-10, |
|
"logits/chosen": -2.0455007553100586, |
|
"logits/rejected": -2.0438742637634277, |
|
"logps/chosen": -1.0494908094406128, |
|
"logps/rejected": -1.2397874593734741, |
|
"loss": 1.1107, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0989816188812256, |
|
"rewards/margins": 0.3805932402610779, |
|
"rewards/rejected": -2.4795749187469482, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.76657060518732, |
|
"grad_norm": 18.699877901633396, |
|
"learning_rate": 9.16769267666434e-10, |
|
"logits/chosen": -2.012563705444336, |
|
"logits/rejected": -2.0103211402893066, |
|
"logps/chosen": -1.0743488073349, |
|
"logps/rejected": -1.1493780612945557, |
|
"loss": 1.2646, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1486976146698, |
|
"rewards/margins": 0.1500580608844757, |
|
"rewards/rejected": -2.2987561225891113, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.7737752161383287, |
|
"grad_norm": 19.950649690108477, |
|
"learning_rate": 8.613741412168113e-10, |
|
"logits/chosen": -2.024034261703491, |
|
"logits/rejected": -2.023303270339966, |
|
"logps/chosen": -1.0807751417160034, |
|
"logps/rejected": -1.209153413772583, |
|
"loss": 1.1806, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.161550283432007, |
|
"rewards/margins": 0.2567565441131592, |
|
"rewards/rejected": -2.418306827545166, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.7809798270893373, |
|
"grad_norm": 19.864173044191876, |
|
"learning_rate": 8.076758604914802e-10, |
|
"logits/chosen": -1.9579622745513916, |
|
"logits/rejected": -1.9533469676971436, |
|
"logps/chosen": -0.9816028475761414, |
|
"logps/rejected": -1.1145174503326416, |
|
"loss": 1.1989, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9632056951522827, |
|
"rewards/margins": 0.2658289074897766, |
|
"rewards/rejected": -2.229034900665283, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.7881844380403455, |
|
"grad_norm": 22.85284631990654, |
|
"learning_rate": 7.55678200257856e-10, |
|
"logits/chosen": -1.9850330352783203, |
|
"logits/rejected": -1.9783369302749634, |
|
"logps/chosen": -1.032204031944275, |
|
"logps/rejected": -1.1751976013183594, |
|
"loss": 1.1758, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.06440806388855, |
|
"rewards/margins": 0.28598710894584656, |
|
"rewards/rejected": -2.3503952026367188, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.795389048991354, |
|
"grad_norm": 17.089433378243903, |
|
"learning_rate": 7.053848157367315e-10, |
|
"logits/chosen": -2.0007100105285645, |
|
"logits/rejected": -1.9952503442764282, |
|
"logps/chosen": -1.0419762134552002, |
|
"logps/rejected": -1.1898977756500244, |
|
"loss": 1.1845, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0839524269104004, |
|
"rewards/margins": 0.2958431541919708, |
|
"rewards/rejected": -2.379795551300049, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.802593659942363, |
|
"grad_norm": 15.839145055220296, |
|
"learning_rate": 6.567992423453794e-10, |
|
"logits/chosen": -2.015761375427246, |
|
"logits/rejected": -2.0144906044006348, |
|
"logps/chosen": -0.9628134965896606, |
|
"logps/rejected": -1.0785901546478271, |
|
"loss": 1.2027, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9256269931793213, |
|
"rewards/margins": 0.23155340552330017, |
|
"rewards/rejected": -2.1571803092956543, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.8097982708933715, |
|
"grad_norm": 19.561769738169332, |
|
"learning_rate": 6.099248954489794e-10, |
|
"logits/chosen": -1.9572585821151733, |
|
"logits/rejected": -1.9550421237945557, |
|
"logps/chosen": -1.0681164264678955, |
|
"logps/rejected": -1.228930115699768, |
|
"loss": 1.157, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.136232852935791, |
|
"rewards/margins": 0.321627140045166, |
|
"rewards/rejected": -2.457860231399536, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.81700288184438, |
|
"grad_norm": 22.777244684824645, |
|
"learning_rate": 5.647650701205653e-10, |
|
"logits/chosen": -2.024953842163086, |
|
"logits/rejected": -2.016838550567627, |
|
"logps/chosen": -1.1103287935256958, |
|
"logps/rejected": -1.2667860984802246, |
|
"loss": 1.1763, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.2206575870513916, |
|
"rewards/margins": 0.31291496753692627, |
|
"rewards/rejected": -2.533572196960449, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.824207492795389, |
|
"grad_norm": 16.303046104621277, |
|
"learning_rate": 5.213229409093856e-10, |
|
"logits/chosen": -2.0344924926757812, |
|
"logits/rejected": -2.0291943550109863, |
|
"logps/chosen": -1.0526678562164307, |
|
"logps/rejected": -1.1854225397109985, |
|
"loss": 1.1995, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1053357124328613, |
|
"rewards/margins": 0.2655092179775238, |
|
"rewards/rejected": -2.370845079421997, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.8314121037463975, |
|
"grad_norm": 20.917034597157283, |
|
"learning_rate": 4.796015616177401e-10, |
|
"logits/chosen": -1.9968492984771729, |
|
"logits/rejected": -1.9910831451416016, |
|
"logps/chosen": -1.0663249492645264, |
|
"logps/rejected": -1.1775870323181152, |
|
"loss": 1.2153, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1326498985290527, |
|
"rewards/margins": 0.22252389788627625, |
|
"rewards/rejected": -2.3551740646362305, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.838616714697406, |
|
"grad_norm": 16.746348704994205, |
|
"learning_rate": 4.3960386508631595e-10, |
|
"logits/chosen": -1.9347660541534424, |
|
"logits/rejected": -1.9273744821548462, |
|
"logps/chosen": -0.9666959643363953, |
|
"logps/rejected": -1.0863577127456665, |
|
"loss": 1.2257, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9333919286727905, |
|
"rewards/margins": 0.23932373523712158, |
|
"rewards/rejected": -2.172715425491333, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.845821325648415, |
|
"grad_norm": 35.59868847583538, |
|
"learning_rate": 4.013326629880243e-10, |
|
"logits/chosen": -1.9773681163787842, |
|
"logits/rejected": -1.9677212238311768, |
|
"logps/chosen": -1.1061880588531494, |
|
"logps/rejected": -1.2337268590927124, |
|
"loss": 1.2042, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.212376117706299, |
|
"rewards/margins": 0.2550778090953827, |
|
"rewards/rejected": -2.467453718185425, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.8530259365994235, |
|
"grad_norm": 19.68089141096437, |
|
"learning_rate": 3.64790645630339e-10, |
|
"logits/chosen": -1.9358896017074585, |
|
"logits/rejected": -1.9352163076400757, |
|
"logps/chosen": -1.0549217462539673, |
|
"logps/rejected": -1.1247824430465698, |
|
"loss": 1.2634, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.1098434925079346, |
|
"rewards/margins": 0.13972175121307373, |
|
"rewards/rejected": -2.2495648860931396, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.860230547550432, |
|
"grad_norm": 21.305782765629946, |
|
"learning_rate": 3.2998038176619e-10, |
|
"logits/chosen": -1.9779675006866455, |
|
"logits/rejected": -1.9694792032241821, |
|
"logps/chosen": -1.0569480657577515, |
|
"logps/rejected": -1.1792137622833252, |
|
"loss": 1.2079, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.113896131515503, |
|
"rewards/margins": 0.24453163146972656, |
|
"rewards/rejected": -2.3584275245666504, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.867435158501441, |
|
"grad_norm": 20.77606365899596, |
|
"learning_rate": 2.969043184133907e-10, |
|
"logits/chosen": -2.0462427139282227, |
|
"logits/rejected": -2.0448436737060547, |
|
"logps/chosen": -0.9707571268081665, |
|
"logps/rejected": -1.1868783235549927, |
|
"loss": 1.0771, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.941514253616333, |
|
"rewards/margins": 0.4322422444820404, |
|
"rewards/rejected": -2.3737566471099854, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.8746397694524495, |
|
"grad_norm": 17.888841507835842, |
|
"learning_rate": 2.6556478068261447e-10, |
|
"logits/chosen": -1.9706792831420898, |
|
"logits/rejected": -1.968033790588379, |
|
"logps/chosen": -0.9736353158950806, |
|
"logps/rejected": -1.1013442277908325, |
|
"loss": 1.2103, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.9472706317901611, |
|
"rewards/margins": 0.25541773438453674, |
|
"rewards/rejected": -2.202688455581665, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.881844380403458, |
|
"grad_norm": 20.827982084787728, |
|
"learning_rate": 2.3596397161395607e-10, |
|
"logits/chosen": -2.0459811687469482, |
|
"logits/rejected": -2.0342445373535156, |
|
"logps/chosen": -1.0675297975540161, |
|
"logps/rejected": -1.2322208881378174, |
|
"loss": 1.1585, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1350595951080322, |
|
"rewards/margins": 0.32938265800476074, |
|
"rewards/rejected": -2.4644417762756348, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.889048991354467, |
|
"grad_norm": 26.363176469996105, |
|
"learning_rate": 2.0810397202206399e-10, |
|
"logits/chosen": -1.9519503116607666, |
|
"logits/rejected": -1.9571945667266846, |
|
"logps/chosen": -1.0638792514801025, |
|
"logps/rejected": -1.1936867237091064, |
|
"loss": 1.1899, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.127758502960205, |
|
"rewards/margins": 0.25961530208587646, |
|
"rewards/rejected": -2.387373447418213, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.8962536023054755, |
|
"grad_norm": 22.435915119282647, |
|
"learning_rate": 1.819867403498737e-10, |
|
"logits/chosen": -2.0360004901885986, |
|
"logits/rejected": -2.0333070755004883, |
|
"logps/chosen": -1.0682737827301025, |
|
"logps/rejected": -1.1997547149658203, |
|
"loss": 1.2021, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.136547565460205, |
|
"rewards/margins": 0.262962281703949, |
|
"rewards/rejected": -2.3995094299316406, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.903458213256484, |
|
"grad_norm": 21.73593175736094, |
|
"learning_rate": 1.5761411253092382e-10, |
|
"logits/chosen": -1.9650490283966064, |
|
"logits/rejected": -1.9548593759536743, |
|
"logps/chosen": -0.9868356585502625, |
|
"logps/rejected": -1.1082721948623657, |
|
"loss": 1.2004, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.973671317100525, |
|
"rewards/margins": 0.24287304282188416, |
|
"rewards/rejected": -2.2165443897247314, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.910662824207493, |
|
"grad_norm": 20.195616773045355, |
|
"learning_rate": 1.3498780186031455e-10, |
|
"logits/chosen": -2.0080840587615967, |
|
"logits/rejected": -2.0045909881591797, |
|
"logps/chosen": -1.161108136177063, |
|
"logps/rejected": -1.2804962396621704, |
|
"loss": 1.2261, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.322216272354126, |
|
"rewards/margins": 0.23877570033073425, |
|
"rewards/rejected": -2.560992479324341, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.9178674351585014, |
|
"grad_norm": 15.481345158642924, |
|
"learning_rate": 1.1410939887425141e-10, |
|
"logits/chosen": -2.0009922981262207, |
|
"logits/rejected": -2.002737522125244, |
|
"logps/chosen": -1.044710397720337, |
|
"logps/rejected": -1.1732409000396729, |
|
"loss": 1.2109, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.089420795440674, |
|
"rewards/margins": 0.2570609450340271, |
|
"rewards/rejected": -2.3464818000793457, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.92507204610951, |
|
"grad_norm": 18.467501751315456, |
|
"learning_rate": 9.498037123825686e-11, |
|
"logits/chosen": -2.008939743041992, |
|
"logits/rejected": -2.0055575370788574, |
|
"logps/chosen": -1.0212924480438232, |
|
"logps/rejected": -1.1463903188705444, |
|
"loss": 1.1994, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0425848960876465, |
|
"rewards/margins": 0.2501956522464752, |
|
"rewards/rejected": -2.292780637741089, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.9322766570605188, |
|
"grad_norm": 21.297612250322334, |
|
"learning_rate": 7.760206364398614e-11, |
|
"logits/chosen": -2.0672459602355957, |
|
"logits/rejected": -2.0643718242645264, |
|
"logps/chosen": -1.0762133598327637, |
|
"logps/rejected": -1.2181751728057861, |
|
"loss": 1.1851, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.1524267196655273, |
|
"rewards/margins": 0.283923476934433, |
|
"rewards/rejected": -2.4363503456115723, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.9394812680115274, |
|
"grad_norm": 21.284898715739565, |
|
"learning_rate": 6.19756977147029e-11, |
|
"logits/chosen": -1.9935872554779053, |
|
"logits/rejected": -1.990228295326233, |
|
"logps/chosen": -1.0278215408325195, |
|
"logps/rejected": -1.2337336540222168, |
|
"loss": 1.1105, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.055643081665039, |
|
"rewards/margins": 0.4118243157863617, |
|
"rewards/rejected": -2.4674673080444336, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.946685878962536, |
|
"grad_norm": 20.87599043089686, |
|
"learning_rate": 4.810237191940625e-11, |
|
"logits/chosen": -1.976012945175171, |
|
"logits/rejected": -1.97482168674469, |
|
"logps/chosen": -1.038079857826233, |
|
"logps/rejected": -1.1688039302825928, |
|
"loss": 1.2176, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.076159715652466, |
|
"rewards/margins": 0.2614482045173645, |
|
"rewards/rejected": -2.3376078605651855, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.9538904899135447, |
|
"grad_norm": 20.210725111790943, |
|
"learning_rate": 3.5983061495617476e-11, |
|
"logits/chosen": -2.032045841217041, |
|
"logits/rejected": -2.0321781635284424, |
|
"logps/chosen": -1.1231155395507812, |
|
"logps/rejected": -1.270812749862671, |
|
"loss": 1.1826, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2462310791015625, |
|
"rewards/margins": 0.295394629240036, |
|
"rewards/rejected": -2.541625499725342, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.9610951008645534, |
|
"grad_norm": 21.355044904847375, |
|
"learning_rate": 2.5618618380812694e-11, |
|
"logits/chosen": -2.018833637237549, |
|
"logits/rejected": -2.008314371109009, |
|
"logps/chosen": -1.0017220973968506, |
|
"logps/rejected": -1.1643407344818115, |
|
"loss": 1.1725, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.003444194793701, |
|
"rewards/margins": 0.32523733377456665, |
|
"rewards/rejected": -2.328681468963623, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.968299711815562, |
|
"grad_norm": 22.742418454002138, |
|
"learning_rate": 1.700977115254576e-11, |
|
"logits/chosen": -1.9938160181045532, |
|
"logits/rejected": -1.9904816150665283, |
|
"logps/chosen": -0.9965242147445679, |
|
"logps/rejected": -1.1439779996871948, |
|
"loss": 1.1688, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9930484294891357, |
|
"rewards/margins": 0.29490748047828674, |
|
"rewards/rejected": -2.2879559993743896, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.9755043227665707, |
|
"grad_norm": 20.73702179826127, |
|
"learning_rate": 1.0157124977230868e-11, |
|
"logits/chosen": -1.9752233028411865, |
|
"logits/rejected": -1.9736427068710327, |
|
"logps/chosen": -0.9688889384269714, |
|
"logps/rejected": -1.117297649383545, |
|
"loss": 1.1686, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9377778768539429, |
|
"rewards/margins": 0.2968176603317261, |
|
"rewards/rejected": -2.23459529876709, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.9827089337175794, |
|
"grad_norm": 21.969537635189976, |
|
"learning_rate": 5.061161567596061e-12, |
|
"logits/chosen": -1.9961363077163696, |
|
"logits/rejected": -1.9917857646942139, |
|
"logps/chosen": -1.0561162233352661, |
|
"logps/rejected": -1.1413224935531616, |
|
"loss": 1.261, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.1122324466705322, |
|
"rewards/margins": 0.17041274905204773, |
|
"rewards/rejected": -2.2826449871063232, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.989913544668588, |
|
"grad_norm": 20.739431552772885, |
|
"learning_rate": 1.7222391488297406e-12, |
|
"logits/chosen": -2.016247510910034, |
|
"logits/rejected": -2.0124571323394775, |
|
"logps/chosen": -1.1069406270980835, |
|
"logps/rejected": -1.2540051937103271, |
|
"loss": 1.1761, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.213881254196167, |
|
"rewards/margins": 0.2941294014453888, |
|
"rewards/rejected": -2.5080103874206543, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.9971181556195967, |
|
"grad_norm": 19.721112564343052, |
|
"learning_rate": 1.4059243338693238e-13, |
|
"logits/chosen": -1.9906642436981201, |
|
"logits/rejected": -1.9835201501846313, |
|
"logps/chosen": -1.0591213703155518, |
|
"logps/rejected": -1.182531714439392, |
|
"loss": 1.1942, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1182427406311035, |
|
"rewards/margins": 0.24682076275348663, |
|
"rewards/rejected": -2.365063428878784, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4164, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2025406490027275, |
|
"train_runtime": 5488.6376, |
|
"train_samples_per_second": 12.135, |
|
"train_steps_per_second": 0.759 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|