diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,20 +1,20 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.0, + "epoch": 1.0, "eval_steps": 100, - "global_step": 2907, + "global_step": 1911, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 1.7182130584192438e-09, - "logits/chosen": -2.1327524185180664, - "logits/rejected": -1.609220027923584, - "logps/chosen": -91.77880859375, - "logps/rejected": -81.204345703125, + "learning_rate": 2.6041666666666667e-08, + "logits/chosen": -2.7462317943573, + "logits/rejected": -2.425077199935913, + "logps/chosen": -250.54042053222656, + "logps/rejected": -177.74742126464844, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, @@ -24,4127 +24,2999 @@ }, { "epoch": 0.01, - "learning_rate": 1.718213058419244e-08, - "logits/chosen": -2.3409039974212646, - "logits/rejected": -2.2914419174194336, - "logps/chosen": -218.2519989013672, - "logps/rejected": -192.96514892578125, - "loss": 0.6949, - "rewards/accuracies": 0.6111111044883728, - "rewards/chosen": 0.008321777917444706, - "rewards/margins": 0.005371565464884043, - "rewards/rejected": 0.0029502129182219505, + "learning_rate": 2.604166666666667e-07, + "logits/chosen": -2.343383312225342, + "logits/rejected": -2.2455639839172363, + "logps/chosen": -311.4956970214844, + "logps/rejected": -254.3400115966797, + "loss": 0.6932, + "rewards/accuracies": 0.3611111044883728, + "rewards/chosen": 0.0004503716481849551, + "rewards/margins": 0.0002595583500806242, + "rewards/rejected": 0.00019081326900050044, "step": 10 }, { - "epoch": 0.02, - "learning_rate": 3.436426116838488e-08, - "logits/chosen": -2.259495735168457, - "logits/rejected": -2.44956636428833, - "logps/chosen": -274.8431091308594, - "logps/rejected": -225.97793579101562, - "loss": 0.6933, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.01440061442553997, - "rewards/margins": 0.013972322456538677, - "rewards/rejected": 0.00042829400626942515, + "epoch": 0.01, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -2.3990731239318848, + "logits/rejected": -2.242825984954834, + "logps/chosen": -307.36834716796875, + "logps/rejected": -239.515625, + "loss": 0.6931, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.00018046621698886156, + "rewards/margins": -0.0010750549845397472, + "rewards/rejected": 0.0012555213179439306, "step": 20 }, { - "epoch": 0.03, - "learning_rate": 5.154639175257731e-08, - "logits/chosen": -2.5081024169921875, - "logits/rejected": -2.4530844688415527, - "logps/chosen": -292.6970520019531, - "logps/rejected": -215.64553833007812, - "loss": 0.6933, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.008286448195576668, - "rewards/margins": 0.008681153878569603, - "rewards/rejected": -0.00039470643969252706, + "epoch": 0.02, + "learning_rate": 7.8125e-07, + "logits/chosen": -2.156728506088257, + "logits/rejected": -2.1305534839630127, + "logps/chosen": -258.18896484375, + "logps/rejected": -222.45346069335938, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.000468552578240633, + "rewards/margins": -0.0007666439050808549, + "rewards/rejected": 0.0002980913850478828, "step": 30 }, { - "epoch": 0.04, - "learning_rate": 6.872852233676976e-08, - "logits/chosen": -2.6037235260009766, - "logits/rejected": -2.4282469749450684, - "logps/chosen": -378.5215759277344, - "logps/rejected": -226.55136108398438, - "loss": 0.6933, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.004302692599594593, - "rewards/margins": 0.008366527035832405, - "rewards/rejected": -0.004063835833221674, + "epoch": 0.02, + "learning_rate": 1.0416666666666667e-06, + "logits/chosen": -2.175902843475342, + "logits/rejected": -2.1458325386047363, + "logps/chosen": -202.2835693359375, + "logps/rejected": -207.637939453125, + "loss": 0.6931, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.00036091814399696887, + "rewards/margins": 0.00010843136260518804, + "rewards/rejected": 0.0002524866722524166, "step": 40 }, { - "epoch": 0.05, - "learning_rate": 8.59106529209622e-08, - "logits/chosen": -2.3304362297058105, - "logits/rejected": -2.30436372756958, - "logps/chosen": -275.41754150390625, - "logps/rejected": -209.0893096923828, - "loss": 0.6934, - "rewards/accuracies": 0.30000001192092896, - "rewards/chosen": -0.01541445404291153, - "rewards/margins": -0.023714235052466393, - "rewards/rejected": 0.008299780078232288, + "epoch": 0.03, + "learning_rate": 1.3020833333333335e-06, + "logits/chosen": -2.3326008319854736, + "logits/rejected": -2.227440118789673, + "logps/chosen": -309.99859619140625, + "logps/rejected": -249.27401733398438, + "loss": 0.693, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.001949362107552588, + "rewards/margins": 0.0011079810792580247, + "rewards/rejected": 0.0008413810282945633, "step": 50 }, { - "epoch": 0.06, - "learning_rate": 1.0309278350515462e-07, - "logits/chosen": -2.408499240875244, - "logits/rejected": -2.518784284591675, - "logps/chosen": -298.5494689941406, - "logps/rejected": -267.29962158203125, - "loss": 0.6947, - "rewards/accuracies": 0.44999998807907104, - "rewards/chosen": -0.01625905930995941, - "rewards/margins": -0.008457355201244354, - "rewards/rejected": -0.007801705040037632, + "epoch": 0.03, + "learning_rate": 1.5625e-06, + "logits/chosen": -2.3864035606384277, + "logits/rejected": -2.252150774002075, + "logps/chosen": -264.1552429199219, + "logps/rejected": -251.7004852294922, + "loss": 0.693, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": 0.0011251050746068358, + "rewards/margins": -0.0001833140559028834, + "rewards/rejected": 0.0013084192760288715, "step": 60 }, { - "epoch": 0.07, - "learning_rate": 1.202749140893471e-07, - "logits/chosen": -2.4770851135253906, - "logits/rejected": -2.2523791790008545, - "logps/chosen": -296.0211486816406, - "logps/rejected": -145.6873321533203, - "loss": 0.6918, - "rewards/accuracies": 0.30000001192092896, - "rewards/chosen": -0.010021962225437164, - "rewards/margins": -0.006689900998026133, - "rewards/rejected": -0.00333206239156425, + "epoch": 0.04, + "learning_rate": 1.8229166666666666e-06, + "logits/chosen": -2.3157763481140137, + "logits/rejected": -2.1782267093658447, + "logps/chosen": -279.8276062011719, + "logps/rejected": -251.36514282226562, + "loss": 0.6927, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0017941547557711601, + "rewards/margins": 0.0011580453719943762, + "rewards/rejected": 0.0006361090345308185, "step": 70 }, { - "epoch": 0.08, - "learning_rate": 1.3745704467353952e-07, - "logits/chosen": -2.4601337909698486, - "logits/rejected": -2.488020181655884, - "logps/chosen": -243.9931182861328, - "logps/rejected": -210.5398406982422, - "loss": 0.6928, - "rewards/accuracies": 0.5, - "rewards/chosen": 0.0015067579224705696, - "rewards/margins": 0.0036226934753358364, - "rewards/rejected": -0.002115936018526554, + "epoch": 0.04, + "learning_rate": 2.0833333333333334e-06, + "logits/chosen": -2.3494324684143066, + "logits/rejected": -2.265619993209839, + "logps/chosen": -257.0757141113281, + "logps/rejected": -238.935791015625, + "loss": 0.6922, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.0022330707870423794, + "rewards/margins": 0.002052165335044265, + "rewards/rejected": 0.00018090553930960596, "step": 80 }, { - "epoch": 0.09, - "learning_rate": 1.5463917525773197e-07, - "logits/chosen": -2.45121431350708, - "logits/rejected": -2.3661465644836426, - "logps/chosen": -214.99539184570312, - "logps/rejected": -186.57984924316406, - "loss": 0.6917, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.017697982490062714, - "rewards/margins": 0.007963714189827442, - "rewards/rejected": 0.009734268300235271, + "epoch": 0.05, + "learning_rate": 2.3437500000000002e-06, + "logits/chosen": -2.3054585456848145, + "logits/rejected": -2.407442569732666, + "logps/chosen": -230.22653198242188, + "logps/rejected": -270.58624267578125, + "loss": 0.6925, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0011901370016857982, + "rewards/margins": 1.3230135664343834e-05, + "rewards/rejected": 0.0011769067496061325, "step": 90 }, { - "epoch": 0.1, - "learning_rate": 1.718213058419244e-07, - "logits/chosen": -2.476562261581421, - "logits/rejected": -2.409389019012451, - "logps/chosen": -310.9223327636719, - "logps/rejected": -162.9552764892578, - "loss": 0.6911, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.007375421933829784, - "rewards/margins": 0.01819022186100483, - "rewards/rejected": -0.010814799927175045, + "epoch": 0.05, + "learning_rate": 2.604166666666667e-06, + "logits/chosen": -2.317207098007202, + "logits/rejected": -2.224195957183838, + "logps/chosen": -275.38079833984375, + "logps/rejected": -239.62570190429688, + "loss": 0.6916, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046446667984128, + "rewards/margins": 0.0029630656354129314, + "rewards/rejected": 0.001681601395830512, "step": 100 }, { - "epoch": 0.11, - "learning_rate": 1.8900343642611682e-07, - "logits/chosen": -2.426025867462158, - "logits/rejected": -2.0674071311950684, - "logps/chosen": -303.84112548828125, - "logps/rejected": -193.99363708496094, - "loss": 0.6914, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.01276397705078125, - "rewards/margins": 0.010143814608454704, - "rewards/rejected": 0.0026201631408184767, + "epoch": 0.05, + "eval_logits/chosen": -2.262146234512329, + "eval_logits/rejected": -2.2040350437164307, + "eval_logps/chosen": -249.55955505371094, + "eval_logps/rejected": -251.6075439453125, + "eval_loss": 0.6912463307380676, + "eval_rewards/accuracies": 0.6484375, + "eval_rewards/chosen": 0.0059408266097307205, + "eval_rewards/margins": 0.004077494610100985, + "eval_rewards/rejected": 0.0018633321160450578, + "eval_runtime": 100.3497, + "eval_samples_per_second": 19.93, + "eval_steps_per_second": 0.319, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 2.8645833333333334e-06, + "logits/chosen": -2.23917818069458, + "logits/rejected": -2.2198050022125244, + "logps/chosen": -259.9859924316406, + "logps/rejected": -238.02651977539062, + "loss": 0.6911, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.005757027771323919, + "rewards/margins": 0.003131809178739786, + "rewards/rejected": 0.002625218825414777, "step": 110 }, { - "epoch": 0.12, - "learning_rate": 2.0618556701030925e-07, - "logits/chosen": -2.2423105239868164, - "logits/rejected": -2.5096383094787598, - "logps/chosen": -295.35986328125, - "logps/rejected": -273.8446350097656, - "loss": 0.6914, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.015526200644671917, - "rewards/margins": -0.02725866064429283, - "rewards/rejected": 0.011732463724911213, + "epoch": 0.06, + "learning_rate": 3.125e-06, + "logits/chosen": -2.327700138092041, + "logits/rejected": -2.237442970275879, + "logps/chosen": -261.9295349121094, + "logps/rejected": -265.9600524902344, + "loss": 0.6896, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.010207553394138813, + "rewards/margins": 0.004325833171606064, + "rewards/rejected": 0.005881720222532749, "step": 120 }, { - "epoch": 0.13, - "learning_rate": 2.2336769759450173e-07, - "logits/chosen": -2.538625955581665, - "logits/rejected": -2.2574923038482666, - "logps/chosen": -289.69061279296875, - "logps/rejected": -183.3281707763672, - "loss": 0.69, - "rewards/accuracies": 0.3499999940395355, - "rewards/chosen": -0.0016457748133689165, - "rewards/margins": -0.00430142879486084, - "rewards/rejected": 0.0026556537486612797, + "epoch": 0.07, + "learning_rate": 3.385416666666667e-06, + "logits/chosen": -2.362593173980713, + "logits/rejected": -2.199610471725464, + "logps/chosen": -341.5224304199219, + "logps/rejected": -274.25823974609375, + "loss": 0.6874, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.020573467016220093, + "rewards/margins": 0.017041604965925217, + "rewards/rejected": 0.003531861351802945, "step": 130 }, { - "epoch": 0.14, - "learning_rate": 2.405498281786942e-07, - "logits/chosen": -2.540769338607788, - "logits/rejected": -2.2134933471679688, - "logps/chosen": -221.8517303466797, - "logps/rejected": -183.58389282226562, - "loss": 0.6897, - "rewards/accuracies": 0.5, - "rewards/chosen": 0.004911613650619984, - "rewards/margins": 0.005466667469590902, - "rewards/rejected": -0.0005550530040636659, + "epoch": 0.07, + "learning_rate": 3.6458333333333333e-06, + "logits/chosen": -2.2749550342559814, + "logits/rejected": -2.189138650894165, + "logps/chosen": -267.3360900878906, + "logps/rejected": -275.9014587402344, + "loss": 0.6851, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.03031373955309391, + "rewards/margins": 0.014086397364735603, + "rewards/rejected": 0.016227342188358307, "step": 140 }, { - "epoch": 0.15, - "learning_rate": 2.5773195876288655e-07, - "logits/chosen": -2.1684834957122803, - "logits/rejected": -2.418734073638916, - "logps/chosen": -366.19830322265625, - "logps/rejected": -190.3058319091797, - "loss": 0.6865, + "epoch": 0.08, + "learning_rate": 3.90625e-06, + "logits/chosen": -2.2647595405578613, + "logits/rejected": -2.031757354736328, + "logps/chosen": -302.0655212402344, + "logps/rejected": -256.95208740234375, + "loss": 0.6793, "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.025678252801299095, - "rewards/margins": 0.029336413368582726, - "rewards/rejected": -0.0036581619642674923, + "rewards/chosen": 0.02602318860590458, + "rewards/margins": 0.022284485399723053, + "rewards/rejected": 0.003738699946552515, "step": 150 }, { - "epoch": 0.17, - "learning_rate": 2.7491408934707903e-07, - "logits/chosen": -2.698945999145508, - "logits/rejected": -2.6068835258483887, - "logps/chosen": -441.375, - "logps/rejected": -300.767822265625, - "loss": 0.6841, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.030073052272200584, - "rewards/margins": 0.047654617577791214, - "rewards/rejected": -0.017581569030880928, + "epoch": 0.08, + "learning_rate": 4.166666666666667e-06, + "logits/chosen": -2.0317959785461426, + "logits/rejected": -1.9486854076385498, + "logps/chosen": -263.51666259765625, + "logps/rejected": -237.7399139404297, + "loss": 0.6759, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.034223657101392746, + "rewards/margins": 0.04548361897468567, + "rewards/rejected": -0.011259960941970348, "step": 160 }, { - "epoch": 0.18, - "learning_rate": 2.9209621993127146e-07, - "logits/chosen": -2.3280515670776367, - "logits/rejected": -2.320730686187744, - "logps/chosen": -271.07452392578125, - "logps/rejected": -224.6231231689453, - "loss": 0.6864, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.02470027096569538, - "rewards/margins": 0.01814831793308258, - "rewards/rejected": 0.006551951169967651, + "epoch": 0.09, + "learning_rate": 4.427083333333334e-06, + "logits/chosen": -2.257275342941284, + "logits/rejected": -2.2866082191467285, + "logps/chosen": -302.7249450683594, + "logps/rejected": -286.78643798828125, + "loss": 0.6743, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.03409689664840698, + "rewards/margins": 0.028191696852445602, + "rewards/rejected": 0.005905195139348507, "step": 170 }, { - "epoch": 0.19, - "learning_rate": 3.0927835051546394e-07, - "logits/chosen": -2.5555853843688965, - "logits/rejected": -2.6365621089935303, - "logps/chosen": -271.2387390136719, - "logps/rejected": -235.908935546875, - "loss": 0.6835, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.03413509577512741, - "rewards/margins": 0.046364832669496536, - "rewards/rejected": -0.012229740619659424, + "epoch": 0.09, + "learning_rate": 4.6875000000000004e-06, + "logits/chosen": -2.2668297290802, + "logits/rejected": -2.165213108062744, + "logps/chosen": -275.1216735839844, + "logps/rejected": -266.6285705566406, + "loss": 0.6638, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.05305374786257744, + "rewards/margins": 0.08881576359272003, + "rewards/rejected": -0.0357620045542717, "step": 180 }, { - "epoch": 0.2, - "learning_rate": 3.2646048109965636e-07, - "logits/chosen": -2.7220354080200195, - "logits/rejected": -2.5860915184020996, - "logps/chosen": -302.74267578125, - "logps/rejected": -208.666015625, - "loss": 0.6814, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.01786138489842415, - "rewards/margins": 0.0315425843000412, - "rewards/rejected": -0.013681203126907349, + "epoch": 0.1, + "learning_rate": 4.947916666666667e-06, + "logits/chosen": -2.131401777267456, + "logits/rejected": -2.138788938522339, + "logps/chosen": -251.2366943359375, + "logps/rejected": -256.0220031738281, + "loss": 0.6622, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0315600261092186, + "rewards/margins": 0.050549279898405075, + "rewards/rejected": -0.08210931718349457, "step": 190 }, { - "epoch": 0.21, - "learning_rate": 3.436426116838488e-07, - "logits/chosen": -2.4432010650634766, - "logits/rejected": -2.346902370452881, - "logps/chosen": -259.94525146484375, - "logps/rejected": -188.97792053222656, - "loss": 0.679, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.021963179111480713, - "rewards/margins": 0.03701996058225632, - "rewards/rejected": -0.015056787058711052, + "epoch": 0.1, + "learning_rate": 4.9997328038213385e-06, + "logits/chosen": -2.2219460010528564, + "logits/rejected": -2.232409954071045, + "logps/chosen": -310.5309143066406, + "logps/rejected": -345.9176330566406, + "loss": 0.655, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.03254920616745949, + "rewards/margins": 0.11690758168697357, + "rewards/rejected": -0.14945678412914276, "step": 200 }, { - "epoch": 0.22, - "learning_rate": 3.608247422680412e-07, - "logits/chosen": -2.793883800506592, - "logits/rejected": -2.4634127616882324, - "logps/chosen": -378.5939025878906, - "logps/rejected": -226.9193115234375, - "loss": 0.6767, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.013226890936493874, - "rewards/margins": 0.02839084342122078, - "rewards/rejected": -0.015163958072662354, + "epoch": 0.1, + "eval_logits/chosen": -2.161393165588379, + "eval_logits/rejected": -2.101147174835205, + "eval_logps/chosen": -255.74208068847656, + "eval_logps/rejected": -269.41058349609375, + "eval_loss": 0.6498140692710876, + "eval_rewards/accuracies": 0.70703125, + "eval_rewards/chosen": -0.055884458124637604, + "eval_rewards/margins": 0.1202826276421547, + "eval_rewards/rejected": -0.1761670857667923, + "eval_runtime": 100.2506, + "eval_samples_per_second": 19.95, + "eval_steps_per_second": 0.319, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.998647417232375e-06, + "logits/chosen": -2.235889434814453, + "logits/rejected": -2.119898796081543, + "logps/chosen": -243.06777954101562, + "logps/rejected": -230.5707244873047, + "loss": 0.6378, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10976459830999374, + "rewards/margins": 0.07079415768384933, + "rewards/rejected": -0.18055877089500427, "step": 210 }, { - "epoch": 0.23, - "learning_rate": 3.7800687285223364e-07, - "logits/chosen": -2.3424696922302246, - "logits/rejected": -2.2594411373138428, - "logps/chosen": -166.11044311523438, - "logps/rejected": -209.3076934814453, - "loss": 0.6758, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.006952253170311451, - "rewards/margins": 0.02028687857091427, - "rewards/rejected": -0.027239132672548294, + "epoch": 0.12, + "learning_rate": 4.996727502703358e-06, + "logits/chosen": -2.109752893447876, + "logits/rejected": -1.9883226156234741, + "logps/chosen": -282.5257263183594, + "logps/rejected": -262.3667907714844, + "loss": 0.6403, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.185433030128479, + "rewards/margins": 0.17308147251605988, + "rewards/rejected": -0.3585144877433777, "step": 220 }, { - "epoch": 0.24, - "learning_rate": 3.9518900343642607e-07, - "logits/chosen": -2.5350894927978516, - "logits/rejected": -2.4550373554229736, - "logps/chosen": -335.16162109375, - "logps/rejected": -251.5087890625, - "loss": 0.674, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.0555986650288105, - "rewards/margins": 0.072585329413414, - "rewards/rejected": -0.01698666624724865, + "epoch": 0.12, + "learning_rate": 4.993973701470142e-06, + "logits/chosen": -2.0258140563964844, + "logits/rejected": -1.9273954629898071, + "logps/chosen": -300.9564208984375, + "logps/rejected": -278.96063232421875, + "loss": 0.6426, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10224632173776627, + "rewards/margins": 0.19058939814567566, + "rewards/rejected": -0.29283568263053894, "step": 230 }, { - "epoch": 0.25, - "learning_rate": 4.123711340206185e-07, - "logits/chosen": -2.4425277709960938, - "logits/rejected": -2.4560999870300293, - "logps/chosen": -364.9286193847656, - "logps/rejected": -251.4180450439453, - "loss": 0.6683, - "rewards/accuracies": 0.5, - "rewards/chosen": 0.006999121513217688, - "rewards/margins": 0.015280758030712605, - "rewards/rejected": -0.00828163605183363, + "epoch": 0.13, + "learning_rate": 4.990386933279973e-06, + "logits/chosen": -2.1792609691619873, + "logits/rejected": -2.0059762001037598, + "logps/chosen": -245.21896362304688, + "logps/rejected": -228.4906463623047, + "loss": 0.6284, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21190258860588074, + "rewards/margins": 0.14619460701942444, + "rewards/rejected": -0.3580971658229828, "step": 240 }, { - "epoch": 0.26, - "learning_rate": 4.2955326460481097e-07, - "logits/chosen": -2.358187675476074, - "logits/rejected": -2.3916361331939697, - "logps/chosen": -278.9718322753906, - "logps/rejected": -262.45458984375, - "loss": 0.6684, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.025192927569150925, - "rewards/margins": 0.08774158358573914, - "rewards/rejected": -0.06254865229129791, + "epoch": 0.13, + "learning_rate": 4.985968396084284e-06, + "logits/chosen": -2.1072471141815186, + "logits/rejected": -2.025906801223755, + "logps/chosen": -308.6363220214844, + "logps/rejected": -286.33856201171875, + "loss": 0.6333, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.31365102529525757, + "rewards/margins": 0.10651323944330215, + "rewards/rejected": -0.4201642870903015, "step": 250 }, { - "epoch": 0.27, - "learning_rate": 4.4673539518900345e-07, - "logits/chosen": -2.117164134979248, - "logits/rejected": -2.433020830154419, - "logps/chosen": -203.03009033203125, - "logps/rejected": -217.13827514648438, - "loss": 0.6679, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.003108497243374586, - "rewards/margins": 0.0338444858789444, - "rewards/rejected": -0.030735988169908524, + "epoch": 0.14, + "learning_rate": 4.980719565638611e-06, + "logits/chosen": -2.294565439224243, + "logits/rejected": -2.056588649749756, + "logps/chosen": -355.87286376953125, + "logps/rejected": -298.9410095214844, + "loss": 0.6238, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19843360781669617, + "rewards/margins": 0.2138761281967163, + "rewards/rejected": -0.41230979561805725, "step": 260 }, { - "epoch": 0.28, - "learning_rate": 4.639175257731959e-07, - "logits/chosen": -2.236057996749878, - "logits/rejected": -2.4115004539489746, - "logps/chosen": -236.7349395751953, - "logps/rejected": -238.6702880859375, - "loss": 0.6612, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.0009882673621177673, - "rewards/margins": 0.07193086296319962, - "rewards/rejected": -0.07094259560108185, + "epoch": 0.14, + "learning_rate": 4.974642195009681e-06, + "logits/chosen": -2.1215481758117676, + "logits/rejected": -2.0347511768341064, + "logps/chosen": -221.60385131835938, + "logps/rejected": -250.5578155517578, + "loss": 0.5957, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19110050797462463, + "rewards/margins": 0.27878326177597046, + "rewards/rejected": -0.4698837697505951, "step": 270 }, { - "epoch": 0.29, - "learning_rate": 4.810996563573884e-07, - "logits/chosen": -2.474492311477661, - "logits/rejected": -2.3905348777770996, - "logps/chosen": -330.3529357910156, - "logps/rejected": -231.5391082763672, - "loss": 0.6578, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.03507215529680252, - "rewards/margins": 0.10007444769144058, - "rewards/rejected": -0.06500230729579926, + "epoch": 0.15, + "learning_rate": 4.967738313989918e-06, + "logits/chosen": -1.9943937063217163, + "logits/rejected": -1.8106372356414795, + "logps/chosen": -285.83843994140625, + "logps/rejected": -252.4984893798828, + "loss": 0.631, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.25647464394569397, + "rewards/margins": 0.044119417667388916, + "rewards/rejected": -0.3005940318107605, "step": 280 }, { - "epoch": 0.3, - "learning_rate": 4.982817869415807e-07, - "logits/chosen": -2.564732074737549, - "logits/rejected": -2.3240487575531006, - "logps/chosen": -277.9839172363281, - "logps/rejected": -197.73281860351562, - "loss": 0.6567, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.05555497482419014, - "rewards/margins": 0.15978679060935974, - "rewards/rejected": -0.1042318120598793, + "epoch": 0.15, + "learning_rate": 4.960010228419499e-06, + "logits/chosen": -1.949554204940796, + "logits/rejected": -2.05960750579834, + "logps/chosen": -259.6241760253906, + "logps/rejected": -297.25799560546875, + "loss": 0.6349, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2845107913017273, + "rewards/margins": 0.24759821593761444, + "rewards/rejected": -0.5321090221405029, "step": 290 }, { - "epoch": 0.31, - "learning_rate": 4.982798165137615e-07, - "logits/chosen": -2.3241677284240723, - "logits/rejected": -2.348520040512085, - "logps/chosen": -209.7371063232422, - "logps/rejected": -238.69595336914062, - "loss": 0.6501, + "epoch": 0.16, + "learning_rate": 4.951460519416228e-06, + "logits/chosen": -1.9813953638076782, + "logits/rejected": -1.8782306909561157, + "logps/chosen": -343.69256591796875, + "logps/rejected": -342.38494873046875, + "loss": 0.6342, "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.041191719472408295, - "rewards/margins": 0.12445306777954102, - "rewards/rejected": -0.08326133340597153, + "rewards/chosen": -0.33312922716140747, + "rewards/margins": 0.11881290376186371, + "rewards/rejected": -0.45194211602211, "step": 300 }, { - "epoch": 0.32, - "learning_rate": 4.963685015290519e-07, - "logits/chosen": -2.466608762741089, - "logits/rejected": -2.338318347930908, - "logps/chosen": -360.8646545410156, - "logps/rejected": -246.0187530517578, - "loss": 0.6505, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.04523809999227524, - "rewards/margins": 0.1772933453321457, - "rewards/rejected": -0.13205525279045105, + "epoch": 0.16, + "eval_logits/chosen": -1.9793298244476318, + "eval_logits/rejected": -1.903726577758789, + "eval_logps/chosen": -284.22235107421875, + "eval_logps/rejected": -314.48388671875, + "eval_loss": 0.6145588159561157, + "eval_rewards/accuracies": 0.703125, + "eval_rewards/chosen": -0.3406871259212494, + "eval_rewards/margins": 0.2862129211425781, + "eval_rewards/rejected": -0.6268999576568604, + "eval_runtime": 100.8595, + "eval_samples_per_second": 19.83, + "eval_steps_per_second": 0.317, + "step": 300 + }, + { + "epoch": 0.16, + "learning_rate": 4.942092042513459e-06, + "logits/chosen": -2.022141933441162, + "logits/rejected": -1.8856008052825928, + "logps/chosen": -315.6492919921875, + "logps/rejected": -310.83612060546875, + "loss": 0.5955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2830706834793091, + "rewards/margins": 0.34754693508148193, + "rewards/rejected": -0.630617618560791, "step": 310 }, { - "epoch": 0.33, - "learning_rate": 4.944571865443424e-07, - "logits/chosen": -2.318599224090576, - "logits/rejected": -2.3406758308410645, - "logps/chosen": -217.85122680664062, - "logps/rejected": -143.0299072265625, - "loss": 0.6362, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.05060691758990288, - "rewards/margins": 0.1060304194688797, - "rewards/rejected": -0.05542349815368652, + "epoch": 0.17, + "learning_rate": 4.931907926706374e-06, + "logits/chosen": -2.045828342437744, + "logits/rejected": -1.9249250888824463, + "logps/chosen": -309.50921630859375, + "logps/rejected": -360.74273681640625, + "loss": 0.5891, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.31072211265563965, + "rewards/margins": 0.24828410148620605, + "rewards/rejected": -0.5590062141418457, "step": 320 }, { - "epoch": 0.34, - "learning_rate": 4.92545871559633e-07, - "logits/chosen": -2.477591037750244, - "logits/rejected": -2.4527573585510254, - "logps/chosen": -326.4366455078125, - "logps/rejected": -284.9319763183594, - "loss": 0.6424, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.0541137270629406, - "rewards/margins": 0.10549300909042358, - "rewards/rejected": -0.051379282027482986, + "epoch": 0.17, + "learning_rate": 4.920911573406925e-06, + "logits/chosen": -2.1581883430480957, + "logits/rejected": -2.04437255859375, + "logps/chosen": -274.237548828125, + "logps/rejected": -290.13446044921875, + "loss": 0.604, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.25217825174331665, + "rewards/margins": 0.2510914206504822, + "rewards/rejected": -0.5032696723937988, "step": 330 }, { - "epoch": 0.35, - "learning_rate": 4.906345565749235e-07, - "logits/chosen": -2.402550220489502, - "logits/rejected": -2.2470858097076416, - "logps/chosen": -232.6081085205078, - "logps/rejected": -135.73312377929688, - "loss": 0.6441, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.018155479803681374, - "rewards/margins": 0.245174378156662, - "rewards/rejected": -0.22701887786388397, + "epoch": 0.18, + "learning_rate": 4.9091066553077875e-06, + "logits/chosen": -1.9913349151611328, + "logits/rejected": -1.752171277999878, + "logps/chosen": -317.76812744140625, + "logps/rejected": -265.75146484375, + "loss": 0.6041, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3602680563926697, + "rewards/margins": 0.32529839873313904, + "rewards/rejected": -0.6855664253234863, "step": 340 }, { - "epoch": 0.36, - "learning_rate": 4.88723241590214e-07, - "logits/chosen": -2.4398138523101807, - "logits/rejected": -2.2956748008728027, - "logps/chosen": -379.9845275878906, - "logps/rejected": -224.34408569335938, - "loss": 0.6282, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.09958186000585556, - "rewards/margins": 0.3024463653564453, - "rewards/rejected": -0.20286449790000916, + "epoch": 0.18, + "learning_rate": 4.8964971151557095e-06, + "logits/chosen": -2.0480399131774902, + "logits/rejected": -2.013430118560791, + "logps/chosen": -273.3092346191406, + "logps/rejected": -343.83270263671875, + "loss": 0.5753, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.5287691354751587, + "rewards/margins": 0.2307828664779663, + "rewards/rejected": -0.7595520615577698, "step": 350 }, { - "epoch": 0.37, - "learning_rate": 4.868119266055046e-07, - "logits/chosen": -2.2719640731811523, - "logits/rejected": -2.3680129051208496, - "logps/chosen": -213.4238739013672, - "logps/rejected": -282.51495361328125, - "loss": 0.6226, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.02809527888894081, - "rewards/margins": 0.08343404531478882, - "rewards/rejected": -0.11152933537960052, + "epoch": 0.19, + "learning_rate": 4.883087164434672e-06, + "logits/chosen": -1.888958215713501, + "logits/rejected": -1.835721731185913, + "logps/chosen": -251.35986328125, + "logps/rejected": -312.13140869140625, + "loss": 0.6004, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.4788152277469635, + "rewards/margins": 0.3157396912574768, + "rewards/rejected": -0.7945548295974731, "step": 360 }, { - "epoch": 0.38, - "learning_rate": 4.849006116207951e-07, - "logits/chosen": -2.400339365005493, - "logits/rejected": -2.400993824005127, - "logps/chosen": -310.1695861816406, - "logps/rejected": -258.10113525390625, - "loss": 0.6198, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.06970132142305374, - "rewards/margins": 0.2196888029575348, - "rewards/rejected": -0.14998750388622284, + "epoch": 0.19, + "learning_rate": 4.868881281959282e-06, + "logits/chosen": -2.2465357780456543, + "logits/rejected": -2.0700020790100098, + "logps/chosen": -337.7215881347656, + "logps/rejected": -335.09942626953125, + "loss": 0.5924, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2437172830104828, + "rewards/margins": 0.3737484812736511, + "rewards/rejected": -0.6174657940864563, "step": 370 }, { - "epoch": 0.39, - "learning_rate": 4.829892966360856e-07, - "logits/chosen": -2.42053484916687, - "logits/rejected": -2.3133673667907715, - "logps/chosen": -279.19329833984375, - "logps/rejected": -253.25363159179688, - "loss": 0.6356, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.14709529280662537, - "rewards/margins": 0.32831987738609314, - "rewards/rejected": -0.18122461438179016, + "epoch": 0.2, + "learning_rate": 4.853884212378889e-06, + "logits/chosen": -2.1131327152252197, + "logits/rejected": -1.9588050842285156, + "logps/chosen": -246.9884033203125, + "logps/rejected": -242.58206176757812, + "loss": 0.5758, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.261677622795105, + "rewards/margins": 0.31416845321655273, + "rewards/rejected": -0.5758460760116577, "step": 380 }, { - "epoch": 0.4, - "learning_rate": 4.810779816513762e-07, - "logits/chosen": -2.3398146629333496, - "logits/rejected": -2.497467041015625, - "logps/chosen": -199.78628540039062, - "logps/rejected": -176.40805053710938, - "loss": 0.6268, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.019132133573293686, - "rewards/margins": 0.16773070394992828, - "rewards/rejected": -0.18686284124851227, + "epoch": 0.2, + "learning_rate": 4.8381009645929044e-06, + "logits/chosen": -2.05610990524292, + "logits/rejected": -1.873307466506958, + "logps/chosen": -315.9834289550781, + "logps/rejected": -398.42181396484375, + "loss": 0.5601, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.45816677808761597, + "rewards/margins": 0.4635772705078125, + "rewards/rejected": -0.9217440485954285, "step": 390 }, { - "epoch": 0.41, - "learning_rate": 4.791666666666667e-07, - "logits/chosen": -2.5169873237609863, - "logits/rejected": -2.568854808807373, - "logps/chosen": -289.6071472167969, - "logps/rejected": -184.77845764160156, - "loss": 0.6193, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.08133763074874878, - "rewards/margins": 0.25977957248687744, - "rewards/rejected": -0.17844195663928986, + "epoch": 0.21, + "learning_rate": 4.821536810077878e-06, + "logits/chosen": -1.9345449209213257, + "logits/rejected": -1.9163599014282227, + "logps/chosen": -311.0472106933594, + "logps/rejected": -330.16021728515625, + "loss": 0.6121, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.48705682158470154, + "rewards/margins": 0.1697099208831787, + "rewards/rejected": -0.6567667722702026, "step": 400 }, { - "epoch": 0.42, - "learning_rate": 4.772553516819572e-07, - "logits/chosen": -2.1512584686279297, - "logits/rejected": -2.186790943145752, - "logps/chosen": -119.762451171875, - "logps/rejected": -174.71890258789062, - "loss": 0.6209, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.12059535086154938, - "rewards/margins": 0.047021061182022095, - "rewards/rejected": -0.16761639714241028, + "epoch": 0.21, + "eval_logits/chosen": -1.9542992115020752, + "eval_logits/rejected": -1.8716992139816284, + "eval_logps/chosen": -296.7203063964844, + "eval_logps/rejected": -340.9551086425781, + "eval_loss": 0.5946135520935059, + "eval_rewards/accuracies": 0.703125, + "eval_rewards/chosen": -0.4656665325164795, + "eval_rewards/margins": 0.42594534158706665, + "eval_rewards/rejected": -0.8916119337081909, + "eval_runtime": 100.2068, + "eval_samples_per_second": 19.959, + "eval_steps_per_second": 0.319, + "step": 400 + }, + { + "epoch": 0.21, + "learning_rate": 4.804197281126862e-06, + "logits/chosen": -2.094160795211792, + "logits/rejected": -1.9404535293579102, + "logps/chosen": -356.2447814941406, + "logps/rejected": -329.25543212890625, + "loss": 0.6201, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.4695979058742523, + "rewards/margins": 0.3254204988479614, + "rewards/rejected": -0.7950183153152466, "step": 410 }, { - "epoch": 0.43, - "learning_rate": 4.753440366972477e-07, - "logits/chosen": -2.5430076122283936, - "logits/rejected": -2.671963691711426, - "logps/chosen": -280.49481201171875, - "logps/rejected": -277.0762939453125, - "loss": 0.6283, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.05606144666671753, - "rewards/margins": 0.046925198286771774, - "rewards/rejected": -0.10298663377761841, + "epoch": 0.22, + "learning_rate": 4.786088169001671e-06, + "logits/chosen": -2.0321784019470215, + "logits/rejected": -1.6601848602294922, + "logps/chosen": -354.99945068359375, + "logps/rejected": -307.051025390625, + "loss": 0.5817, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.3245474398136139, + "rewards/margins": 0.6246824860572815, + "rewards/rejected": -0.949229896068573, "step": 420 }, { - "epoch": 0.44, - "learning_rate": 4.7343272171253825e-07, - "logits/chosen": -2.290693759918213, - "logits/rejected": -2.2883286476135254, - "logps/chosen": -248.0079345703125, - "logps/rejected": -237.4206085205078, - "loss": 0.6154, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.013114357367157936, - "rewards/margins": 0.2169400155544281, - "rewards/rejected": -0.2038256675004959, + "epoch": 0.23, + "learning_rate": 4.767215521998649e-06, + "logits/chosen": -1.9951956272125244, + "logits/rejected": -1.843869924545288, + "logps/chosen": -316.816162109375, + "logps/rejected": -329.95159912109375, + "loss": 0.5718, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.3314690589904785, + "rewards/margins": 0.5325202345848083, + "rewards/rejected": -0.8639893531799316, "step": 430 }, { - "epoch": 0.45, - "learning_rate": 4.715214067278288e-07, - "logits/chosen": -2.404177188873291, - "logits/rejected": -2.43928861618042, - "logps/chosen": -233.565185546875, - "logps/rejected": -212.0560302734375, - "loss": 0.6021, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.052248239517211914, - "rewards/margins": 0.1868211030960083, - "rewards/rejected": -0.23906934261322021, + "epoch": 0.23, + "learning_rate": 4.747585643428586e-06, + "logits/chosen": -1.8855489492416382, + "logits/rejected": -1.8727896213531494, + "logps/chosen": -270.5180969238281, + "logps/rejected": -311.8236999511719, + "loss": 0.6055, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.30184242129325867, + "rewards/margins": 0.3076760172843933, + "rewards/rejected": -0.6095184087753296, "step": 440 }, { - "epoch": 0.46, - "learning_rate": 4.696100917431192e-07, - "logits/chosen": -2.405780792236328, - "logits/rejected": -2.449907064437866, - "logps/chosen": -254.3538818359375, - "logps/rejected": -255.2841796875, - "loss": 0.6353, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.08873947709798813, - "rewards/margins": 0.23247499763965607, - "rewards/rejected": -0.14373552799224854, + "epoch": 0.24, + "learning_rate": 4.727205089511466e-06, + "logits/chosen": -1.9391940832138062, + "logits/rejected": -1.7669153213500977, + "logps/chosen": -310.2550048828125, + "logps/rejected": -332.0259094238281, + "loss": 0.6037, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.5411502718925476, + "rewards/margins": 0.3224232792854309, + "rewards/rejected": -0.8635735511779785, "step": 450 }, { - "epoch": 0.47, - "learning_rate": 4.6769877675840974e-07, - "logits/chosen": -2.3749642372131348, - "logits/rejected": -2.4225552082061768, - "logps/chosen": -226.59994506835938, - "logps/rejected": -198.53065490722656, - "loss": 0.5953, + "epoch": 0.24, + "learning_rate": 4.7060806671867386e-06, + "logits/chosen": -1.8398698568344116, + "logits/rejected": -1.672258973121643, + "logps/chosen": -349.76824951171875, + "logps/rejected": -309.33258056640625, + "loss": 0.5768, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.0910528227686882, - "rewards/margins": 0.3178223967552185, - "rewards/rejected": -0.2267695963382721, + "rewards/chosen": -0.4927810728549957, + "rewards/margins": 0.44175705313682556, + "rewards/rejected": -0.9345381855964661, "step": 460 }, { - "epoch": 0.49, - "learning_rate": 4.6578746177370027e-07, - "logits/chosen": -2.500232696533203, - "logits/rejected": -2.5585453510284424, - "logps/chosen": -290.7154541015625, - "logps/rejected": -141.37062072753906, - "loss": 0.598, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.04495004937052727, - "rewards/margins": 0.2956233620643616, - "rewards/rejected": -0.2506733536720276, + "epoch": 0.25, + "learning_rate": 4.68421943183986e-06, + "logits/chosen": -1.790315866470337, + "logits/rejected": -1.7293519973754883, + "logps/chosen": -277.6053771972656, + "logps/rejected": -304.4560852050781, + "loss": 0.5733, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.48922044038772583, + "rewards/margins": 0.4518005847930908, + "rewards/rejected": -0.9410210847854614, "step": 470 }, { - "epoch": 0.5, - "learning_rate": 4.638761467889908e-07, - "logits/chosen": -2.41825532913208, - "logits/rejected": -2.539612054824829, - "logps/chosen": -201.26425170898438, - "logps/rejected": -197.85702514648438, - "loss": 0.6148, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.03925345838069916, - "rewards/margins": 0.20073261857032776, - "rewards/rejected": -0.2399860918521881, + "epoch": 0.25, + "learning_rate": 4.661628684945851e-06, + "logits/chosen": -1.8333717584609985, + "logits/rejected": -1.5805283784866333, + "logps/chosen": -326.5656433105469, + "logps/rejected": -273.71636962890625, + "loss": 0.5815, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.5415780544281006, + "rewards/margins": 0.23381216824054718, + "rewards/rejected": -0.7753901481628418, "step": 480 }, { - "epoch": 0.51, - "learning_rate": 4.6196483180428133e-07, - "logits/chosen": -2.517301559448242, - "logits/rejected": -2.471219539642334, - "logps/chosen": -248.6940460205078, - "logps/rejected": -149.48056030273438, - "loss": 0.5953, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.005128766410052776, - "rewards/margins": 0.24176044762134552, - "rewards/rejected": -0.24688920378684998, + "epoch": 0.26, + "learning_rate": 4.638315971630662e-06, + "logits/chosen": -1.9222495555877686, + "logits/rejected": -1.776019811630249, + "logps/chosen": -284.45068359375, + "logps/rejected": -410.24859619140625, + "loss": 0.5644, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.4614255428314209, + "rewards/margins": 0.6256455779075623, + "rewards/rejected": -1.0870710611343384, "step": 490 }, { - "epoch": 0.52, - "learning_rate": 4.600535168195718e-07, - "logits/chosen": -2.421499729156494, - "logits/rejected": -2.313544511795044, - "logps/chosen": -198.4822540283203, - "logps/rejected": -208.656982421875, - "loss": 0.5988, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.08058812469244003, - "rewards/margins": 0.1655338704586029, - "rewards/rejected": -0.24612200260162354, + "epoch": 0.26, + "learning_rate": 4.614289078151164e-06, + "logits/chosen": -1.8076536655426025, + "logits/rejected": -1.6980488300323486, + "logps/chosen": -335.1206970214844, + "logps/rejected": -314.8514099121094, + "loss": 0.5973, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.3053855001926422, + "rewards/margins": 0.3548199236392975, + "rewards/rejected": -0.6602053642272949, "step": 500 }, { - "epoch": 0.53, - "learning_rate": 4.5814220183486234e-07, - "logits/chosen": -2.4364054203033447, - "logits/rejected": -2.474491834640503, - "logps/chosen": -357.8948059082031, - "logps/rejected": -313.86407470703125, - "loss": 0.5964, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.09066192060709, - "rewards/margins": 0.11807866394519806, - "rewards/rejected": -0.20874059200286865, + "epoch": 0.26, + "eval_logits/chosen": -1.9282360076904297, + "eval_logits/rejected": -1.843964695930481, + "eval_logps/chosen": -286.9665832519531, + "eval_logps/rejected": -329.4521789550781, + "eval_loss": 0.593827486038208, + "eval_rewards/accuracies": 0.73046875, + "eval_rewards/chosen": -0.36812952160835266, + "eval_rewards/margins": 0.40845340490341187, + "eval_rewards/rejected": -0.7765828967094421, + "eval_runtime": 99.738, + "eval_samples_per_second": 20.053, + "eval_steps_per_second": 0.321, + "step": 500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5895560292946e-06, + "logits/chosen": -1.807668685913086, + "logits/rejected": -1.859532356262207, + "logps/chosen": -280.341552734375, + "logps/rejected": -335.6524963378906, + "loss": 0.5652, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.39417344331741333, + "rewards/margins": 0.301259309053421, + "rewards/rejected": -0.695432722568512, "step": 510 }, { - "epoch": 0.54, - "learning_rate": 4.562308868501529e-07, - "logits/chosen": -2.4329869747161865, - "logits/rejected": -2.3959157466888428, - "logps/chosen": -252.12997436523438, - "logps/rejected": -272.59930419921875, - "loss": 0.5992, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.09437114745378494, - "rewards/margins": 0.3432508707046509, - "rewards/rejected": -0.24887971580028534, + "epoch": 0.27, + "learning_rate": 4.564125085698375e-06, + "logits/chosen": -1.920212984085083, + "logits/rejected": -1.680850625038147, + "logps/chosen": -333.3331604003906, + "logps/rejected": -321.528076171875, + "loss": 0.6101, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.49174195528030396, + "rewards/margins": 0.35194143652915955, + "rewards/rejected": -0.8436833620071411, "step": 520 }, { - "epoch": 0.55, - "learning_rate": 4.543195718654434e-07, - "logits/chosen": -2.5292797088623047, - "logits/rejected": -2.472195625305176, - "logps/chosen": -311.61810302734375, - "logps/rejected": -204.62269592285156, - "loss": 0.6052, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.022383611649274826, - "rewards/margins": 0.4391644597053528, - "rewards/rejected": -0.4615480303764343, + "epoch": 0.28, + "learning_rate": 4.538004741091066e-06, + "logits/chosen": -1.8313194513320923, + "logits/rejected": -1.844010591506958, + "logps/chosen": -266.2292175292969, + "logps/rejected": -343.5267639160156, + "loss": 0.546, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.45850616693496704, + "rewards/margins": 0.576072633266449, + "rewards/rejected": -1.0345789194107056, "step": 530 }, { - "epoch": 0.56, - "learning_rate": 4.5240825688073394e-07, - "logits/chosen": -2.4026424884796143, - "logits/rejected": -2.410029649734497, - "logps/chosen": -254.65774536132812, - "logps/rejected": -220.6438751220703, - "loss": 0.6005, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.06496790796518326, - "rewards/margins": 0.32214078307151794, - "rewards/rejected": -0.2571728527545929, + "epoch": 0.28, + "learning_rate": 4.511203719455588e-06, + "logits/chosen": -1.9197680950164795, + "logits/rejected": -1.5895801782608032, + "logps/chosen": -410.54620361328125, + "logps/rejected": -347.0699157714844, + "loss": 0.5822, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.7892423868179321, + "rewards/margins": 0.38583868741989136, + "rewards/rejected": -1.1750810146331787, "step": 540 }, { - "epoch": 0.57, - "learning_rate": 4.504969418960244e-07, - "logits/chosen": -2.489189624786377, - "logits/rejected": -2.509007215499878, - "logps/chosen": -304.0901184082031, - "logps/rejected": -276.55523681640625, - "loss": 0.6, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.08526446670293808, - "rewards/margins": 0.23875808715820312, - "rewards/rejected": -0.15349361300468445, + "epoch": 0.29, + "learning_rate": 4.483730972115454e-06, + "logits/chosen": -1.8053057193756104, + "logits/rejected": -1.6272900104522705, + "logps/chosen": -340.8621520996094, + "logps/rejected": -371.2051696777344, + "loss": 0.5906, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.9878808856010437, + "rewards/margins": 0.36620211601257324, + "rewards/rejected": -1.3540828227996826, "step": 550 }, { - "epoch": 0.58, - "learning_rate": 4.4858562691131495e-07, - "logits/chosen": -2.626237392425537, - "logits/rejected": -2.4529411792755127, - "logps/chosen": -250.4290008544922, - "logps/rejected": -236.1934814453125, - "loss": 0.5868, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.14224590361118317, - "rewards/margins": 0.5076346397399902, - "rewards/rejected": -0.3653886914253235, + "epoch": 0.29, + "learning_rate": 4.455595674745107e-06, + "logits/chosen": -2.011946201324463, + "logits/rejected": -1.9660618305206299, + "logps/chosen": -399.44403076171875, + "logps/rejected": -416.68109130859375, + "loss": 0.5921, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.7561608552932739, + "rewards/margins": 0.454878032207489, + "rewards/rejected": -1.2110389471054077, "step": 560 }, { - "epoch": 0.59, - "learning_rate": 4.466743119266055e-07, - "logits/chosen": -2.608710527420044, - "logits/rejected": -2.4026219844818115, - "logps/chosen": -322.64434814453125, - "logps/rejected": -315.7530212402344, - "loss": 0.6015, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.017860546708106995, - "rewards/margins": 0.29392343759536743, - "rewards/rejected": -0.27606287598609924, + "epoch": 0.3, + "learning_rate": 4.426807224305315e-06, + "logits/chosen": -1.841407060623169, + "logits/rejected": -1.7657171487808228, + "logps/chosen": -358.94525146484375, + "logps/rejected": -366.9837341308594, + "loss": 0.5917, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.8852140307426453, + "rewards/margins": 0.26677149534225464, + "rewards/rejected": -1.1519855260849, "step": 570 }, { - "epoch": 0.6, - "learning_rate": 4.44762996941896e-07, - "logits/chosen": -2.3832476139068604, - "logits/rejected": -2.451291561126709, - "logps/chosen": -298.6803894042969, - "logps/rejected": -213.0013885498047, - "loss": 0.5851, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.08139020204544067, - "rewards/margins": 0.48270922899246216, - "rewards/rejected": -0.4013189375400543, + "epoch": 0.3, + "learning_rate": 4.39737523590467e-06, + "logits/chosen": -1.8554290533065796, + "logits/rejected": -1.747036337852478, + "logps/chosen": -346.8446350097656, + "logps/rejected": -310.76336669921875, + "loss": 0.5537, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7551018595695496, + "rewards/margins": 0.27851471304893494, + "rewards/rejected": -1.033616542816162, "step": 580 }, { - "epoch": 0.61, - "learning_rate": 4.4285168195718655e-07, - "logits/chosen": -2.4148542881011963, - "logits/rejected": -2.6071524620056152, - "logps/chosen": -235.28567504882812, - "logps/rejected": -229.70938110351562, - "loss": 0.5857, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.05157552286982536, - "rewards/margins": 0.3185363709926605, - "rewards/rejected": -0.3701118528842926, + "epoch": 0.31, + "learning_rate": 4.367309539588208e-06, + "logits/chosen": -1.7877289056777954, + "logits/rejected": -1.508073091506958, + "logps/chosen": -303.0578308105469, + "logps/rejected": -299.3207702636719, + "loss": 0.5666, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.6448020935058594, + "rewards/margins": 0.3490978181362152, + "rewards/rejected": -0.993899941444397, "step": 590 }, { - "epoch": 0.62, - "learning_rate": 4.40940366972477e-07, - "logits/chosen": -2.6201112270355225, - "logits/rejected": -2.5222606658935547, - "logps/chosen": -187.01014709472656, - "logps/rejected": -190.72042846679688, - "loss": 0.591, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.02282007224857807, - "rewards/margins": 0.2915310561656952, - "rewards/rejected": -0.3143511116504669, + "epoch": 0.31, + "learning_rate": 4.336620177054269e-06, + "logits/chosen": -1.7982432842254639, + "logits/rejected": -1.6228100061416626, + "logps/chosen": -265.236572265625, + "logps/rejected": -344.7894287109375, + "loss": 0.5473, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.5600973963737488, + "rewards/margins": 0.6167066693305969, + "rewards/rejected": -1.1768039464950562, "step": 600 }, { - "epoch": 0.63, - "learning_rate": 4.3902905198776756e-07, - "logits/chosen": -2.701819896697998, - "logits/rejected": -2.6200711727142334, - "logps/chosen": -400.83453369140625, - "logps/rejected": -312.381591796875, - "loss": 0.5893, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.1581762284040451, - "rewards/margins": 0.5981796979904175, - "rewards/rejected": -0.4400033950805664, + "epoch": 0.31, + "eval_logits/chosen": -1.7725939750671387, + "eval_logits/rejected": -1.6815264225006104, + "eval_logps/chosen": -319.0811767578125, + "eval_logps/rejected": -374.43408203125, + "eval_loss": 0.5774475932121277, + "eval_rewards/accuracies": 0.734375, + "eval_rewards/chosen": -0.6892752647399902, + "eval_rewards/margins": 0.5371266603469849, + "eval_rewards/rejected": -1.226401925086975, + "eval_runtime": 100.1017, + "eval_samples_per_second": 19.98, + "eval_steps_per_second": 0.32, + "step": 600 + }, + { + "epoch": 0.32, + "learning_rate": 4.30531739830064e-06, + "logits/chosen": -1.600541353225708, + "logits/rejected": -1.671954870223999, + "logps/chosen": -301.27410888671875, + "logps/rejected": -428.30255126953125, + "loss": 0.563, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.8286434412002563, + "rewards/margins": 0.6544786095619202, + "rewards/rejected": -1.4831221103668213, "step": 610 }, { - "epoch": 0.64, - "learning_rate": 4.371177370030581e-07, - "logits/chosen": -2.5501413345336914, - "logits/rejected": -2.316926956176758, - "logps/chosen": -223.07455444335938, - "logps/rejected": -188.2316436767578, - "loss": 0.5698, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.12886852025985718, - "rewards/margins": 0.3416977822780609, - "rewards/rejected": -0.47056636214256287, + "epoch": 0.32, + "learning_rate": 4.273411658201141e-06, + "logits/chosen": -1.7578967809677124, + "logits/rejected": -1.3492388725280762, + "logps/chosen": -380.08013916015625, + "logps/rejected": -386.08056640625, + "loss": 0.5446, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.8161942362785339, + "rewards/margins": 0.5750104188919067, + "rewards/rejected": -1.391204595565796, "step": 620 }, { - "epoch": 0.65, - "learning_rate": 4.352064220183486e-07, - "logits/chosen": -2.261867046356201, - "logits/rejected": -2.159829616546631, - "logps/chosen": -219.3220977783203, - "logps/rejected": -205.33786010742188, - "loss": 0.5838, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.04584250971674919, - "rewards/margins": 0.3203400671482086, - "rewards/rejected": -0.2744975686073303, + "epoch": 0.33, + "learning_rate": 4.240913613013785e-06, + "logits/chosen": -1.5625998973846436, + "logits/rejected": -1.537353754043579, + "logps/chosen": -315.82696533203125, + "logps/rejected": -375.628173828125, + "loss": 0.5564, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.8845146298408508, + "rewards/margins": 0.37904268503189087, + "rewards/rejected": -1.2635573148727417, "step": 630 }, { - "epoch": 0.66, - "learning_rate": 4.3329510703363915e-07, - "logits/chosen": -2.319148063659668, - "logits/rejected": -2.3123698234558105, - "logps/chosen": -306.0313415527344, - "logps/rejected": -234.1546630859375, - "loss": 0.5699, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.04760587960481644, - "rewards/margins": 0.6112427711486816, - "rewards/rejected": -0.563636839389801, + "epoch": 0.33, + "learning_rate": 4.207834116821673e-06, + "logits/chosen": -1.7625129222869873, + "logits/rejected": -1.4166629314422607, + "logps/chosen": -409.44677734375, + "logps/rejected": -451.19329833984375, + "loss": 0.5548, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.7660303115844727, + "rewards/margins": 0.753090500831604, + "rewards/rejected": -1.5191209316253662, "step": 640 }, { - "epoch": 0.67, - "learning_rate": 4.313837920489297e-07, - "logits/chosen": -2.3105361461639404, - "logits/rejected": -2.3982295989990234, - "logps/chosen": -284.56072998046875, - "logps/rejected": -315.3321838378906, - "loss": 0.58, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.0843208059668541, - "rewards/margins": 0.43474799394607544, - "rewards/rejected": -0.35042712092399597, + "epoch": 0.34, + "learning_rate": 4.174184217907818e-06, + "logits/chosen": -1.6399860382080078, + "logits/rejected": -1.3593322038650513, + "logps/chosen": -379.9228515625, + "logps/rejected": -356.4607849121094, + "loss": 0.6129, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.91173255443573, + "rewards/margins": 0.4682820737361908, + "rewards/rejected": -1.3800146579742432, "step": 650 }, { - "epoch": 0.68, - "learning_rate": 4.2947247706422016e-07, - "logits/chosen": -2.4114909172058105, - "logits/rejected": -2.4491775035858154, - "logps/chosen": -219.2405548095703, - "logps/rejected": -253.74575805664062, - "loss": 0.5704, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.01176774688065052, - "rewards/margins": 0.2667906582355499, - "rewards/rejected": -0.2785584330558777, + "epoch": 0.35, + "learning_rate": 4.139975155065109e-06, + "logits/chosen": -1.6585413217544556, + "logits/rejected": -1.4335598945617676, + "logps/chosen": -307.7510681152344, + "logps/rejected": -313.7562255859375, + "loss": 0.5703, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.705053448677063, + "rewards/margins": 0.5841482877731323, + "rewards/rejected": -1.2892017364501953, "step": 660 }, { - "epoch": 0.69, - "learning_rate": 4.275611620795107e-07, - "logits/chosen": -2.519127368927002, - "logits/rejected": -2.356675863265991, - "logps/chosen": -315.9088439941406, - "logps/rejected": -185.24163818359375, - "loss": 0.5741, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.10262980312108994, - "rewards/margins": 0.5418930053710938, - "rewards/rejected": -0.6445227861404419, + "epoch": 0.35, + "learning_rate": 4.105218353842643e-06, + "logits/chosen": -1.6327035427093506, + "logits/rejected": -1.3608360290527344, + "logps/chosen": -330.2358093261719, + "logps/rejected": -306.1988220214844, + "loss": 0.6016, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8032588958740234, + "rewards/margins": 0.30609965324401855, + "rewards/rejected": -1.1093586683273315, "step": 670 }, { - "epoch": 0.7, - "learning_rate": 4.2564984709480123e-07, - "logits/chosen": -2.520284652709961, - "logits/rejected": -2.5212578773498535, - "logps/chosen": -339.563720703125, - "logps/rejected": -229.5682373046875, - "loss": 0.6126, + "epoch": 0.36, + "learning_rate": 4.069925422729689e-06, + "logits/chosen": -1.4646321535110474, + "logits/rejected": -1.5063197612762451, + "logps/chosen": -266.6871337890625, + "logps/rejected": -328.95428466796875, + "loss": 0.6057, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.15836027264595032, - "rewards/margins": 0.2690756916999817, - "rewards/rejected": -0.427435964345932, + "rewards/chosen": -0.8702457547187805, + "rewards/margins": 0.41110268235206604, + "rewards/rejected": -1.281348466873169, "step": 680 }, { - "epoch": 0.71, - "learning_rate": 4.2373853211009176e-07, - "logits/chosen": -2.461088180541992, - "logits/rejected": -2.431992292404175, - "logps/chosen": -314.1544494628906, - "logps/rejected": -220.8463897705078, - "loss": 0.6011, + "epoch": 0.36, + "learning_rate": 4.034108149278544e-06, + "logits/chosen": -1.8260358572006226, + "logits/rejected": -1.7864515781402588, + "logps/chosen": -367.08892822265625, + "logps/rejected": -439.1949157714844, + "loss": 0.5596, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0033186853397637606, - "rewards/margins": 0.4757082462310791, - "rewards/rejected": -0.4790269732475281, + "rewards/chosen": -0.8357939720153809, + "rewards/margins": 0.6055955290794373, + "rewards/rejected": -1.441389560699463, "step": 690 }, { - "epoch": 0.72, - "learning_rate": 4.2182721712538224e-07, - "logits/chosen": -2.554452657699585, - "logits/rejected": -2.2479560375213623, - "logps/chosen": -234.7042236328125, - "logps/rejected": -188.83786010742188, - "loss": 0.5883, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.0076250312849879265, - "rewards/margins": 0.5563846230506897, - "rewards/rejected": -0.5487595796585083, - "step": 700 + "epoch": 0.37, + "learning_rate": 3.997778496167584e-06, + "logits/chosen": -1.2935597896575928, + "logits/rejected": -1.4118752479553223, + "logps/chosen": -245.8689422607422, + "logps/rejected": -354.35906982421875, + "loss": 0.5792, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6742351055145264, + "rewards/margins": 0.6851714849472046, + "rewards/rejected": -1.3594067096710205, + "step": 700 }, { - "epoch": 0.73, - "learning_rate": 4.199159021406727e-07, - "logits/chosen": -2.318946361541748, - "logits/rejected": -2.453735828399658, - "logps/chosen": -209.1856231689453, - "logps/rejected": -227.36953735351562, - "loss": 0.5861, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.13343460857868195, - "rewards/margins": 0.22984579205513, - "rewards/rejected": -0.36328038573265076, + "epoch": 0.37, + "eval_logits/chosen": -1.5775055885314941, + "eval_logits/rejected": -1.4782546758651733, + "eval_logps/chosen": -316.5071716308594, + "eval_logps/rejected": -372.79888916015625, + "eval_loss": 0.570942759513855, + "eval_rewards/accuracies": 0.7578125, + "eval_rewards/chosen": -0.6635350584983826, + "eval_rewards/margins": 0.5465149879455566, + "eval_rewards/rejected": -1.2100499868392944, + "eval_runtime": 99.8188, + "eval_samples_per_second": 20.036, + "eval_steps_per_second": 0.321, + "step": 700 + }, + { + "epoch": 0.37, + "learning_rate": 3.96094859720583e-06, + "logits/chosen": -1.645870566368103, + "logits/rejected": -1.5308467149734497, + "logps/chosen": -324.1588439941406, + "logps/rejected": -402.2452697753906, + "loss": 0.5502, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.775097131729126, + "rewards/margins": 0.43130356073379517, + "rewards/rejected": -1.2064006328582764, "step": 710 }, { - "epoch": 0.74, - "learning_rate": 4.1800458715596325e-07, - "logits/chosen": -2.4781854152679443, - "logits/rejected": -2.2781224250793457, - "logps/chosen": -226.40469360351562, - "logps/rejected": -289.3362121582031, - "loss": 0.5966, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.2653411030769348, - "rewards/margins": 0.09751178324222565, - "rewards/rejected": -0.36285287141799927, + "epoch": 0.38, + "learning_rate": 3.923630753280358e-06, + "logits/chosen": -1.6445233821868896, + "logits/rejected": -1.3163498640060425, + "logps/chosen": -396.59014892578125, + "logps/rejected": -368.18463134765625, + "loss": 0.5609, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8597933650016785, + "rewards/margins": 0.5147794485092163, + "rewards/rejected": -1.37457275390625, "step": 720 }, { - "epoch": 0.75, - "learning_rate": 4.160932721712538e-07, - "logits/chosen": -2.4424407482147217, - "logits/rejected": -2.4211478233337402, - "logps/chosen": -273.59869384765625, - "logps/rejected": -269.41717529296875, - "loss": 0.5795, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.003456842852756381, - "rewards/margins": 0.21872258186340332, - "rewards/rejected": -0.21526578068733215, + "epoch": 0.38, + "learning_rate": 3.88583742824789e-06, + "logits/chosen": -1.1861711740493774, + "logits/rejected": -1.015647530555725, + "logps/chosen": -354.11944580078125, + "logps/rejected": -418.89886474609375, + "loss": 0.5447, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8276666402816772, + "rewards/margins": 0.6120607256889343, + "rewards/rejected": -1.4397274255752563, "step": 730 }, { - "epoch": 0.76, - "learning_rate": 4.141819571865443e-07, - "logits/chosen": -2.2280640602111816, - "logits/rejected": -2.307762622833252, - "logps/chosen": -247.0033721923828, - "logps/rejected": -227.2200927734375, - "loss": 0.5638, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06740056723356247, - "rewards/margins": 0.43576961755752563, - "rewards/rejected": -0.5031701326370239, + "epoch": 0.39, + "learning_rate": 3.847581244771983e-06, + "logits/chosen": -1.0957353115081787, + "logits/rejected": -1.0387994050979614, + "logps/chosen": -300.3524169921875, + "logps/rejected": -353.12310791015625, + "loss": 0.5477, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.9017397165298462, + "rewards/margins": 0.2911619544029236, + "rewards/rejected": -1.1929017305374146, "step": 740 }, { - "epoch": 0.77, - "learning_rate": 4.1227064220183485e-07, - "logits/chosen": -2.397576332092285, - "logits/rejected": -2.354574680328369, - "logps/chosen": -214.11892700195312, - "logps/rejected": -215.72268676757812, - "loss": 0.5816, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.17300334572792053, - "rewards/margins": 0.3608870208263397, - "rewards/rejected": -0.5338903665542603, + "epoch": 0.39, + "learning_rate": 3.80887498010715e-06, + "logits/chosen": -1.4777683019638062, + "logits/rejected": -1.2283952236175537, + "logps/chosen": -393.16693115234375, + "logps/rejected": -372.19317626953125, + "loss": 0.5624, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9122506976127625, + "rewards/margins": 0.29486000537872314, + "rewards/rejected": -1.2071107625961304, "step": 750 }, { - "epoch": 0.78, - "learning_rate": 4.103593272171253e-07, - "logits/chosen": -2.33254337310791, - "logits/rejected": -2.3918721675872803, - "logps/chosen": -340.80072021484375, - "logps/rejected": -347.1974792480469, - "loss": 0.5778, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.130618155002594, - "rewards/margins": 0.32134371995925903, - "rewards/rejected": -0.4519619047641754, + "epoch": 0.4, + "learning_rate": 3.769731561831365e-06, + "logits/chosen": -0.9933805465698242, + "logits/rejected": -1.0841389894485474, + "logps/chosen": -360.70294189453125, + "logps/rejected": -431.0218200683594, + "loss": 0.5184, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.8388063311576843, + "rewards/margins": 0.7050133943557739, + "rewards/rejected": -1.5438196659088135, "step": 760 }, { - "epoch": 0.79, - "learning_rate": 4.0844801223241586e-07, - "logits/chosen": -2.3820478916168213, - "logits/rejected": -2.4310269355773926, - "logps/chosen": -228.812744140625, - "logps/rejected": -197.6693115234375, - "loss": 0.5567, - "rewards/accuracies": 0.44999998807907104, - "rewards/chosen": -0.0885491669178009, - "rewards/margins": 0.15732769668102264, - "rewards/rejected": -0.24587683379650116, + "epoch": 0.4, + "learning_rate": 3.730164063528359e-06, + "logits/chosen": -0.7796175479888916, + "logits/rejected": -0.5043349266052246, + "logps/chosen": -303.9163513183594, + "logps/rejected": -400.3309020996094, + "loss": 0.5436, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8382329940795898, + "rewards/margins": 0.8575286865234375, + "rewards/rejected": -1.6957619190216064, "step": 770 }, { - "epoch": 0.8, - "learning_rate": 4.065366972477064e-07, - "logits/chosen": -2.5541110038757324, - "logits/rejected": -2.3786189556121826, - "logps/chosen": -273.0157165527344, - "logps/rejected": -203.12271118164062, - "loss": 0.5676, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.07696160674095154, - "rewards/margins": 0.5536447763442993, - "rewards/rejected": -0.6306063532829285, + "epoch": 0.41, + "learning_rate": 3.690185700421145e-06, + "logits/chosen": -0.8145714998245239, + "logits/rejected": -0.3118314743041992, + "logps/chosen": -349.3692932128906, + "logps/rejected": -309.53875732421875, + "loss": 0.5767, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8476495742797852, + "rewards/margins": 0.572998046875, + "rewards/rejected": -1.420647382736206, "step": 780 }, { - "epoch": 0.82, - "learning_rate": 4.046253822629969e-07, - "logits/chosen": -2.2037081718444824, - "logits/rejected": -2.20261549949646, - "logps/chosen": -158.65432739257812, - "logps/rejected": -185.50302124023438, - "loss": 0.5411, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.19409586489200592, - "rewards/margins": 0.23793645203113556, - "rewards/rejected": -0.4320322871208191, + "epoch": 0.41, + "learning_rate": 3.649809824958245e-06, + "logits/chosen": -1.1090258359909058, + "logits/rejected": -0.552297055721283, + "logps/chosen": -440.61883544921875, + "logps/rejected": -392.18914794921875, + "loss": 0.5308, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0068585872650146, + "rewards/margins": 0.43958479166030884, + "rewards/rejected": -1.4464435577392578, "step": 790 }, { - "epoch": 0.83, - "learning_rate": 4.0271406727828745e-07, - "logits/chosen": -2.465301990509033, - "logits/rejected": -2.5763792991638184, - "logps/chosen": -271.5575256347656, - "logps/rejected": -209.1376953125, - "loss": 0.5711, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.005071232561022043, - "rewards/margins": 0.3356703221797943, - "rewards/rejected": -0.3407415747642517, + "epoch": 0.42, + "learning_rate": 3.609049922354076e-06, + "logits/chosen": -1.044739842414856, + "logits/rejected": -0.796644389629364, + "logps/chosen": -399.64404296875, + "logps/rejected": -408.13934326171875, + "loss": 0.5194, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0027108192443848, + "rewards/margins": 0.539101779460907, + "rewards/rejected": -1.541812777519226, "step": 800 }, { - "epoch": 0.84, - "learning_rate": 4.00802752293578e-07, - "logits/chosen": -2.3711938858032227, - "logits/rejected": -2.4800353050231934, - "logps/chosen": -240.88314819335938, - "logps/rejected": -233.5401153564453, - "loss": 0.5449, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.08367098867893219, - "rewards/margins": 0.7126539945602417, - "rewards/rejected": -0.7963249683380127, + "epoch": 0.42, + "eval_logits/chosen": -0.5486251711845398, + "eval_logits/rejected": -0.37914562225341797, + "eval_logps/chosen": -352.2356872558594, + "eval_logps/rejected": -416.3268737792969, + "eval_loss": 0.559037983417511, + "eval_rewards/accuracies": 0.74609375, + "eval_rewards/chosen": -1.0208208560943604, + "eval_rewards/margins": 0.6245089769363403, + "eval_rewards/rejected": -1.6453297138214111, + "eval_runtime": 99.895, + "eval_samples_per_second": 20.021, + "eval_steps_per_second": 0.32, + "step": 800 + }, + { + "epoch": 0.42, + "learning_rate": 3.567919606085004e-06, + "logits/chosen": -0.8196202516555786, + "logits/rejected": -0.5285523533821106, + "logps/chosen": -433.35369873046875, + "logps/rejected": -463.96240234375, + "loss": 0.5493, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.1318707466125488, + "rewards/margins": 0.6623066663742065, + "rewards/rejected": -1.7941772937774658, "step": 810 }, { - "epoch": 0.85, - "learning_rate": 3.9889143730886847e-07, - "logits/chosen": -2.42783784866333, - "logits/rejected": -2.4193737506866455, - "logps/chosen": -281.68548583984375, - "logps/rejected": -190.40451049804688, - "loss": 0.5617, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.049332987517118454, - "rewards/margins": 0.5959105491638184, - "rewards/rejected": -0.5465775728225708, + "epoch": 0.43, + "learning_rate": 3.5264326133425467e-06, + "logits/chosen": -1.0270545482635498, + "logits/rejected": -1.041868805885315, + "logps/chosen": -427.3218688964844, + "logps/rejected": -445.3553771972656, + "loss": 0.581, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.8407033085823059, + "rewards/margins": 0.44703882932662964, + "rewards/rejected": -1.2877418994903564, "step": 820 }, { - "epoch": 0.86, - "learning_rate": 3.96980122324159e-07, - "logits/chosen": -2.5133376121520996, - "logits/rejected": -2.4929747581481934, - "logps/chosen": -226.41513061523438, - "logps/rejected": -218.5712890625, - "loss": 0.5419, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.004038684070110321, - "rewards/margins": 0.6471161842346191, - "rewards/rejected": -0.643077552318573, + "epoch": 0.43, + "learning_rate": 3.4846028004452696e-06, + "logits/chosen": -0.4200538992881775, + "logits/rejected": -0.20024017989635468, + "logps/chosen": -317.58538818359375, + "logps/rejected": -362.42376708984375, + "loss": 0.5375, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.0300816297531128, + "rewards/margins": 0.4815269112586975, + "rewards/rejected": -1.5116084814071655, "step": 830 }, { - "epoch": 0.87, - "learning_rate": 3.9506880733944953e-07, - "logits/chosen": -2.366255760192871, - "logits/rejected": -2.1581902503967285, - "logps/chosen": -243.42214965820312, - "logps/rejected": -193.34178161621094, - "loss": 0.5987, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.29814183712005615, - "rewards/margins": 0.10912153869867325, - "rewards/rejected": -0.4072634279727936, + "epoch": 0.44, + "learning_rate": 3.442444138210883e-06, + "logits/chosen": -0.5636943578720093, + "logits/rejected": -0.2242840826511383, + "logps/chosen": -356.8027648925781, + "logps/rejected": -383.00006103515625, + "loss": 0.5785, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1958321332931519, + "rewards/margins": 0.46510282158851624, + "rewards/rejected": -1.6609351634979248, "step": 840 }, { - "epoch": 0.88, - "learning_rate": 3.9315749235474006e-07, - "logits/chosen": -2.4702069759368896, - "logits/rejected": -2.2905399799346924, - "logps/chosen": -202.70205688476562, - "logps/rejected": -229.86154174804688, - "loss": 0.5651, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.14235912263393402, - "rewards/margins": 0.43106895685195923, - "rewards/rejected": -0.5734280347824097, + "epoch": 0.44, + "learning_rate": 3.399970707290105e-06, + "logits/chosen": -0.6056006550788879, + "logits/rejected": -0.5171287655830383, + "logps/chosen": -286.99462890625, + "logps/rejected": -349.02764892578125, + "loss": 0.5842, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9791563749313354, + "rewards/margins": 0.45784440636634827, + "rewards/rejected": -1.4370006322860718, "step": 850 }, { - "epoch": 0.89, - "learning_rate": 3.912461773700306e-07, - "logits/chosen": -2.488694429397583, - "logits/rejected": -2.2790422439575195, - "logps/chosen": -393.3223876953125, - "logps/rejected": -257.35986328125, - "loss": 0.5666, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.01125104445964098, - "rewards/margins": 0.7673934698104858, - "rewards/rejected": -0.7561424374580383, + "epoch": 0.45, + "learning_rate": 3.3571966934638378e-06, + "logits/chosen": -0.5877082943916321, + "logits/rejected": -0.630211591720581, + "logps/chosen": -363.6180725097656, + "logps/rejected": -374.6347961425781, + "loss": 0.5743, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0287179946899414, + "rewards/margins": 0.5056768655776978, + "rewards/rejected": -1.5343949794769287, "step": 860 }, { - "epoch": 0.9, - "learning_rate": 3.8933486238532107e-07, - "logits/chosen": -2.2432663440704346, - "logits/rejected": -2.2947564125061035, - "logps/chosen": -359.70989990234375, - "logps/rejected": -240.7715301513672, - "loss": 0.5618, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.03136935830116272, - "rewards/margins": 0.8124542236328125, - "rewards/rejected": -0.8438236117362976, + "epoch": 0.46, + "learning_rate": 3.314136382905234e-06, + "logits/chosen": -0.5021811723709106, + "logits/rejected": -0.5260487794876099, + "logps/chosen": -368.32452392578125, + "logps/rejected": -409.72064208984375, + "loss": 0.5672, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.0683351755142212, + "rewards/margins": 0.44290056824684143, + "rewards/rejected": -1.5112355947494507, "step": 870 }, { - "epoch": 0.91, - "learning_rate": 3.874235474006116e-07, - "logits/chosen": -2.173022747039795, - "logits/rejected": -2.1610169410705566, - "logps/chosen": -201.29830932617188, - "logps/rejected": -236.1139678955078, - "loss": 0.5597, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.15942071378231049, - "rewards/margins": 0.318124383687973, - "rewards/rejected": -0.4775451123714447, + "epoch": 0.46, + "learning_rate": 3.2708041574082257e-06, + "logits/chosen": -0.44730886816978455, + "logits/rejected": -0.31664231419563293, + "logps/chosen": -322.88885498046875, + "logps/rejected": -367.3641662597656, + "loss": 0.5537, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.011106014251709, + "rewards/margins": 0.5238951444625854, + "rewards/rejected": -1.535001277923584, "step": 880 }, { - "epoch": 0.92, - "learning_rate": 3.8551223241590214e-07, - "logits/chosen": -2.410968542098999, - "logits/rejected": -2.1644670963287354, - "logps/chosen": -223.5022735595703, - "logps/rejected": -212.260986328125, - "loss": 0.5794, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.12731042504310608, - "rewards/margins": 0.8949457406997681, - "rewards/rejected": -1.0222561359405518, + "epoch": 0.47, + "learning_rate": 3.2272144895841285e-06, + "logits/chosen": -0.01936722919344902, + "logits/rejected": 0.09176506102085114, + "logps/chosen": -342.02069091796875, + "logps/rejected": -354.09027099609375, + "loss": 0.565, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.2459288835525513, + "rewards/margins": 0.2770703434944153, + "rewards/rejected": -1.5229991674423218, "step": 890 }, { - "epoch": 0.93, - "learning_rate": 3.8360091743119267e-07, - "logits/chosen": -2.505922794342041, - "logits/rejected": -2.4096271991729736, - "logps/chosen": -276.6337890625, - "logps/rejected": -244.76632690429688, - "loss": 0.5492, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.0883268266916275, - "rewards/margins": 0.560738205909729, - "rewards/rejected": -0.4724113345146179, + "epoch": 0.47, + "learning_rate": 3.1833819380279028e-06, + "logits/chosen": -0.14645493030548096, + "logits/rejected": -0.09734384715557098, + "logps/chosen": -320.1854553222656, + "logps/rejected": -436.3653259277344, + "loss": 0.5367, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.9370415806770325, + "rewards/margins": 0.9560844302177429, + "rewards/rejected": -1.893126130104065, "step": 900 }, { - "epoch": 0.94, - "learning_rate": 3.816896024464832e-07, - "logits/chosen": -2.4202027320861816, - "logits/rejected": -2.3245034217834473, - "logps/chosen": -171.32737731933594, - "logps/rejected": -179.85043334960938, - "loss": 0.5671, + "epoch": 0.47, + "eval_logits/chosen": -0.28988412022590637, + "eval_logits/rejected": -0.09080193191766739, + "eval_logps/chosen": -364.9275817871094, + "eval_logps/rejected": -437.00396728515625, + "eval_loss": 0.5491830110549927, + "eval_rewards/accuracies": 0.7265625, + "eval_rewards/chosen": -1.1477394104003906, + "eval_rewards/margins": 0.7043614387512207, + "eval_rewards/rejected": -1.8521009683609009, + "eval_runtime": 99.6659, + "eval_samples_per_second": 20.067, + "eval_steps_per_second": 0.321, + "step": 900 + }, + { + "epoch": 0.48, + "learning_rate": 3.1393211424557037e-06, + "logits/chosen": -0.49690476059913635, + "logits/rejected": -0.2139798104763031, + "logps/chosen": -381.86834716796875, + "logps/rejected": -435.0732421875, + "loss": 0.5364, "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.12893405556678772, - "rewards/margins": 0.2722606360912323, - "rewards/rejected": -0.4011947214603424, + "rewards/chosen": -1.218109369277954, + "rewards/margins": 0.5727220177650452, + "rewards/rejected": -1.7908313274383545, "step": 910 }, { - "epoch": 0.95, - "learning_rate": 3.797782874617737e-07, - "logits/chosen": -2.489109754562378, - "logits/rejected": -2.5008530616760254, - "logps/chosen": -306.02740478515625, - "logps/rejected": -235.5460662841797, - "loss": 0.5567, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.11618290096521378, - "rewards/margins": 0.5509032011032104, - "rewards/rejected": -0.6670862436294556, + "epoch": 0.48, + "learning_rate": 3.095046818815331e-06, + "logits/chosen": -0.5411738157272339, + "logits/rejected": -0.3583293855190277, + "logps/chosen": -398.50579833984375, + "logps/rejected": -460.13031005859375, + "loss": 0.5766, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.183455467224121, + "rewards/margins": 0.5352784395217896, + "rewards/rejected": -1.7187340259552002, "step": 920 }, { - "epoch": 0.96, - "learning_rate": 3.778669724770642e-07, - "logits/chosen": -2.308039426803589, - "logits/rejected": -2.1885459423065186, - "logps/chosen": -318.63189697265625, - "logps/rejected": -247.6229248046875, - "loss": 0.566, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.18256303668022156, - "rewards/margins": 0.7064282298088074, - "rewards/rejected": -0.8889913558959961, + "epoch": 0.49, + "learning_rate": 3.050573754371228e-06, + "logits/chosen": -0.6063122153282166, + "logits/rejected": -0.1856008768081665, + "logps/chosen": -353.6801452636719, + "logps/rejected": -411.11004638671875, + "loss": 0.5375, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.1633942127227783, + "rewards/margins": 0.6123510599136353, + "rewards/rejected": -1.775745153427124, "step": 930 }, { - "epoch": 0.97, - "learning_rate": 3.7595565749235474e-07, - "logits/chosen": -2.522726535797119, - "logits/rejected": -2.366619110107422, - "logps/chosen": -268.7926025390625, - "logps/rejected": -197.31088256835938, - "loss": 0.5705, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.130192369222641, - "rewards/margins": 0.7135027647018433, - "rewards/rejected": -0.8436950445175171, + "epoch": 0.49, + "learning_rate": 3.0059168027656475e-06, + "logits/chosen": -0.4713048040866852, + "logits/rejected": 0.16548386216163635, + "logps/chosen": -429.6309509277344, + "logps/rejected": -403.17755126953125, + "loss": 0.5238, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1189515590667725, + "rewards/margins": 0.7205164432525635, + "rewards/rejected": -1.8394676446914673, "step": 940 }, { - "epoch": 0.98, - "learning_rate": 3.740443425076452e-07, - "logits/chosen": -2.3446176052093506, - "logits/rejected": -2.3508477210998535, - "logps/chosen": -267.4695739746094, - "logps/rejected": -273.3948974609375, - "loss": 0.57, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.022189509123563766, - "rewards/margins": 0.3890397548675537, - "rewards/rejected": -0.3668502867221832, + "epoch": 0.5, + "learning_rate": 2.9610908790576664e-06, + "logits/chosen": -0.19411665201187134, + "logits/rejected": 0.067843496799469, + "logps/chosen": -363.87176513671875, + "logps/rejected": -404.2000427246094, + "loss": 0.5985, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3660383224487305, + "rewards/margins": 0.4443749487400055, + "rewards/rejected": -1.810413122177124, "step": 950 }, { - "epoch": 0.99, - "learning_rate": 3.7213302752293575e-07, - "logits/chosen": -2.5760252475738525, - "logits/rejected": -2.461642026901245, - "logps/chosen": -264.9154052734375, - "logps/rejected": -198.0699005126953, - "loss": 0.5491, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.11176663637161255, - "rewards/margins": 0.19738611578941345, - "rewards/rejected": -0.309152752161026, + "epoch": 0.5, + "learning_rate": 2.916110954741667e-06, + "logits/chosen": 0.5834629535675049, + "logits/rejected": 0.7395158410072327, + "logps/chosen": -338.3902282714844, + "logps/rejected": -374.72113037109375, + "loss": 0.5698, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.2995877265930176, + "rewards/margins": 0.37612777948379517, + "rewards/rejected": -1.675715684890747, "step": 960 }, { - "epoch": 1.0, - "eval_logits/chosen": -2.1374547481536865, - "eval_logits/rejected": -1.9755022525787354, - "eval_logps/chosen": -248.40455627441406, - "eval_logps/rejected": -195.833251953125, - "eval_loss": 0.5562577247619629, - "eval_rewards/accuracies": 0.78125, - "eval_rewards/chosen": -0.09624822437763214, - "eval_rewards/margins": 0.6263414025306702, - "eval_rewards/rejected": -0.7225896120071411, - "eval_runtime": 49.7794, - "eval_samples_per_second": 40.177, - "eval_steps_per_second": 0.321, - "step": 969 - }, - { - "epoch": 1.0, - "learning_rate": 3.702217125382263e-07, - "logits/chosen": -2.383577823638916, - "logits/rejected": -2.2432830333709717, - "logps/chosen": -269.8841247558594, - "logps/rejected": -266.1859436035156, - "loss": 0.5489, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.028007108718156815, - "rewards/margins": 0.6903571486473083, - "rewards/rejected": -0.662350058555603, + "epoch": 0.51, + "learning_rate": 2.8709920527469836e-06, + "logits/chosen": -0.29962724447250366, + "logits/rejected": -0.045618630945682526, + "logps/chosen": -361.26055908203125, + "logps/rejected": -428.3038024902344, + "loss": 0.5458, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.0471857786178589, + "rewards/margins": 0.619658887386322, + "rewards/rejected": -1.6668447256088257, "step": 970 }, { - "epoch": 1.01, - "learning_rate": 3.6831039755351677e-07, - "logits/chosen": -2.481004238128662, - "logits/rejected": -2.3184635639190674, - "logps/chosen": -239.337890625, - "logps/rejected": -234.6234893798828, - "loss": 0.5453, + "epoch": 0.51, + "learning_rate": 2.8257492424203685e-06, + "logits/chosen": -0.21448250114917755, + "logits/rejected": 0.2300967425107956, + "logps/chosen": -361.10284423828125, + "logps/rejected": -420.21875, + "loss": 0.5348, "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.06412211060523987, - "rewards/margins": 0.32830190658569336, - "rewards/rejected": -0.3924240171909332, + "rewards/chosen": -1.2196528911590576, + "rewards/margins": 0.626075267791748, + "rewards/rejected": -1.8457282781600952, "step": 980 }, { - "epoch": 1.02, - "learning_rate": 3.663990825688073e-07, - "logits/chosen": -2.437516689300537, - "logits/rejected": -2.390587091445923, - "logps/chosen": -252.07528686523438, - "logps/rejected": -195.83224487304688, - "loss": 0.6027, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.1875399798154831, - "rewards/margins": 0.6106002330780029, - "rewards/rejected": -0.7981401681900024, + "epoch": 0.52, + "learning_rate": 2.7803976344929497e-06, + "logits/chosen": 0.016325589269399643, + "logits/rejected": 0.5622560977935791, + "logps/chosen": -393.4544982910156, + "logps/rejected": -495.8486328125, + "loss": 0.537, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -1.3306463956832886, + "rewards/margins": 0.8722470998764038, + "rewards/rejected": -2.2028937339782715, "step": 990 }, { - "epoch": 1.03, - "learning_rate": 3.6448776758409783e-07, - "logits/chosen": -2.3859527111053467, - "logits/rejected": -2.4744107723236084, - "logps/chosen": -246.77200317382812, - "logps/rejected": -183.67501831054688, - "loss": 0.5812, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.18464355170726776, - "rewards/margins": 0.5135353803634644, - "rewards/rejected": -0.6981789469718933, + "epoch": 0.52, + "learning_rate": 2.734952376033368e-06, + "logits/chosen": -0.3442438244819641, + "logits/rejected": -0.028740787878632545, + "logps/chosen": -397.14544677734375, + "logps/rejected": -438.4039001464844, + "loss": 0.5575, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.145361065864563, + "rewards/margins": 0.8945510983467102, + "rewards/rejected": -2.039912462234497, "step": 1000 }, { - "epoch": 1.04, - "learning_rate": 3.6257645259938836e-07, - "logits/chosen": -2.596139669418335, - "logits/rejected": -2.5789005756378174, - "logps/chosen": -280.490966796875, - "logps/rejected": -216.73489379882812, - "loss": 0.5545, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.08334894478321075, - "rewards/margins": 0.8474000692367554, - "rewards/rejected": -0.7640511393547058, + "epoch": 0.52, + "eval_logits/chosen": 0.049825601279735565, + "eval_logits/rejected": 0.27614572644233704, + "eval_logps/chosen": -367.19635009765625, + "eval_logps/rejected": -442.27545166015625, + "eval_loss": 0.5450169444084167, + "eval_rewards/accuracies": 0.734375, + "eval_rewards/chosen": -1.1704269647598267, + "eval_rewards/margins": 0.7343888282775879, + "eval_rewards/rejected": -1.904815673828125, + "eval_runtime": 100.5701, + "eval_samples_per_second": 19.887, + "eval_steps_per_second": 0.318, + "step": 1000 + }, + { + "epoch": 0.53, + "learning_rate": 2.689428645388783e-06, + "logits/chosen": 0.0017841160297393799, + "logits/rejected": 0.5289415717124939, + "logps/chosen": -351.3257751464844, + "logps/rejected": -382.60260009765625, + "loss": 0.4955, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.1659106016159058, + "rewards/margins": 0.8750098347663879, + "rewards/rejected": -2.0409202575683594, "step": 1010 }, { - "epoch": 1.05, - "learning_rate": 3.606651376146789e-07, - "logits/chosen": -2.3291637897491455, - "logits/rejected": -2.3714513778686523, - "logps/chosen": -224.9653778076172, - "logps/rejected": -184.2610321044922, - "loss": 0.5464, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.2414972335100174, - "rewards/margins": 0.3471061587333679, - "rewards/rejected": -0.5886033773422241, + "epoch": 0.53, + "learning_rate": 2.6438416471154277e-06, + "logits/chosen": -0.42786794900894165, + "logits/rejected": 0.2951677441596985, + "logps/chosen": -416.7127990722656, + "logps/rejected": -424.31915283203125, + "loss": 0.54, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.281354546546936, + "rewards/margins": 0.35356560349464417, + "rewards/rejected": -1.6349201202392578, "step": 1020 }, { - "epoch": 1.06, - "learning_rate": 3.5875382262996937e-07, - "logits/chosen": -2.275418758392334, - "logits/rejected": -2.099208354949951, - "logps/chosen": -197.90513610839844, - "logps/rejected": -201.25003051757812, - "loss": 0.5503, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.23300638794898987, - "rewards/margins": 0.42851170897483826, - "rewards/rejected": -0.6615180969238281, + "epoch": 0.54, + "learning_rate": 2.598206606900406e-06, + "logits/chosen": -0.01613428071141243, + "logits/rejected": -0.0008750840788707137, + "logps/chosen": -384.22772216796875, + "logps/rejected": -421.3248596191406, + "loss": 0.5156, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.9642124176025391, + "rewards/margins": 0.5817215442657471, + "rewards/rejected": -1.5459339618682861, "step": 1030 }, { - "epoch": 1.07, - "learning_rate": 3.568425076452599e-07, - "logits/chosen": -2.285912036895752, - "logits/rejected": -2.2500832080841064, - "logps/chosen": -213.13992309570312, - "logps/rejected": -196.5510711669922, - "loss": 0.551, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.3874604105949402, - "rewards/margins": 0.24448566138744354, - "rewards/rejected": -0.6319460868835449, + "epoch": 0.54, + "learning_rate": 2.5525387664764433e-06, + "logits/chosen": 0.11487326771020889, + "logits/rejected": 0.6310429573059082, + "logps/chosen": -388.2038269042969, + "logps/rejected": -442.6739807128906, + "loss": 0.5192, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2305266857147217, + "rewards/margins": 0.7809251546859741, + "rewards/rejected": -2.0114519596099854, "step": 1040 }, { - "epoch": 1.08, - "learning_rate": 3.5493119266055044e-07, - "logits/chosen": -2.257537364959717, - "logits/rejected": -2.1868162155151367, - "logps/chosen": -262.30859375, - "logps/rejected": -174.8310546875, - "loss": 0.5329, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.09565829485654831, - "rewards/margins": 0.5677075982093811, - "rewards/rejected": -0.6633658409118652, + "epoch": 0.55, + "learning_rate": 2.5068533785312673e-06, + "logits/chosen": 0.09778528660535812, + "logits/rejected": 0.4881245195865631, + "logps/chosen": -411.2027893066406, + "logps/rejected": -440.90020751953125, + "loss": 0.522, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5704927444458008, + "rewards/margins": 0.5099351406097412, + "rewards/rejected": -2.080427646636963, "step": 1050 }, { - "epoch": 1.09, - "learning_rate": 3.5301987767584097e-07, - "logits/chosen": -2.5957443714141846, - "logits/rejected": -2.3026747703552246, - "logps/chosen": -248.4452667236328, - "logps/rejected": -291.78692626953125, - "loss": 0.5419, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.01751767471432686, - "rewards/margins": 1.0502655506134033, - "rewards/rejected": -1.0327479839324951, + "epoch": 0.55, + "learning_rate": 2.4611657016133334e-06, + "logits/chosen": -0.07164221256971359, + "logits/rejected": 0.257876992225647, + "logps/chosen": -467.0708923339844, + "logps/rejected": -494.01055908203125, + "loss": 0.5114, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4244937896728516, + "rewards/margins": 0.6371169686317444, + "rewards/rejected": -2.061610698699951, "step": 1060 }, { - "epoch": 1.1, - "learning_rate": 3.511085626911315e-07, - "logits/chosen": -2.495492458343506, - "logits/rejected": -2.360352039337158, - "logps/chosen": -379.7645568847656, - "logps/rejected": -288.1813049316406, - "loss": 0.5355, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.29672688245773315, - "rewards/margins": 0.2822774350643158, - "rewards/rejected": -0.5790044069290161, + "epoch": 0.56, + "learning_rate": 2.4154909950355966e-06, + "logits/chosen": -0.08780699223279953, + "logits/rejected": 0.5813466906547546, + "logps/chosen": -457.4397888183594, + "logps/rejected": -534.4034423828125, + "loss": 0.5268, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.403752326965332, + "rewards/margins": 0.8715044260025024, + "rewards/rejected": -2.275256633758545, "step": 1070 }, { - "epoch": 1.11, - "learning_rate": 3.49197247706422e-07, - "logits/chosen": -2.5327889919281006, - "logits/rejected": -2.4286463260650635, - "logps/chosen": -283.114501953125, - "logps/rejected": -184.7639923095703, - "loss": 0.5419, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.13121911883354187, - "rewards/margins": 0.8478299975395203, - "rewards/rejected": -0.716610848903656, + "epoch": 0.57, + "learning_rate": 2.369844513779026e-06, + "logits/chosen": -0.05808568745851517, + "logits/rejected": 0.19921842217445374, + "logps/chosen": -365.045654296875, + "logps/rejected": -452.080810546875, + "loss": 0.5272, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1917533874511719, + "rewards/margins": 0.8068798184394836, + "rewards/rejected": -1.9986331462860107, "step": 1080 }, { - "epoch": 1.12, - "learning_rate": 3.472859327217125e-07, - "logits/chosen": -2.3253655433654785, - "logits/rejected": -2.4310760498046875, - "logps/chosen": -363.16827392578125, - "logps/rejected": -301.8953552246094, - "loss": 0.5571, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.21858613193035126, - "rewards/margins": 0.8502481579780579, - "rewards/rejected": -0.631662130355835, + "epoch": 0.57, + "learning_rate": 2.3242415033975575e-06, + "logits/chosen": 0.1933274269104004, + "logits/rejected": 0.7321079969406128, + "logps/chosen": -426.7176818847656, + "logps/rejected": -412.4091796875, + "loss": 0.5474, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3316107988357544, + "rewards/margins": 0.5840722322463989, + "rewards/rejected": -1.9156830310821533, "step": 1090 }, { - "epoch": 1.14, - "learning_rate": 3.4537461773700304e-07, - "logits/chosen": -2.407773494720459, - "logits/rejected": -2.4023048877716064, - "logps/chosen": -173.5346221923828, - "logps/rejected": -123.51615905761719, - "loss": 0.563, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0015801489353179932, - "rewards/margins": 0.45527520775794983, - "rewards/rejected": -0.4568553566932678, + "epoch": 0.58, + "learning_rate": 2.2786971949262137e-06, + "logits/chosen": -0.10808311402797699, + "logits/rejected": 0.449955552816391, + "logps/chosen": -346.48138427734375, + "logps/rejected": -401.5810241699219, + "loss": 0.5507, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.9797190427780151, + "rewards/margins": 0.8783604502677917, + "rewards/rejected": -1.8580795526504517, "step": 1100 }, { - "epoch": 1.15, - "learning_rate": 3.434633027522936e-07, - "logits/chosen": -2.2176413536071777, - "logits/rejected": -2.1938424110412598, - "logps/chosen": -279.9801330566406, - "logps/rejected": -209.1507568359375, - "loss": 0.5494, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.01898578554391861, - "rewards/margins": 0.47637319564819336, - "rewards/rejected": -0.45738738775253296, + "epoch": 0.58, + "eval_logits/chosen": 0.2876562774181366, + "eval_logits/rejected": 0.533876895904541, + "eval_logps/chosen": -360.5550842285156, + "eval_logps/rejected": -438.5025939941406, + "eval_loss": 0.542937159538269, + "eval_rewards/accuracies": 0.7421875, + "eval_rewards/chosen": -1.1040146350860596, + "eval_rewards/margins": 0.7630726099014282, + "eval_rewards/rejected": -1.8670872449874878, + "eval_runtime": 99.3891, + "eval_samples_per_second": 20.123, + "eval_steps_per_second": 0.322, + "step": 1100 + }, + { + "epoch": 0.58, + "learning_rate": 2.2332267997940514e-06, + "logits/chosen": 0.33531707525253296, + "logits/rejected": 0.6124697923660278, + "logps/chosen": -344.9618835449219, + "logps/rejected": -386.95452880859375, + "loss": 0.5489, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.142901062965393, + "rewards/margins": 0.5753956437110901, + "rewards/rejected": -1.7182966470718384, "step": 1110 }, { - "epoch": 1.16, - "learning_rate": 3.415519877675841e-07, - "logits/chosen": -2.5352399349212646, - "logits/rejected": -2.4146199226379395, - "logps/chosen": -213.72891235351562, - "logps/rejected": -270.6646728515625, - "loss": 0.5422, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.11530435085296631, - "rewards/margins": 0.4574407935142517, - "rewards/rejected": -0.5727452039718628, + "epoch": 0.59, + "learning_rate": 2.1878455047436754e-06, + "logits/chosen": 0.24967947602272034, + "logits/rejected": 0.7071189880371094, + "logps/chosen": -394.3273620605469, + "logps/rejected": -389.794677734375, + "loss": 0.5209, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.2302659749984741, + "rewards/margins": 0.572158694267273, + "rewards/rejected": -1.802424669265747, "step": 1120 }, { - "epoch": 1.17, - "learning_rate": 3.3964067278287464e-07, - "logits/chosen": -2.37849760055542, - "logits/rejected": -2.2588951587677, - "logps/chosen": -306.17840576171875, - "logps/rejected": -276.8265075683594, - "loss": 0.5349, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.07973052561283112, - "rewards/margins": 0.7714017033576965, - "rewards/rejected": -0.6916711926460266, + "epoch": 0.59, + "learning_rate": 2.1425684667589853e-06, + "logits/chosen": 0.5281914472579956, + "logits/rejected": 0.8983514904975891, + "logps/chosen": -355.4177551269531, + "logps/rejected": -408.59014892578125, + "loss": 0.5686, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3293302059173584, + "rewards/margins": 0.5205702185630798, + "rewards/rejected": -1.849900245666504, "step": 1130 }, { - "epoch": 1.18, - "learning_rate": 3.377293577981651e-07, - "logits/chosen": -2.130993366241455, - "logits/rejected": -2.4299304485321045, - "logps/chosen": -229.61618041992188, - "logps/rejected": -189.0056915283203, - "loss": 0.508, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.2313126027584076, - "rewards/margins": 0.4152100682258606, - "rewards/rejected": -0.6465227007865906, + "epoch": 0.6, + "learning_rate": 2.097410808002869e-06, + "logits/chosen": 0.4173739552497864, + "logits/rejected": 0.831757664680481, + "logps/chosen": -352.9256286621094, + "logps/rejected": -429.7771911621094, + "loss": 0.5486, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.1642874479293823, + "rewards/margins": 0.7625770568847656, + "rewards/rejected": -1.9268646240234375, "step": 1140 }, { - "epoch": 1.19, - "learning_rate": 3.3581804281345565e-07, - "logits/chosen": -2.485884428024292, - "logits/rejected": -2.3729846477508545, - "logps/chosen": -346.53497314453125, - "logps/rejected": -233.9870147705078, - "loss": 0.5674, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.24053406715393066, - "rewards/margins": 0.43819236755371094, - "rewards/rejected": -0.6787264943122864, + "epoch": 0.6, + "learning_rate": 2.0523876107665197e-06, + "logits/chosen": 0.33401042222976685, + "logits/rejected": 1.1779800653457642, + "logps/chosen": -382.9471130371094, + "logps/rejected": -419.718994140625, + "loss": 0.5325, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0509642362594604, + "rewards/margins": 0.9581982493400574, + "rewards/rejected": -2.009162425994873, "step": 1150 }, { - "epoch": 1.2, - "learning_rate": 3.339067278287462e-07, - "logits/chosen": -2.3505210876464844, - "logits/rejected": -2.153390645980835, - "logps/chosen": -230.1878204345703, - "logps/rejected": -220.8087158203125, - "loss": 0.532, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.11666283756494522, - "rewards/margins": 0.6117745637893677, - "rewards/rejected": -0.7284374237060547, + "epoch": 0.61, + "learning_rate": 2.007513912432079e-06, + "logits/chosen": 0.5797901153564453, + "logits/rejected": 0.6977930665016174, + "logps/chosen": -363.8152160644531, + "logps/rejected": -425.59124755859375, + "loss": 0.5414, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.3096548318862915, + "rewards/margins": 0.6404959559440613, + "rewards/rejected": -1.950150728225708, "step": 1160 }, { - "epoch": 1.21, - "learning_rate": 3.319954128440367e-07, - "logits/chosen": -2.525207281112671, - "logits/rejected": -2.4221832752227783, - "logps/chosen": -354.8018493652344, - "logps/rejected": -194.69482421875, - "loss": 0.5507, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03960106521844864, - "rewards/margins": 0.9505462646484375, - "rewards/rejected": -0.9901474118232727, + "epoch": 0.61, + "learning_rate": 1.962804700450265e-06, + "logits/chosen": 0.45753908157348633, + "logits/rejected": 0.6594685912132263, + "logps/chosen": -306.3339538574219, + "logps/rejected": -329.291015625, + "loss": 0.5564, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1404427289962769, + "rewards/margins": 0.4516426622867584, + "rewards/rejected": -1.592085599899292, "step": 1170 }, { - "epoch": 1.22, - "learning_rate": 3.3008409785932725e-07, - "logits/chosen": -2.316929340362549, - "logits/rejected": -2.223125696182251, - "logps/chosen": -189.4244384765625, - "logps/rejected": -190.21742248535156, - "loss": 0.5381, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.4602813720703125, - "rewards/margins": 0.2248728722333908, - "rewards/rejected": -0.6851542592048645, + "epoch": 0.62, + "learning_rate": 1.9182749073346945e-06, + "logits/chosen": 0.4079570174217224, + "logits/rejected": 0.8342088460922241, + "logps/chosen": -372.5623474121094, + "logps/rejected": -444.697509765625, + "loss": 0.5387, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0847569704055786, + "rewards/margins": 0.6219168901443481, + "rewards/rejected": -1.7066739797592163, "step": 1180 }, { - "epoch": 1.23, - "learning_rate": 3.2817278287461773e-07, - "logits/chosen": -2.5135419368743896, - "logits/rejected": -2.398132801055908, - "logps/chosen": -205.8257293701172, - "logps/rejected": -299.1632385253906, - "loss": 0.5313, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.21163006126880646, - "rewards/margins": 0.4841296672821045, - "rewards/rejected": -0.6957597732543945, + "epoch": 0.62, + "learning_rate": 1.8739394056745375e-06, + "logits/chosen": -0.03631135821342468, + "logits/rejected": 0.29353755712509155, + "logps/chosen": -428.0572204589844, + "logps/rejected": -495.27301025390625, + "loss": 0.5187, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.8947092294692993, + "rewards/margins": 0.8324411511421204, + "rewards/rejected": -1.727150321006775, "step": 1190 }, { - "epoch": 1.24, - "learning_rate": 3.262614678899082e-07, - "logits/chosen": -2.211836338043213, - "logits/rejected": -2.2863128185272217, - "logps/chosen": -228.488525390625, - "logps/rejected": -252.99923706054688, - "loss": 0.522, + "epoch": 0.63, + "learning_rate": 1.8298130031671974e-06, + "logits/chosen": 0.3293336033821106, + "logits/rejected": 0.4875457286834717, + "logps/chosen": -369.41693115234375, + "logps/rejected": -485.47552490234375, + "loss": 0.5305, "rewards/accuracies": 0.75, - "rewards/chosen": -0.0907645896077156, - "rewards/margins": 0.7704148292541504, - "rewards/rejected": -0.8611793518066406, + "rewards/chosen": -0.9813504219055176, + "rewards/margins": 0.7564947009086609, + "rewards/rejected": -1.7378450632095337, "step": 1200 }, { - "epoch": 1.25, - "learning_rate": 3.2435015290519874e-07, - "logits/chosen": -2.300359010696411, - "logits/rejected": -2.210038185119629, - "logps/chosen": -193.10565185546875, - "logps/rejected": -223.6150665283203, - "loss": 0.5302, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04345179349184036, - "rewards/margins": 0.6841613054275513, - "rewards/rejected": -0.727613091468811, + "epoch": 0.63, + "eval_logits/chosen": 0.4754948914051056, + "eval_logits/rejected": 0.7349805235862732, + "eval_logps/chosen": -365.72406005859375, + "eval_logps/rejected": -444.2216796875, + "eval_loss": 0.5365983843803406, + "eval_rewards/accuracies": 0.7578125, + "eval_rewards/chosen": -1.155704379081726, + "eval_rewards/margins": 0.7685737609863281, + "eval_rewards/rejected": -1.9242780208587646, + "eval_runtime": 99.8758, + "eval_samples_per_second": 20.025, + "eval_steps_per_second": 0.32, + "step": 1200 + }, + { + "epoch": 0.63, + "learning_rate": 1.7859104376726583e-06, + "logits/chosen": 0.31323331594467163, + "logits/rejected": 0.8091317415237427, + "logps/chosen": -356.2727966308594, + "logps/rejected": -382.131103515625, + "loss": 0.5003, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.319533348083496, + "rewards/margins": 0.5745511651039124, + "rewards/rejected": -1.8940845727920532, "step": 1210 }, { - "epoch": 1.26, - "learning_rate": 3.2243883792048927e-07, - "logits/chosen": -2.5476744174957275, - "logits/rejected": -2.430663585662842, - "logps/chosen": -229.6248321533203, - "logps/rejected": -247.6537322998047, - "loss": 0.5573, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.20007988810539246, - "rewards/margins": 0.406427800655365, - "rewards/rejected": -0.6065077185630798, + "epoch": 0.64, + "learning_rate": 1.7422463722911626e-06, + "logits/chosen": 0.28560835123062134, + "logits/rejected": 0.8169926404953003, + "logps/chosen": -404.62518310546875, + "logps/rejected": -409.4010314941406, + "loss": 0.5401, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.2086342573165894, + "rewards/margins": 0.652005672454834, + "rewards/rejected": -1.8606399297714233, "step": 1220 }, { - "epoch": 1.27, - "learning_rate": 3.205275229357798e-07, - "logits/chosen": -2.5259690284729004, - "logits/rejected": -2.2766363620758057, - "logps/chosen": -265.3452453613281, - "logps/rejected": -221.7385711669922, - "loss": 0.5653, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.11124776303768158, - "rewards/margins": 0.5815609693527222, - "rewards/rejected": -0.6928088068962097, + "epoch": 0.64, + "learning_rate": 1.6988353904658495e-06, + "logits/chosen": 0.0763484388589859, + "logits/rejected": 0.5871134400367737, + "logps/chosen": -396.5611572265625, + "logps/rejected": -415.1891174316406, + "loss": 0.5357, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6239595413208008, + "rewards/margins": 0.532719075679779, + "rewards/rejected": -2.1566786766052246, "step": 1230 }, { - "epoch": 1.28, - "learning_rate": 3.186162079510703e-07, - "logits/chosen": -2.3755877017974854, - "logits/rejected": -2.4818127155303955, - "logps/chosen": -253.17007446289062, - "logps/rejected": -265.7911071777344, - "loss": 0.5397, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.14208722114562988, - "rewards/margins": 0.624567985534668, - "rewards/rejected": -0.7666550874710083, + "epoch": 0.65, + "learning_rate": 1.6556919911120084e-06, + "logits/chosen": -0.16248223185539246, + "logits/rejected": 0.2298637330532074, + "logps/chosen": -440.5934143066406, + "logps/rejected": -452.263916015625, + "loss": 0.5228, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2942602634429932, + "rewards/margins": 0.729531466960907, + "rewards/rejected": -2.023791551589966, "step": 1240 }, { - "epoch": 1.29, - "learning_rate": 3.167048929663608e-07, - "logits/chosen": -2.2667508125305176, - "logits/rejected": -2.1175003051757812, - "logps/chosen": -214.2106475830078, - "logps/rejected": -216.2061767578125, - "loss": 0.5402, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.12058589607477188, - "rewards/margins": 0.762450098991394, - "rewards/rejected": -0.8830360174179077, + "epoch": 0.65, + "learning_rate": 1.6128305837745548e-06, + "logits/chosen": 0.4567365050315857, + "logits/rejected": 0.9410026669502258, + "logps/chosen": -421.6625061035156, + "logps/rejected": -500.62939453125, + "loss": 0.4833, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.259197473526001, + "rewards/margins": 1.098459005355835, + "rewards/rejected": -2.357656478881836, "step": 1250 }, { - "epoch": 1.3, - "learning_rate": 3.1479357798165134e-07, - "logits/chosen": -2.475630283355713, - "logits/rejected": -2.5030765533447266, - "logps/chosen": -229.7735595703125, - "logps/rejected": -204.53561401367188, - "loss": 0.545, + "epoch": 0.66, + "learning_rate": 1.5702654838153641e-06, + "logits/chosen": 0.8424333333969116, + "logits/rejected": 1.3221367597579956, + "logps/chosen": -376.1363830566406, + "logps/rejected": -475.8714904785156, + "loss": 0.5373, "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.023252641782164574, - "rewards/margins": 0.7630415558815002, - "rewards/rejected": -0.7397890090942383, + "rewards/chosen": -1.4600459337234497, + "rewards/margins": 0.8872382044792175, + "rewards/rejected": -2.3472840785980225, "step": 1260 }, { - "epoch": 1.31, - "learning_rate": 3.128822629969419e-07, - "logits/chosen": -2.7179012298583984, - "logits/rejected": -2.6036970615386963, - "logps/chosen": -276.84918212890625, - "logps/rejected": -254.32498168945312, - "loss": 0.5474, + "epoch": 0.66, + "learning_rate": 1.528010907632051e-06, + "logits/chosen": 0.41781607270240784, + "logits/rejected": 0.819769561290741, + "logps/chosen": -369.557861328125, + "logps/rejected": -404.1021423339844, + "loss": 0.5333, "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.22881421446800232, - "rewards/margins": 0.6891492009162903, - "rewards/rejected": -0.9179633259773254, + "rewards/chosen": -1.2966288328170776, + "rewards/margins": 0.6270810961723328, + "rewards/rejected": -1.9237098693847656, "step": 1270 }, { - "epoch": 1.32, - "learning_rate": 3.109709480122324e-07, - "logits/chosen": -2.344045877456665, - "logits/rejected": -2.5483055114746094, - "logps/chosen": -286.34063720703125, - "logps/rejected": -220.1922607421875, - "loss": 0.5663, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.27246418595314026, - "rewards/margins": 0.20061273872852325, - "rewards/rejected": -0.4730769991874695, + "epoch": 0.67, + "learning_rate": 1.486080967909816e-06, + "logits/chosen": 0.532579779624939, + "logits/rejected": 1.2962762117385864, + "logps/chosen": -375.81109619140625, + "logps/rejected": -468.69091796875, + "loss": 0.526, + "rewards/accuracies": 0.875, + "rewards/chosen": -1.2724688053131104, + "rewards/margins": 1.1656290292739868, + "rewards/rejected": -2.4380979537963867, "step": 1280 }, { - "epoch": 1.33, - "learning_rate": 3.0905963302752294e-07, - "logits/chosen": -2.1131272315979004, - "logits/rejected": -2.2049593925476074, - "logps/chosen": -215.07150268554688, - "logps/rejected": -210.6974639892578, - "loss": 0.5606, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.24951574206352234, - "rewards/margins": 0.28748106956481934, - "rewards/rejected": -0.5369968414306641, + "epoch": 0.68, + "learning_rate": 1.4444896689079142e-06, + "logits/chosen": 0.10695119202136993, + "logits/rejected": 0.6434232592582703, + "logps/chosen": -473.42364501953125, + "logps/rejected": -503.74432373046875, + "loss": 0.5469, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.7253440618515015, + "rewards/margins": 0.48180991411209106, + "rewards/rejected": -2.207153797149658, "step": 1290 }, { - "epoch": 1.34, - "learning_rate": 3.071483180428134e-07, - "logits/chosen": -2.578439950942993, - "logits/rejected": -2.587803602218628, - "logps/chosen": -395.8404846191406, - "logps/rejected": -238.2346954345703, - "loss": 0.5476, + "epoch": 0.68, + "learning_rate": 1.403250901782354e-06, + "logits/chosen": -0.03585321083664894, + "logits/rejected": 0.7208360433578491, + "logps/chosen": -407.0667419433594, + "logps/rejected": -425.78289794921875, + "loss": 0.5171, "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.028068387880921364, - "rewards/margins": 0.7790915966033936, - "rewards/rejected": -0.7510231733322144, + "rewards/chosen": -1.2429778575897217, + "rewards/margins": 0.7077491283416748, + "rewards/rejected": -1.950727105140686, "step": 1300 }, { - "epoch": 1.35, - "learning_rate": 3.0523700305810395e-07, - "logits/chosen": -2.45281720161438, - "logits/rejected": -2.3570828437805176, - "logps/chosen": -279.39410400390625, - "logps/rejected": -279.7311096191406, - "loss": 0.5405, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.1633271872997284, - "rewards/margins": 0.6046531200408936, - "rewards/rejected": -0.7679802775382996, + "epoch": 0.68, + "eval_logits/chosen": 0.5029312372207642, + "eval_logits/rejected": 0.7685657143592834, + "eval_logps/chosen": -387.5680847167969, + "eval_logps/rejected": -468.57354736328125, + "eval_loss": 0.5304480195045471, + "eval_rewards/accuracies": 0.765625, + "eval_rewards/chosen": -1.3741439580917358, + "eval_rewards/margins": 0.7936526536941528, + "eval_rewards/rejected": -2.1677966117858887, + "eval_runtime": 99.5127, + "eval_samples_per_second": 20.098, + "eval_steps_per_second": 0.322, + "step": 1300 + }, + { + "epoch": 0.69, + "learning_rate": 1.3623784399463585e-06, + "logits/chosen": 0.167133167386055, + "logits/rejected": 0.6624370813369751, + "logps/chosen": -463.7676696777344, + "logps/rejected": -429.35931396484375, + "loss": 0.5453, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3896446228027344, + "rewards/margins": 0.5783199667930603, + "rewards/rejected": -1.96796452999115, "step": 1310 }, { - "epoch": 1.36, - "learning_rate": 3.033256880733945e-07, - "logits/chosen": -2.2366137504577637, - "logits/rejected": -2.1086764335632324, - "logps/chosen": -169.0766143798828, - "logps/rejected": -210.2600555419922, - "loss": 0.5082, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.2424238622188568, - "rewards/margins": 0.16653873026371002, - "rewards/rejected": -0.4089626371860504, + "epoch": 0.69, + "learning_rate": 1.3218859344701634e-06, + "logits/chosen": 0.32800671458244324, + "logits/rejected": 0.7880526185035706, + "logps/chosen": -425.216552734375, + "logps/rejected": -439.70611572265625, + "loss": 0.5281, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4577715396881104, + "rewards/margins": 0.7258911728858948, + "rewards/rejected": -2.1836624145507812, "step": 1320 }, { - "epoch": 1.37, - "learning_rate": 3.01414373088685e-07, - "logits/chosen": -2.2935876846313477, - "logits/rejected": -2.4133498668670654, - "logps/chosen": -272.6535949707031, - "logps/rejected": -256.1263122558594, - "loss": 0.5408, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.06631962209939957, - "rewards/margins": 0.8952635526657104, - "rewards/rejected": -0.8289439082145691, + "epoch": 0.7, + "learning_rate": 1.2817869095216626e-06, + "logits/chosen": 0.4725889265537262, + "logits/rejected": 0.6833093762397766, + "logps/chosen": -400.4358215332031, + "logps/rejected": -443.46063232421875, + "loss": 0.5021, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.2056840658187866, + "rewards/margins": 0.6257123947143555, + "rewards/rejected": -1.831396460533142, "step": 1330 }, { - "epoch": 1.38, - "learning_rate": 2.9950305810397555e-07, - "logits/chosen": -2.4236862659454346, - "logits/rejected": -2.5300862789154053, - "logps/chosen": -264.49200439453125, - "logps/rejected": -250.42605590820312, - "loss": 0.5558, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.4723237156867981, - "rewards/margins": 0.4657681882381439, - "rewards/rejected": -0.9380919337272644, + "epoch": 0.7, + "learning_rate": 1.2420947578494524e-06, + "logits/chosen": 0.28971534967422485, + "logits/rejected": 0.6644312739372253, + "logps/chosen": -418.182861328125, + "logps/rejected": -483.86260986328125, + "loss": 0.5371, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.4614604711532593, + "rewards/margins": 0.6954753994941711, + "rewards/rejected": -2.156935930252075, "step": 1340 }, { - "epoch": 1.39, - "learning_rate": 2.9759174311926603e-07, - "logits/chosen": -2.388587236404419, - "logits/rejected": -2.268752336502075, - "logps/chosen": -311.207763671875, - "logps/rejected": -154.0712127685547, - "loss": 0.5471, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.054443489760160446, - "rewards/margins": 0.8568767309188843, - "rewards/rejected": -0.9113203883171082, + "epoch": 0.71, + "learning_rate": 1.2028227363097583e-06, + "logits/chosen": 0.6982226371765137, + "logits/rejected": 0.4474209249019623, + "logps/chosen": -356.05047607421875, + "logps/rejected": -364.0442810058594, + "loss": 0.5642, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -1.2198214530944824, + "rewards/margins": 0.17703430354595184, + "rewards/rejected": -1.3968555927276611, "step": 1350 }, { - "epoch": 1.4, - "learning_rate": 2.9568042813455656e-07, - "logits/chosen": -2.134000301361084, - "logits/rejected": -2.2762789726257324, - "logps/chosen": -236.97683715820312, - "logps/rejected": -211.66000366210938, - "loss": 0.5532, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.23458454012870789, - "rewards/margins": 0.4157086908817291, - "rewards/rejected": -0.650293231010437, + "epoch": 0.71, + "learning_rate": 1.1639839614387575e-06, + "logits/chosen": 0.36424458026885986, + "logits/rejected": 0.7081610560417175, + "logps/chosen": -347.169677734375, + "logps/rejected": -375.45782470703125, + "loss": 0.5351, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.1354833841323853, + "rewards/margins": 0.48198217153549194, + "rewards/rejected": -1.6174657344818115, "step": 1360 }, { - "epoch": 1.41, - "learning_rate": 2.937691131498471e-07, - "logits/chosen": -2.4096202850341797, - "logits/rejected": -2.525867462158203, - "logps/chosen": -311.18206787109375, - "logps/rejected": -282.1830749511719, - "loss": 0.5697, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.14300379157066345, - "rewards/margins": 0.38485369086265564, - "rewards/rejected": -0.5278575420379639, + "epoch": 0.72, + "learning_rate": 1.1255914050717553e-06, + "logits/chosen": 0.20586355030536652, + "logits/rejected": 0.5243974328041077, + "logps/chosen": -338.34124755859375, + "logps/rejected": -414.631103515625, + "loss": 0.4757, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0596474409103394, + "rewards/margins": 0.7905664443969727, + "rewards/rejected": -1.8502140045166016, "step": 1370 }, { - "epoch": 1.42, - "learning_rate": 2.918577981651376e-07, - "logits/chosen": -2.3166534900665283, - "logits/rejected": -2.1045656204223633, - "logps/chosen": -318.7552490234375, - "logps/rejected": -169.04383850097656, - "loss": 0.5451, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.16076600551605225, - "rewards/margins": 0.43872982263565063, - "rewards/rejected": -0.5994957685470581, + "epoch": 0.72, + "learning_rate": 1.0876578900107053e-06, + "logits/chosen": 0.5624727010726929, + "logits/rejected": 1.3232195377349854, + "logps/chosen": -299.00689697265625, + "logps/rejected": -350.9127197265625, + "loss": 0.5566, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0775959491729736, + "rewards/margins": 0.9055012464523315, + "rewards/rejected": -1.9830970764160156, "step": 1380 }, { - "epoch": 1.43, - "learning_rate": 2.8994648318042816e-07, - "logits/chosen": -2.477952718734741, - "logits/rejected": -2.3459110260009766, - "logps/chosen": -246.3095245361328, - "logps/rejected": -196.61399841308594, - "loss": 0.5566, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.06157383322715759, - "rewards/margins": 0.7134921550750732, - "rewards/rejected": -0.6519182920455933, + "epoch": 0.73, + "learning_rate": 1.0501960857414912e-06, + "logits/chosen": 0.11433680355548859, + "logits/rejected": 1.1149139404296875, + "logps/chosen": -396.9518127441406, + "logps/rejected": -397.4958190917969, + "loss": 0.5236, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.3632252216339111, + "rewards/margins": 0.8269487619400024, + "rewards/rejected": -2.1901743412017822, "step": 1390 }, { - "epoch": 1.44, - "learning_rate": 2.8803516819571863e-07, - "logits/chosen": -2.489950180053711, - "logits/rejected": -2.50527286529541, - "logps/chosen": -308.19573974609375, - "logps/rejected": -222.0939178466797, - "loss": 0.5669, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.28509682416915894, - "rewards/margins": 0.6225865483283997, - "rewards/rejected": -0.9076833724975586, + "epoch": 0.73, + "learning_rate": 1.0132185042024249e-06, + "logits/chosen": 0.43161505460739136, + "logits/rejected": 0.736209511756897, + "logps/chosen": -331.12628173828125, + "logps/rejected": -450.26202392578125, + "loss": 0.4875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2222540378570557, + "rewards/margins": 0.826834499835968, + "rewards/rejected": -2.049088716506958, "step": 1400 }, { - "epoch": 1.46, - "learning_rate": 2.8612385321100917e-07, - "logits/chosen": -2.2142746448516846, - "logits/rejected": -2.1834685802459717, - "logps/chosen": -199.5335235595703, - "logps/rejected": -232.9855499267578, - "loss": 0.5256, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.20894494652748108, - "rewards/margins": 0.24761509895324707, - "rewards/rejected": -0.45656007528305054, - "step": 1410 + "epoch": 0.73, + "eval_logits/chosen": 0.5925981998443604, + "eval_logits/rejected": 0.8565999269485474, + "eval_logps/chosen": -382.43292236328125, + "eval_logps/rejected": -466.92669677734375, + "eval_loss": 0.5320979356765747, + "eval_rewards/accuracies": 0.7578125, + "eval_rewards/chosen": -1.3227922916412354, + "eval_rewards/margins": 0.8285354375839233, + "eval_rewards/rejected": -2.1513278484344482, + "eval_runtime": 99.9611, + "eval_samples_per_second": 20.008, + "eval_steps_per_second": 0.32, + "step": 1400 }, { - "epoch": 1.47, - "learning_rate": 2.842125382262997e-07, - "logits/chosen": -2.416759490966797, - "logits/rejected": -2.349916934967041, - "logps/chosen": -277.214599609375, - "logps/rejected": -271.69854736328125, - "loss": 0.5399, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.003354682121425867, - "rewards/margins": 0.93927001953125, - "rewards/rejected": -0.9359153509140015, + "epoch": 0.74, + "learning_rate": 9.767374956053584e-07, + "logits/chosen": 0.6243610978126526, + "logits/rejected": 1.1261684894561768, + "logps/chosen": -380.84222412109375, + "logps/rejected": -383.2228698730469, + "loss": 0.5409, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4898316860198975, + "rewards/margins": 0.39106887578964233, + "rewards/rejected": -1.8809006214141846, + "step": 1410 + }, + { + "epoch": 0.74, + "learning_rate": 9.407652443108192e-07, + "logits/chosen": 0.4377509653568268, + "logits/rejected": 0.9771862030029297, + "logps/chosen": -422.27508544921875, + "logps/rejected": -454.2247009277344, + "loss": 0.5151, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -1.375937581062317, + "rewards/margins": 0.815414547920227, + "rewards/rejected": -2.191352367401123, "step": 1420 }, { - "epoch": 1.48, - "learning_rate": 2.8230122324159023e-07, - "logits/chosen": -2.2614686489105225, - "logits/rejected": -2.343787670135498, - "logps/chosen": -205.03573608398438, - "logps/rejected": -297.828369140625, - "loss": 0.5376, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.3863915205001831, - "rewards/margins": 0.32738059759140015, - "rewards/rejected": -0.7137721180915833, + "epoch": 0.75, + "learning_rate": 9.053137647585231e-07, + "logits/chosen": 0.24253082275390625, + "logits/rejected": 0.9329066276550293, + "logps/chosen": -436.6142578125, + "logps/rejected": -496.86151123046875, + "loss": 0.5556, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.5039043426513672, + "rewards/margins": 0.9265894889831543, + "rewards/rejected": -2.4304938316345215, "step": 1430 }, { - "epoch": 1.49, - "learning_rate": 2.8038990825688076e-07, - "logits/chosen": -2.5332446098327637, - "logits/rejected": -2.485593795776367, - "logps/chosen": -204.37960815429688, - "logps/rejected": -191.53567504882812, - "loss": 0.547, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.4227626323699951, - "rewards/margins": 0.1967758685350418, - "rewards/rejected": -0.619538426399231, + "epoch": 0.75, + "learning_rate": 8.703948974546592e-07, + "logits/chosen": 0.5952311158180237, + "logits/rejected": 0.8244959712028503, + "logps/chosen": -419.00579833984375, + "logps/rejected": -488.2655334472656, + "loss": 0.5196, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2945992946624756, + "rewards/margins": 0.7285401225090027, + "rewards/rejected": -2.023139476776123, "step": 1440 }, { - "epoch": 1.5, - "learning_rate": 2.784785932721712e-07, - "logits/chosen": -2.4867165088653564, - "logits/rejected": -2.367835521697998, - "logps/chosen": -264.09783935546875, - "logps/rejected": -226.6724395751953, - "loss": 0.5612, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.03983074054121971, - "rewards/margins": 1.0130517482757568, - "rewards/rejected": -1.052882432937622, + "epoch": 0.76, + "learning_rate": 8.360203050172489e-07, + "logits/chosen": 0.06418957561254501, + "logits/rejected": 0.8408614993095398, + "logps/chosen": -460.03668212890625, + "logps/rejected": -426.32220458984375, + "loss": 0.5277, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3083784580230713, + "rewards/margins": 0.6134061217308044, + "rewards/rejected": -1.9217846393585205, "step": 1450 }, { - "epoch": 1.51, - "learning_rate": 2.765672782874617e-07, - "logits/chosen": -2.376364231109619, - "logits/rejected": -2.221336841583252, - "logps/chosen": -327.6852111816406, - "logps/rejected": -228.00344848632812, - "loss": 0.555, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.287401020526886, - "rewards/margins": 0.42843931913375854, - "rewards/rejected": -0.7158403396606445, + "epoch": 0.76, + "learning_rate": 8.022014682809306e-07, + "logits/chosen": 0.3110508322715759, + "logits/rejected": 0.700973629951477, + "logps/chosen": -397.7454528808594, + "logps/rejected": -399.8217468261719, + "loss": 0.5672, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.165363073348999, + "rewards/margins": 0.5710027813911438, + "rewards/rejected": -1.7363656759262085, "step": 1460 }, { - "epoch": 1.52, - "learning_rate": 2.7465596330275225e-07, - "logits/chosen": -2.417738437652588, - "logits/rejected": -2.3728537559509277, - "logps/chosen": -222.4464111328125, - "logps/rejected": -159.91334533691406, - "loss": 0.5632, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.3034583330154419, - "rewards/margins": 0.43387946486473083, - "rewards/rejected": -0.7373377680778503, + "epoch": 0.77, + "learning_rate": 7.689496824624526e-07, + "logits/chosen": -0.01150925736874342, + "logits/rejected": 1.1124351024627686, + "logps/chosen": -495.40008544921875, + "logps/rejected": -471.8081970214844, + "loss": 0.5214, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -1.3093578815460205, + "rewards/margins": 0.9808523058891296, + "rewards/rejected": -2.290210247039795, "step": 1470 }, { - "epoch": 1.53, - "learning_rate": 2.727446483180428e-07, - "logits/chosen": -2.5264039039611816, - "logits/rejected": -2.549905776977539, - "logps/chosen": -311.6380615234375, - "logps/rejected": -241.33175659179688, - "loss": 0.5474, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.31216129660606384, - "rewards/margins": 0.6543269157409668, - "rewards/rejected": -0.966488242149353, + "epoch": 0.77, + "learning_rate": 7.362760533881649e-07, + "logits/chosen": 0.5060569047927856, + "logits/rejected": 0.566477358341217, + "logps/chosen": -409.0811767578125, + "logps/rejected": -493.974609375, + "loss": 0.5545, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3020764589309692, + "rewards/margins": 0.6977756023406982, + "rewards/rejected": -1.999851942062378, "step": 1480 }, { - "epoch": 1.54, - "learning_rate": 2.708333333333333e-07, - "logits/chosen": -2.4928135871887207, - "logits/rejected": -2.5519299507141113, - "logps/chosen": -278.2198181152344, - "logps/rejected": -230.333984375, - "loss": 0.5281, + "epoch": 0.78, + "learning_rate": 7.041914937847586e-07, + "logits/chosen": -0.16839629411697388, + "logits/rejected": 0.4934666156768799, + "logps/chosen": -408.4761962890625, + "logps/rejected": -467.8258361816406, + "loss": 0.5166, "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.3294491767883301, - "rewards/margins": 0.6884486079216003, - "rewards/rejected": -1.0178978443145752, + "rewards/chosen": -1.2998807430267334, + "rewards/margins": 0.7749007940292358, + "rewards/rejected": -2.0747811794281006, "step": 1490 }, { - "epoch": 1.55, - "learning_rate": 2.6892201834862385e-07, - "logits/chosen": -2.3910984992980957, - "logits/rejected": -2.5310654640197754, - "logps/chosen": -318.42059326171875, - "logps/rejected": -227.05142211914062, - "loss": 0.5449, + "epoch": 0.78, + "learning_rate": 6.7270671963451e-07, + "logits/chosen": 0.30948877334594727, + "logits/rejected": 0.4193127155303955, + "logps/chosen": -359.7439270019531, + "logps/rejected": -472.97705078125, + "loss": 0.5216, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03567828983068466, - "rewards/margins": 1.0869849920272827, - "rewards/rejected": -1.1226632595062256, + "rewards/chosen": -1.078048825263977, + "rewards/margins": 0.7132478952407837, + "rewards/rejected": -1.791296362876892, "step": 1500 }, { - "epoch": 1.56, - "learning_rate": 2.6701070336391433e-07, - "logits/chosen": -2.120537042617798, - "logits/rejected": -1.9794002771377563, - "logps/chosen": -287.77618408203125, - "logps/rejected": -180.72586059570312, - "loss": 0.5251, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.17248104512691498, - "rewards/margins": 0.49385708570480347, - "rewards/rejected": -0.6663382053375244, + "epoch": 0.78, + "eval_logits/chosen": 0.4629691541194916, + "eval_logits/rejected": 0.7188760638237, + "eval_logps/chosen": -370.21026611328125, + "eval_logps/rejected": -452.12982177734375, + "eval_loss": 0.5326071977615356, + "eval_rewards/accuracies": 0.76171875, + "eval_rewards/chosen": -1.2005664110183716, + "eval_rewards/margins": 0.8027929663658142, + "eval_rewards/rejected": -2.003359317779541, + "eval_runtime": 99.5373, + "eval_samples_per_second": 20.093, + "eval_steps_per_second": 0.321, + "step": 1500 + }, + { + "epoch": 0.79, + "learning_rate": 6.418322465962234e-07, + "logits/chosen": 0.22714094817638397, + "logits/rejected": 0.8290818929672241, + "logps/chosen": -376.62322998046875, + "logps/rejected": -393.87847900390625, + "loss": 0.4974, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.1898367404937744, + "rewards/margins": 0.6498056650161743, + "rewards/rejected": -1.8396425247192383, "step": 1510 }, { - "epoch": 1.57, - "learning_rate": 2.6509938837920486e-07, - "logits/chosen": -2.598473310470581, - "logits/rejected": -2.6752312183380127, - "logps/chosen": -303.84912109375, - "logps/rejected": -220.67562866210938, - "loss": 0.5453, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.2263271063566208, - "rewards/margins": 0.805108904838562, - "rewards/rejected": -1.0314362049102783, + "epoch": 0.8, + "learning_rate": 6.115783864930904e-07, + "logits/chosen": 0.2600030303001404, + "logits/rejected": -0.018900180235505104, + "logps/chosen": -407.98211669921875, + "logps/rejected": -459.68212890625, + "loss": 0.5115, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1808936595916748, + "rewards/margins": 0.5539044141769409, + "rewards/rejected": -1.7347980737686157, "step": 1520 }, { - "epoch": 1.58, - "learning_rate": 2.631880733944954e-07, - "logits/chosen": -2.563636541366577, - "logits/rejected": -2.4898366928100586, - "logps/chosen": -317.47259521484375, - "logps/rejected": -213.1184844970703, - "loss": 0.5185, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.07109083235263824, - "rewards/margins": 0.6358667612075806, - "rewards/rejected": -0.7069576382637024, + "epoch": 0.8, + "learning_rate": 5.819552438686238e-07, + "logits/chosen": 0.11013329029083252, + "logits/rejected": 0.7540661096572876, + "logps/chosen": -472.6468811035156, + "logps/rejected": -463.4947204589844, + "loss": 0.5537, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.388946771621704, + "rewards/margins": 0.4617336690425873, + "rewards/rejected": -1.8506805896759033, "step": 1530 }, { - "epoch": 1.59, - "learning_rate": 2.612767584097859e-07, - "logits/chosen": -2.4102084636688232, - "logits/rejected": -2.329968214035034, - "logps/chosen": -262.5071716308594, - "logps/rejected": -234.21939086914062, - "loss": 0.5491, + "epoch": 0.81, + "learning_rate": 5.529727126118229e-07, + "logits/chosen": 0.35062912106513977, + "logits/rejected": 0.3931676149368286, + "logps/chosen": -419.42193603515625, + "logps/rejected": -476.36865234375, + "loss": 0.5286, "rewards/accuracies": 0.75, - "rewards/chosen": -0.028744656592607498, - "rewards/margins": 0.5971611738204956, - "rewards/rejected": -0.6259058713912964, + "rewards/chosen": -0.9165588617324829, + "rewards/margins": 0.5505474209785461, + "rewards/rejected": -1.4671061038970947, "step": 1540 }, { - "epoch": 1.6, - "learning_rate": 2.5936544342507646e-07, - "logits/chosen": -2.52365779876709, - "logits/rejected": -2.2771174907684326, - "logps/chosen": -292.43560791015625, - "logps/rejected": -262.5408020019531, - "loss": 0.5316, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.04525692015886307, - "rewards/margins": 0.5967898368835449, - "rewards/rejected": -0.5515329241752625, + "epoch": 0.81, + "learning_rate": 5.246404726526918e-07, + "logits/chosen": 0.8175240755081177, + "logits/rejected": 0.8358044624328613, + "logps/chosen": -309.7247009277344, + "logps/rejected": -335.03045654296875, + "loss": 0.514, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1784462928771973, + "rewards/margins": 0.5345664620399475, + "rewards/rejected": -1.713012456893921, "step": 1550 }, { - "epoch": 1.61, - "learning_rate": 2.5745412844036693e-07, - "logits/chosen": -2.4652435779571533, - "logits/rejected": -2.3399059772491455, - "logps/chosen": -295.6546325683594, - "logps/rejected": -239.66946411132812, - "loss": 0.547, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.045461829751729965, - "rewards/margins": 0.6490108370780945, - "rewards/rejected": -0.6944726705551147, + "epoch": 0.82, + "learning_rate": 4.969679867292276e-07, + "logits/chosen": 0.5007575750350952, + "logits/rejected": 0.8389018774032593, + "logps/chosen": -331.74761962890625, + "logps/rejected": -406.36187744140625, + "loss": 0.5403, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0899267196655273, + "rewards/margins": 0.5384773015975952, + "rewards/rejected": -1.6284040212631226, "step": 1560 }, { - "epoch": 1.62, - "learning_rate": 2.5554281345565747e-07, - "logits/chosen": -2.425602674484253, - "logits/rejected": -2.3701093196868896, - "logps/chosen": -333.5090637207031, - "logps/rejected": -249.1916961669922, - "loss": 0.5225, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.054739952087402344, - "rewards/margins": 1.113675832748413, - "rewards/rejected": -1.0589358806610107, + "epoch": 0.82, + "learning_rate": 4.699644972269332e-07, + "logits/chosen": 0.562352180480957, + "logits/rejected": 0.4318207800388336, + "logps/chosen": -398.3438720703125, + "logps/rejected": -474.5340270996094, + "loss": 0.5186, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.354994535446167, + "rewards/margins": 0.6516760587692261, + "rewards/rejected": -2.0066704750061035, "step": 1570 }, { - "epoch": 1.63, - "learning_rate": 2.53631498470948e-07, - "logits/chosen": -2.476578712463379, - "logits/rejected": -2.3098702430725098, - "logps/chosen": -277.14373779296875, - "logps/rejected": -208.9362335205078, - "loss": 0.5154, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.12385344505310059, - "rewards/margins": 0.5760471820831299, - "rewards/rejected": -0.6999006867408752, + "epoch": 0.83, + "learning_rate": 4.4363902309194656e-07, + "logits/chosen": 0.23987403512001038, + "logits/rejected": 0.34921976923942566, + "logps/chosen": -400.57708740234375, + "logps/rejected": -437.4691467285156, + "loss": 0.5214, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2433634996414185, + "rewards/margins": 0.4714573323726654, + "rewards/rejected": -1.7148208618164062, "step": 1580 }, { - "epoch": 1.64, - "learning_rate": 2.5172018348623853e-07, - "logits/chosen": -2.3377368450164795, - "logits/rejected": -2.5816705226898193, - "logps/chosen": -333.68310546875, - "logps/rejected": -167.23974609375, - "loss": 0.564, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.043125562369823456, - "rewards/margins": 0.8181279897689819, - "rewards/rejected": -0.7750024795532227, + "epoch": 0.83, + "learning_rate": 4.1800035681877765e-07, + "logits/chosen": 0.3362785577774048, + "logits/rejected": 1.0109045505523682, + "logps/chosen": -453.203369140625, + "logps/rejected": -483.91009521484375, + "loss": 0.5224, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5157214403152466, + "rewards/margins": 0.7080753445625305, + "rewards/rejected": -2.223797082901001, "step": 1590 }, { - "epoch": 1.65, - "learning_rate": 2.4980886850152906e-07, - "logits/chosen": -2.3544070720672607, - "logits/rejected": -2.522584915161133, - "logps/chosen": -372.16339111328125, - "logps/rejected": -314.8898620605469, - "loss": 0.5526, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.10558883845806122, - "rewards/margins": 1.0972599983215332, - "rewards/rejected": -0.9916712045669556, + "epoch": 0.84, + "learning_rate": 3.9305706151369195e-07, + "logits/chosen": 0.26526278257369995, + "logits/rejected": 0.6012780070304871, + "logps/chosen": -449.33795166015625, + "logps/rejected": -431.3976135253906, + "loss": 0.4894, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0703104734420776, + "rewards/margins": 0.8223675489425659, + "rewards/rejected": -1.8926780223846436, "step": 1600 }, { - "epoch": 1.66, - "learning_rate": 2.478975535168196e-07, - "logits/chosen": -2.4156651496887207, - "logits/rejected": -2.321596622467041, - "logps/chosen": -234.2825164794922, - "logps/rejected": -187.53053283691406, - "loss": 0.5361, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.13712987303733826, - "rewards/margins": 0.6930993795394897, - "rewards/rejected": -0.8302291631698608, + "epoch": 0.84, + "eval_logits/chosen": 0.482819139957428, + "eval_logits/rejected": 0.7405462861061096, + "eval_logps/chosen": -373.15850830078125, + "eval_logps/rejected": -457.3565368652344, + "eval_loss": 0.5327410101890564, + "eval_rewards/accuracies": 0.765625, + "eval_rewards/chosen": -1.2300488948822021, + "eval_rewards/margins": 0.8255774974822998, + "eval_rewards/rejected": -2.055626630783081, + "eval_runtime": 99.941, + "eval_samples_per_second": 20.012, + "eval_steps_per_second": 0.32, + "step": 1600 + }, + { + "epoch": 0.84, + "learning_rate": 3.688174680346976e-07, + "logits/chosen": 0.1651560515165329, + "logits/rejected": 1.0159032344818115, + "logps/chosen": -415.13720703125, + "logps/rejected": -381.99053955078125, + "loss": 0.5122, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3641674518585205, + "rewards/margins": 0.5815728902816772, + "rewards/rejected": -1.9457403421401978, "step": 1610 }, { - "epoch": 1.67, - "learning_rate": 2.459862385321101e-07, - "logits/chosen": -2.536264657974243, - "logits/rejected": -2.373924732208252, - "logps/chosen": -377.7065734863281, - "logps/rejected": -278.75494384765625, - "loss": 0.5685, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.011585181578993797, - "rewards/margins": 0.7624789476394653, - "rewards/rejected": -0.750893771648407, + "epoch": 0.85, + "learning_rate": 3.4528967220911287e-07, + "logits/chosen": 0.38268962502479553, + "logits/rejected": 0.7260398268699646, + "logps/chosen": -401.5191650390625, + "logps/rejected": -462.7931213378906, + "loss": 0.5414, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.452815055847168, + "rewards/margins": 0.45942768454551697, + "rewards/rejected": -1.9122428894042969, "step": 1620 }, { - "epoch": 1.68, - "learning_rate": 2.440749235474006e-07, - "logits/chosen": -2.322601795196533, - "logits/rejected": -2.2549948692321777, - "logps/chosen": -292.2939147949219, - "logps/rejected": -266.98040771484375, - "loss": 0.535, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.1699330061674118, - "rewards/margins": 0.8751620054244995, - "rewards/rejected": -0.7052290439605713, + "epoch": 0.85, + "learning_rate": 3.224815321296168e-07, + "logits/chosen": 0.12002329528331757, + "logits/rejected": 0.9523947834968567, + "logps/chosen": -418.4691467285156, + "logps/rejected": -426.87677001953125, + "loss": 0.5471, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2490535974502563, + "rewards/margins": 0.8716403245925903, + "rewards/rejected": -2.1206939220428467, "step": 1630 }, { - "epoch": 1.69, - "learning_rate": 2.421636085626911e-07, - "logits/chosen": -2.3065285682678223, - "logits/rejected": -2.3722896575927734, - "logps/chosen": -338.43115234375, - "logps/rejected": -282.4959411621094, - "loss": 0.555, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.2029719352722168, - "rewards/margins": 0.37522464990615845, - "rewards/rejected": -0.57819664478302, + "epoch": 0.86, + "learning_rate": 3.004006655297209e-07, + "logits/chosen": 0.5747352242469788, + "logits/rejected": 0.7612552642822266, + "logps/chosen": -385.31268310546875, + "logps/rejected": -435.27960205078125, + "loss": 0.5407, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.1729180812835693, + "rewards/margins": 0.7992533445358276, + "rewards/rejected": -1.9721715450286865, "step": 1640 }, { - "epoch": 1.7, - "learning_rate": 2.402522935779816e-07, - "logits/chosen": -2.526841163635254, - "logits/rejected": -2.2794435024261475, - "logps/chosen": -308.1125183105469, - "logps/rejected": -253.7852020263672, - "loss": 0.5306, + "epoch": 0.86, + "learning_rate": 2.7905444723949765e-07, + "logits/chosen": 0.6584094762802124, + "logits/rejected": 0.37713831663131714, + "logps/chosen": -351.07904052734375, + "logps/rejected": -470.117919921875, + "loss": 0.4971, "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.15976569056510925, - "rewards/margins": 0.9058405160903931, - "rewards/rejected": -1.0656063556671143, + "rewards/chosen": -1.173592209815979, + "rewards/margins": 0.6863223314285278, + "rewards/rejected": -1.8599144220352173, "step": 1650 }, { - "epoch": 1.71, - "learning_rate": 2.3834097859327215e-07, - "logits/chosen": -2.308931350708008, - "logits/rejected": -2.195425033569336, - "logps/chosen": -227.3255615234375, - "logps/rejected": -257.3186950683594, - "loss": 0.5538, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.19937238097190857, - "rewards/margins": 0.6596201658248901, - "rewards/rejected": -0.8589925765991211, + "epoch": 0.87, + "learning_rate": 2.5845000672245575e-07, + "logits/chosen": 0.3972054123878479, + "logits/rejected": 0.2828903794288635, + "logps/chosen": -398.77264404296875, + "logps/rejected": -473.8125, + "loss": 0.4996, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.435257911682129, + "rewards/margins": 0.6242495179176331, + "rewards/rejected": -2.059507369995117, "step": 1660 }, { - "epoch": 1.72, - "learning_rate": 2.3642966360856268e-07, - "logits/chosen": -2.505361795425415, - "logits/rejected": -2.2748889923095703, - "logps/chosen": -275.67230224609375, - "logps/rejected": -213.0087890625, - "loss": 0.502, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.023041725158691406, - "rewards/margins": 0.6609295010566711, - "rewards/rejected": -0.6839712262153625, + "epoch": 0.87, + "learning_rate": 2.385942256943499e-07, + "logits/chosen": 0.32778117060661316, + "logits/rejected": 0.6275647282600403, + "logps/chosen": -419.7220153808594, + "logps/rejected": -462.55078125, + "loss": 0.5608, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.428417444229126, + "rewards/margins": 0.5705910325050354, + "rewards/rejected": -1.9990084171295166, "step": 1670 }, { - "epoch": 1.73, - "learning_rate": 2.345183486238532e-07, - "logits/chosen": -2.4382357597351074, - "logits/rejected": -2.337517499923706, - "logps/chosen": -257.69146728515625, - "logps/rejected": -238.8438262939453, - "loss": 0.5522, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.17831094563007355, - "rewards/margins": 0.3667464554309845, - "rewards/rejected": -0.5450573563575745, + "epoch": 0.88, + "learning_rate": 2.1949373582475065e-07, + "logits/chosen": 0.4387190341949463, + "logits/rejected": 0.7346469759941101, + "logps/chosen": -376.1452941894531, + "logps/rejected": -415.2806701660156, + "loss": 0.5528, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3217471837997437, + "rewards/margins": 0.45661622285842896, + "rewards/rejected": -1.778363585472107, "step": 1680 }, { - "epoch": 1.74, - "learning_rate": 2.3260703363914372e-07, - "logits/chosen": -2.54907488822937, - "logits/rejected": -2.3391122817993164, - "logps/chosen": -345.74346923828125, - "logps/rejected": -252.90414428710938, - "loss": 0.5239, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.07564310729503632, - "rewards/margins": 0.590453028678894, - "rewards/rejected": -0.6660962104797363, + "epoch": 0.88, + "learning_rate": 2.0115491652211271e-07, + "logits/chosen": 0.17830494046211243, + "logits/rejected": 0.9514158368110657, + "logps/chosen": -427.56378173828125, + "logps/rejected": -450.45306396484375, + "loss": 0.5492, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.1855056285858154, + "rewards/margins": 1.0056660175323486, + "rewards/rejected": -2.191171884536743, "step": 1690 }, { - "epoch": 1.75, - "learning_rate": 2.3069571865443425e-07, - "logits/chosen": -2.417306900024414, - "logits/rejected": -2.506610155105591, - "logps/chosen": -223.8877716064453, - "logps/rejected": -169.25540161132812, - "loss": 0.5207, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.12120304256677628, - "rewards/margins": 0.6357380151748657, - "rewards/rejected": -0.7569410800933838, + "epoch": 0.89, + "learning_rate": 1.8358389280311306e-07, + "logits/chosen": 0.22087359428405762, + "logits/rejected": 0.33987805247306824, + "logps/chosen": -383.9516906738281, + "logps/rejected": -411.02911376953125, + "loss": 0.5179, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.246467113494873, + "rewards/margins": 0.6251389384269714, + "rewards/rejected": -1.8716061115264893, "step": 1700 }, { - "epoch": 1.76, - "learning_rate": 2.2878440366972476e-07, - "logits/chosen": -2.6201210021972656, - "logits/rejected": -2.4392848014831543, - "logps/chosen": -270.24615478515625, - "logps/rejected": -343.59423828125, - "loss": 0.533, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.28634923696517944, - "rewards/margins": 0.4523712992668152, - "rewards/rejected": -0.7387205958366394, + "epoch": 0.89, + "eval_logits/chosen": 0.5011742115020752, + "eval_logits/rejected": 0.760403573513031, + "eval_logps/chosen": -373.2859802246094, + "eval_logps/rejected": -457.37200927734375, + "eval_loss": 0.5325719118118286, + "eval_rewards/accuracies": 0.765625, + "eval_rewards/chosen": -1.2313237190246582, + "eval_rewards/margins": 0.824457585811615, + "eval_rewards/rejected": -2.055781364440918, + "eval_runtime": 99.393, + "eval_samples_per_second": 20.122, + "eval_steps_per_second": 0.322, + "step": 1700 + }, + { + "epoch": 0.89, + "learning_rate": 1.667865332469379e-07, + "logits/chosen": 0.4138943552970886, + "logits/rejected": 0.8224202394485474, + "logps/chosen": -369.52764892578125, + "logps/rejected": -423.6180725097656, + "loss": 0.538, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.311859369277954, + "rewards/margins": 0.8248014450073242, + "rewards/rejected": -2.1366608142852783, "step": 1710 }, { - "epoch": 1.78, - "learning_rate": 2.268730886850153e-07, - "logits/chosen": -2.069031000137329, - "logits/rejected": -1.9285329580307007, - "logps/chosen": -210.6043243408203, - "logps/rejected": -177.20159912109375, - "loss": 0.5444, + "epoch": 0.9, + "learning_rate": 1.507684480352292e-07, + "logits/chosen": 0.2538720965385437, + "logits/rejected": 0.32807546854019165, + "logps/chosen": -413.42755126953125, + "logps/rejected": -484.99957275390625, + "loss": 0.5225, "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.03490941971540451, - "rewards/margins": 0.9221685528755188, - "rewards/rejected": -0.8872591853141785, + "rewards/chosen": -1.2163430452346802, + "rewards/margins": 0.8377116322517395, + "rewards/rejected": -2.0540547370910645, "step": 1720 }, { - "epoch": 1.79, - "learning_rate": 2.249617737003058e-07, - "logits/chosen": -2.2896201610565186, - "logits/rejected": -2.356297254562378, - "logps/chosen": -292.9852294921875, - "logps/rejected": -277.8864440917969, - "loss": 0.5467, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.21859049797058105, - "rewards/margins": 0.5577865839004517, - "rewards/rejected": -0.7763770818710327, + "epoch": 0.91, + "learning_rate": 1.3553498707832763e-07, + "logits/chosen": 0.40428122878074646, + "logits/rejected": 0.49123507738113403, + "logps/chosen": -334.6162109375, + "logps/rejected": -424.0606384277344, + "loss": 0.5422, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1268707513809204, + "rewards/margins": 0.7507694959640503, + "rewards/rejected": -1.8776404857635498, "step": 1730 }, { - "epoch": 1.8, - "learning_rate": 2.2305045871559633e-07, - "logits/chosen": -2.3246283531188965, - "logits/rejected": -2.1089653968811035, - "logps/chosen": -273.15118408203125, - "logps/rejected": -292.574951171875, - "loss": 0.5207, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.37160351872444153, - "rewards/margins": 0.06535868346691132, - "rewards/rejected": -0.43696221709251404, + "epoch": 0.91, + "learning_rate": 1.2109123822844653e-07, + "logits/chosen": 0.3417002558708191, + "logits/rejected": 0.7004715800285339, + "logps/chosen": -364.5450439453125, + "logps/rejected": -460.31304931640625, + "loss": 0.5338, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.8657018542289734, + "rewards/margins": 1.1912907361984253, + "rewards/rejected": -2.056992769241333, "step": 1740 }, { - "epoch": 1.81, - "learning_rate": 2.2113914373088686e-07, - "logits/chosen": -2.4427847862243652, - "logits/rejected": -2.560529947280884, - "logps/chosen": -153.76699829101562, - "logps/rejected": -248.28408813476562, - "loss": 0.5375, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.2880316376686096, - "rewards/margins": 0.30386772751808167, - "rewards/rejected": -0.5918993949890137, + "epoch": 0.92, + "learning_rate": 1.0744202558037014e-07, + "logits/chosen": -0.01776253618299961, + "logits/rejected": 0.4788368344306946, + "logps/chosen": -474.26715087890625, + "logps/rejected": -514.1068725585938, + "loss": 0.4864, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3969725370407104, + "rewards/margins": 0.6193428635597229, + "rewards/rejected": -2.016315460205078, "step": 1750 }, { - "epoch": 1.82, - "learning_rate": 2.1922782874617736e-07, - "logits/chosen": -2.6262247562408447, - "logits/rejected": -2.416951894760132, - "logps/chosen": -257.9002990722656, - "logps/rejected": -275.48553466796875, - "loss": 0.5315, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.2771896719932556, - "rewards/margins": 0.2873901128768921, - "rewards/rejected": -0.5645797848701477, + "epoch": 0.92, + "learning_rate": 9.459190786024696e-08, + "logits/chosen": 0.476068913936615, + "logits/rejected": 0.7961768507957458, + "logps/chosen": -385.49945068359375, + "logps/rejected": -416.2054748535156, + "loss": 0.5388, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.323490023612976, + "rewards/margins": 0.5129523277282715, + "rewards/rejected": -1.836442232131958, "step": 1760 }, { - "epoch": 1.83, - "learning_rate": 2.1731651376146787e-07, - "logits/chosen": -2.1698267459869385, - "logits/rejected": -2.3672404289245605, - "logps/chosen": -256.00689697265625, - "logps/rejected": -232.8870849609375, - "loss": 0.5626, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.13111671805381775, - "rewards/margins": 0.5583267211914062, - "rewards/rejected": -0.6894434690475464, + "epoch": 0.93, + "learning_rate": 8.254517690300946e-08, + "logits/chosen": 0.15927091240882874, + "logits/rejected": 0.30330100655555725, + "logps/chosen": -370.7587585449219, + "logps/rejected": -503.95654296875, + "loss": 0.5648, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -1.0612715482711792, + "rewards/margins": 1.1672163009643555, + "rewards/rejected": -2.228487730026245, "step": 1770 }, { - "epoch": 1.84, - "learning_rate": 2.154051987767584e-07, - "logits/chosen": -2.2943813800811768, - "logits/rejected": -2.3611464500427246, - "logps/chosen": -289.64422607421875, - "logps/rejected": -286.1085205078125, - "loss": 0.5383, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.17674392461776733, - "rewards/margins": 0.36700400710105896, - "rewards/rejected": -0.5437479615211487, + "epoch": 0.93, + "learning_rate": 7.13058562189381e-08, + "logits/chosen": 0.4144035875797272, + "logits/rejected": 0.5552290081977844, + "logps/chosen": -342.7335510253906, + "logps/rejected": -409.74969482421875, + "loss": 0.5131, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.2526248693466187, + "rewards/margins": 0.5200544595718384, + "rewards/rejected": -1.772679090499878, "step": 1780 }, { - "epoch": 1.85, - "learning_rate": 2.134938837920489e-07, - "logits/chosen": -2.2364659309387207, - "logits/rejected": -2.2148382663726807, - "logps/chosen": -202.55038452148438, - "logps/rejected": -206.96762084960938, - "loss": 0.5432, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.26416245102882385, - "rewards/margins": 0.6946617960929871, - "rewards/rejected": -0.9588242769241333, + "epoch": 0.94, + "learning_rate": 6.087769964984058e-08, + "logits/chosen": 0.705097496509552, + "logits/rejected": 1.3561115264892578, + "logps/chosen": -428.72918701171875, + "logps/rejected": -445.7135314941406, + "loss": 0.5319, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4447662830352783, + "rewards/margins": 0.6180745959281921, + "rewards/rejected": -2.0628409385681152, "step": 1790 }, { - "epoch": 1.86, - "learning_rate": 2.1158256880733944e-07, - "logits/chosen": -2.305535316467285, - "logits/rejected": -2.4840779304504395, - "logps/chosen": -338.34271240234375, - "logps/rejected": -185.28201293945312, - "loss": 0.5132, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.16436448693275452, - "rewards/margins": 0.42132654786109924, - "rewards/rejected": -0.5856910943984985, + "epoch": 0.94, + "learning_rate": 5.126419011529993e-08, + "logits/chosen": 0.38589420914649963, + "logits/rejected": 0.7930299043655396, + "logps/chosen": -413.413818359375, + "logps/rejected": -465.1924743652344, + "loss": 0.5534, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.2359144687652588, + "rewards/margins": 0.5394344329833984, + "rewards/rejected": -1.7753490209579468, "step": 1800 }, { - "epoch": 1.87, - "learning_rate": 2.0967125382262994e-07, - "logits/chosen": -2.7777082920074463, - "logits/rejected": -2.5519793033599854, - "logps/chosen": -233.9488067626953, - "logps/rejected": -237.7256622314453, - "loss": 0.5536, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.48247796297073364, - "rewards/margins": 0.04958399385213852, - "rewards/rejected": -0.5320619344711304, + "epoch": 0.94, + "eval_logits/chosen": 0.4956907331943512, + "eval_logits/rejected": 0.7550484538078308, + "eval_logps/chosen": -373.24365234375, + "eval_logps/rejected": -457.37786865234375, + "eval_loss": 0.5324857234954834, + "eval_rewards/accuracies": 0.765625, + "eval_rewards/chosen": -1.2308998107910156, + "eval_rewards/margins": 0.8249397277832031, + "eval_rewards/rejected": -2.0558395385742188, + "eval_runtime": 99.9672, + "eval_samples_per_second": 20.007, + "eval_steps_per_second": 0.32, + "step": 1800 + }, + { + "epoch": 0.95, + "learning_rate": 4.246853844940724e-08, + "logits/chosen": 0.599514365196228, + "logits/rejected": 0.8959542512893677, + "logps/chosen": -335.34039306640625, + "logps/rejected": -418.07080078125, + "loss": 0.5122, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.3220350742340088, + "rewards/margins": 0.5795026421546936, + "rewards/rejected": -1.9015376567840576, "step": 1810 }, { - "epoch": 1.88, - "learning_rate": 2.0775993883792048e-07, - "logits/chosen": -2.5612552165985107, - "logits/rejected": -2.420253276824951, - "logps/chosen": -339.1038818359375, - "logps/rejected": -225.16360473632812, - "loss": 0.5151, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.03660213202238083, - "rewards/margins": 1.2038971185684204, - "rewards/rejected": -1.2404992580413818, + "epoch": 0.95, + "learning_rate": 3.4493682328368696e-08, + "logits/chosen": 0.3638584315776825, + "logits/rejected": 0.6814897656440735, + "logps/chosen": -426.3352966308594, + "logps/rejected": -463.9798278808594, + "loss": 0.5181, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.2269675731658936, + "rewards/margins": 0.786018967628479, + "rewards/rejected": -2.012986421585083, "step": 1820 }, { - "epoch": 1.89, - "learning_rate": 2.05848623853211e-07, - "logits/chosen": -2.586907386779785, - "logits/rejected": -2.5101799964904785, - "logps/chosen": -310.6505432128906, - "logps/rejected": -244.5113983154297, - "loss": 0.5221, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.411550909280777, - "rewards/margins": 0.3417314291000366, - "rewards/rejected": -0.7532823085784912, + "epoch": 0.96, + "learning_rate": 2.734228528934679e-08, + "logits/chosen": 0.6217674016952515, + "logits/rejected": 1.3591673374176025, + "logps/chosen": -390.75262451171875, + "logps/rejected": -452.92840576171875, + "loss": 0.5142, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.369799017906189, + "rewards/margins": 0.7118135690689087, + "rewards/rejected": -2.0816123485565186, "step": 1830 }, { - "epoch": 1.9, - "learning_rate": 2.0393730886850151e-07, - "logits/chosen": -2.2709717750549316, - "logits/rejected": -2.39669132232666, - "logps/chosen": -226.12057495117188, - "logps/rejected": -200.52218627929688, - "loss": 0.5242, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.046432264149188995, - "rewards/margins": 1.110090732574463, - "rewards/rejected": -1.0636584758758545, + "epoch": 0.96, + "learning_rate": 2.1016735840859447e-08, + "logits/chosen": 0.09014968574047089, + "logits/rejected": 0.6385570168495178, + "logps/chosen": -421.1087951660156, + "logps/rejected": -426.6112365722656, + "loss": 0.528, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.2675201892852783, + "rewards/margins": 0.579815149307251, + "rewards/rejected": -1.8473352193832397, "step": 1840 }, { - "epoch": 1.91, - "learning_rate": 2.0202599388379205e-07, - "logits/chosen": -2.3261749744415283, - "logits/rejected": -2.5248923301696777, - "logps/chosen": -222.13772583007812, - "logps/rejected": -425.5648498535156, - "loss": 0.5189, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.3807342052459717, - "rewards/margins": 0.5107225775718689, - "rewards/rejected": -0.8914567232131958, + "epoch": 0.97, + "learning_rate": 1.551914666503812e-08, + "logits/chosen": 0.30124786496162415, + "logits/rejected": 0.8672188520431519, + "logps/chosen": -329.217529296875, + "logps/rejected": -450.61468505859375, + "loss": 0.4952, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.0372583866119385, + "rewards/margins": 1.0680160522460938, + "rewards/rejected": -2.1052744388580322, "step": 1850 }, { - "epoch": 1.92, - "learning_rate": 2.0011467889908258e-07, - "logits/chosen": -2.398069381713867, - "logits/rejected": -2.2855401039123535, - "logps/chosen": -314.5137939453125, - "logps/rejected": -217.12588500976562, - "loss": 0.5264, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.029106635600328445, - "rewards/margins": 1.0325161218643188, - "rewards/rejected": -1.003409504890442, + "epoch": 0.97, + "learning_rate": 1.0851353912008644e-08, + "logits/chosen": 0.28355082869529724, + "logits/rejected": 0.5023699998855591, + "logps/chosen": -317.22869873046875, + "logps/rejected": -411.1966857910156, + "loss": 0.4757, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.067916750907898, + "rewards/margins": 1.0416524410247803, + "rewards/rejected": -2.1095690727233887, "step": 1860 }, { - "epoch": 1.93, - "learning_rate": 1.9820336391437308e-07, - "logits/chosen": -2.290491819381714, - "logits/rejected": -2.378493070602417, - "logps/chosen": -286.273681640625, - "logps/rejected": -229.33828735351562, - "loss": 0.5646, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.08369754254817963, - "rewards/margins": 0.795507550239563, - "rewards/rejected": -0.879205048084259, + "epoch": 0.98, + "learning_rate": 7.014916586632336e-09, + "logits/chosen": 0.004716170020401478, + "logits/rejected": 0.6306796073913574, + "logps/chosen": -464.43048095703125, + "logps/rejected": -509.4078063964844, + "loss": 0.5422, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.4036610126495361, + "rewards/margins": 0.6888295412063599, + "rewards/rejected": -2.0924906730651855, "step": 1870 }, { - "epoch": 1.94, - "learning_rate": 1.9629204892966362e-07, - "logits/chosen": -2.601147413253784, - "logits/rejected": -2.4275565147399902, - "logps/chosen": -390.74151611328125, - "logps/rejected": -276.0337829589844, - "loss": 0.53, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.009929287247359753, - "rewards/margins": 0.7174602746963501, - "rewards/rejected": -0.7075310945510864, + "epoch": 0.98, + "learning_rate": 4.011116027811956e-09, + "logits/chosen": 0.32270532846450806, + "logits/rejected": 0.7638577818870544, + "logps/chosen": -407.8437194824219, + "logps/rejected": -471.29180908203125, + "loss": 0.4853, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.3231006860733032, + "rewards/margins": 0.6853188276290894, + "rewards/rejected": -2.0084195137023926, "step": 1880 }, { - "epoch": 1.95, - "learning_rate": 1.943807339449541e-07, - "logits/chosen": -2.18162202835083, - "logits/rejected": -1.9625205993652344, - "logps/chosen": -205.7775421142578, - "logps/rejected": -185.28477478027344, - "loss": 0.5423, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.2230360060930252, - "rewards/margins": 0.8223746418952942, - "rewards/rejected": -1.0454107522964478, + "epoch": 0.99, + "learning_rate": 1.8409554805329243e-09, + "logits/chosen": 0.35667330026626587, + "logits/rejected": 0.8502413630485535, + "logps/chosen": -365.97418212890625, + "logps/rejected": -510.9142150878906, + "loss": 0.5214, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.2438390254974365, + "rewards/margins": 1.2221014499664307, + "rewards/rejected": -2.465939998626709, "step": 1890 }, { - "epoch": 1.96, - "learning_rate": 1.9246941896024463e-07, - "logits/chosen": -2.2929797172546387, - "logits/rejected": -2.081751823425293, - "logps/chosen": -176.2759246826172, - "logps/rejected": -127.28376770019531, - "loss": 0.5754, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.16625508666038513, - "rewards/margins": 0.5883089900016785, - "rewards/rejected": -0.754564106464386, + "epoch": 0.99, + "learning_rate": 5.051597607894088e-10, + "logits/chosen": 0.5023200511932373, + "logits/rejected": 0.7432835698127747, + "logps/chosen": -382.82867431640625, + "logps/rejected": -409.9466857910156, + "loss": 0.5539, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4910869598388672, + "rewards/margins": 0.6681244373321533, + "rewards/rejected": -2.1592116355895996, "step": 1900 }, { - "epoch": 1.97, - "learning_rate": 1.9055810397553516e-07, - "logits/chosen": -2.3370862007141113, - "logits/rejected": -2.5263562202453613, - "logps/chosen": -305.7495422363281, - "logps/rejected": -238.85726928710938, - "loss": 0.5508, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.1391083151102066, - "rewards/margins": 0.8672167062759399, - "rewards/rejected": -1.0063250064849854, - "step": 1910 - }, - { - "epoch": 1.98, - "learning_rate": 1.8864678899082566e-07, - "logits/chosen": -2.6837048530578613, - "logits/rejected": -2.4654018878936768, - "logps/chosen": -278.2798767089844, - "logps/rejected": -200.07553100585938, - "loss": 0.5311, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.21458300948143005, - "rewards/margins": 0.6255909204483032, - "rewards/rejected": -0.8401739001274109, - "step": 1920 - }, - { - "epoch": 1.99, - "learning_rate": 1.867354740061162e-07, - "logits/chosen": -2.3841135501861572, - "logits/rejected": -2.4274752140045166, - "logps/chosen": -241.267333984375, - "logps/rejected": -294.39300537109375, - "loss": 0.5454, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.15124601125717163, - "rewards/margins": 1.2222580909729004, - "rewards/rejected": -1.3735041618347168, - "step": 1930 - }, - { - "epoch": 2.0, - "eval_logits/chosen": -2.0971014499664307, - "eval_logits/rejected": -1.931637167930603, - "eval_logps/chosen": -248.69100952148438, - "eval_logps/rejected": -198.20773315429688, - "eval_loss": 0.531248927116394, - "eval_rewards/accuracies": 0.796875, - "eval_rewards/chosen": -0.12489113211631775, - "eval_rewards/margins": 0.8351471424102783, - "eval_rewards/rejected": -0.9600383639335632, - "eval_runtime": 49.837, - "eval_samples_per_second": 40.131, - "eval_steps_per_second": 0.321, - "step": 1938 + "epoch": 0.99, + "eval_logits/chosen": 0.5000983476638794, + "eval_logits/rejected": 0.7595670819282532, + "eval_logps/chosen": -373.40216064453125, + "eval_logps/rejected": -457.4398498535156, + "eval_loss": 0.5325239300727844, + "eval_rewards/accuracies": 0.765625, + "eval_rewards/chosen": -1.2324851751327515, + "eval_rewards/margins": 0.8239741921424866, + "eval_rewards/rejected": -2.056459426879883, + "eval_runtime": 99.4948, + "eval_samples_per_second": 20.102, + "eval_steps_per_second": 0.322, + "step": 1900 }, { - "epoch": 2.0, - "learning_rate": 1.8482415902140673e-07, - "logits/chosen": -2.248683452606201, - "logits/rejected": -2.3425230979919434, - "logps/chosen": -222.09530639648438, - "logps/rejected": -244.0989227294922, - "loss": 0.5393, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.25359851121902466, - "rewards/margins": 0.6999626159667969, - "rewards/rejected": -0.9535611271858215, - "step": 1940 - }, - { - "epoch": 2.01, - "learning_rate": 1.8291284403669723e-07, - "logits/chosen": -2.3472368717193604, - "logits/rejected": -2.4190945625305176, - "logps/chosen": -202.47897338867188, - "logps/rejected": -223.9008331298828, - "loss": 0.5342, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06839686632156372, - "rewards/margins": 0.7683246731758118, - "rewards/rejected": -0.8367214202880859, - "step": 1950 - }, - { - "epoch": 2.02, - "learning_rate": 1.8100152905198777e-07, - "logits/chosen": -2.1655468940734863, - "logits/rejected": -2.06404185295105, - "logps/chosen": -259.51666259765625, - "logps/rejected": -317.1017150878906, - "loss": 0.5335, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.24679192900657654, - "rewards/margins": 1.218878984451294, - "rewards/rejected": -0.9720870852470398, - "step": 1960 - }, - { - "epoch": 2.03, - "learning_rate": 1.7909021406727827e-07, - "logits/chosen": -2.5240676403045654, - "logits/rejected": -2.360107660293579, - "logps/chosen": -175.0189971923828, - "logps/rejected": -180.5626983642578, - "loss": 0.5445, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.4591894745826721, - "rewards/margins": 0.5135048627853394, - "rewards/rejected": -0.9726942777633667, - "step": 1970 - }, - { - "epoch": 2.04, - "learning_rate": 1.771788990825688e-07, - "logits/chosen": -2.3183510303497314, - "logits/rejected": -2.134965658187866, - "logps/chosen": -302.5520324707031, - "logps/rejected": -210.1514434814453, - "loss": 0.5426, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.04827744886279106, - "rewards/margins": 1.1514790058135986, - "rewards/rejected": -1.1032016277313232, - "step": 1980 - }, - { - "epoch": 2.05, - "learning_rate": 1.7526758409785934e-07, - "logits/chosen": -2.512765884399414, - "logits/rejected": -2.167898416519165, - "logps/chosen": -355.231201171875, - "logps/rejected": -272.0666809082031, - "loss": 0.5404, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.12298504263162613, - "rewards/margins": 1.1620323657989502, - "rewards/rejected": -1.2850173711776733, - "step": 1990 - }, - { - "epoch": 2.06, - "learning_rate": 1.7335626911314984e-07, - "logits/chosen": -2.495887517929077, - "logits/rejected": -2.2603235244750977, - "logps/chosen": -236.11611938476562, - "logps/rejected": -186.68002319335938, - "loss": 0.5362, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.14387288689613342, - "rewards/margins": 0.7451364398002625, - "rewards/rejected": -0.8890093564987183, - "step": 2000 - }, - { - "epoch": 2.07, - "learning_rate": 1.7144495412844037e-07, - "logits/chosen": -2.6951799392700195, - "logits/rejected": -2.600787878036499, - "logps/chosen": -388.97418212890625, - "logps/rejected": -281.6431884765625, - "loss": 0.5252, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.054711710661649704, - "rewards/margins": 1.36897611618042, - "rewards/rejected": -1.4236878156661987, - "step": 2010 - }, - { - "epoch": 2.08, - "learning_rate": 1.6953363914373088e-07, - "logits/chosen": -2.5666658878326416, - "logits/rejected": -2.512883186340332, - "logps/chosen": -324.7785949707031, - "logps/rejected": -228.8123016357422, - "loss": 0.5496, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.09128103405237198, - "rewards/margins": 1.144072413444519, - "rewards/rejected": -1.2353535890579224, - "step": 2020 - }, - { - "epoch": 2.09, - "learning_rate": 1.6762232415902138e-07, - "logits/chosen": -2.499027729034424, - "logits/rejected": -2.298178195953369, - "logps/chosen": -235.67837524414062, - "logps/rejected": -202.81117248535156, - "loss": 0.5436, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.24017953872680664, - "rewards/margins": 0.8434773683547974, - "rewards/rejected": -1.083656907081604, - "step": 2030 - }, - { - "epoch": 2.11, - "learning_rate": 1.6571100917431192e-07, - "logits/chosen": -2.5607171058654785, - "logits/rejected": -2.353574514389038, - "logps/chosen": -240.97384643554688, - "logps/rejected": -170.1258544921875, - "loss": 0.5185, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.20230352878570557, - "rewards/margins": 0.6808130741119385, - "rewards/rejected": -0.883116602897644, - "step": 2040 - }, - { - "epoch": 2.12, - "learning_rate": 1.6379969418960242e-07, - "logits/chosen": -2.5361034870147705, - "logits/rejected": -2.513338565826416, - "logps/chosen": -381.1001892089844, - "logps/rejected": -299.3647155761719, - "loss": 0.5277, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.016599655151367188, - "rewards/margins": 0.782288134098053, - "rewards/rejected": -0.7988878488540649, - "step": 2050 - }, - { - "epoch": 2.13, - "learning_rate": 1.6188837920489295e-07, - "logits/chosen": -2.3150038719177246, - "logits/rejected": -2.09513783454895, - "logps/chosen": -262.87286376953125, - "logps/rejected": -209.9288330078125, - "loss": 0.5178, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.19605524837970734, - "rewards/margins": 0.7449124455451965, - "rewards/rejected": -0.9409675598144531, - "step": 2060 - }, - { - "epoch": 2.14, - "learning_rate": 1.5997706422018349e-07, - "logits/chosen": -2.9330039024353027, - "logits/rejected": -2.6582858562469482, - "logps/chosen": -341.77850341796875, - "logps/rejected": -298.8485412597656, - "loss": 0.5397, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.009139073081314564, - "rewards/margins": 0.28165051341056824, - "rewards/rejected": -0.27251148223876953, - "step": 2070 - }, - { - "epoch": 2.15, - "learning_rate": 1.58065749235474e-07, - "logits/chosen": -2.320667266845703, - "logits/rejected": -2.201702356338501, - "logps/chosen": -286.5201416015625, - "logps/rejected": -217.8232421875, - "loss": 0.5418, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.11670849472284317, - "rewards/margins": 0.8405148386955261, - "rewards/rejected": -0.7238063216209412, - "step": 2080 - }, - { - "epoch": 2.16, - "learning_rate": 1.5615443425076452e-07, - "logits/chosen": -2.363339900970459, - "logits/rejected": -2.3478615283966064, - "logps/chosen": -314.27325439453125, - "logps/rejected": -253.44631958007812, - "loss": 0.5247, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.10530459880828857, - "rewards/margins": 1.0353710651397705, - "rewards/rejected": -0.9300664067268372, - "step": 2090 - }, - { - "epoch": 2.17, - "learning_rate": 1.5424311926605506e-07, - "logits/chosen": -2.2379679679870605, - "logits/rejected": -2.2152559757232666, - "logps/chosen": -170.92782592773438, - "logps/rejected": -166.93072509765625, - "loss": 0.5114, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.21574795246124268, - "rewards/margins": 1.1248795986175537, - "rewards/rejected": -1.3406277894973755, - "step": 2100 - }, - { - "epoch": 2.18, - "learning_rate": 1.5233180428134556e-07, - "logits/chosen": -2.4279611110687256, - "logits/rejected": -2.4034111499786377, - "logps/chosen": -271.7957763671875, - "logps/rejected": -260.822265625, - "loss": 0.5237, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.07596953958272934, - "rewards/margins": 0.9050176739692688, - "rewards/rejected": -0.9809872508049011, - "step": 2110 - }, - { - "epoch": 2.19, - "learning_rate": 1.504204892966361e-07, - "logits/chosen": -2.213738441467285, - "logits/rejected": -2.2372987270355225, - "logps/chosen": -295.6419677734375, - "logps/rejected": -206.6561279296875, - "loss": 0.5314, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.19369874894618988, - "rewards/margins": 0.6303919553756714, - "rewards/rejected": -0.8240906596183777, - "step": 2120 - }, - { - "epoch": 2.2, - "learning_rate": 1.485091743119266e-07, - "logits/chosen": -2.4710030555725098, - "logits/rejected": -2.3606488704681396, - "logps/chosen": -242.4974822998047, - "logps/rejected": -218.1492156982422, - "loss": 0.5367, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.16626985371112823, - "rewards/margins": 0.7947496771812439, - "rewards/rejected": -0.9610195159912109, - "step": 2130 - }, - { - "epoch": 2.21, - "learning_rate": 1.465978593272171e-07, - "logits/chosen": -2.255868434906006, - "logits/rejected": -2.3592605590820312, - "logps/chosen": -228.03219604492188, - "logps/rejected": -184.50611877441406, - "loss": 0.5165, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.3162696957588196, - "rewards/margins": 0.7572251558303833, - "rewards/rejected": -1.0734949111938477, - "step": 2140 - }, - { - "epoch": 2.22, - "learning_rate": 1.4468654434250764e-07, - "logits/chosen": -2.2551803588867188, - "logits/rejected": -2.189657688140869, - "logps/chosen": -370.01104736328125, - "logps/rejected": -314.4168395996094, - "loss": 0.5523, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.027963241562247276, - "rewards/margins": 0.7318282127380371, - "rewards/rejected": -0.7038649320602417, - "step": 2150 - }, - { - "epoch": 2.23, - "learning_rate": 1.4277522935779814e-07, - "logits/chosen": -2.4878127574920654, - "logits/rejected": -2.42626953125, - "logps/chosen": -275.42144775390625, - "logps/rejected": -220.00137329101562, - "loss": 0.5275, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.14348439872264862, - "rewards/margins": 1.2011865377426147, - "rewards/rejected": -1.0577021837234497, - "step": 2160 - }, - { - "epoch": 2.24, - "learning_rate": 1.4086391437308867e-07, - "logits/chosen": -2.3342137336730957, - "logits/rejected": -2.4700827598571777, - "logps/chosen": -279.27032470703125, - "logps/rejected": -260.35107421875, - "loss": 0.5171, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.21079519391059875, - "rewards/margins": 0.983725368976593, - "rewards/rejected": -1.1945207118988037, - "step": 2170 - }, - { - "epoch": 2.25, - "learning_rate": 1.389525993883792e-07, - "logits/chosen": -2.543966770172119, - "logits/rejected": -2.178934097290039, - "logps/chosen": -354.38348388671875, - "logps/rejected": -278.5998840332031, - "loss": 0.5621, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.17348089814186096, - "rewards/margins": 1.0600968599319458, - "rewards/rejected": -1.2335779666900635, - "step": 2180 - }, - { - "epoch": 2.26, - "learning_rate": 1.370412844036697e-07, - "logits/chosen": -2.370507001876831, - "logits/rejected": -2.4885435104370117, - "logps/chosen": -282.58270263671875, - "logps/rejected": -245.8155517578125, - "loss": 0.5296, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.010824834927916527, - "rewards/margins": 0.7261137962341309, - "rewards/rejected": -0.7152889966964722, - "step": 2190 - }, - { - "epoch": 2.27, - "learning_rate": 1.3512996941896024e-07, - "logits/chosen": -2.373438835144043, - "logits/rejected": -2.3270111083984375, - "logps/chosen": -255.15109252929688, - "logps/rejected": -247.8589324951172, - "loss": 0.5333, - "rewards/accuracies": 0.44999998807907104, - "rewards/chosen": -0.5083593130111694, - "rewards/margins": 0.12943392992019653, - "rewards/rejected": -0.6377933025360107, - "step": 2200 - }, - { - "epoch": 2.28, - "learning_rate": 1.3321865443425075e-07, - "logits/chosen": -2.239222526550293, - "logits/rejected": -2.1923537254333496, - "logps/chosen": -299.39422607421875, - "logps/rejected": -174.023681640625, - "loss": 0.5336, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.11805693060159683, - "rewards/margins": 1.0056555271148682, - "rewards/rejected": -1.1237125396728516, - "step": 2210 - }, - { - "epoch": 2.29, - "learning_rate": 1.3130733944954128e-07, - "logits/chosen": -2.110752820968628, - "logits/rejected": -1.9507097005844116, - "logps/chosen": -329.23724365234375, - "logps/rejected": -225.7882080078125, - "loss": 0.4955, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.32635951042175293, - "rewards/margins": 0.7226699590682983, - "rewards/rejected": -1.0490295886993408, - "step": 2220 - }, - { - "epoch": 2.3, - "learning_rate": 1.293960244648318e-07, - "logits/chosen": -2.5377070903778076, - "logits/rejected": -2.426079750061035, - "logps/chosen": -373.67645263671875, - "logps/rejected": -226.9227294921875, - "loss": 0.5225, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.15080983936786652, - "rewards/margins": 1.0611189603805542, - "rewards/rejected": -1.2119289636611938, - "step": 2230 - }, - { - "epoch": 2.31, - "learning_rate": 1.2748470948012232e-07, - "logits/chosen": -2.362394332885742, - "logits/rejected": -2.197788715362549, - "logps/chosen": -327.0963134765625, - "logps/rejected": -272.4471130371094, - "loss": 0.5111, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.272097110748291, - "rewards/margins": 0.6045467257499695, - "rewards/rejected": -0.8766437768936157, - "step": 2240 - }, - { - "epoch": 2.32, - "learning_rate": 1.2557339449541285e-07, - "logits/chosen": -2.2135844230651855, - "logits/rejected": -2.296194076538086, - "logps/chosen": -314.9928894042969, - "logps/rejected": -249.1458282470703, - "loss": 0.5293, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.059797704219818115, - "rewards/margins": 1.026885747909546, - "rewards/rejected": -1.0866836309432983, - "step": 2250 - }, - { - "epoch": 2.33, - "learning_rate": 1.2366207951070336e-07, - "logits/chosen": -2.468040943145752, - "logits/rejected": -2.395946741104126, - "logps/chosen": -259.4168395996094, - "logps/rejected": -228.13162231445312, - "loss": 0.5639, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.3209591507911682, - "rewards/margins": 0.7097271680831909, - "rewards/rejected": -1.0306862592697144, - "step": 2260 - }, - { - "epoch": 2.34, - "learning_rate": 1.217507645259939e-07, - "logits/chosen": -2.540865182876587, - "logits/rejected": -2.305960178375244, - "logps/chosen": -327.03668212890625, - "logps/rejected": -316.6974792480469, - "loss": 0.4995, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.10364139080047607, - "rewards/margins": 0.5791498422622681, - "rewards/rejected": -0.6827912330627441, - "step": 2270 - }, - { - "epoch": 2.35, - "learning_rate": 1.198394495412844e-07, - "logits/chosen": -2.5025086402893066, - "logits/rejected": -2.4841408729553223, - "logps/chosen": -346.7644958496094, - "logps/rejected": -261.13916015625, - "loss": 0.5339, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.09549558907747269, - "rewards/margins": 0.9585358500480652, - "rewards/rejected": -1.054031491279602, - "step": 2280 - }, - { - "epoch": 2.36, - "learning_rate": 1.1792813455657493e-07, - "logits/chosen": -2.3317127227783203, - "logits/rejected": -2.3077309131622314, - "logps/chosen": -263.4445495605469, - "logps/rejected": -303.0923156738281, - "loss": 0.5097, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.48188915848731995, - "rewards/margins": 0.5629307627677917, - "rewards/rejected": -1.0448198318481445, - "step": 2290 - }, - { - "epoch": 2.37, - "learning_rate": 1.1601681957186543e-07, - "logits/chosen": -2.5383365154266357, - "logits/rejected": -2.3553810119628906, - "logps/chosen": -303.4425964355469, - "logps/rejected": -202.1547088623047, - "loss": 0.516, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.03705354407429695, - "rewards/margins": 1.2515580654144287, - "rewards/rejected": -1.2145044803619385, - "step": 2300 - }, - { - "epoch": 2.38, - "learning_rate": 1.1410550458715595e-07, - "logits/chosen": -2.290337085723877, - "logits/rejected": -2.136826753616333, - "logps/chosen": -309.16717529296875, - "logps/rejected": -254.88674926757812, - "loss": 0.5478, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.010205010883510113, - "rewards/margins": 0.6125646829605103, - "rewards/rejected": -0.6023596525192261, - "step": 2310 - }, - { - "epoch": 2.39, - "learning_rate": 1.1219418960244648e-07, - "logits/chosen": -2.3295750617980957, - "logits/rejected": -2.385953426361084, - "logps/chosen": -215.65237426757812, - "logps/rejected": -212.346435546875, - "loss": 0.518, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.3336523175239563, - "rewards/margins": 0.6867604851722717, - "rewards/rejected": -1.0204129219055176, - "step": 2320 - }, - { - "epoch": 2.4, - "learning_rate": 1.10282874617737e-07, - "logits/chosen": -2.4023945331573486, - "logits/rejected": -2.4153189659118652, - "logps/chosen": -240.5900115966797, - "logps/rejected": -228.1215362548828, - "loss": 0.4989, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.16529302299022675, - "rewards/margins": 0.8604542016983032, - "rewards/rejected": -1.0257470607757568, - "step": 2330 - }, - { - "epoch": 2.41, - "learning_rate": 1.0837155963302752e-07, - "logits/chosen": -2.2563438415527344, - "logits/rejected": -2.125026226043701, - "logps/chosen": -267.7855224609375, - "logps/rejected": -219.22030639648438, - "loss": 0.5213, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.08529398590326309, - "rewards/margins": 0.9822967648506165, - "rewards/rejected": -1.0675907135009766, - "step": 2340 - }, - { - "epoch": 2.43, - "learning_rate": 1.0646024464831804e-07, - "logits/chosen": -2.2471909523010254, - "logits/rejected": -2.319463014602661, - "logps/chosen": -255.18911743164062, - "logps/rejected": -244.0147247314453, - "loss": 0.5512, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.24005265533924103, - "rewards/margins": 0.49893826246261597, - "rewards/rejected": -0.7389909029006958, - "step": 2350 - }, - { - "epoch": 2.44, - "learning_rate": 1.0454892966360856e-07, - "logits/chosen": -2.348158597946167, - "logits/rejected": -2.28670334815979, - "logps/chosen": -271.4364013671875, - "logps/rejected": -196.4866943359375, - "loss": 0.5465, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.04201481118798256, - "rewards/margins": 1.2664215564727783, - "rewards/rejected": -1.3084365129470825, - "step": 2360 - }, - { - "epoch": 2.45, - "learning_rate": 1.0263761467889908e-07, - "logits/chosen": -2.466719150543213, - "logits/rejected": -2.4625251293182373, - "logps/chosen": -284.7028503417969, - "logps/rejected": -224.3902587890625, - "loss": 0.5451, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.19498039782047272, - "rewards/margins": 0.3427335023880005, - "rewards/rejected": -0.537713885307312, - "step": 2370 - }, - { - "epoch": 2.46, - "learning_rate": 1.007262996941896e-07, - "logits/chosen": -2.122545003890991, - "logits/rejected": -2.2347145080566406, - "logps/chosen": -289.37249755859375, - "logps/rejected": -206.8185577392578, - "loss": 0.5535, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.25089073181152344, - "rewards/margins": 0.7263849973678589, - "rewards/rejected": -0.9772756695747375, - "step": 2380 - }, - { - "epoch": 2.47, - "learning_rate": 9.881498470948011e-08, - "logits/chosen": -2.279235363006592, - "logits/rejected": -2.394592523574829, - "logps/chosen": -239.59011840820312, - "logps/rejected": -206.2091827392578, - "loss": 0.5505, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.09624668210744858, - "rewards/margins": 0.9607799649238586, - "rewards/rejected": -1.0570266246795654, - "step": 2390 - }, - { - "epoch": 2.48, - "learning_rate": 9.690366972477065e-08, - "logits/chosen": -2.580003499984741, - "logits/rejected": -2.5049805641174316, - "logps/chosen": -193.94044494628906, - "logps/rejected": -129.92938232421875, - "loss": 0.5501, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.29306405782699585, - "rewards/margins": 0.3221284747123718, - "rewards/rejected": -0.6151925325393677, - "step": 2400 - }, - { - "epoch": 2.49, - "learning_rate": 9.499235474006116e-08, - "logits/chosen": -2.2965879440307617, - "logits/rejected": -2.5327494144439697, - "logps/chosen": -437.3583068847656, - "logps/rejected": -228.34335327148438, - "loss": 0.5281, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.08578985184431076, - "rewards/margins": 0.9632189869880676, - "rewards/rejected": -1.0490089654922485, - "step": 2410 - }, - { - "epoch": 2.5, - "learning_rate": 9.308103975535168e-08, - "logits/chosen": -2.5039114952087402, - "logits/rejected": -2.533884048461914, - "logps/chosen": -328.80194091796875, - "logps/rejected": -245.8604736328125, - "loss": 0.5309, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.1082562655210495, - "rewards/margins": 0.7982146143913269, - "rewards/rejected": -0.9064709544181824, - "step": 2420 - }, - { - "epoch": 2.51, - "learning_rate": 9.116972477064219e-08, - "logits/chosen": -2.3511176109313965, - "logits/rejected": -2.471139430999756, - "logps/chosen": -351.3453063964844, - "logps/rejected": -363.6473693847656, - "loss": 0.5501, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.06536252051591873, - "rewards/margins": 0.8679245710372925, - "rewards/rejected": -0.9332871437072754, - "step": 2430 - }, - { - "epoch": 2.52, - "learning_rate": 8.925840978593272e-08, - "logits/chosen": -2.4342617988586426, - "logits/rejected": -2.3651440143585205, - "logps/chosen": -185.2665252685547, - "logps/rejected": -184.4652099609375, - "loss": 0.5244, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.01564156636595726, - "rewards/margins": 0.836595892906189, - "rewards/rejected": -0.820954442024231, - "step": 2440 - }, - { - "epoch": 2.53, - "learning_rate": 8.734709480122324e-08, - "logits/chosen": -2.180828809738159, - "logits/rejected": -2.3217592239379883, - "logps/chosen": -162.02694702148438, - "logps/rejected": -214.3283233642578, - "loss": 0.5294, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.3506285548210144, - "rewards/margins": 0.32000571489334106, - "rewards/rejected": -0.6706342697143555, - "step": 2450 - }, - { - "epoch": 2.54, - "learning_rate": 8.543577981651376e-08, - "logits/chosen": -2.336467742919922, - "logits/rejected": -2.384082317352295, - "logps/chosen": -436.14337158203125, - "logps/rejected": -267.2066955566406, - "loss": 0.5303, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.0021473728120326996, - "rewards/margins": 1.047534465789795, - "rewards/rejected": -1.0496817827224731, - "step": 2460 - }, - { - "epoch": 2.55, - "learning_rate": 8.352446483180428e-08, - "logits/chosen": -2.4472270011901855, - "logits/rejected": -2.302725315093994, - "logps/chosen": -265.781005859375, - "logps/rejected": -185.17855834960938, - "loss": 0.5353, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.04335325211286545, - "rewards/margins": 0.785746157169342, - "rewards/rejected": -0.8290994763374329, - "step": 2470 - }, - { - "epoch": 2.56, - "learning_rate": 8.161314984709481e-08, - "logits/chosen": -2.546318292617798, - "logits/rejected": -2.435561180114746, - "logps/chosen": -341.5940246582031, - "logps/rejected": -224.41000366210938, - "loss": 0.5387, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08983229100704193, - "rewards/margins": 1.180863380432129, - "rewards/rejected": -1.2706955671310425, - "step": 2480 - }, - { - "epoch": 2.57, - "learning_rate": 7.970183486238531e-08, - "logits/chosen": -2.327275276184082, - "logits/rejected": -2.229645252227783, - "logps/chosen": -198.76148986816406, - "logps/rejected": -200.72247314453125, - "loss": 0.5137, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06232021003961563, - "rewards/margins": 0.635228157043457, - "rewards/rejected": -0.6975484490394592, - "step": 2490 - }, - { - "epoch": 2.58, - "learning_rate": 7.779051987767583e-08, - "logits/chosen": -2.560883045196533, - "logits/rejected": -2.422776699066162, - "logps/chosen": -403.97991943359375, - "logps/rejected": -216.1969451904297, - "loss": 0.4976, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.12103588879108429, - "rewards/margins": 1.1826056241989136, - "rewards/rejected": -1.0615696907043457, - "step": 2500 - }, - { - "epoch": 2.59, - "learning_rate": 7.587920489296635e-08, - "logits/chosen": -2.3078255653381348, - "logits/rejected": -2.3434040546417236, - "logps/chosen": -200.40487670898438, - "logps/rejected": -198.0846405029297, - "loss": 0.5296, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07855480164289474, - "rewards/margins": 0.8178641200065613, - "rewards/rejected": -0.8964190483093262, - "step": 2510 - }, - { - "epoch": 2.6, - "learning_rate": 7.396788990825688e-08, - "logits/chosen": -2.438007354736328, - "logits/rejected": -2.413921594619751, - "logps/chosen": -205.8711395263672, - "logps/rejected": -196.82382202148438, - "loss": 0.5469, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.14088700711727142, - "rewards/margins": 0.9455137252807617, - "rewards/rejected": -1.0864007472991943, - "step": 2520 - }, - { - "epoch": 2.61, - "learning_rate": 7.20565749235474e-08, - "logits/chosen": -2.546194553375244, - "logits/rejected": -2.4628357887268066, - "logps/chosen": -181.03305053710938, - "logps/rejected": -232.1865234375, - "loss": 0.5298, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.23946937918663025, - "rewards/margins": 0.2353799045085907, - "rewards/rejected": -0.47484928369522095, - "step": 2530 - }, - { - "epoch": 2.62, - "learning_rate": 7.014525993883792e-08, - "logits/chosen": -2.6189305782318115, - "logits/rejected": -2.1566860675811768, - "logps/chosen": -291.4128112792969, - "logps/rejected": -229.62069702148438, - "loss": 0.5305, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.10792186111211777, - "rewards/margins": 0.8227887153625488, - "rewards/rejected": -0.7148668169975281, - "step": 2540 - }, - { - "epoch": 2.63, - "learning_rate": 6.823394495412843e-08, - "logits/chosen": -2.3303613662719727, - "logits/rejected": -2.243708610534668, - "logps/chosen": -240.57321166992188, - "logps/rejected": -237.77621459960938, - "loss": 0.53, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.18570654094219208, - "rewards/margins": 0.8395042419433594, - "rewards/rejected": -1.0252107381820679, - "step": 2550 - }, - { - "epoch": 2.64, - "learning_rate": 6.632262996941895e-08, - "logits/chosen": -2.184171438217163, - "logits/rejected": -2.440727949142456, - "logps/chosen": -293.82208251953125, - "logps/rejected": -232.8399658203125, - "loss": 0.5551, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.14512939751148224, - "rewards/margins": 0.7270814180374146, - "rewards/rejected": -0.8722109794616699, - "step": 2560 - }, - { - "epoch": 2.65, - "learning_rate": 6.441131498470948e-08, - "logits/chosen": -2.308842182159424, - "logits/rejected": -2.357908010482788, - "logps/chosen": -276.0211181640625, - "logps/rejected": -183.45028686523438, - "loss": 0.5318, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.06593456864356995, - "rewards/margins": 1.2839319705963135, - "rewards/rejected": -1.3498667478561401, - "step": 2570 - }, - { - "epoch": 2.66, - "learning_rate": 6.25e-08, - "logits/chosen": -2.4197561740875244, - "logits/rejected": -2.3011603355407715, - "logps/chosen": -332.51519775390625, - "logps/rejected": -231.3236846923828, - "loss": 0.5394, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.08808217942714691, - "rewards/margins": 0.9846493005752563, - "rewards/rejected": -0.896567165851593, - "step": 2580 - }, - { - "epoch": 2.67, - "learning_rate": 6.058868501529052e-08, - "logits/chosen": -2.518052577972412, - "logits/rejected": -2.333383083343506, - "logps/chosen": -253.10482788085938, - "logps/rejected": -198.84860229492188, - "loss": 0.516, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.3139076828956604, - "rewards/margins": 0.5393510460853577, - "rewards/rejected": -0.8532587289810181, - "step": 2590 - }, - { - "epoch": 2.68, - "learning_rate": 5.8677370030581035e-08, - "logits/chosen": -2.1512084007263184, - "logits/rejected": -2.312035083770752, - "logps/chosen": -288.53680419921875, - "logps/rejected": -222.78762817382812, - "loss": 0.5122, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.13815611600875854, - "rewards/margins": 1.1774489879608154, - "rewards/rejected": -1.0392926931381226, - "step": 2600 - }, - { - "epoch": 2.69, - "learning_rate": 5.6766055045871554e-08, - "logits/chosen": -2.2546794414520264, - "logits/rejected": -2.3793716430664062, - "logps/chosen": -343.5367736816406, - "logps/rejected": -309.8175354003906, - "loss": 0.5171, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.32955241203308105, - "rewards/margins": 0.6453115940093994, - "rewards/rejected": -0.9748640060424805, - "step": 2610 - }, - { - "epoch": 2.7, - "learning_rate": 5.485474006116208e-08, - "logits/chosen": -2.549440622329712, - "logits/rejected": -2.3844194412231445, - "logps/chosen": -258.51177978515625, - "logps/rejected": -215.14132690429688, - "loss": 0.5318, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.36421215534210205, - "rewards/margins": 1.2265236377716064, - "rewards/rejected": -0.8623113632202148, - "step": 2620 - }, - { - "epoch": 2.71, - "learning_rate": 5.294342507645259e-08, - "logits/chosen": -2.530996561050415, - "logits/rejected": -2.3681914806365967, - "logps/chosen": -335.9801330566406, - "logps/rejected": -263.645263671875, - "loss": 0.5112, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.1152672991156578, - "rewards/margins": 0.8920395970344543, - "rewards/rejected": -0.7767722606658936, - "step": 2630 - }, - { - "epoch": 2.72, - "learning_rate": 5.1032110091743117e-08, - "logits/chosen": -2.185145378112793, - "logits/rejected": -2.3489866256713867, - "logps/chosen": -260.3304138183594, - "logps/rejected": -180.7406768798828, - "loss": 0.5443, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.4966302514076233, - "rewards/margins": 0.4965400695800781, - "rewards/rejected": -0.9931701421737671, - "step": 2640 - }, - { - "epoch": 2.73, - "learning_rate": 4.9120795107033635e-08, - "logits/chosen": -2.3882431983947754, - "logits/rejected": -2.4234635829925537, - "logps/chosen": -267.7854919433594, - "logps/rejected": -259.3836669921875, - "loss": 0.5344, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.1018451452255249, - "rewards/margins": 1.0978671312332153, - "rewards/rejected": -1.1997122764587402, - "step": 2650 - }, - { - "epoch": 2.75, - "learning_rate": 4.7209480122324154e-08, - "logits/chosen": -2.515261650085449, - "logits/rejected": -2.3157753944396973, - "logps/chosen": -299.7487487792969, - "logps/rejected": -317.60516357421875, - "loss": 0.5231, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.024449225515127182, - "rewards/margins": 1.3952572345733643, - "rewards/rejected": -1.3708080053329468, - "step": 2660 - }, - { - "epoch": 2.76, - "learning_rate": 4.529816513761467e-08, - "logits/chosen": -2.3622994422912598, - "logits/rejected": -2.125053644180298, - "logps/chosen": -248.30117797851562, - "logps/rejected": -275.47894287109375, - "loss": 0.4947, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.29935550689697266, - "rewards/margins": 0.4452272355556488, - "rewards/rejected": -0.7445827722549438, - "step": 2670 - }, - { - "epoch": 2.77, - "learning_rate": 4.33868501529052e-08, - "logits/chosen": -2.584453821182251, - "logits/rejected": -2.18440580368042, - "logps/chosen": -242.4678955078125, - "logps/rejected": -205.87344360351562, - "loss": 0.541, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.28912895917892456, - "rewards/margins": 0.5633198618888855, - "rewards/rejected": -0.8524488210678101, - "step": 2680 - }, - { - "epoch": 2.78, - "learning_rate": 4.147553516819572e-08, - "logits/chosen": -2.475851058959961, - "logits/rejected": -2.3260536193847656, - "logps/chosen": -214.3905029296875, - "logps/rejected": -229.3086700439453, - "loss": 0.544, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.2511672079563141, - "rewards/margins": 0.7018988132476807, - "rewards/rejected": -0.9530660510063171, - "step": 2690 - }, - { - "epoch": 2.79, - "learning_rate": 3.9564220183486236e-08, - "logits/chosen": -2.319629192352295, - "logits/rejected": -2.3051748275756836, - "logps/chosen": -242.98965454101562, - "logps/rejected": -185.3482666015625, - "loss": 0.5039, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.17325511574745178, - "rewards/margins": 0.6108760833740234, - "rewards/rejected": -0.7841311693191528, - "step": 2700 - }, - { - "epoch": 2.8, - "learning_rate": 3.7652905198776755e-08, - "logits/chosen": -2.633847713470459, - "logits/rejected": -2.576089382171631, - "logps/chosen": -300.53460693359375, - "logps/rejected": -229.891357421875, - "loss": 0.5421, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.30726686120033264, - "rewards/margins": 0.44638171792030334, - "rewards/rejected": -0.753648579120636, - "step": 2710 - }, - { - "epoch": 2.81, - "learning_rate": 3.574159021406728e-08, - "logits/chosen": -2.354677438735962, - "logits/rejected": -2.4129960536956787, - "logps/chosen": -258.06610107421875, - "logps/rejected": -236.42153930664062, - "loss": 0.549, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.44077181816101074, - "rewards/margins": 0.5614089369773865, - "rewards/rejected": -1.002180814743042, - "step": 2720 - }, - { - "epoch": 2.82, - "learning_rate": 3.383027522935779e-08, - "logits/chosen": -2.444614887237549, - "logits/rejected": -2.2034354209899902, - "logps/chosen": -259.88946533203125, - "logps/rejected": -274.34490966796875, - "loss": 0.5441, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.042647577822208405, - "rewards/margins": 1.146150827407837, - "rewards/rejected": -1.1035032272338867, - "step": 2730 - }, - { - "epoch": 2.83, - "learning_rate": 3.191896024464832e-08, - "logits/chosen": -2.2883150577545166, - "logits/rejected": -2.3496975898742676, - "logps/chosen": -292.2751770019531, - "logps/rejected": -200.29855346679688, - "loss": 0.5666, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.0850500836968422, - "rewards/margins": 1.3057624101638794, - "rewards/rejected": -1.2207123041152954, - "step": 2740 - }, - { - "epoch": 2.84, - "learning_rate": 3.0007645259938836e-08, - "logits/chosen": -2.456864356994629, - "logits/rejected": -2.5583481788635254, - "logps/chosen": -241.4561767578125, - "logps/rejected": -275.2192077636719, - "loss": 0.5165, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.22843988239765167, - "rewards/margins": 0.4716885983943939, - "rewards/rejected": -0.7001284956932068, - "step": 2750 - }, - { - "epoch": 2.85, - "learning_rate": 2.809633027522936e-08, - "logits/chosen": -2.2733702659606934, - "logits/rejected": -2.3403806686401367, - "logps/chosen": -196.03524780273438, - "logps/rejected": -215.77767944335938, - "loss": 0.5142, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.21540656685829163, - "rewards/margins": 0.6115992665290833, - "rewards/rejected": -0.8270059823989868, - "step": 2760 - }, - { - "epoch": 2.86, - "learning_rate": 2.6185015290519877e-08, - "logits/chosen": -2.3207640647888184, - "logits/rejected": -2.3754372596740723, - "logps/chosen": -177.1396026611328, - "logps/rejected": -185.77505493164062, - "loss": 0.5179, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.019303400069475174, - "rewards/margins": 0.6052744388580322, - "rewards/rejected": -0.5859710574150085, - "step": 2770 - }, - { - "epoch": 2.87, - "learning_rate": 2.4273700305810396e-08, - "logits/chosen": -2.4921469688415527, - "logits/rejected": -2.460731029510498, - "logps/chosen": -291.66229248046875, - "logps/rejected": -241.90072631835938, - "loss": 0.5378, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.022645365446805954, - "rewards/margins": 1.2275021076202393, - "rewards/rejected": -1.2048569917678833, - "step": 2780 - }, - { - "epoch": 2.88, - "learning_rate": 2.2362385321100918e-08, - "logits/chosen": -2.3739137649536133, - "logits/rejected": -2.2125802040100098, - "logps/chosen": -286.7039794921875, - "logps/rejected": -244.8232421875, - "loss": 0.5323, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.10285166651010513, - "rewards/margins": 0.4872564375400543, - "rewards/rejected": -0.5901080965995789, - "step": 2790 - }, - { - "epoch": 2.89, - "learning_rate": 2.0451070336391437e-08, - "logits/chosen": -1.8592112064361572, - "logits/rejected": -1.7996069192886353, - "logps/chosen": -194.89865112304688, - "logps/rejected": -183.8830108642578, - "loss": 0.5462, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.18555238842964172, - "rewards/margins": 0.03723742812871933, - "rewards/rejected": -0.22278980910778046, - "step": 2800 - }, - { - "epoch": 2.9, - "learning_rate": 1.8539755351681956e-08, - "logits/chosen": -2.2919793128967285, - "logits/rejected": -2.5266997814178467, - "logps/chosen": -407.080810546875, - "logps/rejected": -314.6758117675781, - "loss": 0.5106, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.05720346048474312, - "rewards/margins": 0.6664168238639832, - "rewards/rejected": -0.6092134714126587, - "step": 2810 - }, - { - "epoch": 2.91, - "learning_rate": 1.6628440366972478e-08, - "logits/chosen": -2.0835564136505127, - "logits/rejected": -2.263801097869873, - "logps/chosen": -319.478271484375, - "logps/rejected": -161.7843017578125, - "loss": 0.5252, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.046999458223581314, - "rewards/margins": 1.0421249866485596, - "rewards/rejected": -1.0891244411468506, - "step": 2820 - }, - { - "epoch": 2.92, - "learning_rate": 1.4717125382262997e-08, - "logits/chosen": -2.5174002647399902, - "logits/rejected": -2.6593270301818848, - "logps/chosen": -317.7415771484375, - "logps/rejected": -299.45513916015625, - "loss": 0.5221, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.22343751788139343, - "rewards/margins": 0.47214531898498535, - "rewards/rejected": -0.695582926273346, - "step": 2830 - }, - { - "epoch": 2.93, - "learning_rate": 1.2805810397553517e-08, - "logits/chosen": -2.3480212688446045, - "logits/rejected": -2.281808376312256, - "logps/chosen": -368.12774658203125, - "logps/rejected": -359.60064697265625, - "loss": 0.5712, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.30808025598526, - "rewards/margins": 0.5734547972679138, - "rewards/rejected": -0.8815349340438843, - "step": 2840 - }, - { - "epoch": 2.94, - "learning_rate": 1.0894495412844038e-08, - "logits/chosen": -2.2814998626708984, - "logits/rejected": -2.3192315101623535, - "logps/chosen": -251.41836547851562, - "logps/rejected": -233.8671417236328, - "loss": 0.5412, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.1413925290107727, - "rewards/margins": 0.6429818272590637, - "rewards/rejected": -0.7843743562698364, - "step": 2850 - }, - { - "epoch": 2.95, - "learning_rate": 8.983180428134555e-09, - "logits/chosen": -2.0733284950256348, - "logits/rejected": -2.226485252380371, - "logps/chosen": -212.7808380126953, - "logps/rejected": -141.1074676513672, - "loss": 0.5101, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.39890027046203613, - "rewards/margins": 0.5048045516014099, - "rewards/rejected": -0.903704822063446, - "step": 2860 - }, - { - "epoch": 2.96, - "learning_rate": 7.071865443425076e-09, - "logits/chosen": -2.551570415496826, - "logits/rejected": -2.3837685585021973, - "logps/chosen": -274.71490478515625, - "logps/rejected": -316.5244140625, - "loss": 0.5186, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.2874767482280731, - "rewards/margins": 0.5797588229179382, - "rewards/rejected": -0.867235541343689, - "step": 2870 - }, - { - "epoch": 2.97, - "learning_rate": 5.1605504587155965e-09, - "logits/chosen": -2.5565171241760254, - "logits/rejected": -2.4579882621765137, - "logps/chosen": -253.5585479736328, - "logps/rejected": -178.70553588867188, - "loss": 0.5493, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.20143918693065643, - "rewards/margins": 0.6308044195175171, - "rewards/rejected": -0.8322436213493347, - "step": 2880 - }, - { - "epoch": 2.98, - "learning_rate": 3.249235474006116e-09, - "logits/chosen": -2.354980707168579, - "logits/rejected": -2.389235496520996, - "logps/chosen": -283.6640319824219, - "logps/rejected": -232.73165893554688, - "loss": 0.532, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.4438861906528473, - "rewards/margins": 0.5923529267311096, - "rewards/rejected": -1.0362392663955688, - "step": 2890 - }, - { - "epoch": 2.99, - "learning_rate": 1.3379204892966359e-09, - "logits/chosen": -2.1370351314544678, - "logits/rejected": -2.2694876194000244, - "logps/chosen": -225.5801239013672, - "logps/rejected": -227.0513916015625, - "loss": 0.5242, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.11508389562368393, - "rewards/margins": 1.1479651927947998, - "rewards/rejected": -1.032881259918213, - "step": 2900 - }, - { - "epoch": 3.0, - "eval_logits/chosen": -2.085988998413086, - "eval_logits/rejected": -1.9190013408660889, - "eval_logps/chosen": -248.65191650390625, - "eval_logps/rejected": -198.58494567871094, - "eval_loss": 0.5269633531570435, - "eval_rewards/accuracies": 0.78125, - "eval_rewards/chosen": -0.12098389863967896, - "eval_rewards/margins": 0.8767741918563843, - "eval_rewards/rejected": -0.9977580308914185, - "eval_runtime": 50.0854, - "eval_samples_per_second": 39.932, - "eval_steps_per_second": 0.319, - "step": 2907 + "epoch": 1.0, + "learning_rate": 4.175013500196112e-12, + "logits/chosen": 0.29777097702026367, + "logits/rejected": 0.5786265134811401, + "logps/chosen": -399.68646240234375, + "logps/rejected": -434.2972106933594, + "loss": 0.5322, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2507613897323608, + "rewards/margins": 0.5890396237373352, + "rewards/rejected": -1.8398010730743408, + "step": 1910 }, { - "epoch": 3.0, - "step": 2907, + "epoch": 1.0, + "step": 1911, "total_flos": 0.0, - "train_loss": 0.5643668570057567, - "train_runtime": 8096.9375, - "train_samples_per_second": 22.959, - "train_steps_per_second": 0.359 + "train_loss": 0.5648497628454511, + "train_runtime": 7610.489, + "train_samples_per_second": 8.033, + "train_steps_per_second": 0.251 } ], "logging_steps": 10, - "max_steps": 2907, - "num_train_epochs": 3, - "save_steps": 500, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, "total_flos": 0.0, + "train_batch_size": 4, "trial_name": null, "trial_params": null }