|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994837377387713, |
|
"eval_steps": 100, |
|
"global_step": 968, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.154639175257731e-09, |
|
"logits/chosen": -2.251229763031006, |
|
"logits/rejected": -2.2295913696289062, |
|
"logps/chosen": -269.52740478515625, |
|
"logps/rejected": -240.59812927246094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.223740339279175, |
|
"logits/rejected": -2.180643081665039, |
|
"logps/chosen": -284.7340087890625, |
|
"logps/rejected": -205.98194885253906, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": -0.0006893649115227163, |
|
"rewards/margins": 0.0007374237175099552, |
|
"rewards/rejected": -0.0014267880469560623, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.33476185798645, |
|
"logits/rejected": -2.2125375270843506, |
|
"logps/chosen": -320.8204040527344, |
|
"logps/rejected": -248.4267120361328, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0003039050498045981, |
|
"rewards/margins": 0.0023796656168997288, |
|
"rewards/rejected": -0.0020757606253027916, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.339370012283325, |
|
"logits/rejected": -2.304020404815674, |
|
"logps/chosen": -268.95074462890625, |
|
"logps/rejected": -227.067626953125, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0005883350968360901, |
|
"rewards/margins": 0.002594549907371402, |
|
"rewards/rejected": -0.0020062148105353117, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.3392791748046875, |
|
"logits/rejected": -2.3300938606262207, |
|
"logps/chosen": -308.5113220214844, |
|
"logps/rejected": -253.8385467529297, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0024464379530400038, |
|
"rewards/margins": -0.00025889737298712134, |
|
"rewards/rejected": 0.0027053358498960733, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.251412868499756, |
|
"logits/rejected": -2.2359275817871094, |
|
"logps/chosen": -297.78375244140625, |
|
"logps/rejected": -227.23556518554688, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0033915191888809204, |
|
"rewards/margins": 0.0055986023508012295, |
|
"rewards/rejected": -0.0022070836275815964, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.167163848876953, |
|
"logits/rejected": -2.3376193046569824, |
|
"logps/chosen": -256.54510498046875, |
|
"logps/rejected": -229.5459747314453, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.000388039683457464, |
|
"rewards/margins": 0.007883811369538307, |
|
"rewards/rejected": -0.0074957734905183315, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.3430614471435547, |
|
"logits/rejected": -2.281782627105713, |
|
"logps/chosen": -313.92608642578125, |
|
"logps/rejected": -252.57284545898438, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0012417413527145982, |
|
"rewards/margins": 0.0001173208438558504, |
|
"rewards/rejected": 0.0011244199704378843, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.337070941925049, |
|
"logits/rejected": -2.3018112182617188, |
|
"logps/chosen": -302.9524841308594, |
|
"logps/rejected": -243.9047088623047, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0021400884725153446, |
|
"rewards/margins": -0.0002812549355439842, |
|
"rewards/rejected": 0.002421343233436346, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.259251356124878, |
|
"logits/rejected": -2.2963995933532715, |
|
"logps/chosen": -270.1668395996094, |
|
"logps/rejected": -216.64822387695312, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.009941437281668186, |
|
"rewards/margins": 0.010241752490401268, |
|
"rewards/rejected": -0.00030031436472199857, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -2.1677582263946533, |
|
"logits/rejected": -2.2741990089416504, |
|
"logps/chosen": -274.75836181640625, |
|
"logps/rejected": -226.3966064453125, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.006115993484854698, |
|
"rewards/margins": 0.0013887921813875437, |
|
"rewards/rejected": 0.0047272020019590855, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -2.271916389465332, |
|
"logits/rejected": -2.197857141494751, |
|
"logps/chosen": -274.72113037109375, |
|
"logps/rejected": -232.5464324951172, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.005831545684486628, |
|
"rewards/margins": 0.0067709460854530334, |
|
"rewards/rejected": -0.000939400284551084, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -2.2548232078552246, |
|
"logits/rejected": -2.322075366973877, |
|
"logps/chosen": -319.34521484375, |
|
"logps/rejected": -235.76535034179688, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.013832703232765198, |
|
"rewards/margins": 0.01176449190825224, |
|
"rewards/rejected": 0.002068211790174246, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -2.32174015045166, |
|
"logits/rejected": -2.3775150775909424, |
|
"logps/chosen": -296.20733642578125, |
|
"logps/rejected": -245.56655883789062, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.017552796751260757, |
|
"rewards/margins": 0.013545483350753784, |
|
"rewards/rejected": 0.004007314797490835, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -2.3627283573150635, |
|
"logits/rejected": -2.310948133468628, |
|
"logps/chosen": -301.9321594238281, |
|
"logps/rejected": -239.2898406982422, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.011156091466546059, |
|
"rewards/margins": 0.009668431244790554, |
|
"rewards/rejected": 0.0014876595232635736, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -2.2531113624572754, |
|
"logits/rejected": -2.348215341567993, |
|
"logps/chosen": -284.4292907714844, |
|
"logps/rejected": -259.6882019042969, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.017186133190989494, |
|
"rewards/margins": 0.011862866580486298, |
|
"rewards/rejected": 0.005323265679180622, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -2.361238956451416, |
|
"logits/rejected": -2.4430744647979736, |
|
"logps/chosen": -286.7644348144531, |
|
"logps/rejected": -221.6837158203125, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.01911218836903572, |
|
"rewards/margins": 0.014816234819591045, |
|
"rewards/rejected": 0.00429595448076725, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -2.32244610786438, |
|
"logits/rejected": -2.3339757919311523, |
|
"logps/chosen": -301.54693603515625, |
|
"logps/rejected": -239.26095581054688, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.023171866312623024, |
|
"rewards/margins": 0.014685508795082569, |
|
"rewards/rejected": 0.00848635844886303, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -2.347285032272339, |
|
"logits/rejected": -2.3244121074676514, |
|
"logps/chosen": -257.841552734375, |
|
"logps/rejected": -214.5565643310547, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.019994111731648445, |
|
"rewards/margins": 0.01520625315606594, |
|
"rewards/rejected": 0.004787858575582504, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -2.2657313346862793, |
|
"logits/rejected": -2.201254367828369, |
|
"logps/chosen": -253.98916625976562, |
|
"logps/rejected": -206.3340301513672, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0245305635035038, |
|
"rewards/margins": 0.017677443102002144, |
|
"rewards/rejected": 0.006853120867162943, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -2.284461498260498, |
|
"logits/rejected": -2.2873706817626953, |
|
"logps/chosen": -261.44427490234375, |
|
"logps/rejected": -195.59422302246094, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.03187788277864456, |
|
"rewards/margins": 0.024095263332128525, |
|
"rewards/rejected": 0.007782619446516037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -2.18426513671875, |
|
"logits/rejected": -2.1963071823120117, |
|
"logps/chosen": -302.31195068359375, |
|
"logps/rejected": -218.6005401611328, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.03718667849898338, |
|
"rewards/margins": 0.026892077177762985, |
|
"rewards/rejected": 0.010294605046510696, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -2.2150394916534424, |
|
"logits/rejected": -2.2160990238189697, |
|
"logps/chosen": -269.44769287109375, |
|
"logps/rejected": -235.6748504638672, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.038056183606386185, |
|
"rewards/margins": 0.023441683501005173, |
|
"rewards/rejected": 0.014614498242735863, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -2.2152469158172607, |
|
"logits/rejected": -2.1862380504608154, |
|
"logps/chosen": -271.4049377441406, |
|
"logps/rejected": -242.6397247314453, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.03268683701753616, |
|
"rewards/margins": 0.026912549510598183, |
|
"rewards/rejected": 0.0057742842473089695, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -2.3059380054473877, |
|
"logits/rejected": -2.2681984901428223, |
|
"logps/chosen": -309.55499267578125, |
|
"logps/rejected": -221.61703491210938, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.03509462997317314, |
|
"rewards/margins": 0.012767216190695763, |
|
"rewards/rejected": 0.02232741378247738, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -2.307035446166992, |
|
"logits/rejected": -2.2920923233032227, |
|
"logps/chosen": -272.9412841796875, |
|
"logps/rejected": -237.314208984375, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.03535359352827072, |
|
"rewards/margins": 0.012216273695230484, |
|
"rewards/rejected": 0.023137323558330536, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -2.3456673622131348, |
|
"logits/rejected": -2.3194832801818848, |
|
"logps/chosen": -270.475341796875, |
|
"logps/rejected": -221.84536743164062, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.04569912329316139, |
|
"rewards/margins": 0.029975151643157005, |
|
"rewards/rejected": 0.015723969787359238, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -2.385854721069336, |
|
"logits/rejected": -2.3556528091430664, |
|
"logps/chosen": -284.36029052734375, |
|
"logps/rejected": -232.5426788330078, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.04998317360877991, |
|
"rewards/margins": 0.032010577619075775, |
|
"rewards/rejected": 0.017972594127058983, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -2.308225154876709, |
|
"logits/rejected": -2.259629726409912, |
|
"logps/chosen": -293.1715087890625, |
|
"logps/rejected": -236.4293975830078, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.057786036282777786, |
|
"rewards/margins": 0.04149205610156059, |
|
"rewards/rejected": 0.016293983906507492, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -2.278501033782959, |
|
"logits/rejected": -2.369293689727783, |
|
"logps/chosen": -278.4786376953125, |
|
"logps/rejected": -227.40927124023438, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0485750176012516, |
|
"rewards/margins": 0.02242155373096466, |
|
"rewards/rejected": 0.02615346387028694, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -2.2661235332489014, |
|
"logits/rejected": -2.205644130706787, |
|
"logps/chosen": -254.183837890625, |
|
"logps/rejected": -221.9667510986328, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05874975398182869, |
|
"rewards/margins": 0.03965791314840317, |
|
"rewards/rejected": 0.019091838970780373, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -2.32353138923645, |
|
"logits/rejected": -2.3743112087249756, |
|
"logps/chosen": -306.22711181640625, |
|
"logps/rejected": -257.60980224609375, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04823786020278931, |
|
"rewards/margins": 0.017192820087075233, |
|
"rewards/rejected": 0.03104504384100437, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -2.234679698944092, |
|
"logits/rejected": -2.211430788040161, |
|
"logps/chosen": -251.83053588867188, |
|
"logps/rejected": -193.01544189453125, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06583289802074432, |
|
"rewards/margins": 0.047706056386232376, |
|
"rewards/rejected": 0.018126841634511948, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -2.259127140045166, |
|
"logits/rejected": -2.287956714630127, |
|
"logps/chosen": -312.1918029785156, |
|
"logps/rejected": -239.03530883789062, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.07068151980638504, |
|
"rewards/margins": 0.051512353122234344, |
|
"rewards/rejected": 0.0191691592335701, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -2.197277784347534, |
|
"logits/rejected": -2.13037109375, |
|
"logps/chosen": -244.2609100341797, |
|
"logps/rejected": -238.80953979492188, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.05732797831296921, |
|
"rewards/margins": 0.030042264610528946, |
|
"rewards/rejected": 0.027285713702440262, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -2.365830421447754, |
|
"logits/rejected": -2.3728528022766113, |
|
"logps/chosen": -313.7022705078125, |
|
"logps/rejected": -248.090087890625, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.08016298711299896, |
|
"rewards/margins": 0.05509548634290695, |
|
"rewards/rejected": 0.025067497044801712, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -2.22756290435791, |
|
"logits/rejected": -2.259359121322632, |
|
"logps/chosen": -303.25250244140625, |
|
"logps/rejected": -249.8985595703125, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.06414168328046799, |
|
"rewards/margins": 0.04363773763179779, |
|
"rewards/rejected": 0.020503941923379898, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -2.3700273036956787, |
|
"logits/rejected": -2.3231639862060547, |
|
"logps/chosen": -314.5257263183594, |
|
"logps/rejected": -270.7105712890625, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.07061124593019485, |
|
"rewards/margins": 0.03391130641102791, |
|
"rewards/rejected": 0.03669993579387665, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -2.3212878704071045, |
|
"logits/rejected": -2.249602794647217, |
|
"logps/chosen": -291.92474365234375, |
|
"logps/rejected": -239.6724395751953, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.07373902946710587, |
|
"rewards/margins": 0.03367278352379799, |
|
"rewards/rejected": 0.04006624594330788, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -2.297023057937622, |
|
"logits/rejected": -2.264172077178955, |
|
"logps/chosen": -278.0927734375, |
|
"logps/rejected": -237.13436889648438, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.0686158686876297, |
|
"rewards/margins": 0.051144860684871674, |
|
"rewards/rejected": 0.01747100241482258, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -2.237035036087036, |
|
"logits/rejected": -2.2392399311065674, |
|
"logps/chosen": -263.4399108886719, |
|
"logps/rejected": -213.87451171875, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.06768475472927094, |
|
"rewards/margins": 0.048441771417856216, |
|
"rewards/rejected": 0.019242987036705017, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -2.2776081562042236, |
|
"logits/rejected": -2.2924447059631348, |
|
"logps/chosen": -268.8953857421875, |
|
"logps/rejected": -252.852294921875, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.08111406862735748, |
|
"rewards/margins": 0.05318716913461685, |
|
"rewards/rejected": 0.027926897630095482, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -2.3054046630859375, |
|
"logits/rejected": -2.2502362728118896, |
|
"logps/chosen": -252.5205841064453, |
|
"logps/rejected": -204.43344116210938, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.07272285223007202, |
|
"rewards/margins": 0.04809904843568802, |
|
"rewards/rejected": 0.024623800069093704, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -2.3482632637023926, |
|
"logits/rejected": -2.3258707523345947, |
|
"logps/chosen": -263.67095947265625, |
|
"logps/rejected": -241.14047241210938, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07022975385189056, |
|
"rewards/margins": 0.04051927849650383, |
|
"rewards/rejected": 0.029710477218031883, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -2.286533832550049, |
|
"logits/rejected": -2.320568084716797, |
|
"logps/chosen": -286.72894287109375, |
|
"logps/rejected": -247.65542602539062, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.07666246592998505, |
|
"rewards/margins": 0.05972421169281006, |
|
"rewards/rejected": 0.01693824864923954, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -2.206477642059326, |
|
"logits/rejected": -2.315464496612549, |
|
"logps/chosen": -276.1682434082031, |
|
"logps/rejected": -230.3959197998047, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0718303695321083, |
|
"rewards/margins": 0.04074189439415932, |
|
"rewards/rejected": 0.03108847141265869, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -2.277815103530884, |
|
"logits/rejected": -2.342268705368042, |
|
"logps/chosen": -273.23773193359375, |
|
"logps/rejected": -222.5966796875, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0802597850561142, |
|
"rewards/margins": 0.050464123487472534, |
|
"rewards/rejected": 0.029795657843351364, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -2.2656216621398926, |
|
"logits/rejected": -2.2778594493865967, |
|
"logps/chosen": -248.9929656982422, |
|
"logps/rejected": -215.5894012451172, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.08564073592424393, |
|
"rewards/margins": 0.06490761041641235, |
|
"rewards/rejected": 0.020733121782541275, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -2.2962255477905273, |
|
"logits/rejected": -2.27239727973938, |
|
"logps/chosen": -289.5277404785156, |
|
"logps/rejected": -231.601318359375, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.08144901692867279, |
|
"rewards/margins": 0.05658548325300217, |
|
"rewards/rejected": 0.024863524362444878, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -2.445746660232544, |
|
"logits/rejected": -2.267007827758789, |
|
"logps/chosen": -293.1885986328125, |
|
"logps/rejected": -243.8875274658203, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.10828351974487305, |
|
"rewards/margins": 0.08175922185182571, |
|
"rewards/rejected": 0.02652430161833763, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -2.278276205062866, |
|
"logits/rejected": -2.295633316040039, |
|
"logps/chosen": -254.94760131835938, |
|
"logps/rejected": -221.79452514648438, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.08227074891328812, |
|
"rewards/margins": 0.055896710604429245, |
|
"rewards/rejected": 0.026374032720923424, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -2.202611207962036, |
|
"logits/rejected": -2.2495861053466797, |
|
"logps/chosen": -310.4443664550781, |
|
"logps/rejected": -256.72406005859375, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.07021793723106384, |
|
"rewards/margins": 0.040728576481342316, |
|
"rewards/rejected": 0.02948935702443123, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -2.3376307487487793, |
|
"logits/rejected": -2.352074146270752, |
|
"logps/chosen": -278.10504150390625, |
|
"logps/rejected": -244.0722198486328, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0925985723733902, |
|
"rewards/margins": 0.0637633204460144, |
|
"rewards/rejected": 0.028835251927375793, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -2.243332624435425, |
|
"logits/rejected": -2.2513413429260254, |
|
"logps/chosen": -242.59439086914062, |
|
"logps/rejected": -224.13259887695312, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.07866770029067993, |
|
"rewards/margins": 0.057711243629455566, |
|
"rewards/rejected": 0.020956454798579216, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -2.300567150115967, |
|
"logits/rejected": -2.271827220916748, |
|
"logps/chosen": -288.2174377441406, |
|
"logps/rejected": -240.34439086914062, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.10411250591278076, |
|
"rewards/margins": 0.05851038545370102, |
|
"rewards/rejected": 0.04560210928320885, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -2.3359756469726562, |
|
"logits/rejected": -2.194058895111084, |
|
"logps/chosen": -265.052001953125, |
|
"logps/rejected": -230.23605346679688, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0775262787938118, |
|
"rewards/margins": 0.05575944110751152, |
|
"rewards/rejected": 0.021766824647784233, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -2.3195242881774902, |
|
"logits/rejected": -2.283975124359131, |
|
"logps/chosen": -302.0104064941406, |
|
"logps/rejected": -252.0124053955078, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.10010389983654022, |
|
"rewards/margins": 0.053703296929597855, |
|
"rewards/rejected": 0.04640059918165207, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -2.2481091022491455, |
|
"logits/rejected": -2.400871515274048, |
|
"logps/chosen": -268.6519775390625, |
|
"logps/rejected": -223.69882202148438, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0826568529009819, |
|
"rewards/margins": 0.05431235954165459, |
|
"rewards/rejected": 0.028344491496682167, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -2.299408197402954, |
|
"logits/rejected": -2.22338604927063, |
|
"logps/chosen": -299.3912353515625, |
|
"logps/rejected": -236.9815216064453, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.10458721220493317, |
|
"rewards/margins": 0.08465038239955902, |
|
"rewards/rejected": 0.019936833530664444, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -2.2584633827209473, |
|
"logits/rejected": -2.2311649322509766, |
|
"logps/chosen": -253.76913452148438, |
|
"logps/rejected": -218.6166534423828, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.07234074175357819, |
|
"rewards/margins": 0.04758009687066078, |
|
"rewards/rejected": 0.024760644882917404, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -2.318943738937378, |
|
"logits/rejected": -2.2511682510375977, |
|
"logps/chosen": -256.5652770996094, |
|
"logps/rejected": -206.35586547851562, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.07542125880718231, |
|
"rewards/margins": 0.0553053617477417, |
|
"rewards/rejected": 0.020115893334150314, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -2.3058714866638184, |
|
"logits/rejected": -2.304198741912842, |
|
"logps/chosen": -266.4674987792969, |
|
"logps/rejected": -223.82711791992188, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.09824246913194656, |
|
"rewards/margins": 0.06738617271184921, |
|
"rewards/rejected": 0.03085630014538765, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -2.337787389755249, |
|
"logits/rejected": -2.2819180488586426, |
|
"logps/chosen": -313.7826232910156, |
|
"logps/rejected": -249.5704803466797, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.10966908931732178, |
|
"rewards/margins": 0.08016980439424515, |
|
"rewards/rejected": 0.029499292373657227, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -2.2067112922668457, |
|
"logits/rejected": -2.246953010559082, |
|
"logps/chosen": -259.2144775390625, |
|
"logps/rejected": -240.3810272216797, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09941162168979645, |
|
"rewards/margins": 0.06417630612850189, |
|
"rewards/rejected": 0.035235337913036346, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -2.2894420623779297, |
|
"logits/rejected": -2.2385382652282715, |
|
"logps/chosen": -266.48992919921875, |
|
"logps/rejected": -217.8952178955078, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.095299132168293, |
|
"rewards/margins": 0.07987986505031586, |
|
"rewards/rejected": 0.01541926246136427, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -2.33485746383667, |
|
"logits/rejected": -2.3108019828796387, |
|
"logps/chosen": -284.7020568847656, |
|
"logps/rejected": -232.82080078125, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.10341651737689972, |
|
"rewards/margins": 0.07464977353811264, |
|
"rewards/rejected": 0.028766745701432228, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -2.3347816467285156, |
|
"logits/rejected": -2.2758853435516357, |
|
"logps/chosen": -279.80059814453125, |
|
"logps/rejected": -233.2425994873047, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.11068934202194214, |
|
"rewards/margins": 0.07695071399211884, |
|
"rewards/rejected": 0.0337386280298233, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -2.2854952812194824, |
|
"logits/rejected": -2.273536205291748, |
|
"logps/chosen": -295.6964416503906, |
|
"logps/rejected": -240.4071502685547, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.1013779416680336, |
|
"rewards/margins": 0.060683172196149826, |
|
"rewards/rejected": 0.04069476202130318, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -2.34243106842041, |
|
"logits/rejected": -2.2720611095428467, |
|
"logps/chosen": -289.71722412109375, |
|
"logps/rejected": -230.321533203125, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.09767869859933853, |
|
"rewards/margins": 0.039280109107494354, |
|
"rewards/rejected": 0.05839858204126358, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -2.371598482131958, |
|
"logits/rejected": -2.362656354904175, |
|
"logps/chosen": -268.17828369140625, |
|
"logps/rejected": -229.41232299804688, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0969640463590622, |
|
"rewards/margins": 0.06369610875844955, |
|
"rewards/rejected": 0.033267926424741745, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -2.2588796615600586, |
|
"logits/rejected": -2.2576823234558105, |
|
"logps/chosen": -282.4342041015625, |
|
"logps/rejected": -222.56381225585938, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.10399500280618668, |
|
"rewards/margins": 0.08138440549373627, |
|
"rewards/rejected": 0.0226106159389019, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -2.3341283798217773, |
|
"logits/rejected": -2.2046780586242676, |
|
"logps/chosen": -272.2647399902344, |
|
"logps/rejected": -208.01364135742188, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.10669133812189102, |
|
"rewards/margins": 0.08235933631658554, |
|
"rewards/rejected": 0.02433200553059578, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -2.323979139328003, |
|
"logits/rejected": -2.340238094329834, |
|
"logps/chosen": -303.2074279785156, |
|
"logps/rejected": -259.44268798828125, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11533965170383453, |
|
"rewards/margins": 0.047552816569805145, |
|
"rewards/rejected": 0.06778682768344879, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -2.3031513690948486, |
|
"logits/rejected": -2.28584623336792, |
|
"logps/chosen": -270.1670837402344, |
|
"logps/rejected": -252.5519256591797, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.10461707413196564, |
|
"rewards/margins": 0.058367032557725906, |
|
"rewards/rejected": 0.04625004902482033, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -2.2157022953033447, |
|
"logits/rejected": -2.2670745849609375, |
|
"logps/chosen": -276.71240234375, |
|
"logps/rejected": -199.2496795654297, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.11176248639822006, |
|
"rewards/margins": 0.08353973925113678, |
|
"rewards/rejected": 0.02822275087237358, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -2.2043914794921875, |
|
"logits/rejected": -2.221619129180908, |
|
"logps/chosen": -269.0702819824219, |
|
"logps/rejected": -220.8921356201172, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.09922349452972412, |
|
"rewards/margins": 0.04318443313241005, |
|
"rewards/rejected": 0.05603905767202377, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -2.232959270477295, |
|
"logits/rejected": -2.2529525756835938, |
|
"logps/chosen": -267.9338684082031, |
|
"logps/rejected": -249.4876251220703, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.08004304021596909, |
|
"rewards/margins": 0.04949140548706055, |
|
"rewards/rejected": 0.030551627278327942, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -2.293257236480713, |
|
"logits/rejected": -2.2078585624694824, |
|
"logps/chosen": -273.19671630859375, |
|
"logps/rejected": -238.57858276367188, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11353409290313721, |
|
"rewards/margins": 0.06645722687244415, |
|
"rewards/rejected": 0.04707685858011246, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -2.3507869243621826, |
|
"logits/rejected": -2.325718879699707, |
|
"logps/chosen": -290.9693298339844, |
|
"logps/rejected": -236.1486358642578, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0980958342552185, |
|
"rewards/margins": 0.07181811332702637, |
|
"rewards/rejected": 0.026277724653482437, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -2.268038272857666, |
|
"logits/rejected": -2.286581516265869, |
|
"logps/chosen": -270.3387451171875, |
|
"logps/rejected": -221.06356811523438, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.12088136374950409, |
|
"rewards/margins": 0.080001600086689, |
|
"rewards/rejected": 0.040879763662815094, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -2.272735118865967, |
|
"logits/rejected": -2.2941083908081055, |
|
"logps/chosen": -284.6488952636719, |
|
"logps/rejected": -243.56796264648438, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.1113913282752037, |
|
"rewards/margins": 0.05327050760388374, |
|
"rewards/rejected": 0.05812082439661026, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -2.2838375568389893, |
|
"logits/rejected": -2.289247751235962, |
|
"logps/chosen": -269.5845642089844, |
|
"logps/rejected": -230.6207275390625, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.09149408340454102, |
|
"rewards/margins": 0.06341233849525452, |
|
"rewards/rejected": 0.02808173932135105, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -2.365980863571167, |
|
"logits/rejected": -2.3436598777770996, |
|
"logps/chosen": -302.0718688964844, |
|
"logps/rejected": -228.1407470703125, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.13062262535095215, |
|
"rewards/margins": 0.08858474344015121, |
|
"rewards/rejected": 0.04203786700963974, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -2.342413902282715, |
|
"logits/rejected": -2.2254080772399902, |
|
"logps/chosen": -287.4922180175781, |
|
"logps/rejected": -222.5606231689453, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.12904855608940125, |
|
"rewards/margins": 0.08615640550851822, |
|
"rewards/rejected": 0.04289213940501213, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -2.259397029876709, |
|
"logits/rejected": -2.227036476135254, |
|
"logps/chosen": -258.3423767089844, |
|
"logps/rejected": -216.99606323242188, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.10358164459466934, |
|
"rewards/margins": 0.06773830950260162, |
|
"rewards/rejected": 0.03584333881735802, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -2.2834537029266357, |
|
"logits/rejected": -2.3872971534729004, |
|
"logps/chosen": -262.05084228515625, |
|
"logps/rejected": -231.11306762695312, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09495140612125397, |
|
"rewards/margins": 0.055265575647354126, |
|
"rewards/rejected": 0.03968583419919014, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -2.4065003395080566, |
|
"logits/rejected": -2.3337345123291016, |
|
"logps/chosen": -295.71478271484375, |
|
"logps/rejected": -270.1822814941406, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.11348612606525421, |
|
"rewards/margins": 0.07466179132461548, |
|
"rewards/rejected": 0.03882431983947754, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -2.2463555335998535, |
|
"logits/rejected": -2.2443947792053223, |
|
"logps/chosen": -312.9588317871094, |
|
"logps/rejected": -237.4109344482422, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.10128283500671387, |
|
"rewards/margins": 0.053178369998931885, |
|
"rewards/rejected": 0.04810447618365288, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -2.358501434326172, |
|
"logits/rejected": -2.313483715057373, |
|
"logps/chosen": -291.43377685546875, |
|
"logps/rejected": -240.09054565429688, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.10742716491222382, |
|
"rewards/margins": 0.07204015552997589, |
|
"rewards/rejected": 0.03538701683282852, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -2.313149929046631, |
|
"logits/rejected": -2.3558261394500732, |
|
"logps/chosen": -285.90643310546875, |
|
"logps/rejected": -235.43051147460938, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.12259715795516968, |
|
"rewards/margins": 0.09698096662759781, |
|
"rewards/rejected": 0.02561618760228157, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -2.3278651237487793, |
|
"logits/rejected": -2.195068836212158, |
|
"logps/chosen": -272.7381896972656, |
|
"logps/rejected": -211.40640258789062, |
|
"loss": 0.658, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.1207551583647728, |
|
"rewards/margins": 0.09316142648458481, |
|
"rewards/rejected": 0.027593741193413734, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -2.290696859359741, |
|
"logits/rejected": -2.3440823554992676, |
|
"logps/chosen": -238.2651824951172, |
|
"logps/rejected": -206.77969360351562, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.09928463399410248, |
|
"rewards/margins": 0.07226204872131348, |
|
"rewards/rejected": 0.027022594586014748, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -2.375807762145996, |
|
"logits/rejected": -2.367743730545044, |
|
"logps/chosen": -281.56195068359375, |
|
"logps/rejected": -225.125244140625, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.1072310209274292, |
|
"rewards/margins": 0.056608647108078, |
|
"rewards/rejected": 0.050622373819351196, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -2.281919002532959, |
|
"logits/rejected": -2.254122734069824, |
|
"logps/chosen": -256.39105224609375, |
|
"logps/rejected": -203.3081817626953, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.11211923509836197, |
|
"rewards/margins": 0.07925260812044144, |
|
"rewards/rejected": 0.03286661207675934, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -2.316282272338867, |
|
"logits/rejected": -2.3123340606689453, |
|
"logps/chosen": -271.6207580566406, |
|
"logps/rejected": -231.7317352294922, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.10637687146663666, |
|
"rewards/margins": 0.06768520176410675, |
|
"rewards/rejected": 0.0386916846036911, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -2.3146958351135254, |
|
"logits/rejected": -2.2793381214141846, |
|
"logps/chosen": -282.83270263671875, |
|
"logps/rejected": -233.0804443359375, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.11455857753753662, |
|
"rewards/margins": 0.0838586837053299, |
|
"rewards/rejected": 0.030699897557497025, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -2.251638889312744, |
|
"logits/rejected": -2.234907627105713, |
|
"logps/chosen": -281.0075378417969, |
|
"logps/rejected": -239.98049926757812, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.1062885969877243, |
|
"rewards/margins": 0.06708581745624542, |
|
"rewards/rejected": 0.03920278698205948, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.4597132205963135, |
|
"eval_logits/rejected": -2.398695468902588, |
|
"eval_logps/chosen": -278.69171142578125, |
|
"eval_logps/rejected": -230.4560089111328, |
|
"eval_loss": 0.6642152070999146, |
|
"eval_rewards/accuracies": 0.6480000019073486, |
|
"eval_rewards/chosen": 0.10415761172771454, |
|
"eval_rewards/margins": 0.06405296921730042, |
|
"eval_rewards/rejected": 0.04010463133454323, |
|
"eval_runtime": 443.9432, |
|
"eval_samples_per_second": 4.505, |
|
"eval_steps_per_second": 0.282, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 968, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6728762634529555, |
|
"train_runtime": 27528.1814, |
|
"train_samples_per_second": 2.251, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 968, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|