|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996020692399522, |
|
"eval_steps": 100, |
|
"global_step": 1256, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/losses": 0.23031963407993317, |
|
"debug/policy_weights": 0.3322809934616089, |
|
"debug/raw_losses": 0.6931471824645996, |
|
"epoch": 0.0007958615200955034, |
|
"grad_norm": 1.6286401468051575, |
|
"learning_rate": 3.968253968253968e-09, |
|
"logits/chosen": -2.735659122467041, |
|
"logits/rejected": -2.7581238746643066, |
|
"logps/chosen": -124.62968444824219, |
|
"logps/rejected": -168.09475708007812, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/losses": 0.23611842095851898, |
|
"debug/policy_weights": 0.34072285890579224, |
|
"debug/raw_losses": 0.6929619312286377, |
|
"epoch": 0.007958615200955034, |
|
"grad_norm": 1.6881469544577703, |
|
"learning_rate": 3.968253968253968e-08, |
|
"logits/chosen": -2.738783836364746, |
|
"logits/rejected": -2.7277822494506836, |
|
"logps/chosen": -146.68910217285156, |
|
"logps/rejected": -131.2349395751953, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.00013088027480989695, |
|
"rewards/margins": 0.00037292600609362125, |
|
"rewards/rejected": -0.00024204571673180908, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/losses": 0.2265315055847168, |
|
"debug/policy_weights": 0.3267918825149536, |
|
"debug/raw_losses": 0.693217396736145, |
|
"epoch": 0.01591723040191007, |
|
"grad_norm": 1.5615628005517825, |
|
"learning_rate": 7.936507936507936e-08, |
|
"logits/chosen": -2.706993341445923, |
|
"logits/rejected": -2.703998327255249, |
|
"logps/chosen": -129.48587036132812, |
|
"logps/rejected": -130.25735473632812, |
|
"loss": 0.2238, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00017557166574988514, |
|
"rewards/margins": -0.000137387789436616, |
|
"rewards/rejected": -3.818388358922675e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/losses": 0.2132270336151123, |
|
"debug/policy_weights": 0.30768805742263794, |
|
"debug/raw_losses": 0.6929622888565063, |
|
"epoch": 0.0238758456028651, |
|
"grad_norm": 1.5392325714344397, |
|
"learning_rate": 1.1904761904761903e-07, |
|
"logits/chosen": -2.6836955547332764, |
|
"logits/rejected": -2.680663824081421, |
|
"logps/chosen": -141.81492614746094, |
|
"logps/rejected": -155.6810760498047, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.00016820887685753405, |
|
"rewards/margins": 0.0003723005356732756, |
|
"rewards/rejected": -0.00020409165881574154, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/losses": 0.2176501303911209, |
|
"debug/policy_weights": 0.3141174018383026, |
|
"debug/raw_losses": 0.6928995847702026, |
|
"epoch": 0.03183446080382014, |
|
"grad_norm": 1.638997322742743, |
|
"learning_rate": 1.5873015873015872e-07, |
|
"logits/chosen": -2.6918673515319824, |
|
"logits/rejected": -2.6841723918914795, |
|
"logps/chosen": -154.97286987304688, |
|
"logps/rejected": -164.1558837890625, |
|
"loss": 0.221, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.001508228713646531, |
|
"rewards/margins": 0.0005008662701584399, |
|
"rewards/rejected": -0.0020090951584279537, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/losses": 0.2295212298631668, |
|
"debug/policy_weights": 0.33170244097709656, |
|
"debug/raw_losses": 0.6918987035751343, |
|
"epoch": 0.03979307600477517, |
|
"grad_norm": 1.4818468652902261, |
|
"learning_rate": 1.984126984126984e-07, |
|
"logits/chosen": -2.7066802978515625, |
|
"logits/rejected": -2.6879210472106934, |
|
"logps/chosen": -144.00912475585938, |
|
"logps/rejected": -137.74859619140625, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.002855247352272272, |
|
"rewards/margins": 0.0025138729251921177, |
|
"rewards/rejected": -0.005369120743125677, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/losses": 0.22746217250823975, |
|
"debug/policy_weights": 0.3287571668624878, |
|
"debug/raw_losses": 0.6916018724441528, |
|
"epoch": 0.0477516912057302, |
|
"grad_norm": 1.4868186691240965, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -2.715649127960205, |
|
"logits/rejected": -2.716421604156494, |
|
"logps/chosen": -145.9324951171875, |
|
"logps/rejected": -159.49798583984375, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.003058128524571657, |
|
"rewards/margins": 0.0031352252699434757, |
|
"rewards/rejected": -0.006193353794515133, |
|
"step": 60 |
|
}, |
|
{ |
|
"debug/losses": 0.2176016867160797, |
|
"debug/policy_weights": 0.3151531517505646, |
|
"debug/raw_losses": 0.6905783414840698, |
|
"epoch": 0.055710306406685235, |
|
"grad_norm": 1.5355880647836464, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -2.736604690551758, |
|
"logits/rejected": -2.727555990219116, |
|
"logps/chosen": -149.36459350585938, |
|
"logps/rejected": -143.43045043945312, |
|
"loss": 0.2113, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.017263194546103477, |
|
"rewards/margins": 0.005380765534937382, |
|
"rewards/rejected": -0.022643957287073135, |
|
"step": 70 |
|
}, |
|
{ |
|
"debug/losses": 0.1883382946252823, |
|
"debug/policy_weights": 0.2724798023700714, |
|
"debug/raw_losses": 0.6915446519851685, |
|
"epoch": 0.06366892160764027, |
|
"grad_norm": 1.5327858491562372, |
|
"learning_rate": 3.1746031746031743e-07, |
|
"logits/chosen": -2.7107303142547607, |
|
"logits/rejected": -2.6923205852508545, |
|
"logps/chosen": -157.98587036132812, |
|
"logps/rejected": -149.39295959472656, |
|
"loss": 0.2011, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04851604253053665, |
|
"rewards/margins": 0.003855167655274272, |
|
"rewards/rejected": -0.05237121507525444, |
|
"step": 80 |
|
}, |
|
{ |
|
"debug/losses": 0.18660078942775726, |
|
"debug/policy_weights": 0.2731621563434601, |
|
"debug/raw_losses": 0.6829166412353516, |
|
"epoch": 0.07162753680859531, |
|
"grad_norm": 1.6166482985332293, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"logits/chosen": -2.7208943367004395, |
|
"logits/rejected": -2.727738618850708, |
|
"logps/chosen": -152.66610717773438, |
|
"logps/rejected": -173.5172882080078, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.06819866597652435, |
|
"rewards/margins": 0.022195402532815933, |
|
"rewards/rejected": -0.09039406478404999, |
|
"step": 90 |
|
}, |
|
{ |
|
"debug/losses": 0.16780678927898407, |
|
"debug/policy_weights": 0.24758808314800262, |
|
"debug/raw_losses": 0.6784400939941406, |
|
"epoch": 0.07958615200955034, |
|
"grad_norm": 1.5200040949416276, |
|
"learning_rate": 3.968253968253968e-07, |
|
"logits/chosen": -2.6890692710876465, |
|
"logits/rejected": -2.6723039150238037, |
|
"logps/chosen": -149.3301239013672, |
|
"logps/rejected": -143.3484344482422, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11518146842718124, |
|
"rewards/margins": 0.03295627981424332, |
|
"rewards/rejected": -0.14813776314258575, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07958615200955034, |
|
"eval_debug/losses": 0.1618107706308365, |
|
"eval_debug/policy_weights": 0.23841436207294464, |
|
"eval_debug/raw_losses": 0.6786514520645142, |
|
"eval_logits/chosen": -2.711678981781006, |
|
"eval_logits/rejected": -2.703619956970215, |
|
"eval_logps/chosen": -158.4381103515625, |
|
"eval_logps/rejected": -168.3044891357422, |
|
"eval_loss": 0.1633094847202301, |
|
"eval_rewards/accuracies": 0.5979477763175964, |
|
"eval_rewards/chosen": -0.1419461965560913, |
|
"eval_rewards/margins": 0.03410893306136131, |
|
"eval_rewards/rejected": -0.17605511844158173, |
|
"eval_runtime": 152.723, |
|
"eval_samples_per_second": 55.997, |
|
"eval_steps_per_second": 0.877, |
|
"step": 100 |
|
}, |
|
{ |
|
"debug/losses": 0.15096870064735413, |
|
"debug/policy_weights": 0.2191181182861328, |
|
"debug/raw_losses": 0.6909996867179871, |
|
"epoch": 0.08754476721050537, |
|
"grad_norm": 1.5046677998036695, |
|
"learning_rate": 4.365079365079365e-07, |
|
"logits/chosen": -2.6899752616882324, |
|
"logits/rejected": -2.671255350112915, |
|
"logps/chosen": -179.54322814941406, |
|
"logps/rejected": -165.19168090820312, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.21252474188804626, |
|
"rewards/margins": 0.011284739710390568, |
|
"rewards/rejected": -0.22380945086479187, |
|
"step": 110 |
|
}, |
|
{ |
|
"debug/losses": 0.1395491510629654, |
|
"debug/policy_weights": 0.20841534435749054, |
|
"debug/raw_losses": 0.6783668398857117, |
|
"epoch": 0.0955033824114604, |
|
"grad_norm": 1.388858214651452, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -2.663153886795044, |
|
"logits/rejected": -2.6503779888153076, |
|
"logps/chosen": -167.6672821044922, |
|
"logps/rejected": -172.83718872070312, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2853550910949707, |
|
"rewards/margins": 0.04338715597987175, |
|
"rewards/rejected": -0.32874220609664917, |
|
"step": 120 |
|
}, |
|
{ |
|
"debug/losses": 0.11419715732336044, |
|
"debug/policy_weights": 0.17497313022613525, |
|
"debug/raw_losses": 0.6557947397232056, |
|
"epoch": 0.10346199761241544, |
|
"grad_norm": 2.1752818607208244, |
|
"learning_rate": 4.999845414634076e-07, |
|
"logits/chosen": -2.6734774112701416, |
|
"logits/rejected": -2.645984172821045, |
|
"logps/chosen": -189.3183135986328, |
|
"logps/rejected": -179.66036987304688, |
|
"loss": 0.117, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.34408754110336304, |
|
"rewards/margins": 0.09557478129863739, |
|
"rewards/rejected": -0.439662367105484, |
|
"step": 130 |
|
}, |
|
{ |
|
"debug/losses": 0.0952613353729248, |
|
"debug/policy_weights": 0.14768476784229279, |
|
"debug/raw_losses": 0.6522240042686462, |
|
"epoch": 0.11142061281337047, |
|
"grad_norm": 2.442265971491927, |
|
"learning_rate": 4.998106548810311e-07, |
|
"logits/chosen": -2.6368653774261475, |
|
"logits/rejected": -2.6138510704040527, |
|
"logps/chosen": -192.3903045654297, |
|
"logps/rejected": -183.68685913085938, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.46787652373313904, |
|
"rewards/margins": 0.11621709167957306, |
|
"rewards/rejected": -0.5840936303138733, |
|
"step": 140 |
|
}, |
|
{ |
|
"debug/losses": 0.07598518580198288, |
|
"debug/policy_weights": 0.12855970859527588, |
|
"debug/raw_losses": 0.6079715490341187, |
|
"epoch": 0.1193792280143255, |
|
"grad_norm": 5.777724458868344, |
|
"learning_rate": 4.994436933879359e-07, |
|
"logits/chosen": -2.6171412467956543, |
|
"logits/rejected": -2.610034942626953, |
|
"logps/chosen": -196.5837860107422, |
|
"logps/rejected": -225.31173706054688, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5704454183578491, |
|
"rewards/margins": 0.22279813885688782, |
|
"rewards/rejected": -0.7932435870170593, |
|
"step": 150 |
|
}, |
|
{ |
|
"debug/losses": 0.10459884256124496, |
|
"debug/policy_weights": 0.1645442694425583, |
|
"debug/raw_losses": 0.6207043528556824, |
|
"epoch": 0.12733784321528055, |
|
"grad_norm": 1.6699884563484173, |
|
"learning_rate": 4.988839406031596e-07, |
|
"logits/chosen": -2.6161184310913086, |
|
"logits/rejected": -2.6225523948669434, |
|
"logps/chosen": -160.10772705078125, |
|
"logps/rejected": -207.2742156982422, |
|
"loss": 0.1069, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3946920931339264, |
|
"rewards/margins": 0.20463863015174866, |
|
"rewards/rejected": -0.599330723285675, |
|
"step": 160 |
|
}, |
|
{ |
|
"debug/losses": 0.099858358502388, |
|
"debug/policy_weights": 0.1572486311197281, |
|
"debug/raw_losses": 0.6404817700386047, |
|
"epoch": 0.13529645841623558, |
|
"grad_norm": 1.696036642690539, |
|
"learning_rate": 4.981318291512395e-07, |
|
"logits/chosen": -2.5810608863830566, |
|
"logits/rejected": -2.578481912612915, |
|
"logps/chosen": -174.32598876953125, |
|
"logps/rejected": -198.0808868408203, |
|
"loss": 0.1132, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.379153311252594, |
|
"rewards/margins": 0.15221598744392395, |
|
"rewards/rejected": -0.5313693284988403, |
|
"step": 170 |
|
}, |
|
{ |
|
"debug/losses": 0.09473618865013123, |
|
"debug/policy_weights": 0.13851876556873322, |
|
"debug/raw_losses": 0.672447681427002, |
|
"epoch": 0.14325507361719061, |
|
"grad_norm": 1.9630112175338894, |
|
"learning_rate": 4.971879403278432e-07, |
|
"logits/chosen": -2.5959112644195557, |
|
"logits/rejected": -2.5829200744628906, |
|
"logps/chosen": -202.05296325683594, |
|
"logps/rejected": -207.42648315429688, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5352639555931091, |
|
"rewards/margins": 0.0995597094297409, |
|
"rewards/rejected": -0.6348236203193665, |
|
"step": 180 |
|
}, |
|
{ |
|
"debug/losses": 0.08237513154745102, |
|
"debug/policy_weights": 0.12221841514110565, |
|
"debug/raw_losses": 0.6608506441116333, |
|
"epoch": 0.15121368881814565, |
|
"grad_norm": 1.9225034938066397, |
|
"learning_rate": 4.960530036504941e-07, |
|
"logits/chosen": -2.579829692840576, |
|
"logits/rejected": -2.5656394958496094, |
|
"logps/chosen": -222.9535369873047, |
|
"logps/rejected": -232.0922393798828, |
|
"loss": 0.0793, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7860406637191772, |
|
"rewards/margins": 0.1566535085439682, |
|
"rewards/rejected": -0.9426941871643066, |
|
"step": 190 |
|
}, |
|
{ |
|
"debug/losses": 0.08729609102010727, |
|
"debug/policy_weights": 0.13735465705394745, |
|
"debug/raw_losses": 0.6344841718673706, |
|
"epoch": 0.15917230401910068, |
|
"grad_norm": 1.4158943423202177, |
|
"learning_rate": 4.947278962947386e-07, |
|
"logits/chosen": -2.5332934856414795, |
|
"logits/rejected": -2.5345585346221924, |
|
"logps/chosen": -217.6802215576172, |
|
"logps/rejected": -252.038330078125, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6853083372116089, |
|
"rewards/margins": 0.20577910542488098, |
|
"rewards/rejected": -0.8910874128341675, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15917230401910068, |
|
"eval_debug/losses": 0.07361924648284912, |
|
"eval_debug/policy_weights": 0.11718404293060303, |
|
"eval_debug/raw_losses": 0.6317233443260193, |
|
"eval_logits/chosen": -2.550771474838257, |
|
"eval_logits/rejected": -2.540393590927124, |
|
"eval_logps/chosen": -223.08139038085938, |
|
"eval_logps/rejected": -250.74151611328125, |
|
"eval_loss": 0.07500571012496948, |
|
"eval_rewards/accuracies": 0.6361940503120422, |
|
"eval_rewards/chosen": -0.7883791327476501, |
|
"eval_rewards/margins": 0.21204641461372375, |
|
"eval_rewards/rejected": -1.0004255771636963, |
|
"eval_runtime": 153.1417, |
|
"eval_samples_per_second": 55.844, |
|
"eval_steps_per_second": 0.875, |
|
"step": 200 |
|
}, |
|
{ |
|
"debug/losses": 0.06145762279629707, |
|
"debug/policy_weights": 0.10109380632638931, |
|
"debug/raw_losses": 0.6302995681762695, |
|
"epoch": 0.1671309192200557, |
|
"grad_norm": 1.435571888650618, |
|
"learning_rate": 4.932136424161899e-07, |
|
"logits/chosen": -2.521623134613037, |
|
"logits/rejected": -2.50742769241333, |
|
"logps/chosen": -217.82498168945312, |
|
"logps/rejected": -248.6222381591797, |
|
"loss": 0.0666, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.922324538230896, |
|
"rewards/margins": 0.24683670699596405, |
|
"rewards/rejected": -1.169161319732666, |
|
"step": 210 |
|
}, |
|
{ |
|
"debug/losses": 0.04205631464719772, |
|
"debug/policy_weights": 0.07819141447544098, |
|
"debug/raw_losses": 0.5699876546859741, |
|
"epoch": 0.17508953442101075, |
|
"grad_norm": 1.336610780534668, |
|
"learning_rate": 4.915114123589732e-07, |
|
"logits/chosen": -2.550821304321289, |
|
"logits/rejected": -2.526845932006836, |
|
"logps/chosen": -245.73306274414062, |
|
"logps/rejected": -283.7450256347656, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0883342027664185, |
|
"rewards/margins": 0.3927660584449768, |
|
"rewards/rejected": -1.48110032081604, |
|
"step": 220 |
|
}, |
|
{ |
|
"debug/losses": 0.04529764503240585, |
|
"debug/policy_weights": 0.07175824046134949, |
|
"debug/raw_losses": 0.6204754114151001, |
|
"epoch": 0.18304814962196578, |
|
"grad_norm": 1.4784857016304622, |
|
"learning_rate": 4.896225217511849e-07, |
|
"logits/chosen": -2.553422212600708, |
|
"logits/rejected": -2.5453922748565674, |
|
"logps/chosen": -263.7728576660156, |
|
"logps/rejected": -298.7444152832031, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2447619438171387, |
|
"rewards/margins": 0.2553327679634094, |
|
"rewards/rejected": -1.5000946521759033, |
|
"step": 230 |
|
}, |
|
{ |
|
"debug/losses": 0.06539560854434967, |
|
"debug/policy_weights": 0.10245949029922485, |
|
"debug/raw_losses": 0.6345073580741882, |
|
"epoch": 0.1910067648229208, |
|
"grad_norm": 1.9058271009788008, |
|
"learning_rate": 4.875484304880629e-07, |
|
"logits/chosen": -2.576491594314575, |
|
"logits/rejected": -2.558601140975952, |
|
"logps/chosen": -257.90118408203125, |
|
"logps/rejected": -279.42266845703125, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9614318609237671, |
|
"rewards/margins": 0.2736155390739441, |
|
"rewards/rejected": -1.2350473403930664, |
|
"step": 240 |
|
}, |
|
{ |
|
"debug/losses": 0.06740079820156097, |
|
"debug/policy_weights": 0.11094751209020615, |
|
"debug/raw_losses": 0.6248602271080017, |
|
"epoch": 0.19896538002387584, |
|
"grad_norm": 1.5384599137611568, |
|
"learning_rate": 4.852907416036558e-07, |
|
"logits/chosen": -2.521726131439209, |
|
"logits/rejected": -2.512889862060547, |
|
"logps/chosen": -222.5371856689453, |
|
"logps/rejected": -262.0302429199219, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8324035406112671, |
|
"rewards/margins": 0.2716120183467865, |
|
"rewards/rejected": -1.1040157079696655, |
|
"step": 250 |
|
}, |
|
{ |
|
"debug/losses": 0.06672520935535431, |
|
"debug/policy_weights": 0.11189812421798706, |
|
"debug/raw_losses": 0.5824211239814758, |
|
"epoch": 0.20692399522483088, |
|
"grad_norm": 1.4195949507142585, |
|
"learning_rate": 4.828512000318616e-07, |
|
"logits/chosen": -2.535505533218384, |
|
"logits/rejected": -2.4970450401306152, |
|
"logps/chosen": -254.94741821289062, |
|
"logps/rejected": -278.82440185546875, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9024659395217896, |
|
"rewards/margins": 0.3342443108558655, |
|
"rewards/rejected": -1.2367103099822998, |
|
"step": 260 |
|
}, |
|
{ |
|
"debug/losses": 0.05524461343884468, |
|
"debug/policy_weights": 0.09467937052249908, |
|
"debug/raw_losses": 0.600980281829834, |
|
"epoch": 0.2148826104257859, |
|
"grad_norm": 2.09985567586005, |
|
"learning_rate": 4.802316912577946e-07, |
|
"logits/chosen": -2.4608120918273926, |
|
"logits/rejected": -2.4260432720184326, |
|
"logps/chosen": -242.8441925048828, |
|
"logps/rejected": -261.7018127441406, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9519554972648621, |
|
"rewards/margins": 0.32279661297798157, |
|
"rewards/rejected": -1.2747520208358765, |
|
"step": 270 |
|
}, |
|
{ |
|
"debug/losses": 0.07073532044887543, |
|
"debug/policy_weights": 0.11124340444803238, |
|
"debug/raw_losses": 0.6386277079582214, |
|
"epoch": 0.22284122562674094, |
|
"grad_norm": 2.53125193502766, |
|
"learning_rate": 4.774342398605221e-07, |
|
"logits/chosen": -2.4330108165740967, |
|
"logits/rejected": -2.4124581813812256, |
|
"logps/chosen": -238.8041229248047, |
|
"logps/rejected": -258.5404357910156, |
|
"loss": 0.0674, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9319969415664673, |
|
"rewards/margins": 0.2547939419746399, |
|
"rewards/rejected": -1.186790943145752, |
|
"step": 280 |
|
}, |
|
{ |
|
"debug/losses": 0.06495990604162216, |
|
"debug/policy_weights": 0.10757051408290863, |
|
"debug/raw_losses": 0.5930755138397217, |
|
"epoch": 0.23079984082769597, |
|
"grad_norm": 2.544321863170394, |
|
"learning_rate": 4.744610079482978e-07, |
|
"logits/chosen": -2.4562830924987793, |
|
"logits/rejected": -2.4185662269592285, |
|
"logps/chosen": -274.9333190917969, |
|
"logps/rejected": -300.23583984375, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0925931930541992, |
|
"rewards/margins": 0.3425787389278412, |
|
"rewards/rejected": -1.4351718425750732, |
|
"step": 290 |
|
}, |
|
{ |
|
"debug/losses": 0.04091046005487442, |
|
"debug/policy_weights": 0.07422160357236862, |
|
"debug/raw_losses": 0.5923314094543457, |
|
"epoch": 0.238758456028651, |
|
"grad_norm": 2.8319076712554905, |
|
"learning_rate": 4.713142934875005e-07, |
|
"logits/chosen": -2.3712522983551025, |
|
"logits/rejected": -2.329404592514038, |
|
"logps/chosen": -278.4283752441406, |
|
"logps/rejected": -303.9710388183594, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.30873441696167, |
|
"rewards/margins": 0.4222927689552307, |
|
"rewards/rejected": -1.7310272455215454, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.238758456028651, |
|
"eval_debug/losses": 0.0557919405400753, |
|
"eval_debug/policy_weights": 0.09022855013608932, |
|
"eval_debug/raw_losses": 0.6134340167045593, |
|
"eval_logits/chosen": -2.3552868366241455, |
|
"eval_logits/rejected": -2.339716911315918, |
|
"eval_logps/chosen": -268.7242736816406, |
|
"eval_logps/rejected": -311.3038635253906, |
|
"eval_loss": 0.05731642618775368, |
|
"eval_rewards/accuracies": 0.6567164063453674, |
|
"eval_rewards/chosen": -1.2448077201843262, |
|
"eval_rewards/margins": 0.36124110221862793, |
|
"eval_rewards/rejected": -1.6060487031936646, |
|
"eval_runtime": 153.2008, |
|
"eval_samples_per_second": 55.822, |
|
"eval_steps_per_second": 0.875, |
|
"step": 300 |
|
}, |
|
{ |
|
"debug/losses": 0.0425337590277195, |
|
"debug/policy_weights": 0.06980495154857635, |
|
"debug/raw_losses": 0.588487982749939, |
|
"epoch": 0.24671707122960604, |
|
"grad_norm": 1.7020687918550936, |
|
"learning_rate": 4.679965285265706e-07, |
|
"logits/chosen": -2.2830398082733154, |
|
"logits/rejected": -2.270655632019043, |
|
"logps/chosen": -256.02520751953125, |
|
"logps/rejected": -309.3921813964844, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.3847135305404663, |
|
"rewards/margins": 0.4131029546260834, |
|
"rewards/rejected": -1.7978166341781616, |
|
"step": 310 |
|
}, |
|
{ |
|
"debug/losses": 0.030957188457250595, |
|
"debug/policy_weights": 0.05928220599889755, |
|
"debug/raw_losses": 0.5176903009414673, |
|
"epoch": 0.2546756864305611, |
|
"grad_norm": 1.8070710565204129, |
|
"learning_rate": 4.64510277316316e-07, |
|
"logits/chosen": -2.213348150253296, |
|
"logits/rejected": -2.1686458587646484, |
|
"logps/chosen": -307.92364501953125, |
|
"logps/rejected": -366.7274169921875, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6229156255722046, |
|
"rewards/margins": 0.6521427631378174, |
|
"rewards/rejected": -2.2750582695007324, |
|
"step": 320 |
|
}, |
|
{ |
|
"debug/losses": 0.03477818891406059, |
|
"debug/policy_weights": 0.05532154440879822, |
|
"debug/raw_losses": 0.6181614398956299, |
|
"epoch": 0.26263430163151613, |
|
"grad_norm": 1.6888122864648538, |
|
"learning_rate": 4.6085823432804137e-07, |
|
"logits/chosen": -2.2881710529327393, |
|
"logits/rejected": -2.259243965148926, |
|
"logps/chosen": -319.549560546875, |
|
"logps/rejected": -354.44024658203125, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.757887840270996, |
|
"rewards/margins": 0.3513242304325104, |
|
"rewards/rejected": -2.1092123985290527, |
|
"step": 330 |
|
}, |
|
{ |
|
"debug/losses": 0.036134570837020874, |
|
"debug/policy_weights": 0.0601133331656456, |
|
"debug/raw_losses": 0.6012001633644104, |
|
"epoch": 0.27059291683247116, |
|
"grad_norm": 0.9082619220908468, |
|
"learning_rate": 4.570432221710314e-07, |
|
"logits/chosen": -2.345127820968628, |
|
"logits/rejected": -2.328043222427368, |
|
"logps/chosen": -311.11541748046875, |
|
"logps/rejected": -360.7956237792969, |
|
"loss": 0.0329, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6493396759033203, |
|
"rewards/margins": 0.4032120704650879, |
|
"rewards/rejected": -2.052551746368408, |
|
"step": 340 |
|
}, |
|
{ |
|
"debug/losses": 0.038389407098293304, |
|
"debug/policy_weights": 0.0665694996714592, |
|
"debug/raw_losses": 0.5971581339836121, |
|
"epoch": 0.2785515320334262, |
|
"grad_norm": 1.7673927299473366, |
|
"learning_rate": 4.5306818941099866e-07, |
|
"logits/chosen": -2.3454818725585938, |
|
"logits/rejected": -2.2886033058166504, |
|
"logps/chosen": -309.58526611328125, |
|
"logps/rejected": -334.3452453613281, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5936106443405151, |
|
"rewards/margins": 0.41180863976478577, |
|
"rewards/rejected": -2.0054192543029785, |
|
"step": 350 |
|
}, |
|
{ |
|
"debug/losses": 0.03970758244395256, |
|
"debug/policy_weights": 0.06552158296108246, |
|
"debug/raw_losses": 0.5963835716247559, |
|
"epoch": 0.28651014723438123, |
|
"grad_norm": 1.165368953491439, |
|
"learning_rate": 4.4893620829118124e-07, |
|
"logits/chosen": -2.3107028007507324, |
|
"logits/rejected": -2.2786478996276855, |
|
"logps/chosen": -296.3944091796875, |
|
"logps/rejected": -329.440185546875, |
|
"loss": 0.042, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4465725421905518, |
|
"rewards/margins": 0.39386042952537537, |
|
"rewards/rejected": -1.840433120727539, |
|
"step": 360 |
|
}, |
|
{ |
|
"debug/losses": 0.031909000128507614, |
|
"debug/policy_weights": 0.05640440434217453, |
|
"debug/raw_losses": 0.632055401802063, |
|
"epoch": 0.29446876243533626, |
|
"grad_norm": 1.4365386513442289, |
|
"learning_rate": 4.4465047235785185e-07, |
|
"logits/chosen": -2.3353095054626465, |
|
"logits/rejected": -2.299114942550659, |
|
"logps/chosen": -316.80322265625, |
|
"logps/rejected": -334.6367492675781, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.719588041305542, |
|
"rewards/margins": 0.34410953521728516, |
|
"rewards/rejected": -2.063697576522827, |
|
"step": 370 |
|
}, |
|
{ |
|
"debug/losses": 0.03546866774559021, |
|
"debug/policy_weights": 0.05660950019955635, |
|
"debug/raw_losses": 0.6386741399765015, |
|
"epoch": 0.3024273776362913, |
|
"grad_norm": 1.6981468490942673, |
|
"learning_rate": 4.40214293992074e-07, |
|
"logits/chosen": -2.302638530731201, |
|
"logits/rejected": -2.2782890796661377, |
|
"logps/chosen": -302.3027038574219, |
|
"logps/rejected": -331.7867736816406, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.649095892906189, |
|
"rewards/margins": 0.3226591646671295, |
|
"rewards/rejected": -1.971754789352417, |
|
"step": 380 |
|
}, |
|
{ |
|
"debug/losses": 0.04937135428190231, |
|
"debug/policy_weights": 0.07896497845649719, |
|
"debug/raw_losses": 0.6217660903930664, |
|
"epoch": 0.3103859928372463, |
|
"grad_norm": 1.7063718152736185, |
|
"learning_rate": 4.3563110184961234e-07, |
|
"logits/chosen": -2.327444076538086, |
|
"logits/rejected": -2.305551052093506, |
|
"logps/chosen": -294.81793212890625, |
|
"logps/rejected": -332.69305419921875, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4561359882354736, |
|
"rewards/margins": 0.34932222962379456, |
|
"rewards/rejected": -1.8054578304290771, |
|
"step": 390 |
|
}, |
|
{ |
|
"debug/losses": 0.03210610896348953, |
|
"debug/policy_weights": 0.06029694527387619, |
|
"debug/raw_losses": 0.5555212497711182, |
|
"epoch": 0.31834460803820136, |
|
"grad_norm": 1.053077631697114, |
|
"learning_rate": 4.3090443821097566e-07, |
|
"logits/chosen": -2.3126392364501953, |
|
"logits/rejected": -2.290933847427368, |
|
"logps/chosen": -292.9631042480469, |
|
"logps/rejected": -358.627685546875, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5689239501953125, |
|
"rewards/margins": 0.5151135325431824, |
|
"rewards/rejected": -2.0840373039245605, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31834460803820136, |
|
"eval_debug/losses": 0.028384286910295486, |
|
"eval_debug/policy_weights": 0.04820017144083977, |
|
"eval_debug/raw_losses": 0.59935063123703, |
|
"eval_logits/chosen": -2.3006632328033447, |
|
"eval_logits/rejected": -2.2854816913604736, |
|
"eval_logps/chosen": -321.4913330078125, |
|
"eval_logps/rejected": -364.0837097167969, |
|
"eval_loss": 0.030249595642089844, |
|
"eval_rewards/accuracies": 0.6623134613037109, |
|
"eval_rewards/chosen": -1.7724782228469849, |
|
"eval_rewards/margins": 0.3613690733909607, |
|
"eval_rewards/rejected": -2.133847236633301, |
|
"eval_runtime": 153.4178, |
|
"eval_samples_per_second": 55.743, |
|
"eval_steps_per_second": 0.873, |
|
"step": 400 |
|
}, |
|
{ |
|
"debug/losses": 0.029884925112128258, |
|
"debug/policy_weights": 0.05188853666186333, |
|
"debug/raw_losses": 0.5859032869338989, |
|
"epoch": 0.3263032232391564, |
|
"grad_norm": 1.1682793896386887, |
|
"learning_rate": 4.2603795624364195e-07, |
|
"logits/chosen": -2.279048204421997, |
|
"logits/rejected": -2.2422895431518555, |
|
"logps/chosen": -302.4944152832031, |
|
"logps/rejected": -329.99310302734375, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6612056493759155, |
|
"rewards/margins": 0.3638765215873718, |
|
"rewards/rejected": -2.0250821113586426, |
|
"step": 410 |
|
}, |
|
{ |
|
"debug/losses": 0.039774149656295776, |
|
"debug/policy_weights": 0.06452381610870361, |
|
"debug/raw_losses": 0.5788149833679199, |
|
"epoch": 0.3342618384401114, |
|
"grad_norm": 1.1186339494698643, |
|
"learning_rate": 4.210354171785795e-07, |
|
"logits/chosen": -2.3332016468048096, |
|
"logits/rejected": -2.331998109817505, |
|
"logps/chosen": -291.0079040527344, |
|
"logps/rejected": -344.42462158203125, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.488434910774231, |
|
"rewards/margins": 0.40906715393066406, |
|
"rewards/rejected": -1.8975019454956055, |
|
"step": 420 |
|
}, |
|
{ |
|
"debug/losses": 0.03884187713265419, |
|
"debug/policy_weights": 0.06631166487932205, |
|
"debug/raw_losses": 0.5891371369361877, |
|
"epoch": 0.34222045364106646, |
|
"grad_norm": 1.8153925533673183, |
|
"learning_rate": 4.15900687403248e-07, |
|
"logits/chosen": -2.3279812335968018, |
|
"logits/rejected": -2.3141000270843506, |
|
"logps/chosen": -287.6651916503906, |
|
"logps/rejected": -336.90863037109375, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4973087310791016, |
|
"rewards/margins": 0.46901196241378784, |
|
"rewards/rejected": -1.9663206338882446, |
|
"step": 430 |
|
}, |
|
{ |
|
"debug/losses": 0.03900137543678284, |
|
"debug/policy_weights": 0.06590630114078522, |
|
"debug/raw_losses": 0.6019418239593506, |
|
"epoch": 0.3501790688420215, |
|
"grad_norm": 1.3373117259952034, |
|
"learning_rate": 4.1063773547332584e-07, |
|
"logits/chosen": -2.332784652709961, |
|
"logits/rejected": -2.311373233795166, |
|
"logps/chosen": -290.93402099609375, |
|
"logps/rejected": -337.70306396484375, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4399511814117432, |
|
"rewards/margins": 0.4576480984687805, |
|
"rewards/rejected": -1.897599458694458, |
|
"step": 440 |
|
}, |
|
{ |
|
"debug/losses": 0.034195203334093094, |
|
"debug/policy_weights": 0.06440727412700653, |
|
"debug/raw_losses": 0.5752583742141724, |
|
"epoch": 0.3581376840429765, |
|
"grad_norm": 1.4642214637111448, |
|
"learning_rate": 4.0525062904547276e-07, |
|
"logits/chosen": -2.274158000946045, |
|
"logits/rejected": -2.241867780685425, |
|
"logps/chosen": -274.84783935546875, |
|
"logps/rejected": -318.5943908691406, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4228484630584717, |
|
"rewards/margins": 0.5137365460395813, |
|
"rewards/rejected": -1.9365848302841187, |
|
"step": 450 |
|
}, |
|
{ |
|
"debug/losses": 0.042845163494348526, |
|
"debug/policy_weights": 0.07054711878299713, |
|
"debug/raw_losses": 0.5637282729148865, |
|
"epoch": 0.36609629924393156, |
|
"grad_norm": 1.493914915308785, |
|
"learning_rate": 3.997435317334988e-07, |
|
"logits/chosen": -2.3144097328186035, |
|
"logits/rejected": -2.2954559326171875, |
|
"logps/chosen": -297.01629638671875, |
|
"logps/rejected": -354.07861328125, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5000154972076416, |
|
"rewards/margins": 0.5090978145599365, |
|
"rewards/rejected": -2.009113311767578, |
|
"step": 460 |
|
}, |
|
{ |
|
"debug/losses": 0.03523220494389534, |
|
"debug/policy_weights": 0.060066692531108856, |
|
"debug/raw_losses": 0.5853220820426941, |
|
"epoch": 0.3740549144448866, |
|
"grad_norm": 1.5606293995092184, |
|
"learning_rate": 3.941206998903701e-07, |
|
"logits/chosen": -2.290365219116211, |
|
"logits/rejected": -2.2592685222625732, |
|
"logps/chosen": -331.3341369628906, |
|
"logps/rejected": -367.52630615234375, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.7656469345092773, |
|
"rewards/margins": 0.38830217719078064, |
|
"rewards/rejected": -2.153949022293091, |
|
"step": 470 |
|
}, |
|
{ |
|
"debug/losses": 0.042910732328891754, |
|
"debug/policy_weights": 0.06740695238113403, |
|
"debug/raw_losses": 0.5751327276229858, |
|
"epoch": 0.3820135296458416, |
|
"grad_norm": 2.0400801936994357, |
|
"learning_rate": 3.8838647931853684e-07, |
|
"logits/chosen": -2.2397708892822266, |
|
"logits/rejected": -2.2155957221984863, |
|
"logps/chosen": -265.8719177246094, |
|
"logps/rejected": -317.6902770996094, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4397175312042236, |
|
"rewards/margins": 0.4852531850337982, |
|
"rewards/rejected": -1.9249706268310547, |
|
"step": 480 |
|
}, |
|
{ |
|
"debug/losses": 0.040277738124132156, |
|
"debug/policy_weights": 0.07625563442707062, |
|
"debug/raw_losses": 0.5467380881309509, |
|
"epoch": 0.38997214484679665, |
|
"grad_norm": 1.5717280943877756, |
|
"learning_rate": 3.825453019111281e-07, |
|
"logits/chosen": -2.246934175491333, |
|
"logits/rejected": -2.2185163497924805, |
|
"logps/chosen": -281.0770568847656, |
|
"logps/rejected": -348.90753173828125, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3660808801651, |
|
"rewards/margins": 0.5780023336410522, |
|
"rewards/rejected": -1.9440832138061523, |
|
"step": 490 |
|
}, |
|
{ |
|
"debug/losses": 0.0474771223962307, |
|
"debug/policy_weights": 0.08785078674554825, |
|
"debug/raw_losses": 0.5391517281532288, |
|
"epoch": 0.3979307600477517, |
|
"grad_norm": 1.89593573498899, |
|
"learning_rate": 3.7660168222660824e-07, |
|
"logits/chosen": -2.312988519668579, |
|
"logits/rejected": -2.2645742893218994, |
|
"logps/chosen": -305.178466796875, |
|
"logps/rejected": -341.88360595703125, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3672921657562256, |
|
"rewards/margins": 0.5291160941123962, |
|
"rewards/rejected": -1.8964084386825562, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3979307600477517, |
|
"eval_debug/losses": 0.0406743586063385, |
|
"eval_debug/policy_weights": 0.07022719830274582, |
|
"eval_debug/raw_losses": 0.5892212986946106, |
|
"eval_logits/chosen": -2.264326333999634, |
|
"eval_logits/rejected": -2.240631580352783, |
|
"eval_logps/chosen": -294.8950500488281, |
|
"eval_logps/rejected": -349.04681396484375, |
|
"eval_loss": 0.043212514370679855, |
|
"eval_rewards/accuracies": 0.6800373196601868, |
|
"eval_rewards/chosen": -1.5065159797668457, |
|
"eval_rewards/margins": 0.4769621789455414, |
|
"eval_rewards/rejected": -1.9834781885147095, |
|
"eval_runtime": 153.3333, |
|
"eval_samples_per_second": 55.774, |
|
"eval_steps_per_second": 0.874, |
|
"step": 500 |
|
}, |
|
{ |
|
"debug/losses": 0.04252048209309578, |
|
"debug/policy_weights": 0.07039301842451096, |
|
"debug/raw_losses": 0.6011983156204224, |
|
"epoch": 0.4058893752487067, |
|
"grad_norm": 2.2077694886776533, |
|
"learning_rate": 3.705602139995416e-07, |
|
"logits/chosen": -2.2369987964630127, |
|
"logits/rejected": -2.203916072845459, |
|
"logps/chosen": -297.1964111328125, |
|
"logps/rejected": -339.70843505859375, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4493403434753418, |
|
"rewards/margins": 0.44409775733947754, |
|
"rewards/rejected": -1.8934379816055298, |
|
"step": 510 |
|
}, |
|
{ |
|
"debug/losses": 0.045193079859018326, |
|
"debug/policy_weights": 0.07069429010152817, |
|
"debug/raw_losses": 0.6190410256385803, |
|
"epoch": 0.41384799044966175, |
|
"grad_norm": 1.3854536697917073, |
|
"learning_rate": 3.6442556659016475e-07, |
|
"logits/chosen": -2.264357328414917, |
|
"logits/rejected": -2.24722957611084, |
|
"logps/chosen": -291.65087890625, |
|
"logps/rejected": -329.7967224121094, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4414405822753906, |
|
"rewards/margins": 0.3577377200126648, |
|
"rewards/rejected": -1.7991783618927002, |
|
"step": 520 |
|
}, |
|
{ |
|
"debug/losses": 0.03731568530201912, |
|
"debug/policy_weights": 0.06065317243337631, |
|
"debug/raw_losses": 0.6242992281913757, |
|
"epoch": 0.4218066056506168, |
|
"grad_norm": 1.0635014002404581, |
|
"learning_rate": 3.582024813755076e-07, |
|
"logits/chosen": -2.1829025745391846, |
|
"logits/rejected": -2.159717082977295, |
|
"logps/chosen": -333.71392822265625, |
|
"logps/rejected": -360.9361877441406, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.8308143615722656, |
|
"rewards/margins": 0.3124064803123474, |
|
"rewards/rejected": -2.143220901489258, |
|
"step": 530 |
|
}, |
|
{ |
|
"debug/losses": 0.02375711500644684, |
|
"debug/policy_weights": 0.03976626321673393, |
|
"debug/raw_losses": 0.5894423127174377, |
|
"epoch": 0.4297652208515718, |
|
"grad_norm": 0.8777673540587257, |
|
"learning_rate": 3.5189576808485404e-07, |
|
"logits/chosen": -2.1102588176727295, |
|
"logits/rejected": -2.0886712074279785, |
|
"logps/chosen": -356.2108154296875, |
|
"logps/rejected": -402.5682373046875, |
|
"loss": 0.0241, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0562400817871094, |
|
"rewards/margins": 0.4145965576171875, |
|
"rewards/rejected": -2.470837116241455, |
|
"step": 540 |
|
}, |
|
{ |
|
"debug/losses": 0.026336893439292908, |
|
"debug/policy_weights": 0.04056663066148758, |
|
"debug/raw_losses": 0.6279866099357605, |
|
"epoch": 0.43772383605252685, |
|
"grad_norm": 1.3575157514235345, |
|
"learning_rate": 3.4551030108237433e-07, |
|
"logits/chosen": -2.0719475746154785, |
|
"logits/rejected": -2.0144124031066895, |
|
"logps/chosen": -366.66064453125, |
|
"logps/rejected": -384.7783508300781, |
|
"loss": 0.0252, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.140892505645752, |
|
"rewards/margins": 0.3489706516265869, |
|
"rewards/rejected": -2.4898629188537598, |
|
"step": 550 |
|
}, |
|
{ |
|
"debug/losses": 0.023122292011976242, |
|
"debug/policy_weights": 0.045903079211711884, |
|
"debug/raw_losses": 0.567331850528717, |
|
"epoch": 0.4456824512534819, |
|
"grad_norm": 1.197996404456996, |
|
"learning_rate": 3.390510155998023e-07, |
|
"logits/chosen": -2.1358299255371094, |
|
"logits/rejected": -2.1018567085266113, |
|
"logps/chosen": -359.2723693847656, |
|
"logps/rejected": -411.61187744140625, |
|
"loss": 0.0241, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.050138473510742, |
|
"rewards/margins": 0.5038273334503174, |
|
"rewards/rejected": -2.5539660453796387, |
|
"step": 560 |
|
}, |
|
{ |
|
"debug/losses": 0.029903370887041092, |
|
"debug/policy_weights": 0.050540171563625336, |
|
"debug/raw_losses": 0.5872690081596375, |
|
"epoch": 0.4536410664544369, |
|
"grad_norm": 1.5301014116065141, |
|
"learning_rate": 3.325229039220684e-07, |
|
"logits/chosen": -2.0992767810821533, |
|
"logits/rejected": -2.076805830001831, |
|
"logps/chosen": -361.57342529296875, |
|
"logps/rejected": -405.8957214355469, |
|
"loss": 0.0286, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.0752933025360107, |
|
"rewards/margins": 0.4622616171836853, |
|
"rewards/rejected": -2.537554979324341, |
|
"step": 570 |
|
}, |
|
{ |
|
"debug/losses": 0.03479158133268356, |
|
"debug/policy_weights": 0.053991250693798065, |
|
"debug/raw_losses": 0.604568600654602, |
|
"epoch": 0.46159968165539195, |
|
"grad_norm": 1.41615105697594, |
|
"learning_rate": 3.2593101152883795e-07, |
|
"logits/chosen": -2.110628366470337, |
|
"logits/rejected": -2.0749945640563965, |
|
"logps/chosen": -349.9741516113281, |
|
"logps/rejected": -388.0547180175781, |
|
"loss": 0.0249, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.010434150695801, |
|
"rewards/margins": 0.40317314863204956, |
|
"rewards/rejected": -2.413607120513916, |
|
"step": 580 |
|
}, |
|
{ |
|
"debug/losses": 0.03422771021723747, |
|
"debug/policy_weights": 0.05810137465596199, |
|
"debug/raw_losses": 0.5813102126121521, |
|
"epoch": 0.469558296856347, |
|
"grad_norm": 1.4269890028062786, |
|
"learning_rate": 3.192804331949349e-07, |
|
"logits/chosen": -2.1150734424591064, |
|
"logits/rejected": -2.0873661041259766, |
|
"logps/chosen": -322.30841064453125, |
|
"logps/rejected": -366.505859375, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7967876195907593, |
|
"rewards/margins": 0.4749871790409088, |
|
"rewards/rejected": -2.2717747688293457, |
|
"step": 590 |
|
}, |
|
{ |
|
"debug/losses": 0.03016613982617855, |
|
"debug/policy_weights": 0.05547459051012993, |
|
"debug/raw_losses": 0.5693393349647522, |
|
"epoch": 0.477516912057302, |
|
"grad_norm": 1.2863972010725921, |
|
"learning_rate": 3.125763090526674e-07, |
|
"logits/chosen": -2.1585888862609863, |
|
"logits/rejected": -2.11460542678833, |
|
"logps/chosen": -328.352783203125, |
|
"logps/rejected": -376.36846923828125, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.735060691833496, |
|
"rewards/margins": 0.5392208099365234, |
|
"rewards/rejected": -2.2742817401885986, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.477516912057302, |
|
"eval_debug/losses": 0.030185788869857788, |
|
"eval_debug/policy_weights": 0.05269056186079979, |
|
"eval_debug/raw_losses": 0.5812317132949829, |
|
"eval_logits/chosen": -2.13511323928833, |
|
"eval_logits/rejected": -2.11336612701416, |
|
"eval_logps/chosen": -327.0503234863281, |
|
"eval_logps/rejected": -381.1920471191406, |
|
"eval_loss": 0.032057907432317734, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": -1.8280682563781738, |
|
"eval_rewards/margins": 0.4768621623516083, |
|
"eval_rewards/rejected": -2.3049304485321045, |
|
"eval_runtime": 153.2709, |
|
"eval_samples_per_second": 55.797, |
|
"eval_steps_per_second": 0.874, |
|
"step": 600 |
|
}, |
|
{ |
|
"debug/losses": 0.02869519032537937, |
|
"debug/policy_weights": 0.045216310769319534, |
|
"debug/raw_losses": 0.6201798319816589, |
|
"epoch": 0.48547552725825704, |
|
"grad_norm": 1.0413008003733695, |
|
"learning_rate": 3.0582382061909623e-07, |
|
"logits/chosen": -2.144442081451416, |
|
"logits/rejected": -2.114197015762329, |
|
"logps/chosen": -334.3606262207031, |
|
"logps/rejected": -370.7452087402344, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9323441982269287, |
|
"rewards/margins": 0.3452734053134918, |
|
"rewards/rejected": -2.277617931365967, |
|
"step": 610 |
|
}, |
|
{ |
|
"debug/losses": 0.02686266042292118, |
|
"debug/policy_weights": 0.04530490189790726, |
|
"debug/raw_losses": 0.5958613753318787, |
|
"epoch": 0.4934341424592121, |
|
"grad_norm": 1.20614262286929, |
|
"learning_rate": 2.9902818679131775e-07, |
|
"logits/chosen": -2.1490912437438965, |
|
"logits/rejected": -2.1047191619873047, |
|
"logps/chosen": -345.37994384765625, |
|
"logps/rejected": -382.47808837890625, |
|
"loss": 0.0274, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.9065704345703125, |
|
"rewards/margins": 0.42660021781921387, |
|
"rewards/rejected": -2.3331706523895264, |
|
"step": 620 |
|
}, |
|
{ |
|
"debug/losses": 0.02654409408569336, |
|
"debug/policy_weights": 0.04730083793401718, |
|
"debug/raw_losses": 0.5461606979370117, |
|
"epoch": 0.5013927576601671, |
|
"grad_norm": 1.8099237202122547, |
|
"learning_rate": 2.921946598128571e-07, |
|
"logits/chosen": -2.12278413772583, |
|
"logits/rejected": -2.0931880474090576, |
|
"logps/chosen": -322.9065246582031, |
|
"logps/rejected": -354.53228759765625, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7012741565704346, |
|
"rewards/margins": 0.5234018564224243, |
|
"rewards/rejected": -2.2246758937835693, |
|
"step": 630 |
|
}, |
|
{ |
|
"debug/losses": 0.037343092262744904, |
|
"debug/policy_weights": 0.053458474576473236, |
|
"debug/raw_losses": 0.6420666575431824, |
|
"epoch": 0.5093513728611222, |
|
"grad_norm": 1.6865475152716571, |
|
"learning_rate": 2.8532852121428733e-07, |
|
"logits/chosen": -2.1078145503997803, |
|
"logits/rejected": -2.0716660022735596, |
|
"logps/chosen": -296.3316345214844, |
|
"logps/rejected": -330.1187744140625, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.6080780029296875, |
|
"rewards/margins": 0.36063456535339355, |
|
"rewards/rejected": -1.968712568283081, |
|
"step": 640 |
|
}, |
|
{ |
|
"debug/losses": 0.035708196461200714, |
|
"debug/policy_weights": 0.05902569368481636, |
|
"debug/raw_losses": 0.5638469457626343, |
|
"epoch": 0.5173099880620772, |
|
"grad_norm": 1.7034498752676623, |
|
"learning_rate": 2.7843507773121414e-07, |
|
"logits/chosen": -2.1405997276306152, |
|
"logits/rejected": -2.108238697052002, |
|
"logps/chosen": -292.4433288574219, |
|
"logps/rejected": -354.64459228515625, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.5522750616073608, |
|
"rewards/margins": 0.5501760840415955, |
|
"rewards/rejected": -2.1024510860443115, |
|
"step": 650 |
|
}, |
|
{ |
|
"debug/losses": 0.029026631265878677, |
|
"debug/policy_weights": 0.05990219861268997, |
|
"debug/raw_losses": 0.531947135925293, |
|
"epoch": 0.5252686032630323, |
|
"grad_norm": 1.5938360839379533, |
|
"learning_rate": 2.715196572027789e-07, |
|
"logits/chosen": -2.1009621620178223, |
|
"logits/rejected": -2.0724639892578125, |
|
"logps/chosen": -312.18450927734375, |
|
"logps/rejected": -384.11865234375, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.6771888732910156, |
|
"rewards/margins": 0.6227995753288269, |
|
"rewards/rejected": -2.2999885082244873, |
|
"step": 660 |
|
}, |
|
{ |
|
"debug/losses": 0.029919123277068138, |
|
"debug/policy_weights": 0.05170232057571411, |
|
"debug/raw_losses": 0.6014626026153564, |
|
"epoch": 0.5332272184639872, |
|
"grad_norm": 1.5125072558657653, |
|
"learning_rate": 2.645876044538521e-07, |
|
"logits/chosen": -2.1004645824432373, |
|
"logits/rejected": -2.0667197704315186, |
|
"logps/chosen": -330.8302307128906, |
|
"logps/rejected": -369.1127624511719, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8169825077056885, |
|
"rewards/margins": 0.46573132276535034, |
|
"rewards/rejected": -2.2827141284942627, |
|
"step": 670 |
|
}, |
|
{ |
|
"debug/losses": 0.038384515792131424, |
|
"debug/policy_weights": 0.06169766187667847, |
|
"debug/raw_losses": 0.5807624459266663, |
|
"epoch": 0.5411858336649423, |
|
"grad_norm": 1.2825455723938728, |
|
"learning_rate": 2.5764427716409815e-07, |
|
"logits/chosen": -2.126770496368408, |
|
"logits/rejected": -2.0941073894500732, |
|
"logps/chosen": -320.5771484375, |
|
"logps/rejected": -363.2606506347656, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6580756902694702, |
|
"rewards/margins": 0.4379847049713135, |
|
"rewards/rejected": -2.096060276031494, |
|
"step": 680 |
|
}, |
|
{ |
|
"debug/losses": 0.03115171566605568, |
|
"debug/policy_weights": 0.051781851798295975, |
|
"debug/raw_losses": 0.6048017144203186, |
|
"epoch": 0.5491444488658973, |
|
"grad_norm": 1.312607359472945, |
|
"learning_rate": 2.5069504172710494e-07, |
|
"logits/chosen": -2.095470905303955, |
|
"logits/rejected": -2.08461332321167, |
|
"logps/chosen": -335.7181091308594, |
|
"logps/rejected": -388.94171142578125, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.873830795288086, |
|
"rewards/margins": 0.3542255759239197, |
|
"rewards/rejected": -2.2280564308166504, |
|
"step": 690 |
|
}, |
|
{ |
|
"debug/losses": 0.032391004264354706, |
|
"debug/policy_weights": 0.04837799817323685, |
|
"debug/raw_losses": 0.63223797082901, |
|
"epoch": 0.5571030640668524, |
|
"grad_norm": 1.2365349964087866, |
|
"learning_rate": 2.4374526910277886e-07, |
|
"logits/chosen": -2.010230541229248, |
|
"logits/rejected": -1.984021782875061, |
|
"logps/chosen": -325.3149108886719, |
|
"logps/rejected": -352.573974609375, |
|
"loss": 0.0283, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8190257549285889, |
|
"rewards/margins": 0.3168783187866211, |
|
"rewards/rejected": -2.13590407371521, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5571030640668524, |
|
"eval_debug/losses": 0.026820143684744835, |
|
"eval_debug/policy_weights": 0.04667918384075165, |
|
"eval_debug/raw_losses": 0.5765854716300964, |
|
"eval_logits/chosen": -1.9899719953536987, |
|
"eval_logits/rejected": -1.9676791429519653, |
|
"eval_logps/chosen": -328.6565856933594, |
|
"eval_logps/rejected": -378.7768859863281, |
|
"eval_loss": 0.028307339176535606, |
|
"eval_rewards/accuracies": 0.6940298676490784, |
|
"eval_rewards/chosen": -1.8441307544708252, |
|
"eval_rewards/margins": 0.4366479218006134, |
|
"eval_rewards/rejected": -2.280778408050537, |
|
"eval_runtime": 153.3492, |
|
"eval_samples_per_second": 55.768, |
|
"eval_steps_per_second": 0.874, |
|
"step": 700 |
|
}, |
|
{ |
|
"debug/losses": 0.026440713554620743, |
|
"debug/policy_weights": 0.04469674825668335, |
|
"debug/raw_losses": 0.5913997292518616, |
|
"epoch": 0.5650616792678074, |
|
"grad_norm": 1.7865897282724608, |
|
"learning_rate": 2.368003306662104e-07, |
|
"logits/chosen": -1.9654159545898438, |
|
"logits/rejected": -1.9266160726547241, |
|
"logps/chosen": -352.88787841796875, |
|
"logps/rejected": -381.1947326660156, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9729446172714233, |
|
"rewards/margins": 0.3949921727180481, |
|
"rewards/rejected": -2.367936611175537, |
|
"step": 710 |
|
}, |
|
{ |
|
"debug/losses": 0.028649378567934036, |
|
"debug/policy_weights": 0.05744954198598862, |
|
"debug/raw_losses": 0.5734940767288208, |
|
"epoch": 0.5730202944687625, |
|
"grad_norm": 1.4165928688447773, |
|
"learning_rate": 2.2986559405621886e-07, |
|
"logits/chosen": -1.9602625370025635, |
|
"logits/rejected": -1.916290283203125, |
|
"logps/chosen": -343.48406982421875, |
|
"logps/rejected": -386.95989990234375, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8104501962661743, |
|
"rewards/margins": 0.4908137917518616, |
|
"rewards/rejected": -2.3012638092041016, |
|
"step": 720 |
|
}, |
|
{ |
|
"debug/losses": 0.022339364513754845, |
|
"debug/policy_weights": 0.03942544013261795, |
|
"debug/raw_losses": 0.5562313795089722, |
|
"epoch": 0.5809789096697174, |
|
"grad_norm": 1.382543972122978, |
|
"learning_rate": 2.2294641902678443e-07, |
|
"logits/chosen": -1.9571382999420166, |
|
"logits/rejected": -1.9248653650283813, |
|
"logps/chosen": -327.402587890625, |
|
"logps/rejected": -379.947998046875, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9223827123641968, |
|
"rewards/margins": 0.558407723903656, |
|
"rewards/rejected": -2.480790615081787, |
|
"step": 730 |
|
}, |
|
{ |
|
"debug/losses": 0.01860680803656578, |
|
"debug/policy_weights": 0.03574604541063309, |
|
"debug/raw_losses": 0.5566806793212891, |
|
"epoch": 0.5889375248706725, |
|
"grad_norm": 1.468728949042428, |
|
"learning_rate": 2.160481533045751e-07, |
|
"logits/chosen": -1.921987533569336, |
|
"logits/rejected": -1.873268485069275, |
|
"logps/chosen": -362.5789489746094, |
|
"logps/rejected": -399.66748046875, |
|
"loss": 0.0225, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1244444847106934, |
|
"rewards/margins": 0.46507740020751953, |
|
"rewards/rejected": -2.589521884918213, |
|
"step": 740 |
|
}, |
|
{ |
|
"debug/losses": 0.021815910935401917, |
|
"debug/policy_weights": 0.039903424680233, |
|
"debug/raw_losses": 0.5822666883468628, |
|
"epoch": 0.5968961400716275, |
|
"grad_norm": 1.4132116728769042, |
|
"learning_rate": 2.0917612845576882e-07, |
|
"logits/chosen": -1.9699146747589111, |
|
"logits/rejected": -1.9025242328643799, |
|
"logps/chosen": -354.04644775390625, |
|
"logps/rejected": -385.09075927734375, |
|
"loss": 0.024, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.975638747215271, |
|
"rewards/margins": 0.5243066549301147, |
|
"rewards/rejected": -2.4999454021453857, |
|
"step": 750 |
|
}, |
|
{ |
|
"debug/losses": 0.02490827441215515, |
|
"debug/policy_weights": 0.04055667296051979, |
|
"debug/raw_losses": 0.6043840050697327, |
|
"epoch": 0.6048547552725826, |
|
"grad_norm": 1.5603724313512064, |
|
"learning_rate": 2.0233565576536564e-07, |
|
"logits/chosen": -1.9306650161743164, |
|
"logits/rejected": -1.9156144857406616, |
|
"logps/chosen": -329.97711181640625, |
|
"logps/rejected": -378.48834228515625, |
|
"loss": 0.0283, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8800745010375977, |
|
"rewards/margins": 0.4346494674682617, |
|
"rewards/rejected": -2.3147239685058594, |
|
"step": 760 |
|
}, |
|
{ |
|
"debug/losses": 0.02524595521390438, |
|
"debug/policy_weights": 0.04561594873666763, |
|
"debug/raw_losses": 0.5465348958969116, |
|
"epoch": 0.6128133704735376, |
|
"grad_norm": 1.4684425781915056, |
|
"learning_rate": 1.9553202213217537e-07, |
|
"logits/chosen": -1.9351495504379272, |
|
"logits/rejected": -1.901551604270935, |
|
"logps/chosen": -314.9850769042969, |
|
"logps/rejected": -377.1813049316406, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8458607196807861, |
|
"rewards/margins": 0.5721122026443481, |
|
"rewards/rejected": -2.417973041534424, |
|
"step": 770 |
|
}, |
|
{ |
|
"debug/losses": 0.024128446355462074, |
|
"debug/policy_weights": 0.04677557945251465, |
|
"debug/raw_losses": 0.5592825412750244, |
|
"epoch": 0.6207719856744927, |
|
"grad_norm": 1.055203900824062, |
|
"learning_rate": 1.887704859826528e-07, |
|
"logits/chosen": -1.976380705833435, |
|
"logits/rejected": -1.9334523677825928, |
|
"logps/chosen": -364.484375, |
|
"logps/rejected": -424.99957275390625, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.032318353652954, |
|
"rewards/margins": 0.5795284509658813, |
|
"rewards/rejected": -2.611846923828125, |
|
"step": 780 |
|
}, |
|
{ |
|
"debug/losses": 0.024484191089868546, |
|
"debug/policy_weights": 0.04067380353808403, |
|
"debug/raw_losses": 0.5770185589790344, |
|
"epoch": 0.6287306008754476, |
|
"grad_norm": 1.216416574340141, |
|
"learning_rate": 1.8205627320673836e-07, |
|
"logits/chosen": -1.9579511880874634, |
|
"logits/rejected": -1.9054477214813232, |
|
"logps/chosen": -366.54754638671875, |
|
"logps/rejected": -416.3515625, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.1298985481262207, |
|
"rewards/margins": 0.5691512823104858, |
|
"rewards/rejected": -2.699049472808838, |
|
"step": 790 |
|
}, |
|
{ |
|
"debug/losses": 0.02110939845442772, |
|
"debug/policy_weights": 0.03945222496986389, |
|
"debug/raw_losses": 0.6036224365234375, |
|
"epoch": 0.6366892160764027, |
|
"grad_norm": 1.1130648846777194, |
|
"learning_rate": 1.7539457311884675e-07, |
|
"logits/chosen": -1.9825928211212158, |
|
"logits/rejected": -1.9254627227783203, |
|
"logps/chosen": -373.7814025878906, |
|
"logps/rejected": -404.980712890625, |
|
"loss": 0.023, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.1609199047088623, |
|
"rewards/margins": 0.4238300919532776, |
|
"rewards/rejected": -2.584749698638916, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6366892160764027, |
|
"eval_debug/losses": 0.02278236113488674, |
|
"eval_debug/policy_weights": 0.04001852124929428, |
|
"eval_debug/raw_losses": 0.5787393450737, |
|
"eval_logits/chosen": -1.9514880180358887, |
|
"eval_logits/rejected": -1.9267523288726807, |
|
"eval_logps/chosen": -350.94134521484375, |
|
"eval_logps/rejected": -407.4722595214844, |
|
"eval_loss": 0.02435474470257759, |
|
"eval_rewards/accuracies": 0.6884328126907349, |
|
"eval_rewards/chosen": -2.0669784545898438, |
|
"eval_rewards/margins": 0.5007539987564087, |
|
"eval_rewards/rejected": -2.567732334136963, |
|
"eval_runtime": 153.2113, |
|
"eval_samples_per_second": 55.818, |
|
"eval_steps_per_second": 0.875, |
|
"step": 800 |
|
}, |
|
{ |
|
"debug/losses": 0.031348057091236115, |
|
"debug/policy_weights": 0.05042605847120285, |
|
"debug/raw_losses": 0.617703914642334, |
|
"epoch": 0.6446478312773577, |
|
"grad_norm": 1.2829017385682178, |
|
"learning_rate": 1.687905344471226e-07, |
|
"logits/chosen": -1.9726717472076416, |
|
"logits/rejected": -1.9506819248199463, |
|
"logps/chosen": -356.71759033203125, |
|
"logps/rejected": -399.9237060546875, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9306268692016602, |
|
"rewards/margins": 0.41608279943466187, |
|
"rewards/rejected": -2.346709966659546, |
|
"step": 810 |
|
}, |
|
{ |
|
"debug/losses": 0.031708650290966034, |
|
"debug/policy_weights": 0.04735485464334488, |
|
"debug/raw_losses": 0.6054385900497437, |
|
"epoch": 0.6526064464783128, |
|
"grad_norm": 1.2622583124342848, |
|
"learning_rate": 1.6224926135406693e-07, |
|
"logits/chosen": -1.9768617153167725, |
|
"logits/rejected": -1.9380643367767334, |
|
"logps/chosen": -340.0035095214844, |
|
"logps/rejected": -370.344970703125, |
|
"loss": 0.0259, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.954310655593872, |
|
"rewards/margins": 0.4057098925113678, |
|
"rewards/rejected": -2.360020399093628, |
|
"step": 820 |
|
}, |
|
{ |
|
"debug/losses": 0.028013870120048523, |
|
"debug/policy_weights": 0.04989269748330116, |
|
"debug/raw_losses": 0.5807904005050659, |
|
"epoch": 0.6605650616792678, |
|
"grad_norm": 1.2623708525947415, |
|
"learning_rate": 1.557758094916053e-07, |
|
"logits/chosen": -1.9997406005859375, |
|
"logits/rejected": -1.9534574747085571, |
|
"logps/chosen": -351.2829895019531, |
|
"logps/rejected": -399.71087646484375, |
|
"loss": 0.0271, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9434627294540405, |
|
"rewards/margins": 0.4895332455635071, |
|
"rewards/rejected": -2.4329960346221924, |
|
"step": 830 |
|
}, |
|
{ |
|
"debug/losses": 0.031273625791072845, |
|
"debug/policy_weights": 0.05692868307232857, |
|
"debug/raw_losses": 0.5609691739082336, |
|
"epoch": 0.6685236768802229, |
|
"grad_norm": 1.0866287225682587, |
|
"learning_rate": 1.4937518209365108e-07, |
|
"logits/chosen": -2.0409183502197266, |
|
"logits/rejected": -1.9729896783828735, |
|
"logps/chosen": -362.9765319824219, |
|
"logps/rejected": -389.9468994140625, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8453567028045654, |
|
"rewards/margins": 0.5211337804794312, |
|
"rewards/rejected": -2.366490602493286, |
|
"step": 840 |
|
}, |
|
{ |
|
"debug/losses": 0.029767373576760292, |
|
"debug/policy_weights": 0.052564360201358795, |
|
"debug/raw_losses": 0.6102725863456726, |
|
"epoch": 0.6764822920811778, |
|
"grad_norm": 1.394943527609451, |
|
"learning_rate": 1.4305232610918045e-07, |
|
"logits/chosen": -2.0147321224212646, |
|
"logits/rejected": -1.9801139831542969, |
|
"logps/chosen": -339.0684509277344, |
|
"logps/rejected": -375.1840515136719, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.9301865100860596, |
|
"rewards/margins": 0.3946223855018616, |
|
"rewards/rejected": -2.3248085975646973, |
|
"step": 850 |
|
}, |
|
{ |
|
"debug/losses": 0.03315902501344681, |
|
"debug/policy_weights": 0.05502952262759209, |
|
"debug/raw_losses": 0.5980508923530579, |
|
"epoch": 0.6844409072821329, |
|
"grad_norm": 1.1275097393699727, |
|
"learning_rate": 1.3681212837880977e-07, |
|
"logits/chosen": -2.002835512161255, |
|
"logits/rejected": -1.995160698890686, |
|
"logps/chosen": -307.8199157714844, |
|
"logps/rejected": -369.13763427734375, |
|
"loss": 0.0318, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7375946044921875, |
|
"rewards/margins": 0.43073099851608276, |
|
"rewards/rejected": -2.168325901031494, |
|
"step": 860 |
|
}, |
|
{ |
|
"debug/losses": 0.029089778661727905, |
|
"debug/policy_weights": 0.050317078828811646, |
|
"debug/raw_losses": 0.5906243324279785, |
|
"epoch": 0.6923995224830879, |
|
"grad_norm": 1.4878858135274058, |
|
"learning_rate": 1.3065941185782977e-07, |
|
"logits/chosen": -1.9642223119735718, |
|
"logits/rejected": -1.927920937538147, |
|
"logps/chosen": -334.91162109375, |
|
"logps/rejected": -361.5536193847656, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8268492221832275, |
|
"rewards/margins": 0.42832955718040466, |
|
"rewards/rejected": -2.255178928375244, |
|
"step": 870 |
|
}, |
|
{ |
|
"debug/losses": 0.029998168349266052, |
|
"debug/policy_weights": 0.0653611272573471, |
|
"debug/raw_losses": 0.47929373383522034, |
|
"epoch": 0.700358137684043, |
|
"grad_norm": 1.29187319176411, |
|
"learning_rate": 1.2459893188861613e-07, |
|
"logits/chosen": -2.0278587341308594, |
|
"logits/rejected": -1.9883801937103271, |
|
"logps/chosen": -307.253662109375, |
|
"logps/rejected": -392.42962646484375, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5876758098602295, |
|
"rewards/margins": 0.7120193839073181, |
|
"rewards/rejected": -2.2996952533721924, |
|
"step": 880 |
|
}, |
|
{ |
|
"debug/losses": 0.031164586544036865, |
|
"debug/policy_weights": 0.05676507204771042, |
|
"debug/raw_losses": 0.5348513722419739, |
|
"epoch": 0.708316752884998, |
|
"grad_norm": 1.2972584564770755, |
|
"learning_rate": 1.1863537252529548e-07, |
|
"logits/chosen": -1.9931986331939697, |
|
"logits/rejected": -1.9309186935424805, |
|
"logps/chosen": -337.57659912109375, |
|
"logps/rejected": -380.300048828125, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.793096899986267, |
|
"rewards/margins": 0.5044206380844116, |
|
"rewards/rejected": -2.297517776489258, |
|
"step": 890 |
|
}, |
|
{ |
|
"debug/losses": 0.0308552123606205, |
|
"debug/policy_weights": 0.05725027993321419, |
|
"debug/raw_losses": 0.5641220808029175, |
|
"epoch": 0.716275368085953, |
|
"grad_norm": 1.406582703436064, |
|
"learning_rate": 1.1277334291351145e-07, |
|
"logits/chosen": -1.92514169216156, |
|
"logits/rejected": -1.8894935846328735, |
|
"logps/chosen": -308.63641357421875, |
|
"logps/rejected": -367.8720397949219, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7129472494125366, |
|
"rewards/margins": 0.5332741737365723, |
|
"rewards/rejected": -2.2462215423583984, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.716275368085953, |
|
"eval_debug/losses": 0.03159477189183235, |
|
"eval_debug/policy_weights": 0.055877406150102615, |
|
"eval_debug/raw_losses": 0.5719749927520752, |
|
"eval_logits/chosen": -1.952079176902771, |
|
"eval_logits/rejected": -1.9262397289276123, |
|
"eval_logps/chosen": -318.9172668457031, |
|
"eval_logps/rejected": -378.01251220703125, |
|
"eval_loss": 0.03346230462193489, |
|
"eval_rewards/accuracies": 0.6847015023231506, |
|
"eval_rewards/chosen": -1.7467377185821533, |
|
"eval_rewards/margins": 0.5263976454734802, |
|
"eval_rewards/rejected": -2.2731354236602783, |
|
"eval_runtime": 153.3496, |
|
"eval_samples_per_second": 55.768, |
|
"eval_steps_per_second": 0.874, |
|
"step": 900 |
|
}, |
|
{ |
|
"debug/losses": 0.031236503273248672, |
|
"debug/policy_weights": 0.06273458153009415, |
|
"debug/raw_losses": 0.5026707053184509, |
|
"epoch": 0.724233983286908, |
|
"grad_norm": 1.4167921520949656, |
|
"learning_rate": 1.0701737372808431e-07, |
|
"logits/chosen": -1.9520496129989624, |
|
"logits/rejected": -1.926009178161621, |
|
"logps/chosen": -301.79803466796875, |
|
"logps/rejected": -384.8517150878906, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5805127620697021, |
|
"rewards/margins": 0.7013577818870544, |
|
"rewards/rejected": -2.2818703651428223, |
|
"step": 910 |
|
}, |
|
{ |
|
"debug/losses": 0.028870219364762306, |
|
"debug/policy_weights": 0.05118779093027115, |
|
"debug/raw_losses": 0.6152342557907104, |
|
"epoch": 0.7321925984878631, |
|
"grad_norm": 1.3852811285813345, |
|
"learning_rate": 1.0137191367132078e-07, |
|
"logits/chosen": -1.9522676467895508, |
|
"logits/rejected": -1.9203464984893799, |
|
"logps/chosen": -363.1385192871094, |
|
"logps/rejected": -410.9398498535156, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.070452928543091, |
|
"rewards/margins": 0.43279123306274414, |
|
"rewards/rejected": -2.503243923187256, |
|
"step": 920 |
|
}, |
|
{ |
|
"debug/losses": 0.02983178198337555, |
|
"debug/policy_weights": 0.04729953408241272, |
|
"debug/raw_losses": 0.6190310716629028, |
|
"epoch": 0.7401512136888182, |
|
"grad_norm": 1.1110786407319169, |
|
"learning_rate": 9.584132603467827e-08, |
|
"logits/chosen": -1.9343547821044922, |
|
"logits/rejected": -1.8795759677886963, |
|
"logps/chosen": -377.31103515625, |
|
"logps/rejected": -408.9967346191406, |
|
"loss": 0.0268, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1014113426208496, |
|
"rewards/margins": 0.44571709632873535, |
|
"rewards/rejected": -2.547128438949585, |
|
"step": 930 |
|
}, |
|
{ |
|
"debug/losses": 0.025911280885338783, |
|
"debug/policy_weights": 0.04492679983377457, |
|
"debug/raw_losses": 0.5681049227714539, |
|
"epoch": 0.7481098288897732, |
|
"grad_norm": 1.282258686519084, |
|
"learning_rate": 9.042988532644249e-08, |
|
"logits/chosen": -1.9340381622314453, |
|
"logits/rejected": -1.903543472290039, |
|
"logps/chosen": -344.40826416015625, |
|
"logps/rejected": -411.30517578125, |
|
"loss": 0.0266, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0105528831481934, |
|
"rewards/margins": 0.5551122426986694, |
|
"rewards/rejected": -2.5656652450561523, |
|
"step": 940 |
|
}, |
|
{ |
|
"debug/losses": 0.029090652242302895, |
|
"debug/policy_weights": 0.04689662158489227, |
|
"debug/raw_losses": 0.5897936820983887, |
|
"epoch": 0.7560684440907283, |
|
"grad_norm": 1.1204653206310022, |
|
"learning_rate": 8.514177396802428e-08, |
|
"logits/chosen": -1.953285574913025, |
|
"logits/rejected": -1.9266679286956787, |
|
"logps/chosen": -352.0096130371094, |
|
"logps/rejected": -408.1578063964844, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1213886737823486, |
|
"rewards/margins": 0.49127835035324097, |
|
"rewards/rejected": -2.612666606903076, |
|
"step": 950 |
|
}, |
|
{ |
|
"debug/losses": 0.026548391208052635, |
|
"debug/policy_weights": 0.04522048681974411, |
|
"debug/raw_losses": 0.5894621014595032, |
|
"epoch": 0.7640270592916832, |
|
"grad_norm": 0.898387217227339, |
|
"learning_rate": 7.998107906142839e-08, |
|
"logits/chosen": -1.9560728073120117, |
|
"logits/rejected": -1.9274494647979736, |
|
"logps/chosen": -343.21002197265625, |
|
"logps/rejected": -379.6694030761719, |
|
"loss": 0.0259, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0018842220306396, |
|
"rewards/margins": 0.4287974238395691, |
|
"rewards/rejected": -2.4306817054748535, |
|
"step": 960 |
|
}, |
|
{ |
|
"debug/losses": 0.02163025364279747, |
|
"debug/policy_weights": 0.04079737886786461, |
|
"debug/raw_losses": 0.5705414414405823, |
|
"epoch": 0.7719856744926383, |
|
"grad_norm": 1.3955639157776591, |
|
"learning_rate": 7.495178923039396e-08, |
|
"logits/chosen": -1.9478362798690796, |
|
"logits/rejected": -1.9617118835449219, |
|
"logps/chosen": -331.72784423828125, |
|
"logps/rejected": -410.7064514160156, |
|
"loss": 0.0264, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.030539035797119, |
|
"rewards/margins": 0.5192916989326477, |
|
"rewards/rejected": -2.549830913543701, |
|
"step": 970 |
|
}, |
|
{ |
|
"debug/losses": 0.023149430751800537, |
|
"debug/policy_weights": 0.041884638369083405, |
|
"debug/raw_losses": 0.5447368025779724, |
|
"epoch": 0.7799442896935933, |
|
"grad_norm": 1.5585746856477063, |
|
"learning_rate": 7.005779153764682e-08, |
|
"logits/chosen": -1.9719524383544922, |
|
"logits/rejected": -1.9254512786865234, |
|
"logps/chosen": -337.332275390625, |
|
"logps/rejected": -387.82318115234375, |
|
"loss": 0.028, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9454883337020874, |
|
"rewards/margins": 0.5578633546829224, |
|
"rewards/rejected": -2.5033516883850098, |
|
"step": 980 |
|
}, |
|
{ |
|
"debug/losses": 0.027449458837509155, |
|
"debug/policy_weights": 0.04992467164993286, |
|
"debug/raw_losses": 0.551071286201477, |
|
"epoch": 0.7879029048945484, |
|
"grad_norm": 1.6695068228207648, |
|
"learning_rate": 6.530286848064698e-08, |
|
"logits/chosen": -1.9756189584732056, |
|
"logits/rejected": -1.950194001197815, |
|
"logps/chosen": -336.4566955566406, |
|
"logps/rejected": -397.8472595214844, |
|
"loss": 0.0264, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.8330726623535156, |
|
"rewards/margins": 0.5761424899101257, |
|
"rewards/rejected": -2.4092154502868652, |
|
"step": 990 |
|
}, |
|
{ |
|
"debug/losses": 0.026408571749925613, |
|
"debug/policy_weights": 0.04767809808254242, |
|
"debug/raw_losses": 0.5441979169845581, |
|
"epoch": 0.7958615200955034, |
|
"grad_norm": 1.6016926469447665, |
|
"learning_rate": 6.069069506815325e-08, |
|
"logits/chosen": -1.964962363243103, |
|
"logits/rejected": -1.9171581268310547, |
|
"logps/chosen": -334.23931884765625, |
|
"logps/rejected": -392.36944580078125, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8932774066925049, |
|
"rewards/margins": 0.6260205507278442, |
|
"rewards/rejected": -2.5192978382110596, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7958615200955034, |
|
"eval_debug/losses": 0.02711501345038414, |
|
"eval_debug/policy_weights": 0.048435505479574203, |
|
"eval_debug/raw_losses": 0.5694997906684875, |
|
"eval_logits/chosen": -1.9579750299453735, |
|
"eval_logits/rejected": -1.9318426847457886, |
|
"eval_logps/chosen": -338.3062438964844, |
|
"eval_logps/rejected": -398.16033935546875, |
|
"eval_loss": 0.02892610803246498, |
|
"eval_rewards/accuracies": 0.6865671873092651, |
|
"eval_rewards/chosen": -1.9406273365020752, |
|
"eval_rewards/margins": 0.5339860916137695, |
|
"eval_rewards/rejected": -2.474613666534424, |
|
"eval_runtime": 153.1415, |
|
"eval_samples_per_second": 55.844, |
|
"eval_steps_per_second": 0.875, |
|
"step": 1000 |
|
}, |
|
{ |
|
"debug/losses": 0.03183472901582718, |
|
"debug/policy_weights": 0.049464497715234756, |
|
"debug/raw_losses": 0.6195154786109924, |
|
"epoch": 0.8038201352964585, |
|
"grad_norm": 1.4391778362577514, |
|
"learning_rate": 5.6224835979863714e-08, |
|
"logits/chosen": -1.9442205429077148, |
|
"logits/rejected": -1.9024133682250977, |
|
"logps/chosen": -348.94781494140625, |
|
"logps/rejected": -381.45281982421875, |
|
"loss": 0.0284, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9863436222076416, |
|
"rewards/margins": 0.40313720703125, |
|
"rewards/rejected": -2.3894805908203125, |
|
"step": 1010 |
|
}, |
|
{ |
|
"debug/losses": 0.022611474618315697, |
|
"debug/policy_weights": 0.044095687568187714, |
|
"debug/raw_losses": 0.5569943785667419, |
|
"epoch": 0.8117787504974134, |
|
"grad_norm": 1.8837439228048556, |
|
"learning_rate": 5.190874281132851e-08, |
|
"logits/chosen": -1.944157600402832, |
|
"logits/rejected": -1.92160165309906, |
|
"logps/chosen": -325.661865234375, |
|
"logps/rejected": -386.25885009765625, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8595483303070068, |
|
"rewards/margins": 0.5777700543403625, |
|
"rewards/rejected": -2.4373183250427246, |
|
"step": 1020 |
|
}, |
|
{ |
|
"debug/losses": 0.0247525442391634, |
|
"debug/policy_weights": 0.045843105763196945, |
|
"debug/raw_losses": 0.5282943844795227, |
|
"epoch": 0.8197373656983685, |
|
"grad_norm": 1.3079251608813307, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -1.9093300104141235, |
|
"logits/rejected": -1.858944296836853, |
|
"logps/chosen": -322.7106018066406, |
|
"logps/rejected": -379.23748779296875, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.826472282409668, |
|
"rewards/margins": 0.6417751312255859, |
|
"rewards/rejected": -2.468247413635254, |
|
"step": 1030 |
|
}, |
|
{ |
|
"debug/losses": 0.029871661216020584, |
|
"debug/policy_weights": 0.05007849261164665, |
|
"debug/raw_losses": 0.6156904101371765, |
|
"epoch": 0.8276959808993235, |
|
"grad_norm": 1.4623406105385353, |
|
"learning_rate": 4.373907927832513e-08, |
|
"logits/chosen": -1.9410374164581299, |
|
"logits/rejected": -1.9204730987548828, |
|
"logps/chosen": -313.7291564941406, |
|
"logps/rejected": -367.6692199707031, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8602615594863892, |
|
"rewards/margins": 0.42712441086769104, |
|
"rewards/rejected": -2.287385940551758, |
|
"step": 1040 |
|
}, |
|
{ |
|
"debug/losses": 0.029429305344820023, |
|
"debug/policy_weights": 0.054835814982652664, |
|
"debug/raw_losses": 0.5309990048408508, |
|
"epoch": 0.8356545961002786, |
|
"grad_norm": 1.3217568428330395, |
|
"learning_rate": 3.9891823124345665e-08, |
|
"logits/chosen": -1.975529670715332, |
|
"logits/rejected": -1.9376213550567627, |
|
"logps/chosen": -324.4565124511719, |
|
"logps/rejected": -385.99835205078125, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7343988418579102, |
|
"rewards/margins": 0.6463450193405151, |
|
"rewards/rejected": -2.3807437419891357, |
|
"step": 1050 |
|
}, |
|
{ |
|
"debug/losses": 0.026505377143621445, |
|
"debug/policy_weights": 0.050394363701343536, |
|
"debug/raw_losses": 0.5719414949417114, |
|
"epoch": 0.8436132113012336, |
|
"grad_norm": 1.3080390048525132, |
|
"learning_rate": 3.620695643093924e-08, |
|
"logits/chosen": -1.9717638492584229, |
|
"logits/rejected": -1.9583126306533813, |
|
"logps/chosen": -307.21038818359375, |
|
"logps/rejected": -371.9794616699219, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7042350769042969, |
|
"rewards/margins": 0.5117149353027344, |
|
"rewards/rejected": -2.2159502506256104, |
|
"step": 1060 |
|
}, |
|
{ |
|
"debug/losses": 0.03151816874742508, |
|
"debug/policy_weights": 0.051329899579286575, |
|
"debug/raw_losses": 0.5482734441757202, |
|
"epoch": 0.8515718265021887, |
|
"grad_norm": 1.9249185694718636, |
|
"learning_rate": 3.268732717634032e-08, |
|
"logits/chosen": -1.942237138748169, |
|
"logits/rejected": -1.908158540725708, |
|
"logps/chosen": -311.0814514160156, |
|
"logps/rejected": -363.3550720214844, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.746870756149292, |
|
"rewards/margins": 0.545746386051178, |
|
"rewards/rejected": -2.292617082595825, |
|
"step": 1070 |
|
}, |
|
{ |
|
"debug/losses": 0.0361117348074913, |
|
"debug/policy_weights": 0.056406814604997635, |
|
"debug/raw_losses": 0.6171997785568237, |
|
"epoch": 0.8595304417031436, |
|
"grad_norm": 1.2978569483513427, |
|
"learning_rate": 2.9335655629243645e-08, |
|
"logits/chosen": -1.9778077602386475, |
|
"logits/rejected": -1.9576594829559326, |
|
"logps/chosen": -338.1204833984375, |
|
"logps/rejected": -390.57769775390625, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8773784637451172, |
|
"rewards/margins": 0.41199570894241333, |
|
"rewards/rejected": -2.2893738746643066, |
|
"step": 1080 |
|
}, |
|
{ |
|
"debug/losses": 0.02704630419611931, |
|
"debug/policy_weights": 0.04527381435036659, |
|
"debug/raw_losses": 0.6039454340934753, |
|
"epoch": 0.8674890569040987, |
|
"grad_norm": 1.4432056512216735, |
|
"learning_rate": 2.6154532246349476e-08, |
|
"logits/chosen": -1.9391145706176758, |
|
"logits/rejected": -1.882846474647522, |
|
"logps/chosen": -329.9732360839844, |
|
"logps/rejected": -354.43182373046875, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.8840316534042358, |
|
"rewards/margins": 0.42414146661758423, |
|
"rewards/rejected": -2.3081729412078857, |
|
"step": 1090 |
|
}, |
|
{ |
|
"debug/losses": 0.03607811778783798, |
|
"debug/policy_weights": 0.060448456555604935, |
|
"debug/raw_losses": 0.6365524530410767, |
|
"epoch": 0.8754476721050537, |
|
"grad_norm": 2.2478985377156913, |
|
"learning_rate": 2.31464156702382e-08, |
|
"logits/chosen": -1.9797563552856445, |
|
"logits/rejected": -1.9243662357330322, |
|
"logps/chosen": -340.63897705078125, |
|
"logps/rejected": -371.7919006347656, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.8404861688613892, |
|
"rewards/margins": 0.3901959955692291, |
|
"rewards/rejected": -2.230681896209717, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8754476721050537, |
|
"eval_debug/losses": 0.029148757457733154, |
|
"eval_debug/policy_weights": 0.052018992602825165, |
|
"eval_debug/raw_losses": 0.5656781196594238, |
|
"eval_logits/chosen": -1.9813780784606934, |
|
"eval_logits/rejected": -1.9553614854812622, |
|
"eval_logps/chosen": -325.3560485839844, |
|
"eval_logps/rejected": -384.33758544921875, |
|
"eval_loss": 0.031092733144760132, |
|
"eval_rewards/accuracies": 0.700559675693512, |
|
"eval_rewards/chosen": -1.81112539768219, |
|
"eval_rewards/margins": 0.5252605676651001, |
|
"eval_rewards/rejected": -2.336386203765869, |
|
"eval_runtime": 153.3772, |
|
"eval_samples_per_second": 55.758, |
|
"eval_steps_per_second": 0.874, |
|
"step": 1100 |
|
}, |
|
{ |
|
"debug/losses": 0.028289441019296646, |
|
"debug/policy_weights": 0.050502192229032516, |
|
"debug/raw_losses": 0.5873227119445801, |
|
"epoch": 0.8834062873060088, |
|
"grad_norm": 1.2493044367365553, |
|
"learning_rate": 2.031363082912252e-08, |
|
"logits/chosen": -1.9731247425079346, |
|
"logits/rejected": -1.9583030939102173, |
|
"logps/chosen": -312.7198181152344, |
|
"logps/rejected": -365.84539794921875, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.811852216720581, |
|
"rewards/margins": 0.43998369574546814, |
|
"rewards/rejected": -2.2518362998962402, |
|
"step": 1110 |
|
}, |
|
{ |
|
"debug/losses": 0.028473522514104843, |
|
"debug/policy_weights": 0.049161095172166824, |
|
"debug/raw_losses": 0.613974392414093, |
|
"epoch": 0.8913649025069638, |
|
"grad_norm": 1.5694190986130379, |
|
"learning_rate": 1.7658367139945228e-08, |
|
"logits/chosen": -1.9810594320297241, |
|
"logits/rejected": -1.9477195739746094, |
|
"logps/chosen": -323.91619873046875, |
|
"logps/rejected": -368.2818298339844, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7960774898529053, |
|
"rewards/margins": 0.45412111282348633, |
|
"rewards/rejected": -2.2501983642578125, |
|
"step": 1120 |
|
}, |
|
{ |
|
"debug/losses": 0.03077780269086361, |
|
"debug/policy_weights": 0.055907707661390305, |
|
"debug/raw_losses": 0.5727918148040771, |
|
"epoch": 0.8993235177079189, |
|
"grad_norm": 1.736668937190508, |
|
"learning_rate": 1.5182676816211632e-08, |
|
"logits/chosen": -1.9711143970489502, |
|
"logits/rejected": -1.9426500797271729, |
|
"logps/chosen": -326.6500549316406, |
|
"logps/rejected": -389.47027587890625, |
|
"loss": 0.0322, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8423681259155273, |
|
"rewards/margins": 0.4901112914085388, |
|
"rewards/rejected": -2.332479476928711, |
|
"step": 1130 |
|
}, |
|
{ |
|
"debug/losses": 0.04067676141858101, |
|
"debug/policy_weights": 0.06354663521051407, |
|
"debug/raw_losses": 0.6167309880256653, |
|
"epoch": 0.9072821329088738, |
|
"grad_norm": 1.311295756466762, |
|
"learning_rate": 1.2888473281864597e-08, |
|
"logits/chosen": -2.0066215991973877, |
|
"logits/rejected": -1.978035569190979, |
|
"logps/chosen": -334.54376220703125, |
|
"logps/rejected": -368.7451171875, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7672863006591797, |
|
"rewards/margins": 0.3908054232597351, |
|
"rewards/rejected": -2.1580917835235596, |
|
"step": 1140 |
|
}, |
|
{ |
|
"debug/losses": 0.029732922092080116, |
|
"debug/policy_weights": 0.05216806009411812, |
|
"debug/raw_losses": 0.5487850904464722, |
|
"epoch": 0.9152407481098289, |
|
"grad_norm": 1.2474371694735222, |
|
"learning_rate": 1.0777529692427679e-08, |
|
"logits/chosen": -1.9626529216766357, |
|
"logits/rejected": -1.924966812133789, |
|
"logps/chosen": -318.5897521972656, |
|
"logps/rejected": -362.83251953125, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6781198978424072, |
|
"rewards/margins": 0.5509058833122253, |
|
"rewards/rejected": -2.2290260791778564, |
|
"step": 1150 |
|
}, |
|
{ |
|
"debug/losses": 0.034698087722063065, |
|
"debug/policy_weights": 0.054052066057920456, |
|
"debug/raw_losses": 0.599198579788208, |
|
"epoch": 0.9231993633107839, |
|
"grad_norm": 1.4791914719616432, |
|
"learning_rate": 8.851477564560061e-09, |
|
"logits/chosen": -1.9579505920410156, |
|
"logits/rejected": -1.9253816604614258, |
|
"logps/chosen": -311.4327392578125, |
|
"logps/rejected": -374.3013610839844, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.692578911781311, |
|
"rewards/margins": 0.5192922353744507, |
|
"rewards/rejected": -2.2118711471557617, |
|
"step": 1160 |
|
}, |
|
{ |
|
"debug/losses": 0.030673867091536522, |
|
"debug/policy_weights": 0.05762176960706711, |
|
"debug/raw_losses": 0.5920447111129761, |
|
"epoch": 0.931157978511739, |
|
"grad_norm": 1.2297627601023153, |
|
"learning_rate": 7.111805515081531e-09, |
|
"logits/chosen": -1.9873937368392944, |
|
"logits/rejected": -1.928577184677124, |
|
"logps/chosen": -349.22552490234375, |
|
"logps/rejected": -393.4672546386719, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9474347829818726, |
|
"rewards/margins": 0.4757717251777649, |
|
"rewards/rejected": -2.423206329345703, |
|
"step": 1170 |
|
}, |
|
{ |
|
"debug/losses": 0.03575451672077179, |
|
"debug/policy_weights": 0.0605660155415535, |
|
"debug/raw_losses": 0.5854865312576294, |
|
"epoch": 0.939116593712694, |
|
"grad_norm": 1.1864649900689144, |
|
"learning_rate": 5.559858110443016e-09, |
|
"logits/chosen": -2.000704288482666, |
|
"logits/rejected": -1.9662271738052368, |
|
"logps/chosen": -329.9343566894531, |
|
"logps/rejected": -382.48822021484375, |
|
"loss": 0.0302, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.740262746810913, |
|
"rewards/margins": 0.504480242729187, |
|
"rewards/rejected": -2.2447433471679688, |
|
"step": 1180 |
|
}, |
|
{ |
|
"debug/losses": 0.024325475096702576, |
|
"debug/policy_weights": 0.04755675792694092, |
|
"debug/raw_losses": 0.549831211566925, |
|
"epoch": 0.947075208913649, |
|
"grad_norm": 1.4675887378312884, |
|
"learning_rate": 4.196834827531276e-09, |
|
"logits/chosen": -1.9613635540008545, |
|
"logits/rejected": -1.926835060119629, |
|
"logps/chosen": -328.3395690917969, |
|
"logps/rejected": -393.16650390625, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8523941040039062, |
|
"rewards/margins": 0.6523604393005371, |
|
"rewards/rejected": -2.5047545433044434, |
|
"step": 1190 |
|
}, |
|
{ |
|
"debug/losses": 0.024997171014547348, |
|
"debug/policy_weights": 0.04472526162862778, |
|
"debug/raw_losses": 0.5522318482398987, |
|
"epoch": 0.955033824114604, |
|
"grad_norm": 1.2840605704052697, |
|
"learning_rate": 3.023789126611137e-09, |
|
"logits/chosen": -1.9729950428009033, |
|
"logits/rejected": -1.9184396266937256, |
|
"logps/chosen": -324.0898742675781, |
|
"logps/rejected": -372.49151611328125, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.7675983905792236, |
|
"rewards/margins": 0.5561206936836243, |
|
"rewards/rejected": -2.323719024658203, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.955033824114604, |
|
"eval_debug/losses": 0.029483934864401817, |
|
"eval_debug/policy_weights": 0.05266140401363373, |
|
"eval_debug/raw_losses": 0.5652690529823303, |
|
"eval_logits/chosen": -1.9727920293807983, |
|
"eval_logits/rejected": -1.946227788925171, |
|
"eval_logps/chosen": -324.4056091308594, |
|
"eval_logps/rejected": -384.5557556152344, |
|
"eval_loss": 0.03144846484065056, |
|
"eval_rewards/accuracies": 0.6996268630027771, |
|
"eval_rewards/chosen": -1.8016209602355957, |
|
"eval_rewards/margins": 0.536946713924408, |
|
"eval_rewards/rejected": -2.3385674953460693, |
|
"eval_runtime": 153.1595, |
|
"eval_samples_per_second": 55.837, |
|
"eval_steps_per_second": 0.875, |
|
"step": 1200 |
|
}, |
|
{ |
|
"debug/losses": 0.027663961052894592, |
|
"debug/policy_weights": 0.04848761484026909, |
|
"debug/raw_losses": 0.5848130583763123, |
|
"epoch": 0.9629924393155591, |
|
"grad_norm": 1.2701407512231324, |
|
"learning_rate": 2.041627637121929e-09, |
|
"logits/chosen": -1.950147032737732, |
|
"logits/rejected": -1.9268693923950195, |
|
"logps/chosen": -327.3438415527344, |
|
"logps/rejected": -394.26251220703125, |
|
"loss": 0.0305, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.858901023864746, |
|
"rewards/margins": 0.4966161251068115, |
|
"rewards/rejected": -2.3555169105529785, |
|
"step": 1210 |
|
}, |
|
{ |
|
"debug/losses": 0.03097696043550968, |
|
"debug/policy_weights": 0.05295131728053093, |
|
"debug/raw_losses": 0.5842943787574768, |
|
"epoch": 0.9709510545165141, |
|
"grad_norm": 1.7444108884419736, |
|
"learning_rate": 1.2511094569571668e-09, |
|
"logits/chosen": -1.9334291219711304, |
|
"logits/rejected": -1.87801194190979, |
|
"logps/chosen": -323.7477722167969, |
|
"logps/rejected": -347.8150634765625, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.7532447576522827, |
|
"rewards/margins": 0.47265228629112244, |
|
"rewards/rejected": -2.2258970737457275, |
|
"step": 1220 |
|
}, |
|
{ |
|
"debug/losses": 0.02778841182589531, |
|
"debug/policy_weights": 0.04463866725564003, |
|
"debug/raw_losses": 0.5899677276611328, |
|
"epoch": 0.9789096697174692, |
|
"grad_norm": 1.2986395101895525, |
|
"learning_rate": 6.528455657691112e-10, |
|
"logits/chosen": -1.9225807189941406, |
|
"logits/rejected": -1.9189189672470093, |
|
"logps/chosen": -331.2619934082031, |
|
"logps/rejected": -391.13671875, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.9252440929412842, |
|
"rewards/margins": 0.48622721433639526, |
|
"rewards/rejected": -2.411471128463745, |
|
"step": 1230 |
|
}, |
|
{ |
|
"debug/losses": 0.032337166368961334, |
|
"debug/policy_weights": 0.051962029188871384, |
|
"debug/raw_losses": 0.5668946504592896, |
|
"epoch": 0.9868682849184242, |
|
"grad_norm": 1.4772091582386668, |
|
"learning_rate": 2.4729835275189016e-10, |
|
"logits/chosen": -1.9424164295196533, |
|
"logits/rejected": -1.9091637134552002, |
|
"logps/chosen": -328.1160583496094, |
|
"logps/rejected": -392.4992370605469, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8741003274917603, |
|
"rewards/margins": 0.5802657604217529, |
|
"rewards/rejected": -2.4543659687042236, |
|
"step": 1240 |
|
}, |
|
{ |
|
"debug/losses": 0.027117431163787842, |
|
"debug/policy_weights": 0.04854784160852432, |
|
"debug/raw_losses": 0.5413271188735962, |
|
"epoch": 0.9948269001193792, |
|
"grad_norm": 1.622815424856795, |
|
"learning_rate": 3.478125926756337e-11, |
|
"logits/chosen": -1.9423835277557373, |
|
"logits/rejected": -1.9216960668563843, |
|
"logps/chosen": -327.0545349121094, |
|
"logps/rejected": -397.98040771484375, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8533722162246704, |
|
"rewards/margins": 0.5928901433944702, |
|
"rewards/rejected": -2.4462623596191406, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9996020692399522, |
|
"step": 1256, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05419013021620595, |
|
"train_runtime": 10439.8283, |
|
"train_samples_per_second": 15.403, |
|
"train_steps_per_second": 0.12 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1256, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|