|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 4387.344432836715, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -1.689455509185791, |
|
"logits/rejected": -1.4794573783874512, |
|
"logps/chosen": -126.21005249023438, |
|
"logps/rejected": -98.13133239746094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2997.186248490448, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.7082680463790894, |
|
"logits/rejected": -1.610369324684143, |
|
"logps/chosen": -139.5821990966797, |
|
"logps/rejected": -91.33868408203125, |
|
"loss": 0.7371, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.14069372415542603, |
|
"rewards/margins": 0.1832776665687561, |
|
"rewards/rejected": -0.04258394241333008, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 978.7690038539965, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.640048623085022, |
|
"logits/rejected": -1.6500003337860107, |
|
"logps/chosen": -130.82679748535156, |
|
"logps/rejected": -93.84379577636719, |
|
"loss": 0.3586, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.8478235006332397, |
|
"rewards/margins": 2.4079792499542236, |
|
"rewards/rejected": -0.5601558685302734, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 882.5432957594307, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.7016046047210693, |
|
"logits/rejected": -1.6314153671264648, |
|
"logps/chosen": -132.51332092285156, |
|
"logps/rejected": -104.1015625, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 5.684301853179932, |
|
"rewards/margins": 8.036184310913086, |
|
"rewards/rejected": -2.3518824577331543, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 948.7428618668862, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.6547832489013672, |
|
"logits/rejected": -1.5681570768356323, |
|
"logps/chosen": -143.4683074951172, |
|
"logps/rejected": -105.14913177490234, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 8.252656936645508, |
|
"rewards/margins": 14.83061695098877, |
|
"rewards/rejected": -6.577960968017578, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1493.2315961148001, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -1.6189439296722412, |
|
"logits/rejected": -1.639786958694458, |
|
"logps/chosen": -126.5509262084961, |
|
"logps/rejected": -110.4835433959961, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 8.523491859436035, |
|
"rewards/margins": 19.890926361083984, |
|
"rewards/rejected": -11.367437362670898, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 697.7628654861039, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -1.726836919784546, |
|
"logits/rejected": -1.664571762084961, |
|
"logps/chosen": -140.87637329101562, |
|
"logps/rejected": -116.43977355957031, |
|
"loss": 0.1811, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 10.12385082244873, |
|
"rewards/margins": 24.805103302001953, |
|
"rewards/rejected": -14.681253433227539, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 851.3584358950797, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -1.670240044593811, |
|
"logits/rejected": -1.6913667917251587, |
|
"logps/chosen": -134.24986267089844, |
|
"logps/rejected": -115.22517395019531, |
|
"loss": 0.2071, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 10.848904609680176, |
|
"rewards/margins": 28.108707427978516, |
|
"rewards/rejected": -17.25979995727539, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 521.7442029758571, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -1.6372220516204834, |
|
"logits/rejected": -1.618297815322876, |
|
"logps/chosen": -128.79197692871094, |
|
"logps/rejected": -109.78861999511719, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 11.66787052154541, |
|
"rewards/margins": 27.729543685913086, |
|
"rewards/rejected": -16.061674118041992, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1335.0772057896347, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -1.6502501964569092, |
|
"logits/rejected": -1.6660646200180054, |
|
"logps/chosen": -136.35589599609375, |
|
"logps/rejected": -121.8341064453125, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 12.854347229003906, |
|
"rewards/margins": 32.79566955566406, |
|
"rewards/rejected": -19.941320419311523, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 597.1667877282997, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -1.6900157928466797, |
|
"logits/rejected": -1.6581432819366455, |
|
"logps/chosen": -116.9823989868164, |
|
"logps/rejected": -109.75309753417969, |
|
"loss": 0.1812, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 10.995382308959961, |
|
"rewards/margins": 30.093975067138672, |
|
"rewards/rejected": -19.098596572875977, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.730061650276184, |
|
"eval_logits/rejected": -1.7088191509246826, |
|
"eval_logps/chosen": -125.00153350830078, |
|
"eval_logps/rejected": -113.56846618652344, |
|
"eval_loss": 0.14742514491081238, |
|
"eval_rewards/accuracies": 0.91796875, |
|
"eval_rewards/chosen": 12.770716667175293, |
|
"eval_rewards/margins": 33.09844970703125, |
|
"eval_rewards/rejected": -20.32773208618164, |
|
"eval_runtime": 97.6127, |
|
"eval_samples_per_second": 20.489, |
|
"eval_steps_per_second": 0.328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 748.8293985115087, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -1.6079628467559814, |
|
"logits/rejected": -1.6635444164276123, |
|
"logps/chosen": -121.47686767578125, |
|
"logps/rejected": -118.15767669677734, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 10.569832801818848, |
|
"rewards/margins": 26.553226470947266, |
|
"rewards/rejected": -15.983392715454102, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 685.6845900004148, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -1.67098069190979, |
|
"logits/rejected": -1.6562950611114502, |
|
"logps/chosen": -120.14444732666016, |
|
"logps/rejected": -119.04862976074219, |
|
"loss": 0.2, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 12.69383716583252, |
|
"rewards/margins": 30.399723052978516, |
|
"rewards/rejected": -17.705890655517578, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 365.3303217685943, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -1.6567928791046143, |
|
"logits/rejected": -1.6678619384765625, |
|
"logps/chosen": -126.27610778808594, |
|
"logps/rejected": -113.00162506103516, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 14.438420295715332, |
|
"rewards/margins": 34.15327835083008, |
|
"rewards/rejected": -19.714855194091797, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 927.4815100211999, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -1.5695436000823975, |
|
"logits/rejected": -1.5368653535842896, |
|
"logps/chosen": -123.9181900024414, |
|
"logps/rejected": -111.4970474243164, |
|
"loss": 0.2485, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 13.650964736938477, |
|
"rewards/margins": 32.08143997192383, |
|
"rewards/rejected": -18.43047523498535, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 884.1656325190633, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -1.6969578266143799, |
|
"logits/rejected": -1.722955346107483, |
|
"logps/chosen": -121.66493225097656, |
|
"logps/rejected": -109.62093353271484, |
|
"loss": 0.1799, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 14.053888320922852, |
|
"rewards/margins": 33.25988006591797, |
|
"rewards/rejected": -19.20599365234375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1383.2317309989403, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -1.5813496112823486, |
|
"logits/rejected": -1.5495407581329346, |
|
"logps/chosen": -129.09225463867188, |
|
"logps/rejected": -117.093994140625, |
|
"loss": 0.2686, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 12.873576164245605, |
|
"rewards/margins": 31.02213478088379, |
|
"rewards/rejected": -18.148557662963867, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1121.824339354007, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -1.7786369323730469, |
|
"logits/rejected": -1.7519384622573853, |
|
"logps/chosen": -124.59269714355469, |
|
"logps/rejected": -116.8897933959961, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 14.548820495605469, |
|
"rewards/margins": 35.767005920410156, |
|
"rewards/rejected": -21.218185424804688, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1198.59830465361, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -1.7289392948150635, |
|
"logits/rejected": -1.7189258337020874, |
|
"logps/chosen": -127.54146575927734, |
|
"logps/rejected": -112.2616195678711, |
|
"loss": 0.2876, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 15.61170482635498, |
|
"rewards/margins": 37.00361251831055, |
|
"rewards/rejected": -21.391910552978516, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 926.4816546126996, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -1.7749055624008179, |
|
"logits/rejected": -1.7469419240951538, |
|
"logps/chosen": -116.5710678100586, |
|
"logps/rejected": -125.67762756347656, |
|
"loss": 0.2836, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 15.228490829467773, |
|
"rewards/margins": 38.81087112426758, |
|
"rewards/rejected": -23.582382202148438, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 963.2845441111758, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -1.7404663562774658, |
|
"logits/rejected": -1.7393602132797241, |
|
"logps/chosen": -122.38069152832031, |
|
"logps/rejected": -109.40885925292969, |
|
"loss": 0.2958, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 16.75569725036621, |
|
"rewards/margins": 38.156455993652344, |
|
"rewards/rejected": -21.400760650634766, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.8384383916854858, |
|
"eval_logits/rejected": -1.8349756002426147, |
|
"eval_logps/chosen": -122.29769134521484, |
|
"eval_logps/rejected": -116.40873718261719, |
|
"eval_loss": 0.22235894203186035, |
|
"eval_rewards/accuracies": 0.92578125, |
|
"eval_rewards/chosen": 15.474552154541016, |
|
"eval_rewards/margins": 38.64255142211914, |
|
"eval_rewards/rejected": -23.167999267578125, |
|
"eval_runtime": 97.6373, |
|
"eval_samples_per_second": 20.484, |
|
"eval_steps_per_second": 0.328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 707.2538341296229, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -1.6704992055892944, |
|
"logits/rejected": -1.605607271194458, |
|
"logps/chosen": -120.807373046875, |
|
"logps/rejected": -107.77888488769531, |
|
"loss": 0.2821, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 11.783378601074219, |
|
"rewards/margins": 29.960418701171875, |
|
"rewards/rejected": -18.17704200744629, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 601.6467819271398, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -1.8005359172821045, |
|
"logits/rejected": -1.845910668373108, |
|
"logps/chosen": -122.75736236572266, |
|
"logps/rejected": -108.84068298339844, |
|
"loss": 0.2678, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 13.717930793762207, |
|
"rewards/margins": 35.02998733520508, |
|
"rewards/rejected": -21.31205177307129, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 474.6819088542097, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -1.7543551921844482, |
|
"logits/rejected": -1.703619360923767, |
|
"logps/chosen": -134.04598999023438, |
|
"logps/rejected": -127.0660400390625, |
|
"loss": 0.3505, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 13.662841796875, |
|
"rewards/margins": 40.915740966796875, |
|
"rewards/rejected": -27.252899169921875, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1030.0648597846362, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -1.7851312160491943, |
|
"logits/rejected": -1.8008880615234375, |
|
"logps/chosen": -120.84779357910156, |
|
"logps/rejected": -118.9937515258789, |
|
"loss": 0.343, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 11.07734203338623, |
|
"rewards/margins": 32.441627502441406, |
|
"rewards/rejected": -21.364286422729492, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 984.6682547225865, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -1.7966454029083252, |
|
"logits/rejected": -1.8319499492645264, |
|
"logps/chosen": -124.015869140625, |
|
"logps/rejected": -120.4018783569336, |
|
"loss": 0.3481, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 13.399293899536133, |
|
"rewards/margins": 36.81779098510742, |
|
"rewards/rejected": -23.41849708557129, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 466.5498060764853, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -1.8401811122894287, |
|
"logits/rejected": -1.8560435771942139, |
|
"logps/chosen": -128.3160858154297, |
|
"logps/rejected": -107.73912048339844, |
|
"loss": 0.2405, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 15.51091480255127, |
|
"rewards/margins": 38.373741149902344, |
|
"rewards/rejected": -22.862829208374023, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 878.144034078672, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -1.8353513479232788, |
|
"logits/rejected": -1.7788803577423096, |
|
"logps/chosen": -118.90830993652344, |
|
"logps/rejected": -113.4903335571289, |
|
"loss": 0.3013, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 13.150169372558594, |
|
"rewards/margins": 35.406681060791016, |
|
"rewards/rejected": -22.256511688232422, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1244.2545962296354, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -1.6921329498291016, |
|
"logits/rejected": -1.778611183166504, |
|
"logps/chosen": -124.7625732421875, |
|
"logps/rejected": -126.3423080444336, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 12.294075012207031, |
|
"rewards/margins": 37.28684997558594, |
|
"rewards/rejected": -24.992773056030273, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 970.0623194716495, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -1.788028359413147, |
|
"logits/rejected": -1.7638485431671143, |
|
"logps/chosen": -115.53651428222656, |
|
"logps/rejected": -114.30744934082031, |
|
"loss": 0.3137, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 12.6263427734375, |
|
"rewards/margins": 37.81734848022461, |
|
"rewards/rejected": -25.19100570678711, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 817.9238785214287, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -1.7443387508392334, |
|
"logits/rejected": -1.6937278509140015, |
|
"logps/chosen": -133.70358276367188, |
|
"logps/rejected": -131.15541076660156, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 12.736581802368164, |
|
"rewards/margins": 37.65189743041992, |
|
"rewards/rejected": -24.91531753540039, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.8495898246765137, |
|
"eval_logits/rejected": -1.8524861335754395, |
|
"eval_logps/chosen": -123.59905242919922, |
|
"eval_logps/rejected": -120.27072143554688, |
|
"eval_loss": 0.2672339975833893, |
|
"eval_rewards/accuracies": 0.92578125, |
|
"eval_rewards/chosen": 14.173208236694336, |
|
"eval_rewards/margins": 41.203189849853516, |
|
"eval_rewards/rejected": -27.02998161315918, |
|
"eval_runtime": 97.7326, |
|
"eval_samples_per_second": 20.464, |
|
"eval_steps_per_second": 0.327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1032.5800107949206, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -1.7456867694854736, |
|
"logits/rejected": -1.781978964805603, |
|
"logps/chosen": -124.9278335571289, |
|
"logps/rejected": -121.29362487792969, |
|
"loss": 0.3116, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 14.409858703613281, |
|
"rewards/margins": 37.164405822753906, |
|
"rewards/rejected": -22.75455093383789, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 852.7418560203273, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -1.773827314376831, |
|
"logits/rejected": -1.742255449295044, |
|
"logps/chosen": -114.78021240234375, |
|
"logps/rejected": -120.72232818603516, |
|
"loss": 0.307, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 16.016681671142578, |
|
"rewards/margins": 40.483619689941406, |
|
"rewards/rejected": -24.466938018798828, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 530.8053526489199, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -1.7004032135009766, |
|
"logits/rejected": -1.7918256521224976, |
|
"logps/chosen": -125.2526626586914, |
|
"logps/rejected": -116.03642272949219, |
|
"loss": 0.3323, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 12.930267333984375, |
|
"rewards/margins": 35.218624114990234, |
|
"rewards/rejected": -22.28835678100586, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 574.055283608638, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -1.6694965362548828, |
|
"logits/rejected": -1.7466586828231812, |
|
"logps/chosen": -119.57568359375, |
|
"logps/rejected": -108.32354736328125, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 10.914609909057617, |
|
"rewards/margins": 31.60161781311035, |
|
"rewards/rejected": -20.687007904052734, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1566.6302259536992, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -1.7077308893203735, |
|
"logits/rejected": -1.7222753763198853, |
|
"logps/chosen": -119.43708801269531, |
|
"logps/rejected": -127.2352523803711, |
|
"loss": 0.2776, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 10.647879600524902, |
|
"rewards/margins": 36.324337005615234, |
|
"rewards/rejected": -25.67645835876465, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 891.7532408538094, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -1.6924266815185547, |
|
"logits/rejected": -1.7151873111724854, |
|
"logps/chosen": -125.8857421875, |
|
"logps/rejected": -123.62667083740234, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 12.26460075378418, |
|
"rewards/margins": 36.57375717163086, |
|
"rewards/rejected": -24.309158325195312, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1484.9700115288094, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -1.7990468740463257, |
|
"logits/rejected": -1.7503010034561157, |
|
"logps/chosen": -115.22508239746094, |
|
"logps/rejected": -112.91387939453125, |
|
"loss": 0.3162, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 11.164658546447754, |
|
"rewards/margins": 31.5644588470459, |
|
"rewards/rejected": -20.399805068969727, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 567.3331476500654, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -1.8380801677703857, |
|
"logits/rejected": -1.860713005065918, |
|
"logps/chosen": -123.00642395019531, |
|
"logps/rejected": -125.6397933959961, |
|
"loss": 0.2616, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 13.647308349609375, |
|
"rewards/margins": 40.07811737060547, |
|
"rewards/rejected": -26.430805206298828, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1013.5375269262001, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -1.717441201210022, |
|
"logits/rejected": -1.7240034341812134, |
|
"logps/chosen": -118.6935806274414, |
|
"logps/rejected": -114.69548034667969, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 13.80224609375, |
|
"rewards/margins": 38.19357681274414, |
|
"rewards/rejected": -24.391324996948242, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1086.7904609993896, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -1.6992905139923096, |
|
"logits/rejected": -1.7782999277114868, |
|
"logps/chosen": -121.951171875, |
|
"logps/rejected": -115.44358825683594, |
|
"loss": 0.3576, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 10.810310363769531, |
|
"rewards/margins": 34.673805236816406, |
|
"rewards/rejected": -23.86349868774414, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.869842529296875, |
|
"eval_logits/rejected": -1.8741588592529297, |
|
"eval_logps/chosen": -123.62108612060547, |
|
"eval_logps/rejected": -120.2706298828125, |
|
"eval_loss": 0.2510662376880646, |
|
"eval_rewards/accuracies": 0.9296875, |
|
"eval_rewards/chosen": 14.151167869567871, |
|
"eval_rewards/margins": 41.181060791015625, |
|
"eval_rewards/rejected": -27.029890060424805, |
|
"eval_runtime": 97.6822, |
|
"eval_samples_per_second": 20.475, |
|
"eval_steps_per_second": 0.328, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 953.3240690024764, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -1.792865514755249, |
|
"logits/rejected": -1.8273773193359375, |
|
"logps/chosen": -132.37612915039062, |
|
"logps/rejected": -118.2215805053711, |
|
"loss": 0.3428, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 14.9389009475708, |
|
"rewards/margins": 41.05461502075195, |
|
"rewards/rejected": -26.115713119506836, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 497.3107054185315, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -1.7695974111557007, |
|
"logits/rejected": -1.768599271774292, |
|
"logps/chosen": -116.97047424316406, |
|
"logps/rejected": -114.58748626708984, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 11.339197158813477, |
|
"rewards/margins": 35.26213455200195, |
|
"rewards/rejected": -23.92293357849121, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 978.5564078023309, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -1.7172062397003174, |
|
"logits/rejected": -1.747591257095337, |
|
"logps/chosen": -133.02786254882812, |
|
"logps/rejected": -114.73051452636719, |
|
"loss": 0.282, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 13.081387519836426, |
|
"rewards/margins": 34.326393127441406, |
|
"rewards/rejected": -21.245006561279297, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 837.4554645359331, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -1.8500797748565674, |
|
"logits/rejected": -1.8606961965560913, |
|
"logps/chosen": -124.95164489746094, |
|
"logps/rejected": -119.78662109375, |
|
"loss": 0.2435, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 14.02147388458252, |
|
"rewards/margins": 40.40496063232422, |
|
"rewards/rejected": -26.38348388671875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 928.2897315645127, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -1.8660519123077393, |
|
"logits/rejected": -1.83090078830719, |
|
"logps/chosen": -120.15888977050781, |
|
"logps/rejected": -115.3041000366211, |
|
"loss": 0.2863, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 15.148035049438477, |
|
"rewards/margins": 42.080291748046875, |
|
"rewards/rejected": -26.9322566986084, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 623.0626054843979, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -1.7563292980194092, |
|
"logits/rejected": -1.7755203247070312, |
|
"logps/chosen": -124.97782897949219, |
|
"logps/rejected": -119.69969177246094, |
|
"loss": 0.2375, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 14.375999450683594, |
|
"rewards/margins": 38.44708251953125, |
|
"rewards/rejected": -24.071086883544922, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 652.9309582790959, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -1.7278000116348267, |
|
"logits/rejected": -1.7499659061431885, |
|
"logps/chosen": -118.06231689453125, |
|
"logps/rejected": -129.90151977539062, |
|
"loss": 0.2885, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 11.649320602416992, |
|
"rewards/margins": 37.77583312988281, |
|
"rewards/rejected": -26.126514434814453, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2831452648509995, |
|
"train_runtime": 7636.09, |
|
"train_samples_per_second": 8.006, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|