|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994242947610823, |
|
"eval_steps": 100, |
|
"global_step": 868, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 23.51828299790517, |
|
"learning_rate": 5.747126436781609e-09, |
|
"logits/chosen": -1.865264654159546, |
|
"logits/rejected": -1.587956428527832, |
|
"logps/chosen": -204.58331298828125, |
|
"logps/rejected": -154.1517333984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 24.149515970375678, |
|
"learning_rate": 5.747126436781609e-08, |
|
"logits/chosen": -1.90481698513031, |
|
"logits/rejected": -1.8536584377288818, |
|
"logps/chosen": -213.41416931152344, |
|
"logps/rejected": -191.33694458007812, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": -1.9929786503780633e-05, |
|
"rewards/margins": 0.00017105697770603, |
|
"rewards/rejected": -0.00019098672783002257, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 23.563731768256098, |
|
"learning_rate": 1.1494252873563217e-07, |
|
"logits/chosen": -1.9680726528167725, |
|
"logits/rejected": -1.798654317855835, |
|
"logps/chosen": -255.55111694335938, |
|
"logps/rejected": -189.6189727783203, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0016116431215777993, |
|
"rewards/margins": 0.002336590550839901, |
|
"rewards/rejected": -0.0007249473710544407, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 23.038450073297746, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"logits/chosen": -1.8938862085342407, |
|
"logits/rejected": -1.8228662014007568, |
|
"logps/chosen": -212.65322875976562, |
|
"logps/rejected": -194.4668426513672, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.011502735316753387, |
|
"rewards/margins": 0.014704583212733269, |
|
"rewards/rejected": -0.003201847430318594, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 22.339093495440075, |
|
"learning_rate": 2.2988505747126435e-07, |
|
"logits/chosen": -1.8691730499267578, |
|
"logits/rejected": -1.810280442237854, |
|
"logps/chosen": -212.04031372070312, |
|
"logps/rejected": -189.72427368164062, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.027534600347280502, |
|
"rewards/margins": 0.037894655019044876, |
|
"rewards/rejected": -0.010360054671764374, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 21.83120331543706, |
|
"learning_rate": 2.873563218390804e-07, |
|
"logits/chosen": -1.9792773723602295, |
|
"logits/rejected": -1.8856391906738281, |
|
"logps/chosen": -199.00392150878906, |
|
"logps/rejected": -184.42074584960938, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02775971218943596, |
|
"rewards/margins": 0.08295276015996933, |
|
"rewards/rejected": -0.05519305542111397, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 21.94313336281609, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"logits/chosen": -1.978032112121582, |
|
"logits/rejected": -1.8626216650009155, |
|
"logps/chosen": -263.13702392578125, |
|
"logps/rejected": -227.51931762695312, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.024905700236558914, |
|
"rewards/margins": 0.1394185870885849, |
|
"rewards/rejected": -0.1643243134021759, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 21.93834951114425, |
|
"learning_rate": 4.0229885057471266e-07, |
|
"logits/chosen": -1.923208236694336, |
|
"logits/rejected": -1.9092395305633545, |
|
"logps/chosen": -211.4084930419922, |
|
"logps/rejected": -216.09439086914062, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.16204482316970825, |
|
"rewards/margins": 0.21120235323905945, |
|
"rewards/rejected": -0.3732471466064453, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 26.27963832031748, |
|
"learning_rate": 4.597701149425287e-07, |
|
"logits/chosen": -1.7020299434661865, |
|
"logits/rejected": -1.635000467300415, |
|
"logps/chosen": -229.10562133789062, |
|
"logps/rejected": -228.198486328125, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3274237811565399, |
|
"rewards/margins": 0.26525241136550903, |
|
"rewards/rejected": -0.5926762819290161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 35.47456739543052, |
|
"learning_rate": 4.999817969178237e-07, |
|
"logits/chosen": -1.768843412399292, |
|
"logits/rejected": -1.73134446144104, |
|
"logps/chosen": -271.71563720703125, |
|
"logps/rejected": -283.0465393066406, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.538571298122406, |
|
"rewards/margins": 0.47389060258865356, |
|
"rewards/rejected": -1.0124619007110596, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 38.67050237438448, |
|
"learning_rate": 4.996582603056428e-07, |
|
"logits/chosen": -1.7260372638702393, |
|
"logits/rejected": -1.6588356494903564, |
|
"logps/chosen": -285.2041320800781, |
|
"logps/rejected": -323.65692138671875, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6892239451408386, |
|
"rewards/margins": 0.5662633180618286, |
|
"rewards/rejected": -1.2554872035980225, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/chosen": -1.746153473854065, |
|
"eval_logits/rejected": -1.6546903848648071, |
|
"eval_logps/chosen": -421.5047912597656, |
|
"eval_logps/rejected": -451.7755432128906, |
|
"eval_loss": 0.6086099743843079, |
|
"eval_rewards/accuracies": 0.6953125, |
|
"eval_rewards/chosen": -0.8599321246147156, |
|
"eval_rewards/margins": 0.3267643451690674, |
|
"eval_rewards/rejected": -1.1866965293884277, |
|
"eval_runtime": 98.2501, |
|
"eval_samples_per_second": 20.356, |
|
"eval_steps_per_second": 0.326, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 56.77623681367674, |
|
"learning_rate": 4.989308132738126e-07, |
|
"logits/chosen": -1.8324391841888428, |
|
"logits/rejected": -1.7346527576446533, |
|
"logps/chosen": -289.9622802734375, |
|
"logps/rejected": -307.9504699707031, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7402961850166321, |
|
"rewards/margins": 0.6292544007301331, |
|
"rewards/rejected": -1.3695508241653442, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 54.65739090602792, |
|
"learning_rate": 4.978006327248536e-07, |
|
"logits/chosen": -1.91842520236969, |
|
"logits/rejected": -1.849988579750061, |
|
"logps/chosen": -323.345703125, |
|
"logps/rejected": -366.32415771484375, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.107177972793579, |
|
"rewards/margins": 0.7300722599029541, |
|
"rewards/rejected": -1.8372503519058228, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 40.66462467188264, |
|
"learning_rate": 4.962695471250032e-07, |
|
"logits/chosen": -1.7266982793807983, |
|
"logits/rejected": -1.6543283462524414, |
|
"logps/chosen": -320.31195068359375, |
|
"logps/rejected": -359.983154296875, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0283275842666626, |
|
"rewards/margins": 0.7512324452400208, |
|
"rewards/rejected": -1.7795600891113281, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 45.88018498600559, |
|
"learning_rate": 4.94340033546025e-07, |
|
"logits/chosen": -1.4110041856765747, |
|
"logits/rejected": -1.3973127603530884, |
|
"logps/chosen": -312.18145751953125, |
|
"logps/rejected": -390.5517578125, |
|
"loss": 0.4739, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.310011863708496, |
|
"rewards/margins": 0.8049423098564148, |
|
"rewards/rejected": -2.1149544715881348, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 79.78754356153908, |
|
"learning_rate": 4.920152136576705e-07, |
|
"logits/chosen": -1.2265546321868896, |
|
"logits/rejected": -1.1716219186782837, |
|
"logps/chosen": -357.737060546875, |
|
"logps/rejected": -431.76806640625, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4461175203323364, |
|
"rewards/margins": 0.9848885536193848, |
|
"rewards/rejected": -2.4310059547424316, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 40.08268655919122, |
|
"learning_rate": 4.892988486772756e-07, |
|
"logits/chosen": -1.2588635683059692, |
|
"logits/rejected": -1.1425318717956543, |
|
"logps/chosen": -354.57867431640625, |
|
"logps/rejected": -432.987060546875, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4544165134429932, |
|
"rewards/margins": 0.9601584672927856, |
|
"rewards/rejected": -2.4145748615264893, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 37.08844280081501, |
|
"learning_rate": 4.861953332846629e-07, |
|
"logits/chosen": -1.0948612689971924, |
|
"logits/rejected": -0.9797511100769043, |
|
"logps/chosen": -370.5609436035156, |
|
"logps/rejected": -417.10418701171875, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4464932680130005, |
|
"rewards/margins": 0.8114526867866516, |
|
"rewards/rejected": -2.257946014404297, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 51.03369267010431, |
|
"learning_rate": 4.827096885121953e-07, |
|
"logits/chosen": -0.9882611036300659, |
|
"logits/rejected": -0.786241888999939, |
|
"logps/chosen": -403.01361083984375, |
|
"logps/rejected": -465.450439453125, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6873054504394531, |
|
"rewards/margins": 0.8884965181350708, |
|
"rewards/rejected": -2.5758020877838135, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 40.75117386512369, |
|
"learning_rate": 4.788475536214821e-07, |
|
"logits/chosen": -0.6994659900665283, |
|
"logits/rejected": -0.57302325963974, |
|
"logps/chosen": -345.23858642578125, |
|
"logps/rejected": -434.90069580078125, |
|
"loss": 0.4305, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.5364990234375, |
|
"rewards/margins": 1.0722037553787231, |
|
"rewards/rejected": -2.6087028980255127, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 50.385160508667006, |
|
"learning_rate": 4.746151769798818e-07, |
|
"logits/chosen": -0.46505388617515564, |
|
"logits/rejected": -0.32105451822280884, |
|
"logps/chosen": -395.0636901855469, |
|
"logps/rejected": -491.369873046875, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5998367071151733, |
|
"rewards/margins": 1.3637341260910034, |
|
"rewards/rejected": -2.9635708332061768, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -0.8866692185401917, |
|
"eval_logits/rejected": -0.715141236782074, |
|
"eval_logps/chosen": -537.5919799804688, |
|
"eval_logps/rejected": -591.529052734375, |
|
"eval_loss": 0.5454351305961609, |
|
"eval_rewards/accuracies": 0.7421875, |
|
"eval_rewards/chosen": -2.0208044052124023, |
|
"eval_rewards/margins": 0.5634276270866394, |
|
"eval_rewards/rejected": -2.5842318534851074, |
|
"eval_runtime": 98.1521, |
|
"eval_samples_per_second": 20.377, |
|
"eval_steps_per_second": 0.326, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 44.17462139523744, |
|
"learning_rate": 4.7001940595156055e-07, |
|
"logits/chosen": -0.5879951119422913, |
|
"logits/rejected": -0.31766843795776367, |
|
"logps/chosen": -347.45184326171875, |
|
"logps/rejected": -442.23291015625, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.621807336807251, |
|
"rewards/margins": 1.1228187084197998, |
|
"rewards/rejected": -2.7446258068084717, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 46.80720748583798, |
|
"learning_rate": 4.650676758194623e-07, |
|
"logits/chosen": -0.5494168996810913, |
|
"logits/rejected": -0.3329974114894867, |
|
"logps/chosen": -386.22528076171875, |
|
"logps/rejected": -472.072998046875, |
|
"loss": 0.419, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.6599994897842407, |
|
"rewards/margins": 1.2505383491516113, |
|
"rewards/rejected": -2.9105377197265625, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 43.28959440159286, |
|
"learning_rate": 4.5976799775611215e-07, |
|
"logits/chosen": -0.6910772919654846, |
|
"logits/rejected": -0.4287993013858795, |
|
"logps/chosen": -385.10784912109375, |
|
"logps/rejected": -484.22314453125, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.7417128086090088, |
|
"rewards/margins": 1.4360835552215576, |
|
"rewards/rejected": -3.1777961254119873, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 48.21494711877692, |
|
"learning_rate": 4.5412894586271543e-07, |
|
"logits/chosen": -0.3966357111930847, |
|
"logits/rejected": -0.13579869270324707, |
|
"logps/chosen": -405.3009338378906, |
|
"logps/rejected": -484.6737365722656, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8974357843399048, |
|
"rewards/margins": 1.3567252159118652, |
|
"rewards/rejected": -3.2541611194610596, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 42.352515667816355, |
|
"learning_rate": 4.481596432975201e-07, |
|
"logits/chosen": -0.6702763438224792, |
|
"logits/rejected": -0.49778255820274353, |
|
"logps/chosen": -340.3480224609375, |
|
"logps/rejected": -434.61376953125, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6621681451797485, |
|
"rewards/margins": 1.0998741388320923, |
|
"rewards/rejected": -2.762042284011841, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 51.54256095538614, |
|
"learning_rate": 4.41869747515886e-07, |
|
"logits/chosen": -0.6597603559494019, |
|
"logits/rejected": -0.5498248338699341, |
|
"logps/chosen": -365.7995910644531, |
|
"logps/rejected": -490.1622009277344, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4162827730178833, |
|
"rewards/margins": 1.2882452011108398, |
|
"rewards/rejected": -2.7045278549194336, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 48.71803198385668, |
|
"learning_rate": 4.352694346459396e-07, |
|
"logits/chosen": 0.04401933029294014, |
|
"logits/rejected": 0.16322588920593262, |
|
"logps/chosen": -363.21539306640625, |
|
"logps/rejected": -463.6495056152344, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.5739765167236328, |
|
"rewards/margins": 1.1849424839019775, |
|
"rewards/rejected": -2.7589190006256104, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 38.68223370724194, |
|
"learning_rate": 4.2836938302509256e-07, |
|
"logits/chosen": -0.13973233103752136, |
|
"logits/rejected": 0.19283699989318848, |
|
"logps/chosen": -328.5007019042969, |
|
"logps/rejected": -440.18365478515625, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.279756784439087, |
|
"rewards/margins": 1.4430491924285889, |
|
"rewards/rejected": -2.7228057384490967, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 45.704934038680605, |
|
"learning_rate": 4.2118075592405874e-07, |
|
"logits/chosen": 0.20580144226551056, |
|
"logits/rejected": 0.34621715545654297, |
|
"logps/chosen": -407.57373046875, |
|
"logps/rejected": -517.0430908203125, |
|
"loss": 0.4242, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8687858581542969, |
|
"rewards/margins": 1.2867904901504517, |
|
"rewards/rejected": -3.155576229095459, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 48.006993514366904, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": 0.6578917503356934, |
|
"logits/rejected": 0.7554408311843872, |
|
"logps/chosen": -349.4103088378906, |
|
"logps/rejected": -480.834228515625, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.828386664390564, |
|
"rewards/margins": 1.3594980239868164, |
|
"rewards/rejected": -3.18788480758667, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_logits/chosen": -0.5939264297485352, |
|
"eval_logits/rejected": -0.34991100430488586, |
|
"eval_logps/chosen": -545.4883422851562, |
|
"eval_logps/rejected": -617.2100830078125, |
|
"eval_loss": 0.5011798739433289, |
|
"eval_rewards/accuracies": 0.7734375, |
|
"eval_rewards/chosen": -2.0997684001922607, |
|
"eval_rewards/margins": 0.7412738800048828, |
|
"eval_rewards/rejected": -2.8410420417785645, |
|
"eval_runtime": 98.127, |
|
"eval_samples_per_second": 20.382, |
|
"eval_steps_per_second": 0.326, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 43.38987414729455, |
|
"learning_rate": 4.059847439122671e-07, |
|
"logits/chosen": 0.5874438285827637, |
|
"logits/rejected": 0.8824877738952637, |
|
"logps/chosen": -419.9178771972656, |
|
"logps/rejected": -517.2019653320312, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.0750081539154053, |
|
"rewards/margins": 1.2572228908538818, |
|
"rewards/rejected": -3.332231044769287, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 56.605050092804255, |
|
"learning_rate": 3.98001943918432e-07, |
|
"logits/chosen": 0.6735237836837769, |
|
"logits/rejected": 1.019078254699707, |
|
"logps/chosen": -373.03009033203125, |
|
"logps/rejected": -483.0083923339844, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.7667083740234375, |
|
"rewards/margins": 1.1942052841186523, |
|
"rewards/rejected": -2.960913896560669, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 57.81664075376147, |
|
"learning_rate": 3.8977969850346866e-07, |
|
"logits/chosen": 0.4839138090610504, |
|
"logits/rejected": 0.8274878263473511, |
|
"logps/chosen": -387.33673095703125, |
|
"logps/rejected": -499.78094482421875, |
|
"loss": 0.4004, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7052650451660156, |
|
"rewards/margins": 1.477137565612793, |
|
"rewards/rejected": -3.1824028491973877, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 50.66567087546677, |
|
"learning_rate": 3.8133131005357465e-07, |
|
"logits/chosen": 0.23904335498809814, |
|
"logits/rejected": 0.6436888575553894, |
|
"logps/chosen": -374.50750732421875, |
|
"logps/rejected": -534.21435546875, |
|
"loss": 0.3943, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.705120325088501, |
|
"rewards/margins": 1.7923282384872437, |
|
"rewards/rejected": -3.497448444366455, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 41.43510772615216, |
|
"learning_rate": 3.7267044682118435e-07, |
|
"logits/chosen": 0.3483354449272156, |
|
"logits/rejected": 0.6899020075798035, |
|
"logps/chosen": -369.47418212890625, |
|
"logps/rejected": -496.38262939453125, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8410135507583618, |
|
"rewards/margins": 1.4833061695098877, |
|
"rewards/rejected": -3.324319362640381, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 46.89248795203356, |
|
"learning_rate": 3.638111208117425e-07, |
|
"logits/chosen": 0.22267869114875793, |
|
"logits/rejected": 0.4508979916572571, |
|
"logps/chosen": -409.98974609375, |
|
"logps/rejected": -508.88055419921875, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0787599086761475, |
|
"rewards/margins": 1.0934727191925049, |
|
"rewards/rejected": -3.1722328662872314, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 43.02323311612351, |
|
"learning_rate": 3.5476766511433605e-07, |
|
"logits/chosen": 0.1800430715084076, |
|
"logits/rejected": 0.6425480842590332, |
|
"logps/chosen": -431.10736083984375, |
|
"logps/rejected": -516.4458618164062, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9750921726226807, |
|
"rewards/margins": 1.3207170963287354, |
|
"rewards/rejected": -3.295809268951416, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 43.154999607698095, |
|
"learning_rate": 3.455547107128602e-07, |
|
"logits/chosen": 0.3740110993385315, |
|
"logits/rejected": 0.8220480680465698, |
|
"logps/chosen": -410.6556701660156, |
|
"logps/rejected": -515.9549560546875, |
|
"loss": 0.3767, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.596968412399292, |
|
"rewards/margins": 1.6267616748809814, |
|
"rewards/rejected": -3.2237300872802734, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 56.90068596534485, |
|
"learning_rate": 3.361871628152338e-07, |
|
"logits/chosen": 0.6576219797134399, |
|
"logits/rejected": 1.0373657941818237, |
|
"logps/chosen": -398.47906494140625, |
|
"logps/rejected": -556.7415771484375, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.9783694744110107, |
|
"rewards/margins": 1.5746887922286987, |
|
"rewards/rejected": -3.55305814743042, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 41.49097538770333, |
|
"learning_rate": 3.2668017673896077e-07, |
|
"logits/chosen": 0.6066378355026245, |
|
"logits/rejected": 1.0441324710845947, |
|
"logps/chosen": -376.2064514160156, |
|
"logps/rejected": -497.462890625, |
|
"loss": 0.3733, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.7407310009002686, |
|
"rewards/margins": 1.581956148147583, |
|
"rewards/rejected": -3.3226871490478516, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_logits/chosen": -0.5456388592720032, |
|
"eval_logits/rejected": -0.2280205935239792, |
|
"eval_logps/chosen": -550.5716552734375, |
|
"eval_logps/rejected": -626.190185546875, |
|
"eval_loss": 0.47210657596588135, |
|
"eval_rewards/accuracies": 0.77734375, |
|
"eval_rewards/chosen": -2.1506011486053467, |
|
"eval_rewards/margins": 0.7802413105964661, |
|
"eval_rewards/rejected": -2.930842399597168, |
|
"eval_runtime": 98.1161, |
|
"eval_samples_per_second": 20.384, |
|
"eval_steps_per_second": 0.326, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 47.55353494901972, |
|
"learning_rate": 3.1704913339205103e-07, |
|
"logits/chosen": 0.5084329843521118, |
|
"logits/rejected": 0.796318531036377, |
|
"logps/chosen": -409.43585205078125, |
|
"logps/rejected": -561.5556030273438, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.992550253868103, |
|
"rewards/margins": 1.6422802209854126, |
|
"rewards/rejected": -3.6348299980163574, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 41.646877730648264, |
|
"learning_rate": 3.0730961438896885e-07, |
|
"logits/chosen": 0.4776241183280945, |
|
"logits/rejected": 0.7627217769622803, |
|
"logps/chosen": -482.1835021972656, |
|
"logps/rejected": -587.5792236328125, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.093543767929077, |
|
"rewards/margins": 1.4904192686080933, |
|
"rewards/rejected": -3.583962917327881, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 68.32669660083764, |
|
"learning_rate": 2.9747737684186795e-07, |
|
"logits/chosen": 0.7197389602661133, |
|
"logits/rejected": 0.8317638635635376, |
|
"logps/chosen": -388.28656005859375, |
|
"logps/rejected": -509.2151794433594, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7809364795684814, |
|
"rewards/margins": 1.5095723867416382, |
|
"rewards/rejected": -3.290508985519409, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 46.78192200543751, |
|
"learning_rate": 2.8756832786789663e-07, |
|
"logits/chosen": 0.3376988172531128, |
|
"logits/rejected": 0.8295138478279114, |
|
"logps/chosen": -403.0928649902344, |
|
"logps/rejected": -518.611083984375, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.744091272354126, |
|
"rewards/margins": 1.5630067586898804, |
|
"rewards/rejected": -3.307097911834717, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 49.72034219777285, |
|
"learning_rate": 2.7759849885381747e-07, |
|
"logits/chosen": 0.3917238414287567, |
|
"logits/rejected": 0.9007431268692017, |
|
"logps/chosen": -451.806884765625, |
|
"logps/rejected": -584.4218139648438, |
|
"loss": 0.3785, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.0996882915496826, |
|
"rewards/margins": 1.9295704364776611, |
|
"rewards/rejected": -4.029258728027344, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 38.3046078852496, |
|
"learning_rate": 2.675840195195762e-07, |
|
"logits/chosen": 0.1938302218914032, |
|
"logits/rejected": 0.7046247720718384, |
|
"logps/chosen": -375.27606201171875, |
|
"logps/rejected": -523.9801025390625, |
|
"loss": 0.3934, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.7863012552261353, |
|
"rewards/margins": 1.6471843719482422, |
|
"rewards/rejected": -3.433485507965088, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 39.056692194028, |
|
"learning_rate": 2.575410918227829e-07, |
|
"logits/chosen": 0.09105312079191208, |
|
"logits/rejected": 0.5196784138679504, |
|
"logps/chosen": -413.9867248535156, |
|
"logps/rejected": -532.4803466796875, |
|
"loss": 0.3755, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.720029592514038, |
|
"rewards/margins": 1.528271198272705, |
|
"rewards/rejected": -3.2483010292053223, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 46.6868254294557, |
|
"learning_rate": 2.474859637463226e-07, |
|
"logits/chosen": 0.21693472564220428, |
|
"logits/rejected": 0.8155421018600464, |
|
"logps/chosen": -418.37652587890625, |
|
"logps/rejected": -540.866455078125, |
|
"loss": 0.3846, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9778916835784912, |
|
"rewards/margins": 1.7564996480941772, |
|
"rewards/rejected": -3.734391450881958, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 45.512117273870444, |
|
"learning_rate": 2.3743490301150355e-07, |
|
"logits/chosen": 0.2570355236530304, |
|
"logits/rejected": 0.8997817039489746, |
|
"logps/chosen": -381.27801513671875, |
|
"logps/rejected": -525.5377807617188, |
|
"loss": 0.4012, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.6076080799102783, |
|
"rewards/margins": 1.8304884433746338, |
|
"rewards/rejected": -3.438096523284912, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 46.09704078060399, |
|
"learning_rate": 2.274041707592724e-07, |
|
"logits/chosen": 0.7786660194396973, |
|
"logits/rejected": 1.2057403326034546, |
|
"logps/chosen": -416.14068603515625, |
|
"logps/rejected": -602.9859008789062, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2852025032043457, |
|
"rewards/margins": 1.9095999002456665, |
|
"rewards/rejected": -4.194802284240723, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -0.4774431586265564, |
|
"eval_logits/rejected": -0.1090613454580307, |
|
"eval_logps/chosen": -540.1826171875, |
|
"eval_logps/rejected": -627.9595336914062, |
|
"eval_loss": 0.448412150144577, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -2.046710968017578, |
|
"eval_rewards/margins": 0.9018256068229675, |
|
"eval_rewards/rejected": -2.9485368728637695, |
|
"eval_runtime": 98.1848, |
|
"eval_samples_per_second": 20.37, |
|
"eval_steps_per_second": 0.326, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 42.744213876119844, |
|
"learning_rate": 2.17409995242075e-07, |
|
"logits/chosen": 0.6994825005531311, |
|
"logits/rejected": 1.289393663406372, |
|
"logps/chosen": -405.2342224121094, |
|
"logps/rejected": -555.2643432617188, |
|
"loss": 0.3921, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.062455415725708, |
|
"rewards/margins": 1.8831449747085571, |
|
"rewards/rejected": -3.9456000328063965, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 44.25862131066792, |
|
"learning_rate": 2.0746854556892544e-07, |
|
"logits/chosen": 0.7421714067459106, |
|
"logits/rejected": 0.9166728258132935, |
|
"logps/chosen": -363.72222900390625, |
|
"logps/rejected": -499.4908752441406, |
|
"loss": 0.4102, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.795539140701294, |
|
"rewards/margins": 1.4331713914871216, |
|
"rewards/rejected": -3.228710889816284, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 40.42456029676201, |
|
"learning_rate": 1.9759590554616173e-07, |
|
"logits/chosen": 0.2788628935813904, |
|
"logits/rejected": 0.5978427529335022, |
|
"logps/chosen": -387.8989562988281, |
|
"logps/rejected": -499.9576110839844, |
|
"loss": 0.4053, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.6805702447891235, |
|
"rewards/margins": 1.3731516599655151, |
|
"rewards/rejected": -3.0537219047546387, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 43.79592437572997, |
|
"learning_rate": 1.8780804765620746e-07, |
|
"logits/chosen": 0.37570881843566895, |
|
"logits/rejected": 0.5200439691543579, |
|
"logps/chosen": -394.23284912109375, |
|
"logps/rejected": -548.2333374023438, |
|
"loss": 0.384, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5941615104675293, |
|
"rewards/margins": 1.527552843093872, |
|
"rewards/rejected": -3.1217141151428223, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 44.94669101797897, |
|
"learning_rate": 1.7812080721643973e-07, |
|
"logits/chosen": 0.6379637122154236, |
|
"logits/rejected": 1.1335102319717407, |
|
"logps/chosen": -422.62200927734375, |
|
"logps/rejected": -535.2354736328125, |
|
"loss": 0.3932, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.0112552642822266, |
|
"rewards/margins": 1.6570736169815063, |
|
"rewards/rejected": -3.6683287620544434, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 48.51576878403802, |
|
"learning_rate": 1.6854985675997063e-07, |
|
"logits/chosen": 0.5151522755622864, |
|
"logits/rejected": 0.9227844476699829, |
|
"logps/chosen": -410.75244140625, |
|
"logps/rejected": -543.8304443359375, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.9362386465072632, |
|
"rewards/margins": 1.548099398612976, |
|
"rewards/rejected": -3.4843380451202393, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 42.77055197730572, |
|
"learning_rate": 1.5911068067978818e-07, |
|
"logits/chosen": 0.7765737771987915, |
|
"logits/rejected": 0.9592781066894531, |
|
"logps/chosen": -391.6842041015625, |
|
"logps/rejected": -575.3435668945312, |
|
"loss": 0.3642, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0532357692718506, |
|
"rewards/margins": 1.811832070350647, |
|
"rewards/rejected": -3.865067720413208, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 51.09604434640814, |
|
"learning_rate": 1.4981855017728197e-07, |
|
"logits/chosen": 0.596177875995636, |
|
"logits/rejected": 0.7803729772567749, |
|
"logps/chosen": -459.51422119140625, |
|
"logps/rejected": -612.7260131835938, |
|
"loss": 0.388, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.421908140182495, |
|
"rewards/margins": 1.5485522747039795, |
|
"rewards/rejected": -3.9704601764678955, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 51.69715596466598, |
|
"learning_rate": 1.406884985556804e-07, |
|
"logits/chosen": 0.6335197687149048, |
|
"logits/rejected": 1.1092630624771118, |
|
"logps/chosen": -429.76690673828125, |
|
"logps/rejected": -580.2468872070312, |
|
"loss": 0.3807, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2301127910614014, |
|
"rewards/margins": 1.8223087787628174, |
|
"rewards/rejected": -4.052420616149902, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 48.435911535292384, |
|
"learning_rate": 1.3173529689837354e-07, |
|
"logits/chosen": 0.5912660956382751, |
|
"logits/rejected": 1.1899088621139526, |
|
"logps/chosen": -393.476318359375, |
|
"logps/rejected": -521.782958984375, |
|
"loss": 0.3829, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8528053760528564, |
|
"rewards/margins": 1.6730989217758179, |
|
"rewards/rejected": -3.5259041786193848, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_logits/chosen": -0.509851336479187, |
|
"eval_logits/rejected": -0.14121857285499573, |
|
"eval_logps/chosen": -538.1624145507812, |
|
"eval_logps/rejected": -623.8541259765625, |
|
"eval_loss": 0.44193577766418457, |
|
"eval_rewards/accuracies": 0.80859375, |
|
"eval_rewards/chosen": -2.0265088081359863, |
|
"eval_rewards/margins": 0.8809735774993896, |
|
"eval_rewards/rejected": -2.907482147216797, |
|
"eval_runtime": 98.167, |
|
"eval_samples_per_second": 20.373, |
|
"eval_steps_per_second": 0.326, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 48.985755457205066, |
|
"learning_rate": 1.2297343017146726e-07, |
|
"logits/chosen": 0.7694305181503296, |
|
"logits/rejected": 1.232879877090454, |
|
"logps/chosen": -402.1836853027344, |
|
"logps/rejected": -533.408447265625, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.905542016029358, |
|
"rewards/margins": 1.6221548318862915, |
|
"rewards/rejected": -3.5276970863342285, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 45.28513242475784, |
|
"learning_rate": 1.1441707378923474e-07, |
|
"logits/chosen": 0.5253760814666748, |
|
"logits/rejected": 1.0413273572921753, |
|
"logps/chosen": -359.5643615722656, |
|
"logps/rejected": -514.2081909179688, |
|
"loss": 0.3806, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.579487919807434, |
|
"rewards/margins": 1.767327070236206, |
|
"rewards/rejected": -3.3468146324157715, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 47.72652227607087, |
|
"learning_rate": 1.06080070680377e-07, |
|
"logits/chosen": 0.4920094907283783, |
|
"logits/rejected": 1.009433627128601, |
|
"logps/chosen": -399.2576599121094, |
|
"logps/rejected": -537.9578247070312, |
|
"loss": 0.3821, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7137682437896729, |
|
"rewards/margins": 1.7276941537857056, |
|
"rewards/rejected": -3.441462755203247, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 42.1168430015071, |
|
"learning_rate": 9.797590889219587e-08, |
|
"logits/chosen": 0.3111940026283264, |
|
"logits/rejected": 0.8665814399719238, |
|
"logps/chosen": -396.842529296875, |
|
"logps/rejected": -543.9876098632812, |
|
"loss": 0.3843, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7231413125991821, |
|
"rewards/margins": 1.8444896936416626, |
|
"rewards/rejected": -3.567631244659424, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 47.41933670532933, |
|
"learning_rate": 9.011769976891367e-08, |
|
"logits/chosen": 0.4944031834602356, |
|
"logits/rejected": 0.8744715452194214, |
|
"logps/chosen": -398.05615234375, |
|
"logps/rejected": -543.6096801757812, |
|
"loss": 0.3763, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8718990087509155, |
|
"rewards/margins": 1.6193087100982666, |
|
"rewards/rejected": -3.4912078380584717, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 52.75260796298546, |
|
"learning_rate": 8.251815673944218e-08, |
|
"logits/chosen": 0.5813334584236145, |
|
"logits/rejected": 0.9786221385002136, |
|
"logps/chosen": -443.66070556640625, |
|
"logps/rejected": -576.3490600585938, |
|
"loss": 0.3822, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.220869779586792, |
|
"rewards/margins": 1.742889404296875, |
|
"rewards/rejected": -3.963758945465088, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 46.64520061062158, |
|
"learning_rate": 7.518957474892148e-08, |
|
"logits/chosen": 0.6128578186035156, |
|
"logits/rejected": 1.1231881380081177, |
|
"logps/chosen": -427.1106872558594, |
|
"logps/rejected": -589.3102416992188, |
|
"loss": 0.3662, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.0986740589141846, |
|
"rewards/margins": 1.9974746704101562, |
|
"rewards/rejected": -4.096148490905762, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 44.32719204523107, |
|
"learning_rate": 6.814381036730274e-08, |
|
"logits/chosen": 0.44363918900489807, |
|
"logits/rejected": 0.8115978240966797, |
|
"logps/chosen": -397.6707763671875, |
|
"logps/rejected": -538.56591796875, |
|
"loss": 0.3962, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9131567478179932, |
|
"rewards/margins": 1.6610631942749023, |
|
"rewards/rejected": -3.5742194652557373, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 43.744460103075866, |
|
"learning_rate": 6.139226260715872e-08, |
|
"logits/chosen": 0.34574732184410095, |
|
"logits/rejected": 0.7309020161628723, |
|
"logps/chosen": -390.32464599609375, |
|
"logps/rejected": -550.9197998046875, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.7989364862442017, |
|
"rewards/margins": 1.8234875202178955, |
|
"rewards/rejected": -3.622424364089966, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 48.21671181557863, |
|
"learning_rate": 5.4945854481754734e-08, |
|
"logits/chosen": 0.4160235822200775, |
|
"logits/rejected": 1.0240848064422607, |
|
"logps/chosen": -393.590576171875, |
|
"logps/rejected": -540.9241333007812, |
|
"loss": 0.3725, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.9948371648788452, |
|
"rewards/margins": 1.689173936843872, |
|
"rewards/rejected": -3.6840109825134277, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -0.6920372247695923, |
|
"eval_logits/rejected": -0.3223564326763153, |
|
"eval_logps/chosen": -527.349609375, |
|
"eval_logps/rejected": -613.8932495117188, |
|
"eval_loss": 0.43294557929039, |
|
"eval_rewards/accuracies": 0.82421875, |
|
"eval_rewards/chosen": -1.9183804988861084, |
|
"eval_rewards/margins": 0.8894931077957153, |
|
"eval_rewards/rejected": -2.8078737258911133, |
|
"eval_runtime": 98.1374, |
|
"eval_samples_per_second": 20.38, |
|
"eval_steps_per_second": 0.326, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 42.53084626680963, |
|
"learning_rate": 4.881501533321605e-08, |
|
"logits/chosen": 0.6980074048042297, |
|
"logits/rejected": 1.0298550128936768, |
|
"logps/chosen": -367.0564880371094, |
|
"logps/rejected": -539.99560546875, |
|
"loss": 0.3547, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.8503217697143555, |
|
"rewards/margins": 1.9031312465667725, |
|
"rewards/rejected": -3.753452777862549, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 43.590506229310456, |
|
"learning_rate": 4.300966395938377e-08, |
|
"logits/chosen": 0.35197392106056213, |
|
"logits/rejected": 0.8350766897201538, |
|
"logps/chosen": -427.9037170410156, |
|
"logps/rejected": -580.8751831054688, |
|
"loss": 0.3788, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0454230308532715, |
|
"rewards/margins": 1.8579833507537842, |
|
"rewards/rejected": -3.9034061431884766, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 47.15415328548373, |
|
"learning_rate": 3.7539192566655246e-08, |
|
"logits/chosen": 0.3688026964664459, |
|
"logits/rejected": 0.7924972772598267, |
|
"logps/chosen": -387.2108459472656, |
|
"logps/rejected": -532.4842529296875, |
|
"loss": 0.3762, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -1.6555856466293335, |
|
"rewards/margins": 1.8027565479278564, |
|
"rewards/rejected": -3.4583423137664795, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 41.72651096064494, |
|
"learning_rate": 3.24124515747731e-08, |
|
"logits/chosen": 0.4526204466819763, |
|
"logits/rejected": 0.7684503793716431, |
|
"logps/chosen": -406.00042724609375, |
|
"logps/rejected": -571.0294189453125, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0768213272094727, |
|
"rewards/margins": 1.7967207431793213, |
|
"rewards/rejected": -3.8735415935516357, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 47.004010938683734, |
|
"learning_rate": 2.763773529814506e-08, |
|
"logits/chosen": 0.24592173099517822, |
|
"logits/rejected": 0.5948923826217651, |
|
"logps/chosen": -437.3650817871094, |
|
"logps/rejected": -581.8604125976562, |
|
"loss": 0.3772, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.974538803100586, |
|
"rewards/margins": 1.9196981191635132, |
|
"rewards/rejected": -3.8942363262176514, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 56.33205281532714, |
|
"learning_rate": 2.3222768526860698e-08, |
|
"logits/chosen": 0.2990577220916748, |
|
"logits/rejected": 0.7854124903678894, |
|
"logps/chosen": -404.5032653808594, |
|
"logps/rejected": -561.688720703125, |
|
"loss": 0.3938, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9624149799346924, |
|
"rewards/margins": 1.855332374572754, |
|
"rewards/rejected": -3.8177475929260254, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 43.51724396608159, |
|
"learning_rate": 1.9174694029115146e-08, |
|
"logits/chosen": 0.18542930483818054, |
|
"logits/rejected": 0.5257433652877808, |
|
"logps/chosen": -424.1546325683594, |
|
"logps/rejected": -532.9678344726562, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.8116706609725952, |
|
"rewards/margins": 1.6489944458007812, |
|
"rewards/rejected": -3.460665225982666, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 51.298202533295, |
|
"learning_rate": 1.5500060995258134e-08, |
|
"logits/chosen": 0.3892073333263397, |
|
"logits/rejected": 0.8499504327774048, |
|
"logps/chosen": -402.9557189941406, |
|
"logps/rejected": -541.4577026367188, |
|
"loss": 0.349, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -1.8981506824493408, |
|
"rewards/margins": 1.7807424068450928, |
|
"rewards/rejected": -3.6788933277130127, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 56.6017962844276, |
|
"learning_rate": 1.2204814442165812e-08, |
|
"logits/chosen": 0.3551040589809418, |
|
"logits/rejected": 0.8326929807662964, |
|
"logps/chosen": -402.6451416015625, |
|
"logps/rejected": -552.5445556640625, |
|
"loss": 0.386, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.9424476623535156, |
|
"rewards/margins": 1.9533637762069702, |
|
"rewards/rejected": -3.8958117961883545, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 49.36333315496645, |
|
"learning_rate": 9.294285595075669e-09, |
|
"logits/chosen": 0.06378497928380966, |
|
"logits/rejected": 0.5464959144592285, |
|
"logps/chosen": -430.5462951660156, |
|
"logps/rejected": -562.2453002929688, |
|
"loss": 0.4052, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9655787944793701, |
|
"rewards/margins": 1.7598493099212646, |
|
"rewards/rejected": -3.7254281044006348, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -0.8024855852127075, |
|
"eval_logits/rejected": -0.4436371624469757, |
|
"eval_logps/chosen": -524.2042236328125, |
|
"eval_logps/rejected": -612.2493286132812, |
|
"eval_loss": 0.42916327714920044, |
|
"eval_rewards/accuracies": 0.82421875, |
|
"eval_rewards/chosen": -1.8869271278381348, |
|
"eval_rewards/margins": 0.9045072793960571, |
|
"eval_rewards/rejected": -2.7914342880249023, |
|
"eval_runtime": 98.1154, |
|
"eval_samples_per_second": 20.384, |
|
"eval_steps_per_second": 0.326, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 48.45659164140374, |
|
"learning_rate": 6.773183262446914e-09, |
|
"logits/chosen": 0.2793930172920227, |
|
"logits/rejected": 0.8751212954521179, |
|
"logps/chosen": -400.6767883300781, |
|
"logps/rejected": -544.5294799804688, |
|
"loss": 0.3882, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.9480419158935547, |
|
"rewards/margins": 1.711806297302246, |
|
"rewards/rejected": -3.65984845161438, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 48.21463789648397, |
|
"learning_rate": 4.645586217799452e-09, |
|
"logits/chosen": 0.24326184391975403, |
|
"logits/rejected": 0.6566700339317322, |
|
"logps/chosen": -410.050537109375, |
|
"logps/rejected": -576.2342529296875, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.893699049949646, |
|
"rewards/margins": 1.8373210430145264, |
|
"rewards/rejected": -3.731020450592041, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 52.40196558130504, |
|
"learning_rate": 2.9149366008568987e-09, |
|
"logits/chosen": 0.2516610622406006, |
|
"logits/rejected": 0.6028949022293091, |
|
"logps/chosen": -397.42755126953125, |
|
"logps/rejected": -558.4515380859375, |
|
"loss": 0.3856, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.790833830833435, |
|
"rewards/margins": 1.8916391134262085, |
|
"rewards/rejected": -3.6824734210968018, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 45.18885600860689, |
|
"learning_rate": 1.5840343486700215e-09, |
|
"logits/chosen": 0.011555513367056847, |
|
"logits/rejected": 0.5860650539398193, |
|
"logps/chosen": -406.7879638671875, |
|
"logps/rejected": -555.0967407226562, |
|
"loss": 0.3728, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.6412866115570068, |
|
"rewards/margins": 1.9584299325942993, |
|
"rewards/rejected": -3.5997166633605957, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 45.90265978309936, |
|
"learning_rate": 6.550326657293881e-10, |
|
"logits/chosen": 0.08577422052621841, |
|
"logits/rejected": 0.549113929271698, |
|
"logps/chosen": -403.1221618652344, |
|
"logps/rejected": -571.7515869140625, |
|
"loss": 0.3525, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8727197647094727, |
|
"rewards/margins": 2.0556139945983887, |
|
"rewards/rejected": -3.9283337593078613, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 48.04876217861222, |
|
"learning_rate": 1.2943454039654467e-10, |
|
"logits/chosen": 0.5522348284721375, |
|
"logits/rejected": 0.82818204164505, |
|
"logps/chosen": -399.8492126464844, |
|
"logps/rejected": -529.6903076171875, |
|
"loss": 0.3623, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0089919567108154, |
|
"rewards/margins": 1.499420404434204, |
|
"rewards/rejected": -3.5084125995635986, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 868, |
|
"total_flos": 0.0, |
|
"train_loss": 0.42912535238925215, |
|
"train_runtime": 13911.1927, |
|
"train_samples_per_second": 7.989, |
|
"train_steps_per_second": 0.062 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 868, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|