|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998037291462217, |
|
"eval_steps": 500, |
|
"global_step": 2547, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9607843137254902e-08, |
|
"logits/chosen": 0.46488896012306213, |
|
"logits/rejected": 0.6080908179283142, |
|
"logps/chosen": -248.03536987304688, |
|
"logps/rejected": -250.7172393798828, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9607843137254904e-07, |
|
"logits/chosen": 0.5732576847076416, |
|
"logits/rejected": 0.5053917765617371, |
|
"logps/chosen": -283.8868103027344, |
|
"logps/rejected": -267.1470947265625, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.42592594027519226, |
|
"rewards/chosen": 6.855401807115413e-06, |
|
"rewards/margins": -2.866806426027324e-06, |
|
"rewards/rejected": 9.722218237584457e-06, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.921568627450981e-07, |
|
"logits/chosen": 0.6395555138587952, |
|
"logits/rejected": 0.5183161497116089, |
|
"logps/chosen": -259.010498046875, |
|
"logps/rejected": -230.4682159423828, |
|
"loss": 0.0524, |
|
"rewards/accuracies": 0.49166664481163025, |
|
"rewards/chosen": 4.217164314468391e-05, |
|
"rewards/margins": 4.323801113059744e-05, |
|
"rewards/rejected": -1.0663676448530168e-06, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.882352941176471e-07, |
|
"logits/chosen": 0.6133766770362854, |
|
"logits/rejected": 0.5772495269775391, |
|
"logps/chosen": -268.6424255371094, |
|
"logps/rejected": -238.4060516357422, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.429166704416275, |
|
"rewards/chosen": -2.178473187086638e-05, |
|
"rewards/margins": -3.491970710456371e-05, |
|
"rewards/rejected": 1.3134970686223824e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.843137254901962e-07, |
|
"logits/chosen": 0.5451570749282837, |
|
"logits/rejected": 0.6063296794891357, |
|
"logps/chosen": -270.48089599609375, |
|
"logps/rejected": -240.4629669189453, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.429166704416275, |
|
"rewards/chosen": -5.210627568885684e-05, |
|
"rewards/margins": -1.3891922208131291e-05, |
|
"rewards/rejected": -3.821435529971495e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.80392156862745e-07, |
|
"logits/chosen": 0.5579402446746826, |
|
"logits/rejected": 0.6521514058113098, |
|
"logps/chosen": -308.3138732910156, |
|
"logps/rejected": -270.6910095214844, |
|
"loss": 0.0497, |
|
"rewards/accuracies": 0.49166664481163025, |
|
"rewards/chosen": -6.496746323136904e-07, |
|
"rewards/margins": 5.1773578888969496e-05, |
|
"rewards/rejected": -5.242325642029755e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"logits/chosen": 0.5895996689796448, |
|
"logits/rejected": 0.5530626177787781, |
|
"logps/chosen": -271.9648742675781, |
|
"logps/rejected": -238.8866729736328, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 4.7397597882081755e-06, |
|
"rewards/margins": 8.998825069284067e-05, |
|
"rewards/rejected": -8.5248495452106e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3725490196078434e-06, |
|
"logits/chosen": 0.5712030529975891, |
|
"logits/rejected": 0.5446642637252808, |
|
"logps/chosen": -269.52349853515625, |
|
"logps/rejected": -244.25918579101562, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": -0.00014829839346930385, |
|
"rewards/margins": 3.822638973360881e-05, |
|
"rewards/rejected": -0.00018652477592695504, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"logits/chosen": 0.5794577598571777, |
|
"logits/rejected": 0.6224395632743835, |
|
"logps/chosen": -262.1968688964844, |
|
"logps/rejected": -252.41336059570312, |
|
"loss": 0.0772, |
|
"rewards/accuracies": 0.4833333492279053, |
|
"rewards/chosen": -0.00011436897329986095, |
|
"rewards/margins": 8.153673115884885e-05, |
|
"rewards/rejected": -0.00019590572628658265, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"logits/chosen": 0.607342541217804, |
|
"logits/rejected": 0.6603871583938599, |
|
"logps/chosen": -282.23468017578125, |
|
"logps/rejected": -247.54745483398438, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": -7.813036063453183e-05, |
|
"rewards/margins": 0.00024410069454461336, |
|
"rewards/rejected": -0.0003222310624551028, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.96078431372549e-06, |
|
"logits/chosen": 0.5872822999954224, |
|
"logits/rejected": 0.512497067451477, |
|
"logps/chosen": -297.87933349609375, |
|
"logps/rejected": -265.9036560058594, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.491666704416275, |
|
"rewards/chosen": -0.0002379983925493434, |
|
"rewards/margins": 0.0001731793163344264, |
|
"rewards/rejected": -0.0004111776943318546, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1568627450980393e-06, |
|
"logits/chosen": 0.5177062749862671, |
|
"logits/rejected": 0.5935253500938416, |
|
"logps/chosen": -284.13128662109375, |
|
"logps/rejected": -252.48641967773438, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": -0.0005169311771169305, |
|
"rewards/margins": 0.00025343807647004724, |
|
"rewards/rejected": -0.0007703691953793168, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"logits/chosen": 0.5826085209846497, |
|
"logits/rejected": 0.5734565258026123, |
|
"logps/chosen": -279.1020202636719, |
|
"logps/rejected": -242.3232879638672, |
|
"loss": 0.0615, |
|
"rewards/accuracies": 0.5208333730697632, |
|
"rewards/chosen": -0.0006812514620833099, |
|
"rewards/margins": 0.0002726772800087929, |
|
"rewards/rejected": -0.0009539287420921028, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.549019607843137e-06, |
|
"logits/chosen": 0.5514948964118958, |
|
"logits/rejected": 0.6129211783409119, |
|
"logps/chosen": -297.36810302734375, |
|
"logps/rejected": -250.8700408935547, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.5458333492279053, |
|
"rewards/chosen": -0.0008248643134720623, |
|
"rewards/margins": 0.0005141490837559104, |
|
"rewards/rejected": -0.0013390134554356337, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7450980392156867e-06, |
|
"logits/chosen": 0.5837856531143188, |
|
"logits/rejected": 0.5866830348968506, |
|
"logps/chosen": -278.97528076171875, |
|
"logps/rejected": -263.4961242675781, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": -0.0009940828895196319, |
|
"rewards/margins": 0.000614291406236589, |
|
"rewards/rejected": -0.001608374179340899, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": 0.5553902983665466, |
|
"logits/rejected": 0.6875311732292175, |
|
"logps/chosen": -302.75372314453125, |
|
"logps/rejected": -287.02630615234375, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.491666704416275, |
|
"rewards/chosen": -0.0014637492131441832, |
|
"rewards/margins": 0.00041431220597587526, |
|
"rewards/rejected": -0.001878061331808567, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1372549019607846e-06, |
|
"logits/chosen": 0.5743287205696106, |
|
"logits/rejected": 0.6214945912361145, |
|
"logps/chosen": -273.1415710449219, |
|
"logps/rejected": -248.86343383789062, |
|
"loss": 0.0478, |
|
"rewards/accuracies": 0.5708333253860474, |
|
"rewards/chosen": -0.0014291107654571533, |
|
"rewards/margins": 0.0008312534773722291, |
|
"rewards/rejected": -0.0022603641264140606, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": 0.4786551892757416, |
|
"logits/rejected": 0.5745611190795898, |
|
"logps/chosen": -275.05743408203125, |
|
"logps/rejected": -235.23709106445312, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.001848508371040225, |
|
"rewards/margins": 0.0009307243162766099, |
|
"rewards/rejected": -0.0027792328037321568, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.529411764705883e-06, |
|
"logits/chosen": 0.4652465879917145, |
|
"logits/rejected": 0.5753687620162964, |
|
"logps/chosen": -267.81134033203125, |
|
"logps/rejected": -247.77261352539062, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.0021076188422739506, |
|
"rewards/margins": 0.0012388969771564007, |
|
"rewards/rejected": -0.0033465158194303513, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7254901960784316e-06, |
|
"logits/chosen": 0.4822580814361572, |
|
"logits/rejected": 0.5309303998947144, |
|
"logps/chosen": -272.3936767578125, |
|
"logps/rejected": -253.17068481445312, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 0.5708333849906921, |
|
"rewards/chosen": -0.0028272040653973818, |
|
"rewards/margins": 0.0016173187177628279, |
|
"rewards/rejected": -0.004444523248821497, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.92156862745098e-06, |
|
"logits/chosen": 0.5600818395614624, |
|
"logits/rejected": 0.5540789365768433, |
|
"logps/chosen": -288.64727783203125, |
|
"logps/rejected": -252.9949951171875, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.5708333253860474, |
|
"rewards/chosen": -0.00358955143019557, |
|
"rewards/margins": 0.001761708059348166, |
|
"rewards/rejected": -0.005351259373128414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.11764705882353e-06, |
|
"logits/chosen": 0.4279872477054596, |
|
"logits/rejected": 0.542730450630188, |
|
"logps/chosen": -290.51312255859375, |
|
"logps/rejected": -259.2461853027344, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 0.6041667461395264, |
|
"rewards/chosen": -0.004497007466852665, |
|
"rewards/margins": 0.0022050284314900637, |
|
"rewards/rejected": -0.006702035665512085, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.313725490196079e-06, |
|
"logits/chosen": 0.4644894599914551, |
|
"logits/rejected": 0.5338133573532104, |
|
"logps/chosen": -289.86248779296875, |
|
"logps/rejected": -260.25262451171875, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": -0.005599470343440771, |
|
"rewards/margins": 0.0019665162544697523, |
|
"rewards/rejected": -0.007565985433757305, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.509803921568628e-06, |
|
"logits/chosen": 0.4923954904079437, |
|
"logits/rejected": 0.5235131978988647, |
|
"logps/chosen": -298.415771484375, |
|
"logps/rejected": -281.25933837890625, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.005643138196319342, |
|
"rewards/margins": 0.002189520513638854, |
|
"rewards/rejected": -0.007832659408450127, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.705882352941177e-06, |
|
"logits/chosen": 0.4870142936706543, |
|
"logits/rejected": 0.4811806082725525, |
|
"logps/chosen": -316.99267578125, |
|
"logps/rejected": -287.41534423828125, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.0062836273573338985, |
|
"rewards/margins": 0.0031557553447782993, |
|
"rewards/rejected": -0.009439383633434772, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.901960784313726e-06, |
|
"logits/chosen": 0.5249590873718262, |
|
"logits/rejected": 0.516154408454895, |
|
"logps/chosen": -310.1789855957031, |
|
"logps/rejected": -269.47296142578125, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.00644350191578269, |
|
"rewards/margins": 0.00401445385068655, |
|
"rewards/rejected": -0.010457955300807953, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999941289086112e-06, |
|
"logits/chosen": 0.42474403977394104, |
|
"logits/rejected": 0.5383282899856567, |
|
"logps/chosen": -313.0384521484375, |
|
"logps/rejected": -276.7310485839844, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": -0.006402502302080393, |
|
"rewards/margins": 0.005376026965677738, |
|
"rewards/rejected": -0.011778528802096844, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999471618320339e-06, |
|
"logits/chosen": 0.4861672818660736, |
|
"logits/rejected": 0.4541027545928955, |
|
"logps/chosen": -308.2907409667969, |
|
"logps/rejected": -281.0884704589844, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.009800055995583534, |
|
"rewards/margins": 0.005087652709335089, |
|
"rewards/rejected": -0.014887707307934761, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998532365027117e-06, |
|
"logits/chosen": 0.47271719574928284, |
|
"logits/rejected": 0.5171914100646973, |
|
"logps/chosen": -307.691650390625, |
|
"logps/rejected": -277.66546630859375, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.010870738886296749, |
|
"rewards/margins": 0.004916337318718433, |
|
"rewards/rejected": -0.015787076205015182, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997123705666514e-06, |
|
"logits/chosen": 0.37636083364486694, |
|
"logits/rejected": 0.45900458097457886, |
|
"logps/chosen": -302.5892639160156, |
|
"logps/rejected": -288.510498046875, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.013394233770668507, |
|
"rewards/margins": 0.005068282596766949, |
|
"rewards/rejected": -0.018462518230080605, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995245904887195e-06, |
|
"logits/chosen": 0.3868557810783386, |
|
"logits/rejected": 0.5042775273323059, |
|
"logps/chosen": -296.7281799316406, |
|
"logps/rejected": -283.5146484375, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": -0.018061377108097076, |
|
"rewards/margins": 0.004579090513288975, |
|
"rewards/rejected": -0.022640468552708626, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992899315476696e-06, |
|
"logits/chosen": 0.45571646094322205, |
|
"logits/rejected": 0.552790641784668, |
|
"logps/chosen": -299.4598388671875, |
|
"logps/rejected": -281.2596740722656, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.5958333611488342, |
|
"rewards/chosen": -0.015838582068681717, |
|
"rewards/margins": 0.007758588995784521, |
|
"rewards/rejected": -0.023597171530127525, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990084378295148e-06, |
|
"logits/chosen": 0.3956819474697113, |
|
"logits/rejected": 0.4794086515903473, |
|
"logps/chosen": -290.8548278808594, |
|
"logps/rejected": -266.06280517578125, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.5875000357627869, |
|
"rewards/chosen": -0.017532404512166977, |
|
"rewards/margins": 0.008752369321882725, |
|
"rewards/rejected": -0.026284774765372276, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.986801622192453e-06, |
|
"logits/chosen": 0.3517320156097412, |
|
"logits/rejected": 0.40463319420814514, |
|
"logps/chosen": -299.07025146484375, |
|
"logps/rejected": -276.71099853515625, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": -0.021012626588344574, |
|
"rewards/margins": 0.00989647675305605, |
|
"rewards/rejected": -0.03090910241007805, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9830516639089226e-06, |
|
"logits/chosen": 0.33298957347869873, |
|
"logits/rejected": 0.4724348485469818, |
|
"logps/chosen": -321.5062561035156, |
|
"logps/rejected": -284.62286376953125, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.02476133033633232, |
|
"rewards/margins": 0.010583627037703991, |
|
"rewards/rejected": -0.03534495830535889, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.978835207959414e-06, |
|
"logits/chosen": 0.3737282454967499, |
|
"logits/rejected": 0.34736761450767517, |
|
"logps/chosen": -336.1554260253906, |
|
"logps/rejected": -318.351318359375, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": -0.03460691124200821, |
|
"rewards/margins": 0.013388733379542828, |
|
"rewards/rejected": -0.047995638102293015, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9741530465009665e-06, |
|
"logits/chosen": 0.19469328224658966, |
|
"logits/rejected": 0.26699501276016235, |
|
"logps/chosen": -305.50714111328125, |
|
"logps/rejected": -293.6611022949219, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.6041666269302368, |
|
"rewards/chosen": -0.03571179881691933, |
|
"rewards/margins": 0.020327303558588028, |
|
"rewards/rejected": -0.05603910610079765, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.969006059183984e-06, |
|
"logits/chosen": 0.22031648457050323, |
|
"logits/rejected": 0.17535282671451569, |
|
"logps/chosen": -339.7189025878906, |
|
"logps/rejected": -320.98211669921875, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04812643676996231, |
|
"rewards/margins": 0.02219363860785961, |
|
"rewards/rejected": -0.07032007724046707, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963395212986964e-06, |
|
"logits/chosen": 0.17484304308891296, |
|
"logits/rejected": 0.343574583530426, |
|
"logps/chosen": -330.0755310058594, |
|
"logps/rejected": -316.24786376953125, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.6208333969116211, |
|
"rewards/chosen": -0.04701613634824753, |
|
"rewards/margins": 0.023009616881608963, |
|
"rewards/rejected": -0.07002574950456619, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.957321562034833e-06, |
|
"logits/chosen": 0.20497021079063416, |
|
"logits/rejected": 0.14503000676631927, |
|
"logps/chosen": -324.77587890625, |
|
"logps/rejected": -312.4165954589844, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 0.5791667103767395, |
|
"rewards/chosen": -0.056038498878479004, |
|
"rewards/margins": 0.020049121230840683, |
|
"rewards/rejected": -0.07608762383460999, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950786247400908e-06, |
|
"logits/chosen": 0.14520034193992615, |
|
"logits/rejected": 0.17736613750457764, |
|
"logps/chosen": -342.2245178222656, |
|
"logps/rejected": -356.9132995605469, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.5708333849906921, |
|
"rewards/chosen": -0.07601647078990936, |
|
"rewards/margins": 0.01825210079550743, |
|
"rewards/rejected": -0.09426857531070709, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.943790496892513e-06, |
|
"logits/chosen": 0.05470971390604973, |
|
"logits/rejected": 0.11131185293197632, |
|
"logps/chosen": -342.9344482421875, |
|
"logps/rejected": -324.174560546875, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.5916666388511658, |
|
"rewards/chosen": -0.07838026434183121, |
|
"rewards/margins": 0.02048862725496292, |
|
"rewards/rejected": -0.09886889159679413, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.936335624820313e-06, |
|
"logits/chosen": 0.003329972270876169, |
|
"logits/rejected": 0.11333286762237549, |
|
"logps/chosen": -394.786865234375, |
|
"logps/rejected": -365.4358215332031, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 0.5916667580604553, |
|
"rewards/chosen": -0.0723833367228508, |
|
"rewards/margins": 0.019510764628648758, |
|
"rewards/rejected": -0.09189411252737045, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9284230317513906e-06, |
|
"logits/chosen": 0.0464923158288002, |
|
"logits/rejected": 0.10706806182861328, |
|
"logps/chosen": -389.9523620605469, |
|
"logps/rejected": -353.79107666015625, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -0.08335821330547333, |
|
"rewards/margins": 0.021867142990231514, |
|
"rewards/rejected": -0.10522536188364029, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.920054204246116e-06, |
|
"logits/chosen": 0.033209413290023804, |
|
"logits/rejected": 0.10846443474292755, |
|
"logps/chosen": -370.37774658203125, |
|
"logps/rejected": -356.85919189453125, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": -0.08408842980861664, |
|
"rewards/margins": 0.023422975093126297, |
|
"rewards/rejected": -0.10751141607761383, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.911230714578858e-06, |
|
"logits/chosen": 0.06528304517269135, |
|
"logits/rejected": 0.03408993408083916, |
|
"logps/chosen": -376.26800537109375, |
|
"logps/rejected": -403.13470458984375, |
|
"loss": 0.0451, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10300055891275406, |
|
"rewards/margins": 0.028705894947052002, |
|
"rewards/rejected": -0.13170644640922546, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.90195422044261e-06, |
|
"logits/chosen": 0.009843332692980766, |
|
"logits/rejected": 0.08538283407688141, |
|
"logps/chosen": -380.8959045410156, |
|
"logps/rejected": -362.66143798828125, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.595833420753479, |
|
"rewards/chosen": -0.08409236371517181, |
|
"rewards/margins": 0.024981295689940453, |
|
"rewards/rejected": -0.10907366126775742, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.89222646463754e-06, |
|
"logits/chosen": -0.025348594412207603, |
|
"logits/rejected": 0.07642585784196854, |
|
"logps/chosen": -367.01043701171875, |
|
"logps/rejected": -378.41986083984375, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09914330393075943, |
|
"rewards/margins": 0.0271566454321146, |
|
"rewards/rejected": -0.12629994750022888, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882049274743578e-06, |
|
"logits/chosen": 0.08742884546518326, |
|
"logits/rejected": -0.006139741744846106, |
|
"logps/chosen": -395.2368469238281, |
|
"logps/rejected": -397.25640869140625, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": -0.09513958543539047, |
|
"rewards/margins": 0.032758679240942, |
|
"rewards/rejected": -0.12789827585220337, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8714245627770515e-06, |
|
"logits/chosen": 0.0017001063097268343, |
|
"logits/rejected": 0.0077015915885567665, |
|
"logps/chosen": -409.57733154296875, |
|
"logps/rejected": -395.6904296875, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 0.6208332777023315, |
|
"rewards/chosen": -0.11873996257781982, |
|
"rewards/margins": 0.029652219265699387, |
|
"rewards/rejected": -0.1483922004699707, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860354324831482e-06, |
|
"logits/chosen": -0.04007488489151001, |
|
"logits/rejected": -0.007833145558834076, |
|
"logps/chosen": -427.4029235839844, |
|
"logps/rejected": -418.568359375, |
|
"loss": 0.0494, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.13312438130378723, |
|
"rewards/margins": 0.033408962190151215, |
|
"rewards/rejected": -0.16653335094451904, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.848840640702565e-06, |
|
"logits/chosen": -0.07128571718931198, |
|
"logits/rejected": 0.001640728092752397, |
|
"logps/chosen": -411.7811584472656, |
|
"logps/rejected": -417.4090270996094, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 0.6291667222976685, |
|
"rewards/chosen": -0.14686502516269684, |
|
"rewards/margins": 0.0317012295126915, |
|
"rewards/rejected": -0.17856626212596893, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.836885673497435e-06, |
|
"logits/chosen": -0.002718993928283453, |
|
"logits/rejected": 0.0009809813927859068, |
|
"logps/chosen": -444.13201904296875, |
|
"logps/rejected": -433.43511962890625, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1496659815311432, |
|
"rewards/margins": 0.03328583389520645, |
|
"rewards/rejected": -0.18295182287693024, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824491669228279e-06, |
|
"logits/chosen": 0.019337791949510574, |
|
"logits/rejected": 0.09313885867595673, |
|
"logps/chosen": -413.6084899902344, |
|
"logps/rejected": -422.8143005371094, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.15184900164604187, |
|
"rewards/margins": 0.027125831693410873, |
|
"rewards/rejected": -0.17897482216358185, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.811660956390372e-06, |
|
"logits/chosen": 0.047072622925043106, |
|
"logits/rejected": -0.007110513746738434, |
|
"logps/chosen": -463.8948669433594, |
|
"logps/rejected": -487.029541015625, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 0.6708333492279053, |
|
"rewards/chosen": -0.1728728711605072, |
|
"rewards/margins": 0.03820256516337395, |
|
"rewards/rejected": -0.21107542514801025, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.798395945524615e-06, |
|
"logits/chosen": 0.038352273404598236, |
|
"logits/rejected": 0.03103743866086006, |
|
"logps/chosen": -426.7752380371094, |
|
"logps/rejected": -435.8916931152344, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.17635078728199005, |
|
"rewards/margins": 0.03284124284982681, |
|
"rewards/rejected": -0.20919200778007507, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.784699128764654e-06, |
|
"logits/chosen": 0.03960705175995827, |
|
"logits/rejected": 0.08261826634407043, |
|
"logps/chosen": -458.94873046875, |
|
"logps/rejected": -475.46539306640625, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.5791667103767395, |
|
"rewards/chosen": -0.18301726877689362, |
|
"rewards/margins": 0.03872082382440567, |
|
"rewards/rejected": -0.2217380702495575, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770573079368691e-06, |
|
"logits/chosen": 0.034317515790462494, |
|
"logits/rejected": 0.12880605459213257, |
|
"logps/chosen": -512.765625, |
|
"logps/rejected": -510.3446350097656, |
|
"loss": 0.0413, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.20771527290344238, |
|
"rewards/margins": 0.03588930517435074, |
|
"rewards/rejected": -0.24360458552837372, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.756020451236025e-06, |
|
"logits/chosen": -0.017928753048181534, |
|
"logits/rejected": 0.12967847287654877, |
|
"logps/chosen": -548.6043090820312, |
|
"logps/rejected": -566.4415283203125, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.21577982604503632, |
|
"rewards/margins": 0.04475008323788643, |
|
"rewards/rejected": -0.26052993535995483, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.741043978408463e-06, |
|
"logits/chosen": 0.0831088125705719, |
|
"logits/rejected": 0.1524641066789627, |
|
"logps/chosen": -465.2798767089844, |
|
"logps/rejected": -489.66558837890625, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.18829752504825592, |
|
"rewards/margins": 0.055931150913238525, |
|
"rewards/rejected": -0.24422867596149445, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.725646474556666e-06, |
|
"logits/chosen": 0.13520978391170502, |
|
"logits/rejected": 0.03108205832540989, |
|
"logps/chosen": -504.00933837890625, |
|
"logps/rejected": -523.7846069335938, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.2136494219303131, |
|
"rewards/margins": 0.0509483627974987, |
|
"rewards/rejected": -0.26459774374961853, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.709830832451538e-06, |
|
"logits/chosen": 0.08839813619852066, |
|
"logits/rejected": 0.1621231734752655, |
|
"logps/chosen": -543.0064086914062, |
|
"logps/rejected": -552.148681640625, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.5750001072883606, |
|
"rewards/chosen": -0.21853478252887726, |
|
"rewards/margins": 0.02961021102964878, |
|
"rewards/rejected": -0.24814501404762268, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693600023420758e-06, |
|
"logits/chosen": 0.12001373618841171, |
|
"logits/rejected": 0.15668334066867828, |
|
"logps/chosen": -525.3939208984375, |
|
"logps/rejected": -518.4581298828125, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.20465150475502014, |
|
"rewards/margins": 0.050372164696455, |
|
"rewards/rejected": -0.25502365827560425, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.676957096790536e-06, |
|
"logits/chosen": 0.03348086029291153, |
|
"logits/rejected": 0.053055208176374435, |
|
"logps/chosen": -540.022216796875, |
|
"logps/rejected": -549.9537963867188, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.25113242864608765, |
|
"rewards/margins": 0.0484883114695549, |
|
"rewards/rejected": -0.29962077736854553, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.659905179312743e-06, |
|
"logits/chosen": -0.022173499688506126, |
|
"logits/rejected": -0.0015158101450651884, |
|
"logps/chosen": -499.20263671875, |
|
"logps/rejected": -524.68115234375, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.5958333611488342, |
|
"rewards/chosen": -0.20415392518043518, |
|
"rewards/margins": 0.0474516786634922, |
|
"rewards/rejected": -0.2516055703163147, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.642447474577466e-06, |
|
"logits/chosen": 0.0385822094976902, |
|
"logits/rejected": 0.08059495687484741, |
|
"logps/chosen": -467.4093322753906, |
|
"logps/rejected": -448.34063720703125, |
|
"loss": 0.04, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.18831539154052734, |
|
"rewards/margins": 0.029135623946785927, |
|
"rewards/rejected": -0.21745100617408752, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6245872624111535e-06, |
|
"logits/chosen": 0.07350125908851624, |
|
"logits/rejected": 0.07948148250579834, |
|
"logps/chosen": -486.54443359375, |
|
"logps/rejected": -482.19207763671875, |
|
"loss": 0.0374, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": -0.21189257502555847, |
|
"rewards/margins": 0.02745920978486538, |
|
"rewards/rejected": -0.2393517792224884, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.606327898260413e-06, |
|
"logits/chosen": 0.08668817579746246, |
|
"logits/rejected": 0.10542847216129303, |
|
"logps/chosen": -524.11181640625, |
|
"logps/rejected": -522.0614013671875, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": -0.23068180680274963, |
|
"rewards/margins": 0.027634575963020325, |
|
"rewards/rejected": -0.25831639766693115, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.587672812561626e-06, |
|
"logits/chosen": 0.11421390622854233, |
|
"logits/rejected": 0.12782898545265198, |
|
"logps/chosen": -481.06671142578125, |
|
"logps/rejected": -506.188232421875, |
|
"loss": 0.037, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2014632523059845, |
|
"rewards/margins": 0.04268348962068558, |
|
"rewards/rejected": -0.24414674937725067, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5686255100964535e-06, |
|
"logits/chosen": 0.18164952099323273, |
|
"logits/rejected": 0.17569103837013245, |
|
"logps/chosen": -491.39691162109375, |
|
"logps/rejected": -484.49127197265625, |
|
"loss": 0.0323, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": -0.2082110345363617, |
|
"rewards/margins": 0.022325094789266586, |
|
"rewards/rejected": -0.23053613305091858, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549189569333387e-06, |
|
"logits/chosen": 0.1008072942495346, |
|
"logits/rejected": 0.10313601791858673, |
|
"logps/chosen": -464.47589111328125, |
|
"logps/rejected": -479.8761291503906, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.6041666269302368, |
|
"rewards/chosen": -0.207403302192688, |
|
"rewards/margins": 0.03284333646297455, |
|
"rewards/rejected": -0.24024665355682373, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.529368641755453e-06, |
|
"logits/chosen": 0.17117619514465332, |
|
"logits/rejected": 0.13185498118400574, |
|
"logps/chosen": -490.5037536621094, |
|
"logps/rejected": -495.96728515625, |
|
"loss": 0.037, |
|
"rewards/accuracies": 0.60833340883255, |
|
"rewards/chosen": -0.1993740200996399, |
|
"rewards/margins": 0.03559664636850357, |
|
"rewards/rejected": -0.23497064411640167, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.509166451174194e-06, |
|
"logits/chosen": 0.0657685250043869, |
|
"logits/rejected": 0.2077389508485794, |
|
"logps/chosen": -550.5696411132812, |
|
"logps/rejected": -528.6010131835938, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.23079976439476013, |
|
"rewards/margins": 0.03827609866857529, |
|
"rewards/rejected": -0.269075870513916, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.488586793030075e-06, |
|
"logits/chosen": 0.06361114978790283, |
|
"logits/rejected": 0.1869657188653946, |
|
"logps/chosen": -494.99249267578125, |
|
"logps/rejected": -481.3677673339844, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.6041666269302368, |
|
"rewards/chosen": -0.2145577371120453, |
|
"rewards/margins": 0.040170665830373764, |
|
"rewards/rejected": -0.25472837686538696, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4676335336794125e-06, |
|
"logits/chosen": 0.05644664913415909, |
|
"logits/rejected": 0.24188189208507538, |
|
"logps/chosen": -548.88037109375, |
|
"logps/rejected": -532.0226440429688, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": -0.25053781270980835, |
|
"rewards/margins": 0.0365094318985939, |
|
"rewards/rejected": -0.28704723715782166, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.446310609668001e-06, |
|
"logits/chosen": 0.11400438845157623, |
|
"logits/rejected": 0.18557283282279968, |
|
"logps/chosen": -490.58465576171875, |
|
"logps/rejected": -531.9818725585938, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.23414695262908936, |
|
"rewards/margins": 0.035373009741306305, |
|
"rewards/rejected": -0.2695199251174927, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.424622026991536e-06, |
|
"logits/chosen": 0.11939887702465057, |
|
"logits/rejected": 0.1422724574804306, |
|
"logps/chosen": -542.72900390625, |
|
"logps/rejected": -549.841796875, |
|
"loss": 0.0374, |
|
"rewards/accuracies": 0.6208333969116211, |
|
"rewards/chosen": -0.24516990780830383, |
|
"rewards/margins": 0.0396624319255352, |
|
"rewards/rejected": -0.28483232855796814, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.402571860343006e-06, |
|
"logits/chosen": 0.1107536181807518, |
|
"logits/rejected": 0.09486749023199081, |
|
"logps/chosen": -526.3046875, |
|
"logps/rejected": -545.7291870117188, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.5791667103767395, |
|
"rewards/chosen": -0.23441223800182343, |
|
"rewards/margins": 0.04228886589407921, |
|
"rewards/rejected": -0.27670109272003174, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3801642523471585e-06, |
|
"logits/chosen": 0.11268027126789093, |
|
"logits/rejected": 0.14215223491191864, |
|
"logps/chosen": -493.4173889160156, |
|
"logps/rejected": -503.95245361328125, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -0.21670031547546387, |
|
"rewards/margins": 0.042834293097257614, |
|
"rewards/rejected": -0.259534627199173, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.35740341278222e-06, |
|
"logits/chosen": 0.1416054219007492, |
|
"logits/rejected": 0.10916705429553986, |
|
"logps/chosen": -567.3914794921875, |
|
"logps/rejected": -554.6038818359375, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.24395592510700226, |
|
"rewards/margins": 0.042997851967811584, |
|
"rewards/rejected": -0.28695374727249146, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.334293617788992e-06, |
|
"logits/chosen": 0.05959437042474747, |
|
"logits/rejected": 0.09841112047433853, |
|
"logps/chosen": -537.2469482421875, |
|
"logps/rejected": -533.2855224609375, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 0.5791667103767395, |
|
"rewards/chosen": -0.25295352935791016, |
|
"rewards/margins": 0.05385099723935127, |
|
"rewards/rejected": -0.30680450797080994, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.310839209067482e-06, |
|
"logits/chosen": 0.16151997447013855, |
|
"logits/rejected": 0.02669985592365265, |
|
"logps/chosen": -547.8688354492188, |
|
"logps/rejected": -549.8050537109375, |
|
"loss": 0.0386, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2529495060443878, |
|
"rewards/margins": 0.04387221857905388, |
|
"rewards/rejected": -0.2968217432498932, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2870445930612135e-06, |
|
"logits/chosen": 0.09316407889127731, |
|
"logits/rejected": 0.13477447628974915, |
|
"logps/chosen": -542.4185180664062, |
|
"logps/rejected": -559.9661254882812, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.6416666507720947, |
|
"rewards/chosen": -0.25684836506843567, |
|
"rewards/margins": 0.05672596022486687, |
|
"rewards/rejected": -0.31357431411743164, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.262914240129379e-06, |
|
"logits/chosen": 0.12965205311775208, |
|
"logits/rejected": 0.1867331862449646, |
|
"logps/chosen": -539.5619506835938, |
|
"logps/rejected": -548.6708984375, |
|
"loss": 0.0431, |
|
"rewards/accuracies": 0.6208332777023315, |
|
"rewards/chosen": -0.23366475105285645, |
|
"rewards/margins": 0.05336092785000801, |
|
"rewards/rejected": -0.28702566027641296, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.238452683706979e-06, |
|
"logits/chosen": 0.1362159699201584, |
|
"logits/rejected": 0.1769772171974182, |
|
"logps/chosen": -512.2302856445312, |
|
"logps/rejected": -537.4754028320312, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.6624999642372131, |
|
"rewards/chosen": -0.24229979515075684, |
|
"rewards/margins": 0.051566917449235916, |
|
"rewards/rejected": -0.29386672377586365, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.213664519453115e-06, |
|
"logits/chosen": 0.18831387162208557, |
|
"logits/rejected": 0.23091156780719757, |
|
"logps/chosen": -525.028564453125, |
|
"logps/rejected": -547.172607421875, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.23653945326805115, |
|
"rewards/margins": 0.0342828668653965, |
|
"rewards/rejected": -0.27082234621047974, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.188554404387588e-06, |
|
"logits/chosen": 0.18981604278087616, |
|
"logits/rejected": 0.24236874282360077, |
|
"logps/chosen": -510.7457580566406, |
|
"logps/rejected": -513.2806396484375, |
|
"loss": 0.0438, |
|
"rewards/accuracies": 0.5541666746139526, |
|
"rewards/chosen": -0.23030021786689758, |
|
"rewards/margins": 0.021967504173517227, |
|
"rewards/rejected": -0.2522676885128021, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.163127056015975e-06, |
|
"logits/chosen": 0.1602737456560135, |
|
"logits/rejected": 0.16516181826591492, |
|
"logps/chosen": -540.715576171875, |
|
"logps/rejected": -552.7288208007812, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.6375001072883606, |
|
"rewards/chosen": -0.24130482971668243, |
|
"rewards/margins": 0.037548404186964035, |
|
"rewards/rejected": -0.27885323762893677, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.137387251443335e-06, |
|
"logits/chosen": 0.13992497324943542, |
|
"logits/rejected": 0.2219216376543045, |
|
"logps/chosen": -486.76177978515625, |
|
"logps/rejected": -504.03802490234375, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.6750000715255737, |
|
"rewards/chosen": -0.2241494357585907, |
|
"rewards/margins": 0.049367643892765045, |
|
"rewards/rejected": -0.27351707220077515, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.111339826476725e-06, |
|
"logits/chosen": 0.11946271359920502, |
|
"logits/rejected": 0.25534653663635254, |
|
"logps/chosen": -515.7346801757812, |
|
"logps/rejected": -519.0991821289062, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.5708333253860474, |
|
"rewards/chosen": -0.2389453649520874, |
|
"rewards/margins": 0.034484557807445526, |
|
"rewards/rejected": -0.2734299600124359, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.084989674716679e-06, |
|
"logits/chosen": 0.21193349361419678, |
|
"logits/rejected": 0.1593044102191925, |
|
"logps/chosen": -566.4176025390625, |
|
"logps/rejected": -586.424072265625, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 0.6208333969116211, |
|
"rewards/chosen": -0.2513963282108307, |
|
"rewards/margins": 0.043524421751499176, |
|
"rewards/rejected": -0.29492074251174927, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05834174663784e-06, |
|
"logits/chosen": 0.12491929531097412, |
|
"logits/rejected": 0.21425482630729675, |
|
"logps/chosen": -551.5991821289062, |
|
"logps/rejected": -550.2804565429688, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 0.5958333015441895, |
|
"rewards/chosen": -0.259804904460907, |
|
"rewards/margins": 0.03690405935049057, |
|
"rewards/rejected": -0.29670897126197815, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.031401048658892e-06, |
|
"logits/chosen": 0.13918252289295197, |
|
"logits/rejected": 0.1187935471534729, |
|
"logps/chosen": -546.859130859375, |
|
"logps/rejected": -551.1666870117188, |
|
"loss": 0.046, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -0.24925978481769562, |
|
"rewards/margins": 0.04101654142141342, |
|
"rewards/rejected": -0.29027634859085083, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.004172642202002e-06, |
|
"logits/chosen": 0.1344153732061386, |
|
"logits/rejected": 0.193574458360672, |
|
"logps/chosen": -535.381103515625, |
|
"logps/rejected": -538.9880981445312, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.60833340883255, |
|
"rewards/chosen": -0.2537057399749756, |
|
"rewards/margins": 0.041815683245658875, |
|
"rewards/rejected": -0.29552140831947327, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.976661642741908e-06, |
|
"logits/chosen": 0.13408444821834564, |
|
"logits/rejected": 0.13395357131958008, |
|
"logps/chosen": -571.904052734375, |
|
"logps/rejected": -573.4716796875, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.27147650718688965, |
|
"rewards/margins": 0.036444105207920074, |
|
"rewards/rejected": -0.3079206347465515, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.948873218844863e-06, |
|
"logits/chosen": 0.07277832925319672, |
|
"logits/rejected": 0.1558527946472168, |
|
"logps/chosen": -547.19580078125, |
|
"logps/rejected": -566.0330200195312, |
|
"loss": 0.0421, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": -0.27663344144821167, |
|
"rewards/margins": 0.02979731187224388, |
|
"rewards/rejected": -0.30643078684806824, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920812591197604e-06, |
|
"logits/chosen": 0.17433296144008636, |
|
"logits/rejected": 0.22166447341442108, |
|
"logps/chosen": -513.27783203125, |
|
"logps/rejected": -523.3780517578125, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.5708333253860474, |
|
"rewards/chosen": -0.25121426582336426, |
|
"rewards/margins": 0.043769314885139465, |
|
"rewards/rejected": -0.2949835956096649, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.892485031626527e-06, |
|
"logits/chosen": 0.10151199996471405, |
|
"logits/rejected": 0.2275511473417282, |
|
"logps/chosen": -542.8049926757812, |
|
"logps/rejected": -553.17724609375, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.6041666865348816, |
|
"rewards/chosen": -0.26481813192367554, |
|
"rewards/margins": 0.041800495237112045, |
|
"rewards/rejected": -0.3066186308860779, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.863895862107255e-06, |
|
"logits/chosen": 0.0613768994808197, |
|
"logits/rejected": 0.17055214941501617, |
|
"logps/chosen": -565.1832885742188, |
|
"logps/rejected": -586.520751953125, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -0.26884034276008606, |
|
"rewards/margins": 0.048948947340250015, |
|
"rewards/rejected": -0.31778931617736816, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.835050453764779e-06, |
|
"logits/chosen": 0.05239003151655197, |
|
"logits/rejected": 0.084544338285923, |
|
"logps/chosen": -555.6310424804688, |
|
"logps/rejected": -569.8839721679688, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -0.27361077070236206, |
|
"rewards/margins": 0.05519675463438034, |
|
"rewards/rejected": -0.3288075625896454, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.80595422586438e-06, |
|
"logits/chosen": 0.04233277589082718, |
|
"logits/rejected": 0.19852963089942932, |
|
"logps/chosen": -601.9495239257812, |
|
"logps/rejected": -589.6444091796875, |
|
"loss": 0.0337, |
|
"rewards/accuracies": 0.6125000715255737, |
|
"rewards/chosen": -0.2874451279640198, |
|
"rewards/margins": 0.038885898888111115, |
|
"rewards/rejected": -0.3263310492038727, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7766126447934857e-06, |
|
"logits/chosen": 0.055559318512678146, |
|
"logits/rejected": 0.08241190761327744, |
|
"logps/chosen": -535.3719482421875, |
|
"logps/rejected": -538.2369995117188, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": -0.27672910690307617, |
|
"rewards/margins": 0.02185177244246006, |
|
"rewards/rejected": -0.2985808849334717, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7470312230346955e-06, |
|
"logits/chosen": 0.13304325938224792, |
|
"logits/rejected": 0.17154903709888458, |
|
"logps/chosen": -644.9939575195312, |
|
"logps/rejected": -635.8195190429688, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.6291667222976685, |
|
"rewards/chosen": -0.31732743978500366, |
|
"rewards/margins": 0.04391757771372795, |
|
"rewards/rejected": -0.3612450361251831, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.717215518130127e-06, |
|
"logits/chosen": 0.05652935430407524, |
|
"logits/rejected": 0.14982140064239502, |
|
"logps/chosen": -623.8362426757812, |
|
"logps/rejected": -625.2267456054688, |
|
"loss": 0.041, |
|
"rewards/accuracies": 0.5458332896232605, |
|
"rewards/chosen": -0.3348296284675598, |
|
"rewards/margins": 0.03417374938726425, |
|
"rewards/rejected": -0.3690033555030823, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.687171131637314e-06, |
|
"logits/chosen": 0.1260356307029724, |
|
"logits/rejected": 0.22198334336280823, |
|
"logps/chosen": -650.363525390625, |
|
"logps/rejected": -636.438232421875, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3333287835121155, |
|
"rewards/margins": 0.04869426414370537, |
|
"rewards/rejected": -0.38202303647994995, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6569037080768153e-06, |
|
"logits/chosen": 0.08464168012142181, |
|
"logits/rejected": 0.1217307597398758, |
|
"logps/chosen": -594.9779663085938, |
|
"logps/rejected": -604.1603393554688, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": -0.3139379620552063, |
|
"rewards/margins": 0.04148873686790466, |
|
"rewards/rejected": -0.35542672872543335, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6264189338717766e-06, |
|
"logits/chosen": 0.12213291972875595, |
|
"logits/rejected": 0.16966882348060608, |
|
"logps/chosen": -577.3529052734375, |
|
"logps/rejected": -577.2765502929688, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 0.6125000715255737, |
|
"rewards/chosen": -0.2956945598125458, |
|
"rewards/margins": 0.03605617582798004, |
|
"rewards/rejected": -0.3317507207393646, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.595722536279595e-06, |
|
"logits/chosen": 0.11031585931777954, |
|
"logits/rejected": 0.15994997322559357, |
|
"logps/chosen": -584.4676513671875, |
|
"logps/rejected": -583.6405639648438, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 0.6041666269302368, |
|
"rewards/chosen": -0.298969566822052, |
|
"rewards/margins": 0.04104112833738327, |
|
"rewards/rejected": -0.34001070261001587, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5648202823159317e-06, |
|
"logits/chosen": 0.08247041702270508, |
|
"logits/rejected": 0.11911840736865997, |
|
"logps/chosen": -548.5514526367188, |
|
"logps/rejected": -581.8680419921875, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.27981576323509216, |
|
"rewards/margins": 0.04667786508798599, |
|
"rewards/rejected": -0.32649365067481995, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5337179776712427e-06, |
|
"logits/chosen": 0.05764445662498474, |
|
"logits/rejected": 0.15337909758090973, |
|
"logps/chosen": -600.7415771484375, |
|
"logps/rejected": -617.4767456054688, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.29948121309280396, |
|
"rewards/margins": 0.03527333587408066, |
|
"rewards/rejected": -0.3347545266151428, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5024214656200497e-06, |
|
"logits/chosen": 0.07227407395839691, |
|
"logits/rejected": 0.06218891218304634, |
|
"logps/chosen": -563.1790771484375, |
|
"logps/rejected": -560.1734619140625, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.2846832275390625, |
|
"rewards/margins": 0.041954539716243744, |
|
"rewards/rejected": -0.32663780450820923, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4709366259231468e-06, |
|
"logits/chosen": 0.0697668045759201, |
|
"logits/rejected": 0.24307604134082794, |
|
"logps/chosen": -628.77001953125, |
|
"logps/rejected": -621.4201049804688, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.6041667461395264, |
|
"rewards/chosen": -0.30719098448753357, |
|
"rewards/margins": 0.05207127332687378, |
|
"rewards/rejected": -0.3592623174190521, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.439269373722957e-06, |
|
"logits/chosen": 0.0506308451294899, |
|
"logits/rejected": 0.15233644843101501, |
|
"logps/chosen": -559.6640014648438, |
|
"logps/rejected": -583.3889770507812, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -0.2970438003540039, |
|
"rewards/margins": 0.048429377377033234, |
|
"rewards/rejected": -0.34547320008277893, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4074256584322336e-06, |
|
"logits/chosen": 0.1312958151102066, |
|
"logits/rejected": 0.09558086097240448, |
|
"logps/chosen": -608.6600341796875, |
|
"logps/rejected": -633.423095703125, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.3313175439834595, |
|
"rewards/margins": 0.04614431411027908, |
|
"rewards/rejected": -0.3774617910385132, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.375411462616332e-06, |
|
"logits/chosen": 0.050155311822891235, |
|
"logits/rejected": 0.05635789781808853, |
|
"logps/chosen": -635.3673095703125, |
|
"logps/rejected": -645.3787231445312, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": -0.3184329867362976, |
|
"rewards/margins": 0.049739837646484375, |
|
"rewards/rejected": -0.368172824382782, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.343232800869247e-06, |
|
"logits/chosen": 0.06382627040147781, |
|
"logits/rejected": 0.16157305240631104, |
|
"logps/chosen": -633.6954345703125, |
|
"logps/rejected": -649.1434326171875, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.6916666626930237, |
|
"rewards/chosen": -0.32204508781433105, |
|
"rewards/margins": 0.053830452263355255, |
|
"rewards/rejected": -0.3758755326271057, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.310895718683635e-06, |
|
"logits/chosen": 0.14830520749092102, |
|
"logits/rejected": 0.155593141913414, |
|
"logps/chosen": -621.237548828125, |
|
"logps/rejected": -618.225341796875, |
|
"loss": 0.0465, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.30767199397087097, |
|
"rewards/margins": 0.04294462502002716, |
|
"rewards/rejected": -0.3506166338920593, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.27840629131503e-06, |
|
"logits/chosen": 0.07861081510782242, |
|
"logits/rejected": 0.10706281661987305, |
|
"logps/chosen": -573.5535888671875, |
|
"logps/rejected": -588.395263671875, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2752663493156433, |
|
"rewards/margins": 0.04959608614444733, |
|
"rewards/rejected": -0.32486245036125183, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2457706226404715e-06, |
|
"logits/chosen": 0.07944418489933014, |
|
"logits/rejected": 0.1640586107969284, |
|
"logps/chosen": -587.9627685546875, |
|
"logps/rejected": -604.2811889648438, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.5875000357627869, |
|
"rewards/chosen": -0.29255762696266174, |
|
"rewards/margins": 0.04707466810941696, |
|
"rewards/rejected": -0.3396322429180145, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2129948440117487e-06, |
|
"logits/chosen": 0.10159436613321304, |
|
"logits/rejected": 0.15032300353050232, |
|
"logps/chosen": -637.3546142578125, |
|
"logps/rejected": -640.6038818359375, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3146159052848816, |
|
"rewards/margins": 0.04759891331195831, |
|
"rewards/rejected": -0.3622148633003235, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1800851131034904e-06, |
|
"logits/chosen": 0.11651058495044708, |
|
"logits/rejected": 0.10065825283527374, |
|
"logps/chosen": -591.7140502929688, |
|
"logps/rejected": -598.9595947265625, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.595833420753479, |
|
"rewards/chosen": -0.31949153542518616, |
|
"rewards/margins": 0.048882994800806046, |
|
"rewards/rejected": -0.3683745265007019, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.147047612756302e-06, |
|
"logits/chosen": 0.11179051548242569, |
|
"logits/rejected": 0.1353040635585785, |
|
"logps/chosen": -612.1154174804688, |
|
"logps/rejected": -651.6345825195312, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.3106861412525177, |
|
"rewards/margins": 0.052404046058654785, |
|
"rewards/rejected": -0.3630901873111725, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1138885498151843e-06, |
|
"logits/chosen": 0.1667296588420868, |
|
"logits/rejected": 0.13556842505931854, |
|
"logps/chosen": -565.1221923828125, |
|
"logps/rejected": -589.7633056640625, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.6708333492279053, |
|
"rewards/chosen": -0.2937767505645752, |
|
"rewards/margins": 0.06078268960118294, |
|
"rewards/rejected": -0.35455942153930664, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0806141539634294e-06, |
|
"logits/chosen": 0.10828538239002228, |
|
"logits/rejected": 0.15916824340820312, |
|
"logps/chosen": -575.6380004882812, |
|
"logps/rejected": -566.2510986328125, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2859388291835785, |
|
"rewards/margins": 0.05137751251459122, |
|
"rewards/rejected": -0.3373163640499115, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0472306765522393e-06, |
|
"logits/chosen": 0.11922699213027954, |
|
"logits/rejected": 0.1259177178144455, |
|
"logps/chosen": -606.871826171875, |
|
"logps/rejected": -612.5156860351562, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.30839142203330994, |
|
"rewards/margins": 0.03700650483369827, |
|
"rewards/rejected": -0.3453979790210724, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0137443894262634e-06, |
|
"logits/chosen": 0.14493581652641296, |
|
"logits/rejected": 0.16140879690647125, |
|
"logps/chosen": -594.2196044921875, |
|
"logps/rejected": -594.2235717773438, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.6041666865348816, |
|
"rewards/chosen": -0.30472633242607117, |
|
"rewards/margins": 0.052565477788448334, |
|
"rewards/rejected": -0.3572917878627777, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.980161583745294e-06, |
|
"logits/chosen": 0.10105246305465698, |
|
"logits/rejected": 0.15723805129528046, |
|
"logps/chosen": -620.9005126953125, |
|
"logps/rejected": -618.7329711914062, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.6375000476837158, |
|
"rewards/chosen": -0.3132612705230713, |
|
"rewards/margins": 0.04581070318818092, |
|
"rewards/rejected": -0.3590719997882843, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.946488568802324e-06, |
|
"logits/chosen": 0.14716312289237976, |
|
"logits/rejected": 0.1699884533882141, |
|
"logps/chosen": -577.747314453125, |
|
"logps/rejected": -588.8140869140625, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2906225323677063, |
|
"rewards/margins": 0.04303520917892456, |
|
"rewards/rejected": -0.33365774154663086, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.912731670838207e-06, |
|
"logits/chosen": 0.12559036910533905, |
|
"logits/rejected": 0.14857833087444305, |
|
"logps/chosen": -573.2417602539062, |
|
"logps/rejected": -594.3810424804688, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.6291666626930237, |
|
"rewards/chosen": -0.2716652452945709, |
|
"rewards/margins": 0.04548722505569458, |
|
"rewards/rejected": -0.3171524703502655, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8788972318531272e-06, |
|
"logits/chosen": 0.13978327810764313, |
|
"logits/rejected": 0.15119092166423798, |
|
"logps/chosen": -543.723388671875, |
|
"logps/rejected": -543.7661743164062, |
|
"loss": 0.0406, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": -0.2711487412452698, |
|
"rewards/margins": 0.03938648849725723, |
|
"rewards/rejected": -0.3105352520942688, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.844991608415113e-06, |
|
"logits/chosen": 0.07532133162021637, |
|
"logits/rejected": 0.12284326553344727, |
|
"logps/chosen": -540.3726806640625, |
|
"logps/rejected": -558.138671875, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2621740996837616, |
|
"rewards/margins": 0.039031222462654114, |
|
"rewards/rejected": -0.3012053370475769, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8110211704658073e-06, |
|
"logits/chosen": 0.06420323997735977, |
|
"logits/rejected": 0.11035875231027603, |
|
"logps/chosen": -504.5506286621094, |
|
"logps/rejected": -545.575439453125, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.2541031241416931, |
|
"rewards/margins": 0.0474877692759037, |
|
"rewards/rejected": -0.3015909194946289, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.776992300123732e-06, |
|
"logits/chosen": 0.02886870503425598, |
|
"logits/rejected": 0.08844368159770966, |
|
"logps/chosen": -561.7490234375, |
|
"logps/rejected": -580.228759765625, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.27741044759750366, |
|
"rewards/margins": 0.040386758744716644, |
|
"rewards/rejected": -0.3177972435951233, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.742911390485262e-06, |
|
"logits/chosen": 0.07097556442022324, |
|
"logits/rejected": 0.12344779819250107, |
|
"logps/chosen": -502.0489807128906, |
|
"logps/rejected": -517.57861328125, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2432735413312912, |
|
"rewards/margins": 0.028061959892511368, |
|
"rewards/rejected": -0.27133551239967346, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7087848444235354e-06, |
|
"logits/chosen": 0.0665472000837326, |
|
"logits/rejected": 0.12706132233142853, |
|
"logps/chosen": -494.16497802734375, |
|
"logps/rejected": -502.8155212402344, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.21378079056739807, |
|
"rewards/margins": 0.050316452980041504, |
|
"rewards/rejected": -0.2640972435474396, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.674619073385531e-06, |
|
"logits/chosen": 0.16853651404380798, |
|
"logits/rejected": 0.19799327850341797, |
|
"logps/chosen": -517.6676635742188, |
|
"logps/rejected": -565.3977661132812, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2295691967010498, |
|
"rewards/margins": 0.06171298772096634, |
|
"rewards/rejected": -0.29128220677375793, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.640420496187528e-06, |
|
"logits/chosen": 0.08946164697408676, |
|
"logits/rejected": 0.15218599140644073, |
|
"logps/chosen": -524.2635498046875, |
|
"logps/rejected": -535.5513916015625, |
|
"loss": 0.0406, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.23513486981391907, |
|
"rewards/margins": 0.049268923699855804, |
|
"rewards/rejected": -0.2844037711620331, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6061955378091896e-06, |
|
"logits/chosen": 0.057071197777986526, |
|
"logits/rejected": 0.09025406837463379, |
|
"logps/chosen": -501.2740173339844, |
|
"logps/rejected": -527.2225341796875, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 0.6375001072883606, |
|
"rewards/chosen": -0.22606611251831055, |
|
"rewards/margins": 0.05956178158521652, |
|
"rewards/rejected": -0.28562790155410767, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5719506281864838e-06, |
|
"logits/chosen": 0.09907601028680801, |
|
"logits/rejected": 0.11537568271160126, |
|
"logps/chosen": -517.09033203125, |
|
"logps/rejected": -532.591796875, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.23992018401622772, |
|
"rewards/margins": 0.0427953377366066, |
|
"rewards/rejected": -0.2827155292034149, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.537692201003671e-06, |
|
"logits/chosen": 0.0913078784942627, |
|
"logits/rejected": 0.0602453276515007, |
|
"logps/chosen": -498.9737243652344, |
|
"logps/rejected": -548.3724975585938, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2386229932308197, |
|
"rewards/margins": 0.045166097581386566, |
|
"rewards/rejected": -0.2837890386581421, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.503426692484594e-06, |
|
"logits/chosen": 0.08517016470432281, |
|
"logits/rejected": 0.07997085154056549, |
|
"logps/chosen": -555.7423095703125, |
|
"logps/rejected": -571.2945556640625, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -0.27391716837882996, |
|
"rewards/margins": 0.04417312890291214, |
|
"rewards/rejected": -0.3180902600288391, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4691605401834843e-06, |
|
"logits/chosen": 0.15011510252952576, |
|
"logits/rejected": 0.2677749693393707, |
|
"logps/chosen": -542.283935546875, |
|
"logps/rejected": -584.0543212890625, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": -0.2576281428337097, |
|
"rewards/margins": 0.05184938758611679, |
|
"rewards/rejected": -0.3094775080680847, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.434900181775524e-06, |
|
"logits/chosen": 0.07792448252439499, |
|
"logits/rejected": 0.06877502799034119, |
|
"logps/chosen": -571.9642944335938, |
|
"logps/rejected": -567.0635375976562, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.6541666388511658, |
|
"rewards/chosen": -0.24740977585315704, |
|
"rewards/margins": 0.047786761075258255, |
|
"rewards/rejected": -0.2951965034008026, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.40065205384738e-06, |
|
"logits/chosen": 0.1122296079993248, |
|
"logits/rejected": 0.15267851948738098, |
|
"logps/chosen": -507.0908203125, |
|
"logps/rejected": -531.3499755859375, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.23170237243175507, |
|
"rewards/margins": 0.04316466301679611, |
|
"rewards/rejected": -0.2748670279979706, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3664225906879452e-06, |
|
"logits/chosen": 0.13994117081165314, |
|
"logits/rejected": 0.2027665674686432, |
|
"logps/chosen": -490.51116943359375, |
|
"logps/rejected": -501.07916259765625, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.5916666388511658, |
|
"rewards/chosen": -0.2336377650499344, |
|
"rewards/margins": 0.03639250993728638, |
|
"rewards/rejected": -0.27003028988838196, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3322182230795127e-06, |
|
"logits/chosen": 0.13082632422447205, |
|
"logits/rejected": 0.10433439910411835, |
|
"logps/chosen": -514.8374633789062, |
|
"logps/rejected": -540.4562377929688, |
|
"loss": 0.0465, |
|
"rewards/accuracies": 0.6125000715255737, |
|
"rewards/chosen": -0.23975805938243866, |
|
"rewards/margins": 0.04342503473162651, |
|
"rewards/rejected": -0.28318309783935547, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.298045377089604e-06, |
|
"logits/chosen": 0.10040481388568878, |
|
"logits/rejected": 0.2444113940000534, |
|
"logps/chosen": -543.1686401367188, |
|
"logps/rejected": -562.5289916992188, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.6541666984558105, |
|
"rewards/chosen": -0.25354671478271484, |
|
"rewards/margins": 0.06049323081970215, |
|
"rewards/rejected": -0.3140399158000946, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2639104728636915e-06, |
|
"logits/chosen": 0.17033420503139496, |
|
"logits/rejected": 0.07545267045497894, |
|
"logps/chosen": -586.39599609375, |
|
"logps/rejected": -605.8544921875, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 0.5791666507720947, |
|
"rewards/chosen": -0.2881447374820709, |
|
"rewards/margins": 0.046030301600694656, |
|
"rewards/rejected": -0.3341750502586365, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2298199234190236e-06, |
|
"logits/chosen": 0.11797505617141724, |
|
"logits/rejected": 0.13803385198116302, |
|
"logps/chosen": -584.68701171875, |
|
"logps/rejected": -591.5270385742188, |
|
"loss": 0.04, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.2901507019996643, |
|
"rewards/margins": 0.05170051008462906, |
|
"rewards/rejected": -0.3418511748313904, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.195780133439794e-06, |
|
"logits/chosen": 0.16074219346046448, |
|
"logits/rejected": 0.1192971020936966, |
|
"logps/chosen": -555.1654052734375, |
|
"logps/rejected": -564.3656005859375, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 0.6041666865348816, |
|
"rewards/chosen": -0.26911431550979614, |
|
"rewards/margins": 0.048468612134456635, |
|
"rewards/rejected": -0.317582905292511, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1617974980738814e-06, |
|
"logits/chosen": 0.1529623568058014, |
|
"logits/rejected": 0.1636894941329956, |
|
"logps/chosen": -540.774169921875, |
|
"logps/rejected": -556.2821655273438, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 0.595833420753479, |
|
"rewards/chosen": -0.2688151001930237, |
|
"rewards/margins": 0.046817291527986526, |
|
"rewards/rejected": -0.3156324028968811, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1278784017313688e-06, |
|
"logits/chosen": 0.14876937866210938, |
|
"logits/rejected": 0.17081575095653534, |
|
"logps/chosen": -536.3919067382812, |
|
"logps/rejected": -525.305419921875, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 0.5791667103767395, |
|
"rewards/chosen": -0.25508958101272583, |
|
"rewards/margins": 0.0361848846077919, |
|
"rewards/rejected": -0.29127445816993713, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0940292168850913e-06, |
|
"logits/chosen": 0.11073969304561615, |
|
"logits/rejected": 0.1865236610174179, |
|
"logps/chosen": -536.0477905273438, |
|
"logps/rejected": -549.0618896484375, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.5624999403953552, |
|
"rewards/chosen": -0.27074000239372253, |
|
"rewards/margins": 0.03495349735021591, |
|
"rewards/rejected": -0.30569347739219666, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.060256302873421e-06, |
|
"logits/chosen": 0.11048384755849838, |
|
"logits/rejected": 0.13183829188346863, |
|
"logps/chosen": -535.2600708007812, |
|
"logps/rejected": -593.9940795898438, |
|
"loss": 0.0273, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.26926225423812866, |
|
"rewards/margins": 0.049711745232343674, |
|
"rewards/rejected": -0.31897395849227905, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.02656600470552e-06, |
|
"logits/chosen": 0.12331001460552216, |
|
"logits/rejected": 0.19340971112251282, |
|
"logps/chosen": -577.4415283203125, |
|
"logps/rejected": -596.8298950195312, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.6166667342185974, |
|
"rewards/chosen": -0.28572091460227966, |
|
"rewards/margins": 0.04659656435251236, |
|
"rewards/rejected": -0.3323175013065338, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.99296465186929e-06, |
|
"logits/chosen": 0.10413268953561783, |
|
"logits/rejected": 0.23993226885795593, |
|
"logps/chosen": -587.4747314453125, |
|
"logps/rejected": -588.8648071289062, |
|
"loss": 0.0374, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": -0.29135701060295105, |
|
"rewards/margins": 0.03427191823720932, |
|
"rewards/rejected": -0.32562893629074097, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.959458557142228e-06, |
|
"logits/chosen": 0.12116161733865738, |
|
"logits/rejected": 0.14634718000888824, |
|
"logps/chosen": -562.8546142578125, |
|
"logps/rejected": -598.8267822265625, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.5791667103767395, |
|
"rewards/chosen": -0.2859250605106354, |
|
"rewards/margins": 0.04271562397480011, |
|
"rewards/rejected": -0.3286406993865967, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9260540154054317e-06, |
|
"logits/chosen": 0.10976073890924454, |
|
"logits/rejected": 0.23895160853862762, |
|
"logps/chosen": -553.7778930664062, |
|
"logps/rejected": -583.1566772460938, |
|
"loss": 0.0354, |
|
"rewards/accuracies": 0.6708333492279053, |
|
"rewards/chosen": -0.28700724244117737, |
|
"rewards/margins": 0.04391475021839142, |
|
"rewards/rejected": -0.3309219777584076, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8927573024609666e-06, |
|
"logits/chosen": 0.18281932175159454, |
|
"logits/rejected": 0.12242082506418228, |
|
"logps/chosen": -515.3033447265625, |
|
"logps/rejected": -552.0582885742188, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": -0.2664439380168915, |
|
"rewards/margins": 0.04643597453832626, |
|
"rewards/rejected": -0.3128799498081207, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8595746738528045e-06, |
|
"logits/chosen": 0.08008557558059692, |
|
"logits/rejected": 0.21306416392326355, |
|
"logps/chosen": -580.7192993164062, |
|
"logps/rejected": -610.1173095703125, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.28699517250061035, |
|
"rewards/margins": 0.048240162432193756, |
|
"rewards/rejected": -0.3352353870868683, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.826512363691586e-06, |
|
"logits/chosen": 0.0883159190416336, |
|
"logits/rejected": 0.19013457000255585, |
|
"logps/chosen": -533.0752563476562, |
|
"logps/rejected": -550.2881469726562, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.27697938680648804, |
|
"rewards/margins": 0.04447559267282486, |
|
"rewards/rejected": -0.3214550018310547, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7935765834833966e-06, |
|
"logits/chosen": 0.11177249252796173, |
|
"logits/rejected": 0.15451344847679138, |
|
"logps/chosen": -579.9695434570312, |
|
"logps/rejected": -609.8052368164062, |
|
"loss": 0.0407, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.298335462808609, |
|
"rewards/margins": 0.053702156990766525, |
|
"rewards/rejected": -0.35203760862350464, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7607735209627953e-06, |
|
"logits/chosen": 0.07564587891101837, |
|
"logits/rejected": 0.1478845179080963, |
|
"logps/chosen": -583.5760498046875, |
|
"logps/rejected": -615.6829223632812, |
|
"loss": 0.0415, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3110070824623108, |
|
"rewards/margins": 0.060508888214826584, |
|
"rewards/rejected": -0.3715159296989441, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7281093389303105e-06, |
|
"logits/chosen": 0.08597441017627716, |
|
"logits/rejected": 0.24169404804706573, |
|
"logps/chosen": -596.5226440429688, |
|
"logps/rejected": -639.04052734375, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.3174855411052704, |
|
"rewards/margins": 0.05773097276687622, |
|
"rewards/rejected": -0.375216543674469, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6955901740946136e-06, |
|
"logits/chosen": 0.12801986932754517, |
|
"logits/rejected": 0.15590648353099823, |
|
"logps/chosen": -595.8865966796875, |
|
"logps/rejected": -616.6199951171875, |
|
"loss": 0.0265, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -0.30426111817359924, |
|
"rewards/margins": 0.04693468287587166, |
|
"rewards/rejected": -0.3511958420276642, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.663222135919601e-06, |
|
"logits/chosen": 0.13937242329120636, |
|
"logits/rejected": 0.1421765387058258, |
|
"logps/chosen": -597.5296020507812, |
|
"logps/rejected": -603.4146728515625, |
|
"loss": 0.042, |
|
"rewards/accuracies": 0.6583333611488342, |
|
"rewards/chosen": -0.29278069734573364, |
|
"rewards/margins": 0.052191488444805145, |
|
"rewards/rejected": -0.3449721336364746, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6310113054765947e-06, |
|
"logits/chosen": 0.09593503177165985, |
|
"logits/rejected": 0.22683358192443848, |
|
"logps/chosen": -555.0337524414062, |
|
"logps/rejected": -579.9691772460938, |
|
"loss": 0.0433, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -0.28781309723854065, |
|
"rewards/margins": 0.05301021412014961, |
|
"rewards/rejected": -0.3408232629299164, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5989637343018705e-06, |
|
"logits/chosen": 0.1495979279279709, |
|
"logits/rejected": 0.20952686667442322, |
|
"logps/chosen": -569.4849853515625, |
|
"logps/rejected": -616.1697387695312, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.30492356419563293, |
|
"rewards/margins": 0.059173502027988434, |
|
"rewards/rejected": -0.36409705877304077, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5670854432597433e-06, |
|
"logits/chosen": 0.1326146423816681, |
|
"logits/rejected": 0.20724856853485107, |
|
"logps/chosen": -584.0203247070312, |
|
"logps/rejected": -590.6256103515625, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.28019410371780396, |
|
"rewards/margins": 0.04888462647795677, |
|
"rewards/rejected": -0.329078733921051, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5353824214114075e-06, |
|
"logits/chosen": 0.1035301685333252, |
|
"logits/rejected": 0.16259366273880005, |
|
"logps/chosen": -548.8323364257812, |
|
"logps/rejected": -565.8499755859375, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2619378864765167, |
|
"rewards/margins": 0.03961848467588425, |
|
"rewards/rejected": -0.3015563189983368, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5038606248897586e-06, |
|
"logits/chosen": 0.06201974302530289, |
|
"logits/rejected": 0.09014655649662018, |
|
"logps/chosen": -521.3331909179688, |
|
"logps/rejected": -544.2221069335938, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.2565329074859619, |
|
"rewards/margins": 0.04992014169692993, |
|
"rewards/rejected": -0.30645304918289185, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4725259757803983e-06, |
|
"logits/chosen": 0.16263772547245026, |
|
"logits/rejected": 0.13033676147460938, |
|
"logps/chosen": -553.0350341796875, |
|
"logps/rejected": -569.7715454101562, |
|
"loss": 0.0452, |
|
"rewards/accuracies": 0.6125000715255737, |
|
"rewards/chosen": -0.2759607434272766, |
|
"rewards/margins": 0.05112532526254654, |
|
"rewards/rejected": -0.32708609104156494, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4413843610090342e-06, |
|
"logits/chosen": 0.1796967089176178, |
|
"logits/rejected": 0.1423921287059784, |
|
"logps/chosen": -598.36181640625, |
|
"logps/rejected": -588.3529052734375, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.28392156958580017, |
|
"rewards/margins": 0.06371378898620605, |
|
"rewards/rejected": -0.347635418176651, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.410441631235487e-06, |
|
"logits/chosen": 0.12067948281764984, |
|
"logits/rejected": 0.18646660447120667, |
|
"logps/chosen": -594.8400268554688, |
|
"logps/rejected": -634.2889404296875, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -0.30511975288391113, |
|
"rewards/margins": 0.045869968831539154, |
|
"rewards/rejected": -0.3509897291660309, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3797035997545144e-06, |
|
"logits/chosen": 0.044562358409166336, |
|
"logits/rejected": 0.04120717570185661, |
|
"logps/chosen": -540.0699462890625, |
|
"logps/rejected": -543.9365234375, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": -0.2733212113380432, |
|
"rewards/margins": 0.04933062568306923, |
|
"rewards/rejected": -0.32265186309814453, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3491760414036478e-06, |
|
"logits/chosen": 0.11400707066059113, |
|
"logits/rejected": 0.09495958685874939, |
|
"logps/chosen": -565.5853271484375, |
|
"logps/rejected": -576.4033203125, |
|
"loss": 0.0305, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -0.2805452346801758, |
|
"rewards/margins": 0.04857431724667549, |
|
"rewards/rejected": -0.32911956310272217, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3188646914782616e-06, |
|
"logits/chosen": 0.11315472424030304, |
|
"logits/rejected": 0.208398699760437, |
|
"logps/chosen": -598.4920043945312, |
|
"logps/rejected": -589.9212036132812, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -0.2833411991596222, |
|
"rewards/margins": 0.04889676719903946, |
|
"rewards/rejected": -0.33223795890808105, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.288775244654062e-06, |
|
"logits/chosen": 0.08946464955806732, |
|
"logits/rejected": 0.154561847448349, |
|
"logps/chosen": -599.8006591796875, |
|
"logps/rejected": -594.125732421875, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.28097003698349, |
|
"rewards/margins": 0.04311807453632355, |
|
"rewards/rejected": -0.32408809661865234, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2589133539172193e-06, |
|
"logits/chosen": 0.12887665629386902, |
|
"logits/rejected": 0.2141556292772293, |
|
"logps/chosen": -576.6539916992188, |
|
"logps/rejected": -609.6483764648438, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2800661623477936, |
|
"rewards/margins": 0.057719629257917404, |
|
"rewards/rejected": -0.3377857804298401, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2292846295023222e-06, |
|
"logits/chosen": 0.10555066913366318, |
|
"logits/rejected": 0.24661192297935486, |
|
"logps/chosen": -565.2025146484375, |
|
"logps/rejected": -572.82666015625, |
|
"loss": 0.03, |
|
"rewards/accuracies": 0.5291666984558105, |
|
"rewards/chosen": -0.2735806107521057, |
|
"rewards/margins": 0.03343730419874191, |
|
"rewards/rejected": -0.30701789259910583, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.19989463783837e-06, |
|
"logits/chosen": 0.07563383877277374, |
|
"logits/rejected": 0.14022833108901978, |
|
"logps/chosen": -563.0352783203125, |
|
"logps/rejected": -582.5489501953125, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.26767465472221375, |
|
"rewards/margins": 0.049182355403900146, |
|
"rewards/rejected": -0.3168570399284363, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1707489005029877e-06, |
|
"logits/chosen": 0.10331223905086517, |
|
"logits/rejected": 0.10989202558994293, |
|
"logps/chosen": -566.6705932617188, |
|
"logps/rejected": -583.3214111328125, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.279958575963974, |
|
"rewards/margins": 0.05120917409658432, |
|
"rewards/rejected": -0.3311677575111389, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1418528931850781e-06, |
|
"logits/chosen": 0.13162486255168915, |
|
"logits/rejected": 0.09169518947601318, |
|
"logps/chosen": -571.5591430664062, |
|
"logps/rejected": -575.8829956054688, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.28504106402397156, |
|
"rewards/margins": 0.039923541247844696, |
|
"rewards/rejected": -0.32496461272239685, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.113212044656087e-06, |
|
"logits/chosen": 0.11077318340539932, |
|
"logits/rejected": 0.14336992800235748, |
|
"logps/chosen": -573.6709594726562, |
|
"logps/rejected": -595.0328979492188, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.2933489680290222, |
|
"rewards/margins": 0.0513363853096962, |
|
"rewards/rejected": -0.3446853756904602, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0848317357500854e-06, |
|
"logits/chosen": 0.10393325984477997, |
|
"logits/rejected": 0.1483684927225113, |
|
"logps/chosen": -568.5980834960938, |
|
"logps/rejected": -586.1634521484375, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.28037458658218384, |
|
"rewards/margins": 0.05212429165840149, |
|
"rewards/rejected": -0.33249884843826294, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0567172983528534e-06, |
|
"logits/chosen": 0.026229266077280045, |
|
"logits/rejected": 0.13465450704097748, |
|
"logps/chosen": -508.6505432128906, |
|
"logps/rejected": -546.9326782226562, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": -0.27017685770988464, |
|
"rewards/margins": 0.041460223495960236, |
|
"rewards/rejected": -0.31163710355758667, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0288740144001722e-06, |
|
"logits/chosen": 0.09365380555391312, |
|
"logits/rejected": 0.0902700275182724, |
|
"logps/chosen": -572.3986206054688, |
|
"logps/rejected": -586.7833251953125, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.26681095361709595, |
|
"rewards/margins": 0.04616251215338707, |
|
"rewards/rejected": -0.3129734396934509, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0013071148854861e-06, |
|
"logits/chosen": 0.10842391103506088, |
|
"logits/rejected": 0.10454890877008438, |
|
"logps/chosen": -538.0030517578125, |
|
"logps/rejected": -558.1130981445312, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -0.26716285943984985, |
|
"rewards/margins": 0.05413733050227165, |
|
"rewards/rejected": -0.321300208568573, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.740217788771453e-07, |
|
"logits/chosen": 0.08302603662014008, |
|
"logits/rejected": 0.18150724470615387, |
|
"logps/chosen": -511.07769775390625, |
|
"logps/rejected": -545.5850219726562, |
|
"loss": 0.0413, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.24858680367469788, |
|
"rewards/margins": 0.04247165471315384, |
|
"rewards/rejected": -0.2910584807395935, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.470231325453958e-07, |
|
"logits/chosen": 0.12578806281089783, |
|
"logits/rejected": 0.231987863779068, |
|
"logps/chosen": -514.3307495117188, |
|
"logps/rejected": -531.3946533203125, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 0.6041666865348816, |
|
"rewards/chosen": -0.24261601269245148, |
|
"rewards/margins": 0.05155906826257706, |
|
"rewards/rejected": -0.29417508840560913, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.203162481993175e-07, |
|
"logits/chosen": 0.16151490807533264, |
|
"logits/rejected": 0.17980621755123138, |
|
"logps/chosen": -496.478515625, |
|
"logps/rejected": -538.2314453125, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -0.2318914234638214, |
|
"rewards/margins": 0.05293119698762894, |
|
"rewards/rejected": -0.28482261300086975, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.939061433338722e-07, |
|
"logits/chosen": 0.09906987845897675, |
|
"logits/rejected": 0.2187938690185547, |
|
"logps/chosen": -526.57275390625, |
|
"logps/rejected": -556.8406372070312, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.25222307443618774, |
|
"rewards/margins": 0.050073761492967606, |
|
"rewards/rejected": -0.30229681730270386, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.677977796872541e-07, |
|
"logits/chosen": 0.15915422141551971, |
|
"logits/rejected": 0.11833087354898453, |
|
"logps/chosen": -529.197021484375, |
|
"logps/rejected": -534.5230712890625, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.25595852732658386, |
|
"rewards/margins": 0.04330848529934883, |
|
"rewards/rejected": -0.299267053604126, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.419960623087129e-07, |
|
"logits/chosen": 0.20990967750549316, |
|
"logits/rejected": 0.13641339540481567, |
|
"logps/chosen": -507.27008056640625, |
|
"logps/rejected": -555.5889892578125, |
|
"loss": 0.046, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -0.24782676994800568, |
|
"rewards/margins": 0.04908429831266403, |
|
"rewards/rejected": -0.2969110608100891, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.165058386370314e-07, |
|
"logits/chosen": 0.04395260289311409, |
|
"logits/rejected": 0.13358573615550995, |
|
"logps/chosen": -534.5526733398438, |
|
"logps/rejected": -559.8387451171875, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.25756314396858215, |
|
"rewards/margins": 0.04230283945798874, |
|
"rewards/rejected": -0.2998659908771515, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.913318975898238e-07, |
|
"logits/chosen": 0.11071997880935669, |
|
"logits/rejected": 0.12993398308753967, |
|
"logps/chosen": -565.89794921875, |
|
"logps/rejected": -564.6920166015625, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": -0.2614702880382538, |
|
"rewards/margins": 0.04234743118286133, |
|
"rewards/rejected": -0.3038177192211151, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.664789686638272e-07, |
|
"logits/chosen": 0.09845000505447388, |
|
"logits/rejected": 0.16275520622730255, |
|
"logps/chosen": -514.97119140625, |
|
"logps/rejected": -554.59423828125, |
|
"loss": 0.0415, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2425486147403717, |
|
"rewards/margins": 0.05070207640528679, |
|
"rewards/rejected": -0.29325070977211, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.41951721046357e-07, |
|
"logits/chosen": 0.09489642083644867, |
|
"logits/rejected": 0.11213739961385727, |
|
"logps/chosen": -586.5117797851562, |
|
"logps/rejected": -600.290283203125, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.6083332896232605, |
|
"rewards/chosen": -0.27559903264045715, |
|
"rewards/margins": 0.047925811260938644, |
|
"rewards/rejected": -0.3235248625278473, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.177547627380987e-07, |
|
"logits/chosen": 0.08067229390144348, |
|
"logits/rejected": 0.1412743777036667, |
|
"logps/chosen": -555.6590576171875, |
|
"logps/rejected": -576.60791015625, |
|
"loss": 0.0507, |
|
"rewards/accuracies": 0.595833420753479, |
|
"rewards/chosen": -0.27716270089149475, |
|
"rewards/margins": 0.04384743049740791, |
|
"rewards/rejected": -0.32101011276245117, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.93892639687386e-07, |
|
"logits/chosen": 0.09003408253192902, |
|
"logits/rejected": 0.1542806327342987, |
|
"logps/chosen": -576.6299438476562, |
|
"logps/rejected": -576.0133666992188, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": -0.27841562032699585, |
|
"rewards/margins": 0.045767974108457565, |
|
"rewards/rejected": -0.3241836130619049, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.703698349361437e-07, |
|
"logits/chosen": 0.10635080188512802, |
|
"logits/rejected": 0.05730734393000603, |
|
"logps/chosen": -577.123779296875, |
|
"logps/rejected": -601.00390625, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.28797826170921326, |
|
"rewards/margins": 0.05532165244221687, |
|
"rewards/rejected": -0.34329989552497864, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.471907677776426e-07, |
|
"logits/chosen": 0.15324482321739197, |
|
"logits/rejected": 0.06719908863306046, |
|
"logps/chosen": -561.4310302734375, |
|
"logps/rejected": -571.0897216796875, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2713034451007843, |
|
"rewards/margins": 0.058576516807079315, |
|
"rewards/rejected": -0.3298799395561218, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.243597929262404e-07, |
|
"logits/chosen": 0.06141387298703194, |
|
"logits/rejected": 0.14357991516590118, |
|
"logps/chosen": -573.6912841796875, |
|
"logps/rejected": -616.2233276367188, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.6041666865348816, |
|
"rewards/chosen": -0.28281545639038086, |
|
"rewards/margins": 0.05899351090192795, |
|
"rewards/rejected": -0.3418089747428894, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.018811996992455e-07, |
|
"logits/chosen": 0.07172363996505737, |
|
"logits/rejected": 0.12844975292682648, |
|
"logps/chosen": -600.66455078125, |
|
"logps/rejected": -590.8489990234375, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -0.27809762954711914, |
|
"rewards/margins": 0.05610889941453934, |
|
"rewards/rejected": -0.3342065215110779, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.797592112110734e-07, |
|
"logits/chosen": 0.06431909650564194, |
|
"logits/rejected": 0.09170184284448624, |
|
"logps/chosen": -547.0779418945312, |
|
"logps/rejected": -577.0574340820312, |
|
"loss": 0.0331, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.2835572361946106, |
|
"rewards/margins": 0.051959216594696045, |
|
"rewards/rejected": -0.33551645278930664, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.579979835798361e-07, |
|
"logits/chosen": 0.0938582718372345, |
|
"logits/rejected": 0.1470281332731247, |
|
"logps/chosen": -540.1471557617188, |
|
"logps/rejected": -592.1015014648438, |
|
"loss": 0.041, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -0.2827063202857971, |
|
"rewards/margins": 0.060177069157361984, |
|
"rewards/rejected": -0.3428834080696106, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.366016051465245e-07, |
|
"logits/chosen": 0.0799831822514534, |
|
"logits/rejected": 0.12903760373592377, |
|
"logps/chosen": -581.998779296875, |
|
"logps/rejected": -606.8611450195312, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2922040522098541, |
|
"rewards/margins": 0.05147022008895874, |
|
"rewards/rejected": -0.3436742424964905, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.155740957069186e-07, |
|
"logits/chosen": 0.10282020270824432, |
|
"logits/rejected": 0.14179909229278564, |
|
"logps/chosen": -559.0867919921875, |
|
"logps/rejected": -587.3806762695312, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.5791667103767395, |
|
"rewards/chosen": -0.27100348472595215, |
|
"rewards/margins": 0.05496737360954285, |
|
"rewards/rejected": -0.325970858335495, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.949194057563783e-07, |
|
"logits/chosen": 0.10007326304912567, |
|
"logits/rejected": 0.15941056609153748, |
|
"logps/chosen": -560.44140625, |
|
"logps/rejected": -567.3951416015625, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": -0.26652681827545166, |
|
"rewards/margins": 0.050540946424007416, |
|
"rewards/rejected": -0.3170677125453949, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.746414157476506e-07, |
|
"logits/chosen": 0.1249409168958664, |
|
"logits/rejected": 0.054917313158512115, |
|
"logps/chosen": -530.5921630859375, |
|
"logps/rejected": -524.3117065429688, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.2425779104232788, |
|
"rewards/margins": 0.05763505771756172, |
|
"rewards/rejected": -0.30021294951438904, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5474393536184214e-07, |
|
"logits/chosen": 0.07765215635299683, |
|
"logits/rejected": 0.10989616811275482, |
|
"logps/chosen": -548.0401611328125, |
|
"logps/rejected": -552.0130004882812, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.26287007331848145, |
|
"rewards/margins": 0.05279888957738876, |
|
"rewards/rejected": -0.31566891074180603, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.352307027926828e-07, |
|
"logits/chosen": 0.06283166259527206, |
|
"logits/rejected": 0.21911051869392395, |
|
"logps/chosen": -560.373779296875, |
|
"logps/rejected": -582.3746337890625, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.27789947390556335, |
|
"rewards/margins": 0.05053368955850601, |
|
"rewards/rejected": -0.32843321561813354, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1610538404421837e-07, |
|
"logits/chosen": 0.021615978330373764, |
|
"logits/rejected": 0.13444559276103973, |
|
"logps/chosen": -549.7747802734375, |
|
"logps/rejected": -571.6242065429688, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.27358126640319824, |
|
"rewards/margins": 0.04225178807973862, |
|
"rewards/rejected": -0.31583306193351746, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9737157224207265e-07, |
|
"logits/chosen": 0.12500056624412537, |
|
"logits/rejected": 0.20162144303321838, |
|
"logps/chosen": -572.5682983398438, |
|
"logps/rejected": -572.8493041992188, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.27676117420196533, |
|
"rewards/margins": 0.04713458567857742, |
|
"rewards/rejected": -0.32389578223228455, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7903278695839456e-07, |
|
"logits/chosen": 0.06253985315561295, |
|
"logits/rejected": 0.14370861649513245, |
|
"logps/chosen": -567.3818359375, |
|
"logps/rejected": -567.6959838867188, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 0.5541666746139526, |
|
"rewards/chosen": -0.2864064574241638, |
|
"rewards/margins": 0.04097602888941765, |
|
"rewards/rejected": -0.32738247513771057, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.610924735506274e-07, |
|
"logits/chosen": 0.10091836750507355, |
|
"logits/rejected": 0.1071823462843895, |
|
"logps/chosen": -560.47705078125, |
|
"logps/rejected": -581.1371459960938, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.2785682678222656, |
|
"rewards/margins": 0.050090573728084564, |
|
"rewards/rejected": -0.3286588191986084, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4355400251421977e-07, |
|
"logits/chosen": 0.04096272587776184, |
|
"logits/rejected": 0.25352293252944946, |
|
"logps/chosen": -542.7060546875, |
|
"logps/rejected": -555.5399169921875, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.5625000596046448, |
|
"rewards/chosen": -0.273470014333725, |
|
"rewards/margins": 0.03956315666437149, |
|
"rewards/rejected": -0.31303319334983826, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2642066884940064e-07, |
|
"logits/chosen": 0.11667946726083755, |
|
"logits/rejected": 0.10625074058771133, |
|
"logps/chosen": -543.03564453125, |
|
"logps/rejected": -578.9127197265625, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.6083332896232605, |
|
"rewards/chosen": -0.2704079747200012, |
|
"rewards/margins": 0.05524685978889465, |
|
"rewards/rejected": -0.32565486431121826, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0969569144214147e-07, |
|
"logits/chosen": 0.11611845344305038, |
|
"logits/rejected": 0.13405680656433105, |
|
"logps/chosen": -521.346435546875, |
|
"logps/rejected": -532.4287719726562, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.5708333849906921, |
|
"rewards/chosen": -0.26295241713523865, |
|
"rewards/margins": 0.04004523903131485, |
|
"rewards/rejected": -0.3029976785182953, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.933822124594124e-07, |
|
"logits/chosen": 0.13651199638843536, |
|
"logits/rejected": 0.14987996220588684, |
|
"logps/chosen": -579.0687866210938, |
|
"logps/rejected": -603.724609375, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.6458333730697632, |
|
"rewards/chosen": -0.2872801423072815, |
|
"rewards/margins": 0.05356990173459053, |
|
"rewards/rejected": -0.34085002541542053, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.774832967588556e-07, |
|
"logits/chosen": 0.10344930738210678, |
|
"logits/rejected": 0.1647767573595047, |
|
"logps/chosen": -556.7415771484375, |
|
"logps/rejected": -550.4581298828125, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": -0.26868194341659546, |
|
"rewards/margins": 0.05047239735722542, |
|
"rewards/rejected": -0.3191543221473694, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6200193131298376e-07, |
|
"logits/chosen": 0.12514041364192963, |
|
"logits/rejected": 0.12365315854549408, |
|
"logps/chosen": -553.3133544921875, |
|
"logps/rejected": -573.3492431640625, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.2692439556121826, |
|
"rewards/margins": 0.05325891822576523, |
|
"rewards/rejected": -0.32250285148620605, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.469410246480067e-07, |
|
"logits/chosen": 0.07550040632486343, |
|
"logits/rejected": 0.16558191180229187, |
|
"logps/chosen": -587.4112548828125, |
|
"logps/rejected": -619.96826171875, |
|
"loss": 0.0337, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.29542380571365356, |
|
"rewards/margins": 0.06875937432050705, |
|
"rewards/rejected": -0.3641831576824188, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.3230340629740166e-07, |
|
"logits/chosen": 0.14227424561977386, |
|
"logits/rejected": 0.07906799018383026, |
|
"logps/chosen": -546.7794189453125, |
|
"logps/rejected": -545.5219116210938, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.5458333492279053, |
|
"rewards/chosen": -0.2760690450668335, |
|
"rewards/margins": 0.025999590754508972, |
|
"rewards/rejected": -0.3020685911178589, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1809182627031883e-07, |
|
"logits/chosen": 0.13371774554252625, |
|
"logits/rejected": 0.1723383665084839, |
|
"logps/chosen": -586.3175048828125, |
|
"logps/rejected": -594.6130981445312, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.2775975465774536, |
|
"rewards/margins": 0.04781870171427727, |
|
"rewards/rejected": -0.3254162669181824, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0430895453492944e-07, |
|
"logits/chosen": 0.08875533938407898, |
|
"logits/rejected": 0.16999275982379913, |
|
"logps/chosen": -580.5711669921875, |
|
"logps/rejected": -608.8607788085938, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.6291667222976685, |
|
"rewards/chosen": -0.2822466790676117, |
|
"rewards/margins": 0.047433655709028244, |
|
"rewards/rejected": -0.32968032360076904, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9095738051681412e-07, |
|
"logits/chosen": 0.19837184250354767, |
|
"logits/rejected": 0.16092568635940552, |
|
"logps/chosen": -592.68701171875, |
|
"logps/rejected": -623.7216796875, |
|
"loss": 0.0483, |
|
"rewards/accuracies": 0.6791666746139526, |
|
"rewards/chosen": -0.29589009284973145, |
|
"rewards/margins": 0.05754246562719345, |
|
"rewards/rejected": -0.3534325063228607, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7803961261247864e-07, |
|
"logits/chosen": 0.16064941883087158, |
|
"logits/rejected": 0.13991779088974, |
|
"logps/chosen": -573.6605834960938, |
|
"logps/rejected": -612.8917846679688, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 0.6666667461395264, |
|
"rewards/chosen": -0.2881428599357605, |
|
"rewards/margins": 0.06800989806652069, |
|
"rewards/rejected": -0.3561527132987976, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6555807771809375e-07, |
|
"logits/chosen": 0.10939987748861313, |
|
"logits/rejected": 0.08681745082139969, |
|
"logps/chosen": -572.9388427734375, |
|
"logps/rejected": -569.3970947265625, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2807319760322571, |
|
"rewards/margins": 0.047644276171922684, |
|
"rewards/rejected": -0.32837623357772827, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5351512077355024e-07, |
|
"logits/chosen": 0.0837242603302002, |
|
"logits/rejected": 0.1572417914867401, |
|
"logps/chosen": -587.7030029296875, |
|
"logps/rejected": -579.8245239257812, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2788943350315094, |
|
"rewards/margins": 0.04011840373277664, |
|
"rewards/rejected": -0.31901273131370544, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4191300432190634e-07, |
|
"logits/chosen": 0.04972783476114273, |
|
"logits/rejected": 0.2529766261577606, |
|
"logps/chosen": -563.1866455078125, |
|
"logps/rejected": -596.42578125, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.29752233624458313, |
|
"rewards/margins": 0.0639050304889679, |
|
"rewards/rejected": -0.36142733693122864, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3075390808431897e-07, |
|
"logits/chosen": 0.11287392675876617, |
|
"logits/rejected": 0.11052433401346207, |
|
"logps/chosen": -533.6840209960938, |
|
"logps/rejected": -540.5704956054688, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": -0.27624043822288513, |
|
"rewards/margins": 0.0406770259141922, |
|
"rewards/rejected": -0.3169174790382385, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2003992855053326e-07, |
|
"logits/chosen": 0.13198883831501007, |
|
"logits/rejected": 0.1386340856552124, |
|
"logps/chosen": -547.6902465820312, |
|
"logps/rejected": -601.7785034179688, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": -0.28226011991500854, |
|
"rewards/margins": 0.05512434244155884, |
|
"rewards/rejected": -0.337384432554245, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0977307858500818e-07, |
|
"logits/chosen": 0.1405051052570343, |
|
"logits/rejected": 0.12027152627706528, |
|
"logps/chosen": -545.7700805664062, |
|
"logps/rejected": -588.6355590820312, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2875562608242035, |
|
"rewards/margins": 0.052831798791885376, |
|
"rewards/rejected": -0.34038805961608887, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.995528704875635e-08, |
|
"logits/chosen": 0.12197569757699966, |
|
"logits/rejected": 0.025906020775437355, |
|
"logps/chosen": -571.2064819335938, |
|
"logps/rejected": -597.6893310546875, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.6375000476837158, |
|
"rewards/chosen": -0.28911882638931274, |
|
"rewards/margins": 0.054231010377407074, |
|
"rewards/rejected": -0.3433498442173004, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.058839843696237e-08, |
|
"logits/chosen": 0.09433220326900482, |
|
"logits/rejected": 0.20620909333229065, |
|
"logps/chosen": -555.5418701171875, |
|
"logps/rejected": -581.8546142578125, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": -0.2735230326652527, |
|
"rewards/margins": 0.05654352903366089, |
|
"rewards/rejected": -0.33006659150123596, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.167417253245213e-08, |
|
"logits/chosen": 0.08379559963941574, |
|
"logits/rejected": 0.15960299968719482, |
|
"logps/chosen": -575.5586547851562, |
|
"logps/rejected": -596.3479614257812, |
|
"loss": 0.0374, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.29097455739974976, |
|
"rewards/margins": 0.043259985744953156, |
|
"rewards/rejected": -0.3342345356941223, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.32142840750788e-08, |
|
"logits/chosen": 0.15721924602985382, |
|
"logits/rejected": 0.11612733453512192, |
|
"logps/chosen": -598.3759155273438, |
|
"logps/rejected": -604.84228515625, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.595833420753479, |
|
"rewards/chosen": -0.28792813420295715, |
|
"rewards/margins": 0.046954017132520676, |
|
"rewards/rejected": -0.3348821699619293, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.521032244708375e-08, |
|
"logits/chosen": 0.1785806119441986, |
|
"logits/rejected": 0.12633617222309113, |
|
"logps/chosen": -576.2505493164062, |
|
"logps/rejected": -601.0803833007812, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.2926011085510254, |
|
"rewards/margins": 0.04950517788529396, |
|
"rewards/rejected": -0.34210631251335144, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.766379137449624e-08, |
|
"logits/chosen": 0.09354978054761887, |
|
"logits/rejected": 0.20856766402721405, |
|
"logps/chosen": -558.0820922851562, |
|
"logps/rejected": -587.3670043945312, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 0.6291667222976685, |
|
"rewards/chosen": -0.2841167449951172, |
|
"rewards/margins": 0.054156333208084106, |
|
"rewards/rejected": -0.3382730782032013, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.0576108644623536e-08, |
|
"logits/chosen": 0.13919702172279358, |
|
"logits/rejected": 0.19555795192718506, |
|
"logps/chosen": -620.8248901367188, |
|
"logps/rejected": -609.3353271484375, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2929002642631531, |
|
"rewards/margins": 0.04485376924276352, |
|
"rewards/rejected": -0.3377540707588196, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.394860583968624e-08, |
|
"logits/chosen": 0.1323954313993454, |
|
"logits/rejected": 0.18047359585762024, |
|
"logps/chosen": -545.4381103515625, |
|
"logps/rejected": -573.6661376953125, |
|
"loss": 0.0465, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": -0.2781949043273926, |
|
"rewards/margins": 0.049311086535453796, |
|
"rewards/rejected": -0.3275059759616852, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.778252808665284e-08, |
|
"logits/chosen": 0.13182419538497925, |
|
"logits/rejected": 0.13190338015556335, |
|
"logps/chosen": -615.33935546875, |
|
"logps/rejected": -599.4812622070312, |
|
"loss": 0.0248, |
|
"rewards/accuracies": 0.6291667222976685, |
|
"rewards/chosen": -0.28227499127388, |
|
"rewards/margins": 0.04690806567668915, |
|
"rewards/rejected": -0.32918307185173035, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.207903382331262e-08, |
|
"logits/chosen": 0.1339016705751419, |
|
"logits/rejected": 0.1503659188747406, |
|
"logps/chosen": -600.3336181640625, |
|
"logps/rejected": -609.2835693359375, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.29118743538856506, |
|
"rewards/margins": 0.05089714378118515, |
|
"rewards/rejected": -0.3420846164226532, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.683919458063705e-08, |
|
"logits/chosen": 0.11768970638513565, |
|
"logits/rejected": 0.08185064047574997, |
|
"logps/chosen": -548.324462890625, |
|
"logps/rejected": -555.4493408203125, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.27761414647102356, |
|
"rewards/margins": 0.048674218356609344, |
|
"rewards/rejected": -0.3262883424758911, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2063994781468256e-08, |
|
"logits/chosen": 0.1320245862007141, |
|
"logits/rejected": 0.11001825332641602, |
|
"logps/chosen": -556.1405029296875, |
|
"logps/rejected": -561.6322631835938, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.5791666507720947, |
|
"rewards/chosen": -0.2681787610054016, |
|
"rewards/margins": 0.052210353314876556, |
|
"rewards/rejected": -0.3203890919685364, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7754331555573656e-08, |
|
"logits/chosen": 0.12108851969242096, |
|
"logits/rejected": 0.1010405421257019, |
|
"logps/chosen": -575.8355712890625, |
|
"logps/rejected": -592.566162109375, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 0.595833420753479, |
|
"rewards/chosen": -0.2866538465023041, |
|
"rewards/margins": 0.038824331015348434, |
|
"rewards/rejected": -0.325478196144104, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3911014571098835e-08, |
|
"logits/chosen": 0.12808892130851746, |
|
"logits/rejected": 0.16563333570957184, |
|
"logps/chosen": -607.0869140625, |
|
"logps/rejected": -590.3297119140625, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.29146137833595276, |
|
"rewards/margins": 0.04118754714727402, |
|
"rewards/rejected": -0.332648903131485, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0534765882453113e-08, |
|
"logits/chosen": 0.13690322637557983, |
|
"logits/rejected": 0.13680371642112732, |
|
"logps/chosen": -574.5492553710938, |
|
"logps/rejected": -587.0123291015625, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": -0.27521491050720215, |
|
"rewards/margins": 0.052486516535282135, |
|
"rewards/rejected": -0.3277014195919037, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.626219794655553e-09, |
|
"logits/chosen": 0.10713358223438263, |
|
"logits/rejected": 0.11065838485956192, |
|
"logps/chosen": -582.7445068359375, |
|
"logps/rejected": -612.7381591796875, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": -0.2857518494129181, |
|
"rewards/margins": 0.0499836802482605, |
|
"rewards/rejected": -0.3357354998588562, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.185922744166128e-09, |
|
"logits/chosen": 0.12372003495693207, |
|
"logits/rejected": 0.19938071072101593, |
|
"logps/chosen": -613.9265747070312, |
|
"logps/rejected": -604.8402099609375, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": -0.2945869565010071, |
|
"rewards/margins": 0.04543297737836838, |
|
"rewards/rejected": -0.34001994132995605, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.2143331962256053e-09, |
|
"logits/chosen": 0.055711567401885986, |
|
"logits/rejected": 0.17630581557750702, |
|
"logps/chosen": -589.8251342773438, |
|
"logps/rejected": -600.02880859375, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.5708333253860474, |
|
"rewards/chosen": -0.2909054160118103, |
|
"rewards/margins": 0.03341306746006012, |
|
"rewards/rejected": -0.32431846857070923, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.711821558721405e-09, |
|
"logits/chosen": 0.11054827272891998, |
|
"logits/rejected": 0.1433221995830536, |
|
"logps/chosen": -546.5811767578125, |
|
"logps/rejected": -569.87646484375, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.6375000476837158, |
|
"rewards/chosen": -0.26730093359947205, |
|
"rewards/margins": 0.05267611891031265, |
|
"rewards/rejected": -0.3199770748615265, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.786701125999218e-10, |
|
"logits/chosen": 0.12685494124889374, |
|
"logits/rejected": 0.12306642532348633, |
|
"logps/chosen": -550.8919677734375, |
|
"logps/rejected": -557.5734252929688, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.5708333253860474, |
|
"rewards/chosen": -0.27823224663734436, |
|
"rewards/margins": 0.04585646465420723, |
|
"rewards/rejected": -0.3240886926651001, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1507295883145253e-10, |
|
"logits/chosen": 0.05342329666018486, |
|
"logits/rejected": 0.14736703038215637, |
|
"logps/chosen": -567.9596557617188, |
|
"logps/rejected": -585.9571533203125, |
|
"loss": 0.0264, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": -0.27421218156814575, |
|
"rewards/margins": 0.046621158719062805, |
|
"rewards/rejected": -0.320833295583725, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2547, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04083743233644686, |
|
"train_runtime": 25203.2602, |
|
"train_samples_per_second": 2.426, |
|
"train_steps_per_second": 0.101 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2547, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|