|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9988002399520095, |
|
"eval_steps": 10000, |
|
"global_step": 1666, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.988023952095808e-08, |
|
"logits/chosen": 0.08723282814025879, |
|
"logits/rejected": 0.1474362313747406, |
|
"logps/chosen": -289.8438415527344, |
|
"logps/rejected": -246.7926788330078, |
|
"loss": 0.3233, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.00020217681594658643, |
|
"rewards/margins": 0.00030051826615817845, |
|
"rewards/rejected": -9.834145021159202e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1976047904191617e-07, |
|
"logits/chosen": 0.08892221748828888, |
|
"logits/rejected": 0.14665499329566956, |
|
"logps/chosen": -336.4267272949219, |
|
"logps/rejected": -287.6366271972656, |
|
"loss": 0.326, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.00019070778216701, |
|
"rewards/margins": 0.0006663546664640307, |
|
"rewards/rejected": -0.0008570626378059387, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7964071856287425e-07, |
|
"logits/chosen": 0.03857799991965294, |
|
"logits/rejected": 0.16380922496318817, |
|
"logps/chosen": -342.10687255859375, |
|
"logps/rejected": -264.2757263183594, |
|
"loss": 0.3235, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0013111254666000605, |
|
"rewards/margins": 0.0015496534761041403, |
|
"rewards/rejected": -0.0002385281550232321, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3952095808383233e-07, |
|
"logits/chosen": 0.09239836037158966, |
|
"logits/rejected": 0.10474340617656708, |
|
"logps/chosen": -246.67648315429688, |
|
"logps/rejected": -242.5207061767578, |
|
"loss": 0.324, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0020021735690534115, |
|
"rewards/margins": 0.0020810733549296856, |
|
"rewards/rejected": -7.889991684351116e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9940119760479036e-07, |
|
"logits/chosen": 0.08982165902853012, |
|
"logits/rejected": 0.17564034461975098, |
|
"logps/chosen": -295.698974609375, |
|
"logps/rejected": -221.64443969726562, |
|
"loss": 0.3224, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.007395695894956589, |
|
"rewards/margins": 0.00678109098225832, |
|
"rewards/rejected": 0.0006146054365672171, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.592814371257485e-07, |
|
"logits/chosen": 0.14322063326835632, |
|
"logits/rejected": 0.20782272517681122, |
|
"logps/chosen": -289.552978515625, |
|
"logps/rejected": -268.31988525390625, |
|
"loss": 0.3298, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.015825165435671806, |
|
"rewards/margins": 0.008219954557716846, |
|
"rewards/rejected": 0.007605212740600109, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.191616766467065e-07, |
|
"logits/chosen": 0.06546024978160858, |
|
"logits/rejected": 0.12448060512542725, |
|
"logps/chosen": -317.5198974609375, |
|
"logps/rejected": -250.50119018554688, |
|
"loss": 0.3414, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0307091623544693, |
|
"rewards/margins": 0.03058113530278206, |
|
"rewards/rejected": 0.00012802743003703654, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.790419161676647e-07, |
|
"logits/chosen": 0.0973881259560585, |
|
"logits/rejected": 0.18290364742279053, |
|
"logps/chosen": -333.90106201171875, |
|
"logps/rejected": -289.8985290527344, |
|
"loss": 0.3516, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.024836184456944466, |
|
"rewards/margins": 0.036705560982227325, |
|
"rewards/rejected": -0.011869379319250584, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.389221556886228e-07, |
|
"logits/chosen": 0.07697711139917374, |
|
"logits/rejected": 0.13395674526691437, |
|
"logps/chosen": -350.3150939941406, |
|
"logps/rejected": -295.5214538574219, |
|
"loss": 0.3653, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.012771876528859138, |
|
"rewards/margins": 0.04415798559784889, |
|
"rewards/rejected": -0.031386107206344604, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.988023952095807e-07, |
|
"logits/chosen": 0.13352516293525696, |
|
"logits/rejected": 0.18881280720233917, |
|
"logps/chosen": -359.6278076171875, |
|
"logps/rejected": -266.6817321777344, |
|
"loss": 0.3554, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.03511623665690422, |
|
"rewards/margins": 0.09815546870231628, |
|
"rewards/rejected": -0.06303922832012177, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.586826347305389e-07, |
|
"logits/chosen": 0.0884179100394249, |
|
"logits/rejected": 0.21645644307136536, |
|
"logps/chosen": -361.37237548828125, |
|
"logps/rejected": -261.2263488769531, |
|
"loss": 0.3636, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.05635715276002884, |
|
"rewards/margins": 0.11102348566055298, |
|
"rewards/rejected": -0.05466633290052414, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.18562874251497e-07, |
|
"logits/chosen": 0.18063569068908691, |
|
"logits/rejected": 0.266376793384552, |
|
"logps/chosen": -325.956787109375, |
|
"logps/rejected": -271.6441345214844, |
|
"loss": 0.3595, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0006754301721230149, |
|
"rewards/margins": 0.04642491415143013, |
|
"rewards/rejected": -0.04710034281015396, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.784431137724551e-07, |
|
"logits/chosen": 0.14518184959888458, |
|
"logits/rejected": 0.22254076600074768, |
|
"logps/chosen": -319.9495544433594, |
|
"logps/rejected": -263.5156555175781, |
|
"loss": 0.3544, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0138475950807333, |
|
"rewards/margins": 0.12771600484848022, |
|
"rewards/rejected": -0.11386840045452118, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.38323353293413e-07, |
|
"logits/chosen": 0.12264908850193024, |
|
"logits/rejected": 0.18698899447917938, |
|
"logps/chosen": -313.93426513671875, |
|
"logps/rejected": -276.4668273925781, |
|
"loss": 0.3494, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.01783711649477482, |
|
"rewards/margins": 0.10452733933925629, |
|
"rewards/rejected": -0.08669020980596542, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.982035928143712e-07, |
|
"logits/chosen": 0.09591711312532425, |
|
"logits/rejected": 0.17884746193885803, |
|
"logps/chosen": -350.5014343261719, |
|
"logps/rejected": -266.41552734375, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0012602738570421934, |
|
"rewards/margins": 0.14207753539085388, |
|
"rewards/rejected": -0.14081726968288422, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.580838323353293e-07, |
|
"logits/chosen": 0.0989338606595993, |
|
"logits/rejected": 0.13404981791973114, |
|
"logps/chosen": -316.9004821777344, |
|
"logps/rejected": -312.2667236328125, |
|
"loss": 0.3203, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0687781348824501, |
|
"rewards/margins": 0.09935374557971954, |
|
"rewards/rejected": -0.16813188791275024, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.999901172555115e-07, |
|
"logits/chosen": 0.15517865121364594, |
|
"logits/rejected": 0.20303210616111755, |
|
"logps/chosen": -282.88330078125, |
|
"logps/rejected": -263.9994812011719, |
|
"loss": 0.3198, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.11205162107944489, |
|
"rewards/margins": 0.10533325374126434, |
|
"rewards/rejected": -0.21738485991954803, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.998144348880984e-07, |
|
"logits/chosen": 0.12583962082862854, |
|
"logits/rejected": 0.18330255150794983, |
|
"logps/chosen": -332.62225341796875, |
|
"logps/rejected": -299.07421875, |
|
"loss": 0.3114, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10971450805664062, |
|
"rewards/margins": 0.15495118498802185, |
|
"rewards/rejected": -0.2646656930446625, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.994192247951515e-07, |
|
"logits/chosen": 0.14090900123119354, |
|
"logits/rejected": 0.1716761291027069, |
|
"logps/chosen": -332.7867431640625, |
|
"logps/rejected": -332.4990539550781, |
|
"loss": 0.2964, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.17104236781597137, |
|
"rewards/margins": 0.13000008463859558, |
|
"rewards/rejected": -0.30104246735572815, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.988046605602389e-07, |
|
"logits/chosen": 0.1038103699684143, |
|
"logits/rejected": 0.15158522129058838, |
|
"logps/chosen": -353.9814758300781, |
|
"logps/rejected": -326.39556884765625, |
|
"loss": 0.2851, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.19866228103637695, |
|
"rewards/margins": 0.3053509593009949, |
|
"rewards/rejected": -0.504013180732727, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.979710121113161e-07, |
|
"logits/chosen": 0.07421442121267319, |
|
"logits/rejected": 0.10481990873813629, |
|
"logps/chosen": -354.7379455566406, |
|
"logps/rejected": -324.5375061035156, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.1955946683883667, |
|
"rewards/margins": 0.20845285058021545, |
|
"rewards/rejected": -0.40404754877090454, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.969186456021698e-07, |
|
"logits/chosen": 0.09808576852083206, |
|
"logits/rejected": 0.16462191939353943, |
|
"logps/chosen": -335.7714538574219, |
|
"logps/rejected": -331.1880798339844, |
|
"loss": 0.2537, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.23376011848449707, |
|
"rewards/margins": 0.29557761549949646, |
|
"rewards/rejected": -0.5293377637863159, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.956480232515958e-07, |
|
"logits/chosen": 0.08053131401538849, |
|
"logits/rejected": 0.2201889455318451, |
|
"logps/chosen": -398.9234619140625, |
|
"logps/rejected": -311.8883361816406, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.2779925465583801, |
|
"rewards/margins": 0.2911529839038849, |
|
"rewards/rejected": -0.5691455006599426, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.941597031403838e-07, |
|
"logits/chosen": 0.05627553537487984, |
|
"logits/rejected": 0.22260212898254395, |
|
"logps/chosen": -429.3963928222656, |
|
"logps/rejected": -313.8590393066406, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.370193213224411, |
|
"rewards/margins": 0.20042189955711365, |
|
"rewards/rejected": -0.5706151127815247, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.924543389661986e-07, |
|
"logits/chosen": 0.11179877817630768, |
|
"logits/rejected": 0.19428351521492004, |
|
"logps/chosen": -391.66510009765625, |
|
"logps/rejected": -318.9273376464844, |
|
"loss": 0.2167, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.43162259459495544, |
|
"rewards/margins": 0.27592721581459045, |
|
"rewards/rejected": -0.7075497508049011, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.905326797564637e-07, |
|
"logits/chosen": 0.11460906267166138, |
|
"logits/rejected": 0.19496436417102814, |
|
"logps/chosen": -373.25457763671875, |
|
"logps/rejected": -342.4559020996094, |
|
"loss": 0.2219, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5576636791229248, |
|
"rewards/margins": 0.3388262391090393, |
|
"rewards/rejected": -0.8964899182319641, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.883955695393743e-07, |
|
"logits/chosen": 0.07489059120416641, |
|
"logits/rejected": 0.1965363323688507, |
|
"logps/chosen": -456.4065856933594, |
|
"logps/rejected": -375.62841796875, |
|
"loss": 0.1962, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47448381781578064, |
|
"rewards/margins": 0.28402233123779297, |
|
"rewards/rejected": -0.758506178855896, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.860439469731857e-07, |
|
"logits/chosen": 0.08477760851383209, |
|
"logits/rejected": 0.18167419731616974, |
|
"logps/chosen": -363.19488525390625, |
|
"logps/rejected": -343.9361877441406, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5575977563858032, |
|
"rewards/margins": 0.1699770838022232, |
|
"rewards/rejected": -0.7275748252868652, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.834788449339357e-07, |
|
"logits/chosen": 0.07804753631353378, |
|
"logits/rejected": 0.10795494168996811, |
|
"logps/chosen": -339.80242919921875, |
|
"logps/rejected": -364.9732360839844, |
|
"loss": 0.1985, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6320484280586243, |
|
"rewards/margins": 0.27737244963645935, |
|
"rewards/rejected": -0.9094208478927612, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.807013900617874e-07, |
|
"logits/chosen": 0.04762539267539978, |
|
"logits/rejected": 0.06572198867797852, |
|
"logps/chosen": -371.88922119140625, |
|
"logps/rejected": -344.0954284667969, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5951868295669556, |
|
"rewards/margins": 0.362405002117157, |
|
"rewards/rejected": -0.9575918912887573, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.777128022661876e-07, |
|
"logits/chosen": 0.021948417648673058, |
|
"logits/rejected": 0.0974685400724411, |
|
"logps/chosen": -377.27667236328125, |
|
"logps/rejected": -320.70843505859375, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5508317947387695, |
|
"rewards/margins": 0.27007168531417847, |
|
"rewards/rejected": -0.8209035992622375, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.745143941900607e-07, |
|
"logits/chosen": -0.05026810243725777, |
|
"logits/rejected": -0.028773341327905655, |
|
"logps/chosen": -397.15936279296875, |
|
"logps/rejected": -438.9103088378906, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7560676336288452, |
|
"rewards/margins": 0.41686925292015076, |
|
"rewards/rejected": -1.172937035560608, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.711075706332709e-07, |
|
"logits/chosen": -0.04232814535498619, |
|
"logits/rejected": 0.0720619410276413, |
|
"logps/chosen": -409.37701416015625, |
|
"logps/rejected": -379.40960693359375, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7803069353103638, |
|
"rewards/margins": 0.3621472716331482, |
|
"rewards/rejected": -1.1424543857574463, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.674938279356085e-07, |
|
"logits/chosen": 0.008946272544562817, |
|
"logits/rejected": 0.05618705600500107, |
|
"logps/chosen": -401.8056335449219, |
|
"logps/rejected": -350.46331787109375, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7637485265731812, |
|
"rewards/margins": 0.30835336446762085, |
|
"rewards/rejected": -1.0721018314361572, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.636747533195696e-07, |
|
"logits/chosen": -0.005753317382186651, |
|
"logits/rejected": 0.10436991602182388, |
|
"logps/chosen": -375.9329528808594, |
|
"logps/rejected": -351.3202209472656, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6658205986022949, |
|
"rewards/margins": 0.3570438027381897, |
|
"rewards/rejected": -1.0228643417358398, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.596520241932198e-07, |
|
"logits/chosen": -0.10068678855895996, |
|
"logits/rejected": -0.0736171081662178, |
|
"logps/chosen": -413.36505126953125, |
|
"logps/rejected": -384.11761474609375, |
|
"loss": 0.1781, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6729551553726196, |
|
"rewards/margins": 0.567957878112793, |
|
"rewards/rejected": -1.2409130334854126, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.554274074134438e-07, |
|
"logits/chosen": -0.11362670361995697, |
|
"logits/rejected": 0.022236399352550507, |
|
"logps/chosen": -431.1710510253906, |
|
"logps/rejected": -389.5150451660156, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9115616083145142, |
|
"rewards/margins": 0.35060685873031616, |
|
"rewards/rejected": -1.262168526649475, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.510027585099106e-07, |
|
"logits/chosen": -0.11619666963815689, |
|
"logits/rejected": -0.10023369640111923, |
|
"logps/chosen": -390.26239013671875, |
|
"logps/rejected": -380.4273376464844, |
|
"loss": 0.1742, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8604833483695984, |
|
"rewards/margins": 0.4251517355442047, |
|
"rewards/rejected": -1.2856351137161255, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.463800208700903e-07, |
|
"logits/chosen": -0.13372397422790527, |
|
"logits/rejected": -0.04371088743209839, |
|
"logps/chosen": -406.59423828125, |
|
"logps/rejected": -367.8764953613281, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8710261583328247, |
|
"rewards/margins": 0.2594194710254669, |
|
"rewards/rejected": -1.1304455995559692, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.415612248856824e-07, |
|
"logits/chosen": -0.059288300573825836, |
|
"logits/rejected": -0.031034788116812706, |
|
"logps/chosen": -350.6271057128906, |
|
"logps/rejected": -349.37396240234375, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6463299989700317, |
|
"rewards/margins": 0.44987383484840393, |
|
"rewards/rejected": -1.0962039232254028, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.365484870608296e-07, |
|
"logits/chosen": -0.13688340783119202, |
|
"logits/rejected": -0.1447652131319046, |
|
"logps/chosen": -356.8211364746094, |
|
"logps/rejected": -380.91314697265625, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6279365420341492, |
|
"rewards/margins": 0.3849129378795624, |
|
"rewards/rejected": -1.0128495693206787, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.313440090825118e-07, |
|
"logits/chosen": -0.12175603210926056, |
|
"logits/rejected": -0.05414363741874695, |
|
"logps/chosen": -396.71197509765625, |
|
"logps/rejected": -352.76312255859375, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8293508291244507, |
|
"rewards/margins": 0.2594980001449585, |
|
"rewards/rejected": -1.0888488292694092, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.259500768535226e-07, |
|
"logits/chosen": -0.1462847888469696, |
|
"logits/rejected": -0.1344456970691681, |
|
"logps/chosen": -405.64959716796875, |
|
"logps/rejected": -365.44989013671875, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7200776934623718, |
|
"rewards/margins": 0.4672032296657562, |
|
"rewards/rejected": -1.1872810125350952, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.203690594884599e-07, |
|
"logits/chosen": -0.20732097327709198, |
|
"logits/rejected": -0.10968241840600967, |
|
"logps/chosen": -454.0621643066406, |
|
"logps/rejected": -413.76629638671875, |
|
"loss": 0.1467, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8635724782943726, |
|
"rewards/margins": 0.43014320731163025, |
|
"rewards/rejected": -1.2937157154083252, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.146034082731666e-07, |
|
"logits/chosen": -0.24053998291492462, |
|
"logits/rejected": -0.10800532251596451, |
|
"logps/chosen": -424.5973205566406, |
|
"logps/rejected": -410.4818420410156, |
|
"loss": 0.1682, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.041223168373108, |
|
"rewards/margins": 0.5180162191390991, |
|
"rewards/rejected": -1.559239387512207, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.086556555880808e-07, |
|
"logits/chosen": -0.2650529742240906, |
|
"logits/rejected": -0.13450463116168976, |
|
"logps/chosen": -446.36468505859375, |
|
"logps/rejected": -422.0096130371094, |
|
"loss": 0.1489, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1759449243545532, |
|
"rewards/margins": 0.5576841235160828, |
|
"rewards/rejected": -1.7336289882659912, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.025284137959672e-07, |
|
"logits/chosen": -0.08063942193984985, |
|
"logits/rejected": -0.08374373614788055, |
|
"logps/chosen": -368.17987060546875, |
|
"logps/rejected": -371.68597412109375, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8910188674926758, |
|
"rewards/margins": 0.3901366889476776, |
|
"rewards/rejected": -1.2811555862426758, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.962243740945193e-07, |
|
"logits/chosen": -0.13440537452697754, |
|
"logits/rejected": -0.04218355566263199, |
|
"logps/chosen": -416.8321228027344, |
|
"logps/rejected": -364.4325866699219, |
|
"loss": 0.2108, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6111631393432617, |
|
"rewards/margins": 0.44943752884864807, |
|
"rewards/rejected": -1.0606005191802979, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.897463053343362e-07, |
|
"logits/chosen": -0.15430530905723572, |
|
"logits/rejected": -0.07277049124240875, |
|
"logps/chosen": -382.2574462890625, |
|
"logps/rejected": -377.4708557128906, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.6694794297218323, |
|
"rewards/margins": 0.4132021367549896, |
|
"rewards/rejected": -1.08268141746521, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.83097052802791e-07, |
|
"logits/chosen": -0.16823723912239075, |
|
"logits/rejected": -0.05930133908987045, |
|
"logps/chosen": -431.1293029785156, |
|
"logps/rejected": -416.7881774902344, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7873836159706116, |
|
"rewards/margins": 0.39316868782043457, |
|
"rewards/rejected": -1.1805522441864014, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.762795369743302e-07, |
|
"logits/chosen": -0.11715607345104218, |
|
"logits/rejected": -0.08342987298965454, |
|
"logps/chosen": -378.78887939453125, |
|
"logps/rejected": -364.96392822265625, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.930017352104187, |
|
"rewards/margins": 0.35262879729270935, |
|
"rewards/rejected": -1.2826461791992188, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 8.692967522277452e-07, |
|
"logits/chosen": -0.2919595241546631, |
|
"logits/rejected": -0.13776831328868866, |
|
"logps/chosen": -471.62213134765625, |
|
"logps/rejected": -412.658935546875, |
|
"loss": 0.151, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0413435697555542, |
|
"rewards/margins": 0.5299603343009949, |
|
"rewards/rejected": -1.5713039636611938, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.621517655309871e-07, |
|
"logits/chosen": -0.22538213431835175, |
|
"logits/rejected": -0.16458283364772797, |
|
"logps/chosen": -471.0306091308594, |
|
"logps/rejected": -448.32958984375, |
|
"loss": 0.1764, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8633302450180054, |
|
"rewards/margins": 0.5896649956703186, |
|
"rewards/rejected": -1.4529950618743896, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.548477150940976e-07, |
|
"logits/chosen": -0.2914785146713257, |
|
"logits/rejected": -0.2505750358104706, |
|
"logps/chosen": -396.79302978515625, |
|
"logps/rejected": -392.1474609375, |
|
"loss": 0.1663, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8635896444320679, |
|
"rewards/margins": 0.45653051137924194, |
|
"rewards/rejected": -1.320120096206665, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.473878089908488e-07, |
|
"logits/chosen": -0.23568303883075714, |
|
"logits/rejected": -0.13943514227867126, |
|
"logps/chosen": -386.1482849121094, |
|
"logps/rejected": -365.82904052734375, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7458887696266174, |
|
"rewards/margins": 0.29060885310173035, |
|
"rewards/rejected": -1.0364975929260254, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 8.397753237496989e-07, |
|
"logits/chosen": -0.16948673129081726, |
|
"logits/rejected": -0.13608554005622864, |
|
"logps/chosen": -403.01434326171875, |
|
"logps/rejected": -411.62725830078125, |
|
"loss": 0.1565, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9865185618400574, |
|
"rewards/margins": 0.4449576735496521, |
|
"rewards/rejected": -1.431476354598999, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 8.320136029146792e-07, |
|
"logits/chosen": -0.2247391641139984, |
|
"logits/rejected": -0.1330215483903885, |
|
"logps/chosen": -418.1533203125, |
|
"logps/rejected": -402.0802307128906, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1958593130111694, |
|
"rewards/margins": 0.4591103494167328, |
|
"rewards/rejected": -1.654969573020935, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.241060555768485e-07, |
|
"logits/chosen": -0.2574161887168884, |
|
"logits/rejected": -0.2403256893157959, |
|
"logps/chosen": -499.733642578125, |
|
"logps/rejected": -511.239501953125, |
|
"loss": 0.1269, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4520691633224487, |
|
"rewards/margins": 0.6710880994796753, |
|
"rewards/rejected": -2.123157262802124, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.160561548769579e-07, |
|
"logits/chosen": -0.2796838879585266, |
|
"logits/rejected": -0.18458350002765656, |
|
"logps/chosen": -447.6980895996094, |
|
"logps/rejected": -411.94921875, |
|
"loss": 0.1419, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0911153554916382, |
|
"rewards/margins": 0.625370979309082, |
|
"rewards/rejected": -1.7164863348007202, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.078674364799822e-07, |
|
"logits/chosen": -0.2084120213985443, |
|
"logits/rejected": -0.13774996995925903, |
|
"logps/chosen": -430.189697265625, |
|
"logps/rejected": -379.07867431640625, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9517067074775696, |
|
"rewards/margins": 0.3666831851005554, |
|
"rewards/rejected": -1.318389654159546, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.995434970221915e-07, |
|
"logits/chosen": -0.20178177952766418, |
|
"logits/rejected": -0.08668573200702667, |
|
"logps/chosen": -446.00244140625, |
|
"logps/rejected": -375.57171630859375, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9166814088821411, |
|
"rewards/margins": 0.42661458253860474, |
|
"rewards/rejected": -1.3432958126068115, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.910879925314412e-07, |
|
"logits/chosen": -0.24198313057422638, |
|
"logits/rejected": -0.14964896440505981, |
|
"logps/chosen": -444.73773193359375, |
|
"logps/rejected": -413.18994140625, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.036938190460205, |
|
"rewards/margins": 0.41328781843185425, |
|
"rewards/rejected": -1.450226068496704, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.825046368213781e-07, |
|
"logits/chosen": -0.13304699957370758, |
|
"logits/rejected": -0.12154946476221085, |
|
"logps/chosen": -403.96533203125, |
|
"logps/rejected": -399.8603820800781, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8464315533638, |
|
"rewards/margins": 0.501135528087616, |
|
"rewards/rejected": -1.3475672006607056, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.737971998602646e-07, |
|
"logits/chosen": -0.12783931195735931, |
|
"logits/rejected": -0.1469259113073349, |
|
"logps/chosen": -413.7705993652344, |
|
"logps/rejected": -416.5107421875, |
|
"loss": 0.1468, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2587213516235352, |
|
"rewards/margins": 0.4588088095188141, |
|
"rewards/rejected": -1.7175302505493164, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.649695061151383e-07, |
|
"logits/chosen": -0.22540828585624695, |
|
"logits/rejected": -0.1876813918352127, |
|
"logps/chosen": -507.74346923828125, |
|
"logps/rejected": -520.5947265625, |
|
"loss": 0.1349, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4848111867904663, |
|
"rewards/margins": 0.5542536973953247, |
|
"rewards/rejected": -2.039064884185791, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.560254328720362e-07, |
|
"logits/chosen": -0.2085895836353302, |
|
"logits/rejected": -0.042162101715803146, |
|
"logps/chosen": -440.8785095214844, |
|
"logps/rejected": -387.02215576171875, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.233938455581665, |
|
"rewards/margins": 0.3742896020412445, |
|
"rewards/rejected": -1.6082279682159424, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.469689085330195e-07, |
|
"logits/chosen": -0.1412975937128067, |
|
"logits/rejected": -0.17901337146759033, |
|
"logps/chosen": -446.0218200683594, |
|
"logps/rejected": -433.8721618652344, |
|
"loss": 0.1612, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8437545895576477, |
|
"rewards/margins": 0.5731817483901978, |
|
"rewards/rejected": -1.4169362783432007, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.37803910890746e-07, |
|
"logits/chosen": -0.17172157764434814, |
|
"logits/rejected": -0.15589216351509094, |
|
"logps/chosen": -413.99176025390625, |
|
"logps/rejected": -443.12994384765625, |
|
"loss": 0.145, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0244253873825073, |
|
"rewards/margins": 0.5451046824455261, |
|
"rewards/rejected": -1.5695301294326782, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.285344653813504e-07, |
|
"logits/chosen": -0.16723224520683289, |
|
"logits/rejected": -0.11218209564685822, |
|
"logps/chosen": -459.5580139160156, |
|
"logps/rejected": -502.71514892578125, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.443068027496338, |
|
"rewards/margins": 0.6126604676246643, |
|
"rewards/rejected": -2.0557284355163574, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.19164643316399e-07, |
|
"logits/chosen": -0.1634540855884552, |
|
"logits/rejected": -0.008752308785915375, |
|
"logps/chosen": -477.576416015625, |
|
"logps/rejected": -446.4156188964844, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.5389097929000854, |
|
"rewards/margins": 0.4564815163612366, |
|
"rewards/rejected": -1.9953914880752563, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.096985600946937e-07, |
|
"logits/chosen": -0.12237264961004257, |
|
"logits/rejected": -0.10291185230016708, |
|
"logps/chosen": -393.76837158203125, |
|
"logps/rejected": -429.14447021484375, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9208710789680481, |
|
"rewards/margins": 0.6199368238449097, |
|
"rewards/rejected": -1.5408079624176025, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.001403733947133e-07, |
|
"logits/chosen": -0.1201944500207901, |
|
"logits/rejected": -0.05582220479846001, |
|
"logps/chosen": -382.8240661621094, |
|
"logps/rejected": -375.1324462890625, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.789650559425354, |
|
"rewards/margins": 0.3769993185997009, |
|
"rewards/rejected": -1.1666499376296997, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.904942813484846e-07, |
|
"logits/chosen": -0.08779577165842056, |
|
"logits/rejected": -0.05727202445268631, |
|
"logps/chosen": -375.1112976074219, |
|
"logps/rejected": -363.4750061035156, |
|
"loss": 0.1511, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8722445368766785, |
|
"rewards/margins": 0.4427576959133148, |
|
"rewards/rejected": -1.315002202987671, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.807645206976847e-07, |
|
"logits/chosen": -0.15901021659374237, |
|
"logits/rejected": -0.046349309384822845, |
|
"logps/chosen": -454.17816162109375, |
|
"logps/rejected": -410.53814697265625, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2105097770690918, |
|
"rewards/margins": 0.5188924670219421, |
|
"rewards/rejected": -1.7294021844863892, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.709553649327864e-07, |
|
"logits/chosen": -0.10003723204135895, |
|
"logits/rejected": -0.11153779178857803, |
|
"logps/chosen": -424.2566833496094, |
|
"logps/rejected": -428.76263427734375, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.9821775555610657, |
|
"rewards/margins": 0.5280328989028931, |
|
"rewards/rejected": -1.510210633277893, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.610711224160624e-07, |
|
"logits/chosen": -0.09928876161575317, |
|
"logits/rejected": 0.037872180342674255, |
|
"logps/chosen": -461.2574157714844, |
|
"logps/rejected": -414.75433349609375, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9287399053573608, |
|
"rewards/margins": 0.4935649037361145, |
|
"rewards/rejected": -1.4223048686981201, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.51116134489272e-07, |
|
"logits/chosen": -0.04129798337817192, |
|
"logits/rejected": 0.029511254280805588, |
|
"logps/chosen": -425.9366149902344, |
|
"logps/rejected": -408.80267333984375, |
|
"loss": 0.1517, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9357036352157593, |
|
"rewards/margins": 0.4151946008205414, |
|
"rewards/rejected": -1.3508983850479126, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.410947735668653e-07, |
|
"logits/chosen": -0.05360158160328865, |
|
"logits/rejected": -0.07298514991998672, |
|
"logps/chosen": -392.5763854980469, |
|
"logps/rejected": -427.8981018066406, |
|
"loss": 0.1527, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8554953336715698, |
|
"rewards/margins": 0.7457026243209839, |
|
"rewards/rejected": -1.6011979579925537, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.310114412155368e-07, |
|
"logits/chosen": -0.10374744981527328, |
|
"logits/rejected": 0.0370178297162056, |
|
"logps/chosen": -447.22381591796875, |
|
"logps/rejected": -401.534423828125, |
|
"loss": 0.1595, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.0632203817367554, |
|
"rewards/margins": 0.2709798216819763, |
|
"rewards/rejected": -1.334200143814087, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.208705662209762e-07, |
|
"logits/chosen": -0.08297502994537354, |
|
"logits/rejected": -0.03830767422914505, |
|
"logps/chosen": -397.39202880859375, |
|
"logps/rejected": -369.2044372558594, |
|
"loss": 0.1663, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8531109094619751, |
|
"rewards/margins": 0.4128567576408386, |
|
"rewards/rejected": -1.265967607498169, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.106766026426648e-07, |
|
"logits/chosen": -0.07533316314220428, |
|
"logits/rejected": 0.044484030455350876, |
|
"logps/chosen": -390.5148010253906, |
|
"logps/rejected": -376.8379211425781, |
|
"loss": 0.175, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9820185899734497, |
|
"rewards/margins": 0.4463561475276947, |
|
"rewards/rejected": -1.4283746480941772, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.004340278575695e-07, |
|
"logits/chosen": -0.0853220671415329, |
|
"logits/rejected": -0.05402841418981552, |
|
"logps/chosen": -432.50543212890625, |
|
"logps/rejected": -429.68017578125, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9129465222358704, |
|
"rewards/margins": 0.6217477321624756, |
|
"rewards/rejected": -1.5346943140029907, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.901473405935966e-07, |
|
"logits/chosen": -0.08009025454521179, |
|
"logits/rejected": 0.029772957786917686, |
|
"logps/chosen": -389.1134948730469, |
|
"logps/rejected": -390.2154846191406, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0638234615325928, |
|
"rewards/margins": 0.27301082015037537, |
|
"rewards/rejected": -1.336834192276001, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.798210589536672e-07, |
|
"logits/chosen": 0.038750506937503815, |
|
"logits/rejected": 0.011047865264117718, |
|
"logps/chosen": -398.5277099609375, |
|
"logps/rejected": -468.46044921875, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.989227294921875, |
|
"rewards/margins": 0.8411723971366882, |
|
"rewards/rejected": -1.830399751663208, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5.694597184312832e-07, |
|
"logits/chosen": -0.08361298590898514, |
|
"logits/rejected": 0.025835633277893066, |
|
"logps/chosen": -512.9071655273438, |
|
"logps/rejected": -552.0848388671875, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.7927268743515015, |
|
"rewards/margins": 1.0089781284332275, |
|
"rewards/rejected": -2.8017051219940186, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.590678699184552e-07, |
|
"logits/chosen": -0.1973053216934204, |
|
"logits/rejected": -0.08661861717700958, |
|
"logps/chosen": -538.1658935546875, |
|
"logps/rejected": -642.89697265625, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.399050235748291, |
|
"rewards/margins": 1.239048719406128, |
|
"rewards/rejected": -3.638098955154419, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.486500777068659e-07, |
|
"logits/chosen": -0.09753044694662094, |
|
"logits/rejected": 0.013812586665153503, |
|
"logps/chosen": -533.619873046875, |
|
"logps/rejected": -590.58544921875, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.461451530456543, |
|
"rewards/margins": 1.150248646736145, |
|
"rewards/rejected": -3.6117000579833984, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.382109174831493e-07, |
|
"logits/chosen": -0.16656556725502014, |
|
"logits/rejected": -0.014411838725209236, |
|
"logps/chosen": -612.1375732421875, |
|
"logps/rejected": -666.2962646484375, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.350978374481201, |
|
"rewards/margins": 1.2788021564483643, |
|
"rewards/rejected": -3.6297805309295654, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.277549743191652e-07, |
|
"logits/chosen": -0.04728949815034866, |
|
"logits/rejected": 0.05177200958132744, |
|
"logps/chosen": -481.9996032714844, |
|
"logps/rejected": -563.8628540039062, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9757206439971924, |
|
"rewards/margins": 1.2286030054092407, |
|
"rewards/rejected": -3.2043235301971436, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5.172868406581501e-07, |
|
"logits/chosen": -0.10119952261447906, |
|
"logits/rejected": 0.03440069407224655, |
|
"logps/chosen": -575.5086059570312, |
|
"logps/rejected": -601.7667236328125, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.9103889465332031, |
|
"rewards/margins": 1.1987693309783936, |
|
"rewards/rejected": -3.109158515930176, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.068111142976319e-07, |
|
"logits/chosen": -0.012911921367049217, |
|
"logits/rejected": 0.05222976952791214, |
|
"logps/chosen": -544.2637939453125, |
|
"logps/rejected": -609.9290161132812, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.4613072872161865, |
|
"rewards/margins": 1.0719743967056274, |
|
"rewards/rejected": -3.5332818031311035, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.963323963699926e-07, |
|
"logits/chosen": 0.07444079965353012, |
|
"logits/rejected": 0.07786104083061218, |
|
"logps/chosen": -598.2421875, |
|
"logps/rejected": -706.32470703125, |
|
"loss": 0.0256, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.6978304386138916, |
|
"rewards/margins": 1.5244108438491821, |
|
"rewards/rejected": -4.222241401672363, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.858552893215655e-07, |
|
"logits/chosen": 0.01419870276004076, |
|
"logits/rejected": 0.17051884531974792, |
|
"logps/chosen": -530.5960083007812, |
|
"logps/rejected": -619.4927978515625, |
|
"loss": 0.0215, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.6646533012390137, |
|
"rewards/margins": 1.1269919872283936, |
|
"rewards/rejected": -3.791645050048828, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.753843948911556e-07, |
|
"logits/chosen": 0.08497779071331024, |
|
"logits/rejected": 0.1790248453617096, |
|
"logps/chosen": -576.8201293945312, |
|
"logps/rejected": -674.81982421875, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.776611804962158, |
|
"rewards/margins": 1.3886988162994385, |
|
"rewards/rejected": -4.165310382843018, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.649243120888722e-07, |
|
"logits/chosen": 0.0691087394952774, |
|
"logits/rejected": 0.14361132681369781, |
|
"logps/chosen": -612.6062622070312, |
|
"logps/rejected": -648.2825317382812, |
|
"loss": 0.025, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.668504238128662, |
|
"rewards/margins": 1.0527547597885132, |
|
"rewards/rejected": -3.721259355545044, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.544796351761574e-07, |
|
"logits/chosen": 0.07248688489198685, |
|
"logits/rejected": 0.18278047442436218, |
|
"logps/chosen": -539.9449462890625, |
|
"logps/rejected": -617.9127197265625, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3308327198028564, |
|
"rewards/margins": 1.2744853496551514, |
|
"rewards/rejected": -3.605318069458008, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.440549516479022e-07, |
|
"logits/chosen": 0.14086951315402985, |
|
"logits/rejected": 0.1397445946931839, |
|
"logps/chosen": -541.9815673828125, |
|
"logps/rejected": -650.6157836914062, |
|
"loss": 0.0259, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4639604091644287, |
|
"rewards/margins": 1.412184476852417, |
|
"rewards/rejected": -3.876145124435425, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.336548402175345e-07, |
|
"logits/chosen": 0.08946482837200165, |
|
"logits/rejected": 0.23766390979290009, |
|
"logps/chosen": -570.1082153320312, |
|
"logps/rejected": -633.4486083984375, |
|
"loss": 0.022, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.765277862548828, |
|
"rewards/margins": 1.223432183265686, |
|
"rewards/rejected": -3.9887099266052246, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.232838688059627e-07, |
|
"logits/chosen": 0.20700442790985107, |
|
"logits/rejected": 0.21234914660453796, |
|
"logps/chosen": -642.5216064453125, |
|
"logps/rejected": -769.6670532226562, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.0695090293884277, |
|
"rewards/margins": 1.9037091732025146, |
|
"rewards/rejected": -4.9732184410095215, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.129465925352618e-07, |
|
"logits/chosen": 0.16028758883476257, |
|
"logits/rejected": 0.3449386954307556, |
|
"logps/chosen": -624.1184692382812, |
|
"logps/rejected": -672.5079956054688, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.061570405960083, |
|
"rewards/margins": 1.1443489789962769, |
|
"rewards/rejected": -4.205918788909912, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.0264755172797837e-07, |
|
"logits/chosen": 0.052181851118803024, |
|
"logits/rejected": 0.18743617832660675, |
|
"logps/chosen": -607.5186767578125, |
|
"logps/rejected": -672.8460693359375, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.874335765838623, |
|
"rewards/margins": 1.2407417297363281, |
|
"rewards/rejected": -4.115077018737793, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.9239126991293775e-07, |
|
"logits/chosen": 0.24214263260364532, |
|
"logits/rejected": 0.40768828988075256, |
|
"logps/chosen": -620.0783081054688, |
|
"logps/rejected": -685.48388671875, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8905162811279297, |
|
"rewards/margins": 1.159411907196045, |
|
"rewards/rejected": -4.049928188323975, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.82182251838427e-07, |
|
"logits/chosen": 0.1082601398229599, |
|
"logits/rejected": 0.29715824127197266, |
|
"logps/chosen": -589.5526123046875, |
|
"logps/rejected": -628.5372924804688, |
|
"loss": 0.0202, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6682987213134766, |
|
"rewards/margins": 1.2489776611328125, |
|
"rewards/rejected": -3.9172768592834473, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.720249814936255e-07, |
|
"logits/chosen": 0.2514716684818268, |
|
"logits/rejected": 0.27680593729019165, |
|
"logps/chosen": -592.1260986328125, |
|
"logps/rejected": -714.8380737304688, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.839751720428467, |
|
"rewards/margins": 1.4019855260849, |
|
"rewards/rejected": -4.241737365722656, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.6192392013915473e-07, |
|
"logits/chosen": 0.16709819436073303, |
|
"logits/rejected": 0.29886525869369507, |
|
"logps/chosen": -598.5506591796875, |
|
"logps/rejected": -646.1890258789062, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.756328582763672, |
|
"rewards/margins": 1.287087321281433, |
|
"rewards/rejected": -4.0434160232543945, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.5188350434761025e-07, |
|
"logits/chosen": 0.20310468971729279, |
|
"logits/rejected": 0.32862892746925354, |
|
"logps/chosen": -669.3348388671875, |
|
"logps/rejected": -772.3299560546875, |
|
"loss": 0.0196, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.227342128753662, |
|
"rewards/margins": 1.726514220237732, |
|
"rewards/rejected": -4.953856468200684, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.419081440549368e-07, |
|
"logits/chosen": 0.12433931976556778, |
|
"logits/rejected": 0.20679005980491638, |
|
"logps/chosen": -597.89501953125, |
|
"logps/rejected": -657.15869140625, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.850302219390869, |
|
"rewards/margins": 1.2778496742248535, |
|
"rewards/rejected": -4.128152370452881, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.3200222062350324e-07, |
|
"logits/chosen": 0.20479054749011993, |
|
"logits/rejected": 0.4336840510368347, |
|
"logps/chosen": -650.9668579101562, |
|
"logps/rejected": -683.4470825195312, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9142518043518066, |
|
"rewards/margins": 1.1479203701019287, |
|
"rewards/rejected": -4.062172889709473, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.2217008491772724e-07, |
|
"logits/chosen": 0.17460568249225616, |
|
"logits/rejected": 0.32013821601867676, |
|
"logps/chosen": -649.8709716796875, |
|
"logps/rejected": -717.8926391601562, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.0518133640289307, |
|
"rewards/margins": 1.3449747562408447, |
|
"rewards/rejected": -4.396788120269775, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.124160553930953e-07, |
|
"logits/chosen": 0.23403926193714142, |
|
"logits/rejected": 0.4171646535396576, |
|
"logps/chosen": -583.6199951171875, |
|
"logps/rejected": -628.165771484375, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5831427574157715, |
|
"rewards/margins": 1.416587471961975, |
|
"rewards/rejected": -3.999729871749878, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.027444161994178e-07, |
|
"logits/chosen": 0.26827913522720337, |
|
"logits/rejected": 0.3499876856803894, |
|
"logps/chosen": -567.4503784179688, |
|
"logps/rejected": -667.9777221679688, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.5715322494506836, |
|
"rewards/margins": 1.4084771871566772, |
|
"rewards/rejected": -3.980009078979492, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.9315941529915055e-07, |
|
"logits/chosen": 0.2547352910041809, |
|
"logits/rejected": 0.47194820642471313, |
|
"logps/chosen": -698.35888671875, |
|
"logps/rejected": -709.6544189453125, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.165942668914795, |
|
"rewards/margins": 1.3185456991195679, |
|
"rewards/rejected": -4.484488487243652, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8366526260161205e-07, |
|
"logits/chosen": 0.3811063766479492, |
|
"logits/rejected": 0.5796335935592651, |
|
"logps/chosen": -667.5139770507812, |
|
"logps/rejected": -759.1563720703125, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.5972373485565186, |
|
"rewards/margins": 1.5377312898635864, |
|
"rewards/rejected": -5.134967803955078, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.742661281139129e-07, |
|
"logits/chosen": 0.4097815155982971, |
|
"logits/rejected": 0.5813673734664917, |
|
"logps/chosen": -648.1516723632812, |
|
"logps/rejected": -774.3548583984375, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.3875465393066406, |
|
"rewards/margins": 1.6946359872817993, |
|
"rewards/rejected": -5.082181930541992, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6496614010941214e-07, |
|
"logits/chosen": 0.40659332275390625, |
|
"logits/rejected": 0.47693824768066406, |
|
"logps/chosen": -579.2315673828125, |
|
"logps/rejected": -690.2713623046875, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.0188193321228027, |
|
"rewards/margins": 1.4409698247909546, |
|
"rewards/rejected": -4.459788799285889, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.557693833145038e-07, |
|
"logits/chosen": 0.3575282692909241, |
|
"logits/rejected": 0.393863707780838, |
|
"logps/chosen": -661.1494140625, |
|
"logps/rejected": -756.2431640625, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.1486072540283203, |
|
"rewards/margins": 1.5837332010269165, |
|
"rewards/rejected": -4.7323408126831055, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.4667989711452873e-07, |
|
"logits/chosen": 0.28119125962257385, |
|
"logits/rejected": 0.5084460973739624, |
|
"logps/chosen": -585.8961181640625, |
|
"logps/rejected": -656.994873046875, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.9292385578155518, |
|
"rewards/margins": 1.1598174571990967, |
|
"rewards/rejected": -4.089056015014648, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.3770167377960237e-07, |
|
"logits/chosen": 0.2689970135688782, |
|
"logits/rejected": 0.5148590803146362, |
|
"logps/chosen": -717.4390869140625, |
|
"logps/rejected": -785.14404296875, |
|
"loss": 0.015, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.3781027793884277, |
|
"rewards/margins": 1.664996862411499, |
|
"rewards/rejected": -5.043099880218506, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.2883865671113633e-07, |
|
"logits/chosen": 0.31557050347328186, |
|
"logits/rejected": 0.411182701587677, |
|
"logps/chosen": -648.5716552734375, |
|
"logps/rejected": -728.0420532226562, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.0543835163116455, |
|
"rewards/margins": 1.5502524375915527, |
|
"rewards/rejected": -4.604635715484619, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.200947387098232e-07, |
|
"logits/chosen": 0.26245275139808655, |
|
"logits/rejected": 0.37014099955558777, |
|
"logps/chosen": -645.890380859375, |
|
"logps/rejected": -696.8558959960938, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.092529773712158, |
|
"rewards/margins": 1.4243347644805908, |
|
"rewards/rejected": -4.516864776611328, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.1147376026584757e-07, |
|
"logits/chosen": 0.3540686070919037, |
|
"logits/rejected": 0.44220852851867676, |
|
"logps/chosen": -694.4542236328125, |
|
"logps/rejected": -797.4610595703125, |
|
"loss": 0.016, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.1828300952911377, |
|
"rewards/margins": 1.86924147605896, |
|
"rewards/rejected": -5.052072048187256, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.0297950787207047e-07, |
|
"logits/chosen": 0.36199456453323364, |
|
"logits/rejected": 0.5422841906547546, |
|
"logps/chosen": -615.1419677734375, |
|
"logps/rejected": -665.1968383789062, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.897984027862549, |
|
"rewards/margins": 1.3999998569488525, |
|
"rewards/rejected": -4.2979841232299805, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.9461571236093288e-07, |
|
"logits/chosen": 0.2255418598651886, |
|
"logits/rejected": 0.3416988253593445, |
|
"logps/chosen": -586.51806640625, |
|
"logps/rejected": -626.4557495117188, |
|
"loss": 0.02, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6398327350616455, |
|
"rewards/margins": 1.3156733512878418, |
|
"rewards/rejected": -3.9555065631866455, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.8638604726580476e-07, |
|
"logits/chosen": 0.4877316355705261, |
|
"logits/rejected": 0.5342193245887756, |
|
"logps/chosen": -573.1345825195312, |
|
"logps/rejected": -688.9697875976562, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.821394205093384, |
|
"rewards/margins": 1.4121348857879639, |
|
"rewards/rejected": -4.233529090881348, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.782941272075017e-07, |
|
"logits/chosen": 0.3591156601905823, |
|
"logits/rejected": 0.5589128732681274, |
|
"logps/chosen": -630.3018798828125, |
|
"logps/rejected": -729.1790771484375, |
|
"loss": 0.017, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9634406566619873, |
|
"rewards/margins": 1.6115903854370117, |
|
"rewards/rejected": -4.575031280517578, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.7034350630667626e-07, |
|
"logits/chosen": 0.4524804651737213, |
|
"logits/rejected": 0.6036592721939087, |
|
"logps/chosen": -576.6759033203125, |
|
"logps/rejected": -693.260986328125, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.0177433490753174, |
|
"rewards/margins": 1.490710973739624, |
|
"rewards/rejected": -4.508454322814941, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.6253767662278345e-07, |
|
"logits/chosen": 0.3014351427555084, |
|
"logits/rejected": 0.5008861422538757, |
|
"logps/chosen": -718.8760986328125, |
|
"logps/rejected": -813.004150390625, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.741528034210205, |
|
"rewards/margins": 1.4162757396697998, |
|
"rewards/rejected": -5.157803535461426, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.548800666203028e-07, |
|
"logits/chosen": 0.47732776403427124, |
|
"logits/rejected": 0.6197739839553833, |
|
"logps/chosen": -660.623291015625, |
|
"logps/rejected": -757.375244140625, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.632997989654541, |
|
"rewards/margins": 1.6059176921844482, |
|
"rewards/rejected": -5.23891544342041, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4737403966289385e-07, |
|
"logits/chosen": 0.3244774639606476, |
|
"logits/rejected": 0.5204547643661499, |
|
"logps/chosen": -726.2711181640625, |
|
"logps/rejected": -772.5819091796875, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -4.009468078613281, |
|
"rewards/margins": 1.0510145425796509, |
|
"rewards/rejected": -5.060482501983643, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.400228925361449e-07, |
|
"logits/chosen": 0.47019681334495544, |
|
"logits/rejected": 0.6322409510612488, |
|
"logps/chosen": -661.2239379882812, |
|
"logps/rejected": -696.6038818359375, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.51452898979187, |
|
"rewards/margins": 1.1311668157577515, |
|
"rewards/rejected": -4.64569616317749, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.328298539995637e-07, |
|
"logits/chosen": 0.29750996828079224, |
|
"logits/rejected": 0.3950595259666443, |
|
"logps/chosen": -645.1038208007812, |
|
"logps/rejected": -781.1411743164062, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.2570998668670654, |
|
"rewards/margins": 1.7285950183868408, |
|
"rewards/rejected": -4.985694408416748, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.257980833684471e-07, |
|
"logits/chosen": 0.47976547479629517, |
|
"logits/rejected": 0.51587975025177, |
|
"logps/chosen": -616.9915771484375, |
|
"logps/rejected": -749.0167236328125, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.158031940460205, |
|
"rewards/margins": 1.6786686182022095, |
|
"rewards/rejected": -4.836700439453125, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.1893066912625078e-07, |
|
"logits/chosen": 0.40313810110092163, |
|
"logits/rejected": 0.5334981083869934, |
|
"logps/chosen": -605.7171020507812, |
|
"logps/rejected": -690.5645141601562, |
|
"loss": 0.015, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.071347951889038, |
|
"rewards/margins": 1.3815619945526123, |
|
"rewards/rejected": -4.45290994644165, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.1223062756807078e-07, |
|
"logits/chosen": 0.3190918564796448, |
|
"logits/rejected": 0.4667654037475586, |
|
"logps/chosen": -641.0958862304688, |
|
"logps/rejected": -681.6056518554688, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.0553221702575684, |
|
"rewards/margins": 1.2857555150985718, |
|
"rewards/rejected": -4.3410773277282715, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.0570090147583088e-07, |
|
"logits/chosen": 0.4918629229068756, |
|
"logits/rejected": 0.6110485792160034, |
|
"logps/chosen": -679.90185546875, |
|
"logps/rejected": -766.8203735351562, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.3473587036132812, |
|
"rewards/margins": 1.5597641468048096, |
|
"rewards/rejected": -4.90712308883667, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.934435882575848e-08, |
|
"logits/chosen": 0.32224616408348083, |
|
"logits/rejected": 0.4978526532649994, |
|
"logps/chosen": -746.5880737304688, |
|
"logps/rejected": -807.6434326171875, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.446526288986206, |
|
"rewards/margins": 1.6646654605865479, |
|
"rewards/rejected": -5.111191749572754, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.316379152871668e-08, |
|
"logits/chosen": 0.3561343550682068, |
|
"logits/rejected": 0.5769251585006714, |
|
"logps/chosen": -660.674072265625, |
|
"logps/rejected": -767.1783447265625, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.296705722808838, |
|
"rewards/margins": 1.5563912391662598, |
|
"rewards/rejected": -4.853096961975098, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.716191420394509e-08, |
|
"logits/chosen": 0.48394888639450073, |
|
"logits/rejected": 0.5622594356536865, |
|
"logps/chosen": -608.4400634765625, |
|
"logps/rejected": -728.1359252929688, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.9183599948883057, |
|
"rewards/margins": 1.6131757497787476, |
|
"rewards/rejected": -4.5315351486206055, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.134136298674931e-08, |
|
"logits/chosen": 0.4225463271141052, |
|
"logits/rejected": 0.5670620799064636, |
|
"logps/chosen": -591.5728149414062, |
|
"logps/rejected": -708.0966186523438, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.0601813793182373, |
|
"rewards/margins": 1.5123151540756226, |
|
"rewards/rejected": -4.57249641418457, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.570469437066146e-08, |
|
"logits/chosen": 0.5167518854141235, |
|
"logits/rejected": 0.501775324344635, |
|
"logps/chosen": -587.5382080078125, |
|
"logps/rejected": -788.7468872070312, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.152822971343994, |
|
"rewards/margins": 1.8745685815811157, |
|
"rewards/rejected": -5.02739143371582, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.025438408458106e-08, |
|
"logits/chosen": 0.5128912925720215, |
|
"logits/rejected": 0.6313267350196838, |
|
"logps/chosen": -668.2738037109375, |
|
"logps/rejected": -725.1722412109375, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.4178764820098877, |
|
"rewards/margins": 1.2856941223144531, |
|
"rewards/rejected": -4.70357084274292, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.49928260053893e-08, |
|
"logits/chosen": 0.47072911262512207, |
|
"logits/rejected": 0.5178387761116028, |
|
"logps/chosen": -693.3670043945312, |
|
"logps/rejected": -771.8135375976562, |
|
"loss": 0.0168, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.376781940460205, |
|
"rewards/margins": 1.6709791421890259, |
|
"rewards/rejected": -5.0477614402771, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5.992233110651412e-08, |
|
"logits/chosen": 0.4310898184776306, |
|
"logits/rejected": 0.5147913694381714, |
|
"logps/chosen": -654.1636962890625, |
|
"logps/rejected": -809.3177490234375, |
|
"loss": 0.013, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.412320375442505, |
|
"rewards/margins": 1.580108880996704, |
|
"rewards/rejected": -4.992428779602051, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5.504512644290787e-08, |
|
"logits/chosen": 0.34709280729293823, |
|
"logits/rejected": 0.4276227056980133, |
|
"logps/chosen": -631.5572509765625, |
|
"logps/rejected": -723.4016723632812, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.13346791267395, |
|
"rewards/margins": 1.2867248058319092, |
|
"rewards/rejected": -4.420193195343018, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5.036335417288373e-08, |
|
"logits/chosen": 0.3249759376049042, |
|
"logits/rejected": 0.5136023163795471, |
|
"logps/chosen": -692.810791015625, |
|
"logps/rejected": -764.9146728515625, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.325841188430786, |
|
"rewards/margins": 1.6227283477783203, |
|
"rewards/rejected": -4.9485697746276855, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.587907061724033e-08, |
|
"logits/chosen": 0.4547889828681946, |
|
"logits/rejected": 0.6101347804069519, |
|
"logps/chosen": -648.64404296875, |
|
"logps/rejected": -718.5267333984375, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.184832811355591, |
|
"rewards/margins": 1.4085993766784668, |
|
"rewards/rejected": -4.593432426452637, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1594245356087467e-08, |
|
"logits/chosen": 0.3632095158100128, |
|
"logits/rejected": 0.4777224659919739, |
|
"logps/chosen": -618.84765625, |
|
"logps/rejected": -726.1567993164062, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.185579776763916, |
|
"rewards/margins": 1.4200047254562378, |
|
"rewards/rejected": -4.605584144592285, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.751076036377071e-08, |
|
"logits/chosen": 0.3495107591152191, |
|
"logits/rejected": 0.6066192388534546, |
|
"logps/chosen": -640.0892333984375, |
|
"logps/rejected": -659.4434204101562, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.1215078830718994, |
|
"rewards/margins": 1.1207990646362305, |
|
"rewards/rejected": -4.242306709289551, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.363040918227289e-08, |
|
"logits/chosen": 0.47332292795181274, |
|
"logits/rejected": 0.5948122143745422, |
|
"logps/chosen": -615.6058349609375, |
|
"logps/rejected": -682.501708984375, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.3774936199188232, |
|
"rewards/margins": 1.1772058010101318, |
|
"rewards/rejected": -4.554699420928955, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.995489613345753e-08, |
|
"logits/chosen": 0.4362161159515381, |
|
"logits/rejected": 0.6808967590332031, |
|
"logps/chosen": -674.7125244140625, |
|
"logps/rejected": -721.2472534179688, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -3.4577622413635254, |
|
"rewards/margins": 1.0942115783691406, |
|
"rewards/rejected": -4.551973342895508, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.6485835570499494e-08, |
|
"logits/chosen": 0.43375635147094727, |
|
"logits/rejected": 0.6752065420150757, |
|
"logps/chosen": -632.56396484375, |
|
"logps/rejected": -739.25439453125, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9880669116973877, |
|
"rewards/margins": 1.6647765636444092, |
|
"rewards/rejected": -4.652843475341797, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.3224751168831048e-08, |
|
"logits/chosen": 0.4767048954963684, |
|
"logits/rejected": 0.7012344598770142, |
|
"logps/chosen": -584.639892578125, |
|
"logps/rejected": -666.232666015625, |
|
"loss": 0.016, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.8932371139526367, |
|
"rewards/margins": 1.4125299453735352, |
|
"rewards/rejected": -4.305767059326172, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0173075256915418e-08, |
|
"logits/chosen": 0.5076589584350586, |
|
"logits/rejected": 0.5357536673545837, |
|
"logps/chosen": -660.1342163085938, |
|
"logps/rejected": -769.7876586914062, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.183320999145508, |
|
"rewards/margins": 1.548128366470337, |
|
"rewards/rejected": -4.731449127197266, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.7332148187142126e-08, |
|
"logits/chosen": 0.5189244151115417, |
|
"logits/rejected": 0.5688571929931641, |
|
"logps/chosen": -592.2298583984375, |
|
"logps/rejected": -701.166748046875, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.999229907989502, |
|
"rewards/margins": 1.526184320449829, |
|
"rewards/rejected": -4.525413990020752, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.4703217747118746e-08, |
|
"logits/chosen": 0.47760123014450073, |
|
"logits/rejected": 0.569862961769104, |
|
"logps/chosen": -608.0677490234375, |
|
"logps/rejected": -744.98974609375, |
|
"loss": 0.0168, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.1274056434631348, |
|
"rewards/margins": 1.7376220226287842, |
|
"rewards/rejected": -4.86502742767334, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2287438611620182e-08, |
|
"logits/chosen": 0.40803298354148865, |
|
"logits/rejected": 0.47397923469543457, |
|
"logps/chosen": -605.1934814453125, |
|
"logps/rejected": -706.01416015625, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.087914228439331, |
|
"rewards/margins": 1.4495090246200562, |
|
"rewards/rejected": -4.537423133850098, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.0085871835434023e-08, |
|
"logits/chosen": 0.457242488861084, |
|
"logits/rejected": 0.5799147486686707, |
|
"logps/chosen": -618.122802734375, |
|
"logps/rejected": -734.541259765625, |
|
"loss": 0.014, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.0382626056671143, |
|
"rewards/margins": 1.7189610004425049, |
|
"rewards/rejected": -4.757222652435303, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.099484387325494e-09, |
|
"logits/chosen": 0.4558071494102478, |
|
"logits/rejected": 0.5063390135765076, |
|
"logps/chosen": -659.652099609375, |
|
"logps/rejected": -760.4281005859375, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.3247177600860596, |
|
"rewards/margins": 1.6427574157714844, |
|
"rewards/rejected": -4.967474937438965, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.3291487253271936e-09, |
|
"logits/chosen": 0.4046391546726227, |
|
"logits/rejected": 0.537802517414093, |
|
"logps/chosen": -669.2388916015625, |
|
"logps/rejected": -760.1110229492188, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.352482557296753, |
|
"rewards/margins": 1.2877779006958008, |
|
"rewards/rejected": -4.640260219573975, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.775642413539338e-09, |
|
"logits/chosen": 0.5053682327270508, |
|
"logits/rejected": 0.637668251991272, |
|
"logps/chosen": -612.4566650390625, |
|
"logps/rejected": -682.6697998046875, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.1416869163513184, |
|
"rewards/margins": 1.0475047826766968, |
|
"rewards/rejected": -4.189192295074463, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.4396477806090674e-09, |
|
"logits/chosen": 0.4289971888065338, |
|
"logits/rejected": 0.6064268350601196, |
|
"logps/chosen": -650.7198486328125, |
|
"logps/rejected": -691.7114868164062, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.111639976501465, |
|
"rewards/margins": 1.4212944507598877, |
|
"rewards/rejected": -4.53293514251709, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.321751620039447e-09, |
|
"logits/chosen": 0.3730877935886383, |
|
"logits/rejected": 0.5358615517616272, |
|
"logps/chosen": -634.3133544921875, |
|
"logps/rejected": -689.7657470703125, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.0234384536743164, |
|
"rewards/margins": 1.1467082500457764, |
|
"rewards/rejected": -4.170146942138672, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.422444932458633e-09, |
|
"logits/chosen": 0.4879744052886963, |
|
"logits/rejected": 0.5437642335891724, |
|
"logps/chosen": -628.467529296875, |
|
"logps/rejected": -736.6702880859375, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.185553550720215, |
|
"rewards/margins": 1.3367440700531006, |
|
"rewards/rejected": -4.522297382354736, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.421227099634886e-10, |
|
"logits/chosen": 0.3235490620136261, |
|
"logits/rejected": 0.5868870615959167, |
|
"logps/chosen": -694.3482055664062, |
|
"logps/rejected": -766.0265502929688, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.1539559364318848, |
|
"rewards/margins": 1.6172927618026733, |
|
"rewards/rejected": -4.771248817443848, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.8108376263175083e-10, |
|
"logits/chosen": 0.40071624517440796, |
|
"logits/rejected": 0.5358114242553711, |
|
"logps/chosen": -693.2542114257812, |
|
"logps/rejected": -787.3406982421875, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.3677265644073486, |
|
"rewards/margins": 1.6144136190414429, |
|
"rewards/rejected": -4.982139587402344, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.953058727912406e-11, |
|
"logits/chosen": 0.3938693106174469, |
|
"logits/rejected": 0.5522228479385376, |
|
"logps/chosen": -619.6367797851562, |
|
"logps/rejected": -711.4348754882812, |
|
"loss": 0.017, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.093592882156372, |
|
"rewards/margins": 1.4544086456298828, |
|
"rewards/rejected": -4.548001289367676, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1666, |
|
"total_flos": 0.0, |
|
"train_loss": 0.11547961056518669, |
|
"train_runtime": 23573.0656, |
|
"train_samples_per_second": 9.051, |
|
"train_steps_per_second": 0.071 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1666, |
|
"num_train_epochs": 2, |
|
"save_steps": 10000, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|