|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1346, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 23.545113700609754, |
|
"learning_rate": 3.7037037037037036e-09, |
|
"logits/chosen": -2.017277240753174, |
|
"logits/rejected": -1.9505600929260254, |
|
"logps/chosen": -342.8155212402344, |
|
"logps/rejected": -264.6424865722656, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 23.704110924178444, |
|
"learning_rate": 3.7037037037037036e-08, |
|
"logits/chosen": -1.852867603302002, |
|
"logits/rejected": -1.7641547918319702, |
|
"logps/chosen": -243.63710021972656, |
|
"logps/rejected": -215.13551330566406, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.0004846964729949832, |
|
"rewards/margins": -0.001089173136278987, |
|
"rewards/rejected": 0.0006044767214916646, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 27.48286479448467, |
|
"learning_rate": 7.407407407407407e-08, |
|
"logits/chosen": -1.9755146503448486, |
|
"logits/rejected": -1.8412548303604126, |
|
"logps/chosen": -241.4310302734375, |
|
"logps/rejected": -210.738037109375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0005561274592764676, |
|
"rewards/margins": 0.0004348217917140573, |
|
"rewards/rejected": 0.00012130556569900364, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 23.49895713678948, |
|
"learning_rate": 1.111111111111111e-07, |
|
"logits/chosen": -1.8477449417114258, |
|
"logits/rejected": -1.781266450881958, |
|
"logps/chosen": -277.84527587890625, |
|
"logps/rejected": -244.1582489013672, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.005596889648586512, |
|
"rewards/margins": 0.0021990840323269367, |
|
"rewards/rejected": 0.003397804917767644, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 21.952979365752906, |
|
"learning_rate": 1.4814814814814815e-07, |
|
"logits/chosen": -1.8662084341049194, |
|
"logits/rejected": -1.8252031803131104, |
|
"logps/chosen": -279.81585693359375, |
|
"logps/rejected": -256.37322998046875, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.026755522936582565, |
|
"rewards/margins": 0.01376323588192463, |
|
"rewards/rejected": 0.01299228798598051, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 22.515894719363914, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -1.886828064918518, |
|
"logits/rejected": -1.796974539756775, |
|
"logps/chosen": -245.1302490234375, |
|
"logps/rejected": -207.6703338623047, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.05396401137113571, |
|
"rewards/margins": 0.03148679807782173, |
|
"rewards/rejected": 0.02247721515595913, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 21.11853715417876, |
|
"learning_rate": 2.222222222222222e-07, |
|
"logits/chosen": -1.8658056259155273, |
|
"logits/rejected": -1.7990939617156982, |
|
"logps/chosen": -245.4588623046875, |
|
"logps/rejected": -228.79067993164062, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0710381492972374, |
|
"rewards/margins": 0.053314320743083954, |
|
"rewards/rejected": 0.01772383041679859, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 21.639022509531838, |
|
"learning_rate": 2.5925925925925923e-07, |
|
"logits/chosen": -1.8920536041259766, |
|
"logits/rejected": -1.8345096111297607, |
|
"logps/chosen": -223.96511840820312, |
|
"logps/rejected": -196.08775329589844, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.06073574349284172, |
|
"rewards/margins": 0.08626440167427063, |
|
"rewards/rejected": -0.02552866004407406, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 22.179495576107882, |
|
"learning_rate": 2.962962962962963e-07, |
|
"logits/chosen": -1.8825687170028687, |
|
"logits/rejected": -1.847541093826294, |
|
"logps/chosen": -232.0540313720703, |
|
"logps/rejected": -240.20120239257812, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.03458085656166077, |
|
"rewards/margins": 0.1154135912656784, |
|
"rewards/rejected": -0.08083274215459824, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 21.88163995061792, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -1.9384691715240479, |
|
"logits/rejected": -1.922488808631897, |
|
"logps/chosen": -248.4744415283203, |
|
"logps/rejected": -261.0725402832031, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.029202425852417946, |
|
"rewards/margins": 0.2103302925825119, |
|
"rewards/rejected": -0.2395327389240265, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 27.693123307166786, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -1.9232885837554932, |
|
"logits/rejected": -1.9198648929595947, |
|
"logps/chosen": -245.3694610595703, |
|
"logps/rejected": -275.853515625, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.23111872375011444, |
|
"rewards/margins": 0.2522026598453522, |
|
"rewards/rejected": -0.4833213686943054, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_logits/chosen": -1.787776231765747, |
|
"eval_logits/rejected": -1.7244033813476562, |
|
"eval_logps/chosen": -325.57440185546875, |
|
"eval_logps/rejected": -351.93182373046875, |
|
"eval_loss": 0.6428781747817993, |
|
"eval_rewards/accuracies": 0.671875, |
|
"eval_rewards/chosen": -0.13797907531261444, |
|
"eval_rewards/margins": 0.2060878425836563, |
|
"eval_rewards/rejected": -0.34406691789627075, |
|
"eval_runtime": 97.6555, |
|
"eval_samples_per_second": 20.48, |
|
"eval_steps_per_second": 0.328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 33.52938589908786, |
|
"learning_rate": 4.0740740740740737e-07, |
|
"logits/chosen": -1.8354734182357788, |
|
"logits/rejected": -1.7754793167114258, |
|
"logps/chosen": -295.2403869628906, |
|
"logps/rejected": -316.46923828125, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5448485016822815, |
|
"rewards/margins": 0.3984159529209137, |
|
"rewards/rejected": -0.943264365196228, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 32.42547027840792, |
|
"learning_rate": 4.444444444444444e-07, |
|
"logits/chosen": -1.7011499404907227, |
|
"logits/rejected": -1.708805799484253, |
|
"logps/chosen": -307.11334228515625, |
|
"logps/rejected": -348.78729248046875, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5989453196525574, |
|
"rewards/margins": 0.6007151007652283, |
|
"rewards/rejected": -1.1996605396270752, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 33.08064593315955, |
|
"learning_rate": 4.814814814814814e-07, |
|
"logits/chosen": -1.70786452293396, |
|
"logits/rejected": -1.6745007038116455, |
|
"logps/chosen": -290.42498779296875, |
|
"logps/rejected": -343.42510986328125, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7598094344139099, |
|
"rewards/margins": 0.6571252346038818, |
|
"rewards/rejected": -1.4169347286224365, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 33.94320124887001, |
|
"learning_rate": 4.999789692194508e-07, |
|
"logits/chosen": -1.8099472522735596, |
|
"logits/rejected": -1.754595398902893, |
|
"logps/chosen": -314.9842224121094, |
|
"logps/rejected": -356.81011962890625, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.812475860118866, |
|
"rewards/margins": 0.6942508816719055, |
|
"rewards/rejected": -1.5067269802093506, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 39.07047935152003, |
|
"learning_rate": 4.998107442045616e-07, |
|
"logits/chosen": -1.6377861499786377, |
|
"logits/rejected": -1.6226139068603516, |
|
"logps/chosen": -304.92840576171875, |
|
"logps/rejected": -393.1883239746094, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.8283722996711731, |
|
"rewards/margins": 0.8278924822807312, |
|
"rewards/rejected": -1.6562646627426147, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 42.785505208166626, |
|
"learning_rate": 4.994744073829293e-07, |
|
"logits/chosen": -1.5746722221374512, |
|
"logits/rejected": -1.4142063856124878, |
|
"logps/chosen": -343.25823974609375, |
|
"logps/rejected": -402.02691650390625, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8369730710983276, |
|
"rewards/margins": 0.8556060791015625, |
|
"rewards/rejected": -1.6925792694091797, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 48.274083606893925, |
|
"learning_rate": 4.989701850946613e-07, |
|
"logits/chosen": -1.5056556463241577, |
|
"logits/rejected": -1.3766965866088867, |
|
"logps/chosen": -335.7103271484375, |
|
"logps/rejected": -388.94097900390625, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9376843571662903, |
|
"rewards/margins": 0.8313838243484497, |
|
"rewards/rejected": -1.7690680027008057, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 46.176765511998994, |
|
"learning_rate": 4.982984166595104e-07, |
|
"logits/chosen": -1.4761296510696411, |
|
"logits/rejected": -1.3599636554718018, |
|
"logps/chosen": -408.171630859375, |
|
"logps/rejected": -472.0873107910156, |
|
"loss": 0.4577, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.2097257375717163, |
|
"rewards/margins": 1.240505576133728, |
|
"rewards/rejected": -2.4502310752868652, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 43.28509926988276, |
|
"learning_rate": 4.974595541485259e-07, |
|
"logits/chosen": -1.3221380710601807, |
|
"logits/rejected": -1.204590082168579, |
|
"logps/chosen": -335.5089416503906, |
|
"logps/rejected": -428.30621337890625, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.076790452003479, |
|
"rewards/margins": 1.0969324111938477, |
|
"rewards/rejected": -2.173722743988037, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 56.09927596713516, |
|
"learning_rate": 4.964541620798307e-07, |
|
"logits/chosen": -1.2160365581512451, |
|
"logits/rejected": -1.118375539779663, |
|
"logps/chosen": -348.90753173828125, |
|
"logps/rejected": -468.21563720703125, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2727657556533813, |
|
"rewards/margins": 1.1830675601959229, |
|
"rewards/rejected": -2.4558334350585938, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -1.4371435642242432, |
|
"eval_logits/rejected": -1.366525650024414, |
|
"eval_logps/chosen": -361.1814880371094, |
|
"eval_logps/rejected": -427.2509765625, |
|
"eval_loss": 0.559985339641571, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -0.4940495491027832, |
|
"eval_rewards/margins": 0.6032084226608276, |
|
"eval_rewards/rejected": -1.0972579717636108, |
|
"eval_runtime": 97.4901, |
|
"eval_samples_per_second": 20.515, |
|
"eval_steps_per_second": 0.328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 49.36366262587358, |
|
"learning_rate": 4.952829170387241e-07, |
|
"logits/chosen": -1.1800302267074585, |
|
"logits/rejected": -1.0126550197601318, |
|
"logps/chosen": -380.48828125, |
|
"logps/rejected": -450.0765075683594, |
|
"loss": 0.4458, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3101383447647095, |
|
"rewards/margins": 0.9806028604507446, |
|
"rewards/rejected": -2.290741443634033, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 57.25684926546983, |
|
"learning_rate": 4.939466072223697e-07, |
|
"logits/chosen": -1.2157623767852783, |
|
"logits/rejected": -1.0489680767059326, |
|
"logps/chosen": -372.591064453125, |
|
"logps/rejected": -468.7542419433594, |
|
"loss": 0.4545, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3519532680511475, |
|
"rewards/margins": 1.1502256393432617, |
|
"rewards/rejected": -2.50217866897583, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 40.98752146946231, |
|
"learning_rate": 4.924461319093725e-07, |
|
"logits/chosen": -1.1049861907958984, |
|
"logits/rejected": -1.0018864870071411, |
|
"logps/chosen": -361.7793884277344, |
|
"logps/rejected": -487.15460205078125, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1743983030319214, |
|
"rewards/margins": 1.1021788120269775, |
|
"rewards/rejected": -2.2765772342681885, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 57.39176618017778, |
|
"learning_rate": 4.907825008546038e-07, |
|
"logits/chosen": -0.7271394729614258, |
|
"logits/rejected": -0.6813848614692688, |
|
"logps/chosen": -377.90118408203125, |
|
"logps/rejected": -523.9625244140625, |
|
"loss": 0.4333, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4791629314422607, |
|
"rewards/margins": 1.4326350688934326, |
|
"rewards/rejected": -2.9117980003356934, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 51.26102709104704, |
|
"learning_rate": 4.889568336096795e-07, |
|
"logits/chosen": -0.5312275290489197, |
|
"logits/rejected": -0.37771934270858765, |
|
"logps/chosen": -381.1251220703125, |
|
"logps/rejected": -479.7431640625, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5479203462600708, |
|
"rewards/margins": 1.1352421045303345, |
|
"rewards/rejected": -2.6831624507904053, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 46.69946748969463, |
|
"learning_rate": 4.869703587695508e-07, |
|
"logits/chosen": -0.44748228788375854, |
|
"logits/rejected": -0.18481455743312836, |
|
"logps/chosen": -379.5589904785156, |
|
"logps/rejected": -527.2100830078125, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.412641167640686, |
|
"rewards/margins": 1.667824149131775, |
|
"rewards/rejected": -3.080465793609619, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 40.8957837906737, |
|
"learning_rate": 4.848244131457127e-07, |
|
"logits/chosen": -0.9530747532844543, |
|
"logits/rejected": -0.6137160062789917, |
|
"logps/chosen": -400.1986083984375, |
|
"logps/rejected": -499.60308837890625, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.4335994720458984, |
|
"rewards/margins": 1.4832035303115845, |
|
"rewards/rejected": -2.9168028831481934, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 45.308995144235396, |
|
"learning_rate": 4.825204408665877e-07, |
|
"logits/chosen": -1.2076747417449951, |
|
"logits/rejected": -0.9289032220840454, |
|
"logps/chosen": -426.99114990234375, |
|
"logps/rejected": -532.0573120117188, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.4818888902664185, |
|
"rewards/margins": 1.4990845918655396, |
|
"rewards/rejected": -2.980973720550537, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 57.75176826411474, |
|
"learning_rate": 4.800599924056907e-07, |
|
"logits/chosen": -0.7638604044914246, |
|
"logits/rejected": -0.7332445383071899, |
|
"logps/chosen": -383.2490539550781, |
|
"logps/rejected": -556.2003784179688, |
|
"loss": 0.3833, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5847924947738647, |
|
"rewards/margins": 1.5942741632461548, |
|
"rewards/rejected": -3.1790668964385986, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 45.582764097748154, |
|
"learning_rate": 4.774447235382259e-07, |
|
"logits/chosen": -0.5798165202140808, |
|
"logits/rejected": -0.5653051733970642, |
|
"logps/chosen": -411.58154296875, |
|
"logps/rejected": -582.2734375, |
|
"loss": 0.3963, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.766920804977417, |
|
"rewards/margins": 1.7389370203018188, |
|
"rewards/rejected": -3.5058579444885254, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_logits/chosen": -1.4608731269836426, |
|
"eval_logits/rejected": -1.2769949436187744, |
|
"eval_logps/chosen": -423.00341796875, |
|
"eval_logps/rejected": -521.115478515625, |
|
"eval_loss": 0.5291498303413391, |
|
"eval_rewards/accuracies": 0.7421875, |
|
"eval_rewards/chosen": -1.1122692823410034, |
|
"eval_rewards/margins": 0.9236345291137695, |
|
"eval_rewards/rejected": -2.0359039306640625, |
|
"eval_runtime": 97.2217, |
|
"eval_samples_per_second": 20.572, |
|
"eval_steps_per_second": 0.329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 42.82644939529418, |
|
"learning_rate": 4.7467639422682426e-07, |
|
"logits/chosen": -0.6843788623809814, |
|
"logits/rejected": -0.46269315481185913, |
|
"logps/chosen": -417.7638244628906, |
|
"logps/rejected": -573.83837890625, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.8430830240249634, |
|
"rewards/margins": 1.669550895690918, |
|
"rewards/rejected": -3.512633800506592, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 55.146360598406936, |
|
"learning_rate": 4.7175686743716223e-07, |
|
"logits/chosen": -1.140579104423523, |
|
"logits/rejected": -0.8973017930984497, |
|
"logps/chosen": -419.18048095703125, |
|
"logps/rejected": -527.0257568359375, |
|
"loss": 0.405, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4635722637176514, |
|
"rewards/margins": 1.3773781061172485, |
|
"rewards/rejected": -2.8409504890441895, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 45.88101703811544, |
|
"learning_rate": 4.686881078842688e-07, |
|
"logits/chosen": -1.0653458833694458, |
|
"logits/rejected": -0.8751330375671387, |
|
"logps/chosen": -386.37335205078125, |
|
"logps/rejected": -510.29949951171875, |
|
"loss": 0.3899, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.47976553440094, |
|
"rewards/margins": 1.366317868232727, |
|
"rewards/rejected": -2.846083164215088, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 58.11307992254104, |
|
"learning_rate": 4.654721807103558e-07, |
|
"logits/chosen": -0.5151967406272888, |
|
"logits/rejected": -0.14977958798408508, |
|
"logps/chosen": -400.7736511230469, |
|
"logps/rejected": -529.3316650390625, |
|
"loss": 0.3938, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7082515954971313, |
|
"rewards/margins": 1.6958554983139038, |
|
"rewards/rejected": -3.404106855392456, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 48.499175539211535, |
|
"learning_rate": 4.621112500950678e-07, |
|
"logits/chosen": -0.8198322057723999, |
|
"logits/rejected": -0.5934363603591919, |
|
"logps/chosen": -429.72113037109375, |
|
"logps/rejected": -547.5772705078125, |
|
"loss": 0.3843, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.8615728616714478, |
|
"rewards/margins": 1.499329924583435, |
|
"rewards/rejected": -3.3609023094177246, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 55.599844022581365, |
|
"learning_rate": 4.5860757779908225e-07, |
|
"logits/chosen": -1.0455310344696045, |
|
"logits/rejected": -0.6826554536819458, |
|
"logps/chosen": -413.38739013671875, |
|
"logps/rejected": -542.2623291015625, |
|
"loss": 0.3736, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5897157192230225, |
|
"rewards/margins": 1.6853986978530884, |
|
"rewards/rejected": -3.2751145362854004, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 74.71151634556864, |
|
"learning_rate": 4.5496352164204304e-07, |
|
"logits/chosen": -0.4619407057762146, |
|
"logits/rejected": -0.23415322601795197, |
|
"logps/chosen": -426.197998046875, |
|
"logps/rejected": -620.7210693359375, |
|
"loss": 0.3997, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0138180255889893, |
|
"rewards/margins": 2.0114035606384277, |
|
"rewards/rejected": -4.025221347808838, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 46.835706945950214, |
|
"learning_rate": 4.5118153391584966e-07, |
|
"logits/chosen": -0.7893734574317932, |
|
"logits/rejected": -0.5286726951599121, |
|
"logps/chosen": -348.12554931640625, |
|
"logps/rejected": -483.89215087890625, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0020155906677246, |
|
"rewards/margins": 1.7324419021606445, |
|
"rewards/rejected": -2.734457492828369, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 51.06658825135186, |
|
"learning_rate": 4.472641597343713e-07, |
|
"logits/chosen": -0.5109713077545166, |
|
"logits/rejected": -0.07112047076225281, |
|
"logps/chosen": -389.3044738769531, |
|
"logps/rejected": -567.7926635742188, |
|
"loss": 0.3846, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.6159217357635498, |
|
"rewards/margins": 1.9207748174667358, |
|
"rewards/rejected": -3.536696672439575, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 44.181665144710905, |
|
"learning_rate": 4.4321403532069523e-07, |
|
"logits/chosen": -0.5097373127937317, |
|
"logits/rejected": -0.2719523012638092, |
|
"logps/chosen": -353.91278076171875, |
|
"logps/rejected": -517.2376708984375, |
|
"loss": 0.4012, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5704162120819092, |
|
"rewards/margins": 1.8435367345809937, |
|
"rewards/rejected": -3.4139533042907715, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_logits/chosen": -1.3372514247894287, |
|
"eval_logits/rejected": -1.1222751140594482, |
|
"eval_logps/chosen": -417.65863037109375, |
|
"eval_logps/rejected": -516.7505493164062, |
|
"eval_loss": 0.5314938426017761, |
|
"eval_rewards/accuracies": 0.7734375, |
|
"eval_rewards/chosen": -1.058821201324463, |
|
"eval_rewards/margins": 0.9334329962730408, |
|
"eval_rewards/rejected": -1.9922541379928589, |
|
"eval_runtime": 97.4658, |
|
"eval_samples_per_second": 20.52, |
|
"eval_steps_per_second": 0.328, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 50.26869622592037, |
|
"learning_rate": 4.390338862330631e-07, |
|
"logits/chosen": -0.7592865824699402, |
|
"logits/rejected": -0.4464483857154846, |
|
"logps/chosen": -401.47607421875, |
|
"logps/rejected": -523.3784790039062, |
|
"loss": 0.3803, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7566916942596436, |
|
"rewards/margins": 1.5606569051742554, |
|
"rewards/rejected": -3.3173484802246094, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 51.57934206296598, |
|
"learning_rate": 4.3472652553068835e-07, |
|
"logits/chosen": -0.6644355654716492, |
|
"logits/rejected": -0.23346371948719025, |
|
"logps/chosen": -404.8458557128906, |
|
"logps/rejected": -540.8956298828125, |
|
"loss": 0.3797, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.7600839138031006, |
|
"rewards/margins": 1.6869585514068604, |
|
"rewards/rejected": -3.4470419883728027, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 73.04228089758476, |
|
"learning_rate": 4.3029485188068895e-07, |
|
"logits/chosen": 0.10370206832885742, |
|
"logits/rejected": 0.39608412981033325, |
|
"logps/chosen": -385.42498779296875, |
|
"logps/rejected": -570.5172729492188, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.839719533920288, |
|
"rewards/margins": 1.714897871017456, |
|
"rewards/rejected": -3.5546176433563232, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 54.512857623037554, |
|
"learning_rate": 4.257418476074103e-07, |
|
"logits/chosen": -0.023069072514772415, |
|
"logits/rejected": 0.3960541784763336, |
|
"logps/chosen": -423.490478515625, |
|
"logps/rejected": -592.7897338867188, |
|
"loss": 0.3638, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7877943515777588, |
|
"rewards/margins": 2.115088701248169, |
|
"rewards/rejected": -3.9028830528259277, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 55.7162708155443, |
|
"learning_rate": 4.210705766854504e-07, |
|
"logits/chosen": 0.15324774384498596, |
|
"logits/rejected": 0.521506667137146, |
|
"logps/chosen": -456.01776123046875, |
|
"logps/rejected": -625.3338623046875, |
|
"loss": 0.352, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.096989870071411, |
|
"rewards/margins": 1.874829649925232, |
|
"rewards/rejected": -3.9718196392059326, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 51.50110954656292, |
|
"learning_rate": 4.16284182677737e-07, |
|
"logits/chosen": 0.3847750127315521, |
|
"logits/rejected": 0.9687877893447876, |
|
"logps/chosen": -421.48321533203125, |
|
"logps/rejected": -571.6495361328125, |
|
"loss": 0.3771, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7761863470077515, |
|
"rewards/margins": 1.777931809425354, |
|
"rewards/rejected": -3.5541183948516846, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 42.17081561639591, |
|
"learning_rate": 4.113858866200466e-07, |
|
"logits/chosen": 0.5899291634559631, |
|
"logits/rejected": 0.9651363492012024, |
|
"logps/chosen": -411.4060974121094, |
|
"logps/rejected": -587.0046997070312, |
|
"loss": 0.3551, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -1.751307725906372, |
|
"rewards/margins": 1.814639687538147, |
|
"rewards/rejected": -3.5659472942352295, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 48.02610054790726, |
|
"learning_rate": 4.063789848533865e-07, |
|
"logits/chosen": 0.46232396364212036, |
|
"logits/rejected": 1.0872290134429932, |
|
"logps/chosen": -472.24139404296875, |
|
"logps/rejected": -634.9567260742188, |
|
"loss": 0.374, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.287501573562622, |
|
"rewards/margins": 1.8356859683990479, |
|
"rewards/rejected": -4.123187065124512, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 45.88835702974933, |
|
"learning_rate": 4.0126684680570074e-07, |
|
"logits/chosen": -0.3817380368709564, |
|
"logits/rejected": 0.1566486358642578, |
|
"logps/chosen": -461.13934326171875, |
|
"logps/rejected": -592.1519165039062, |
|
"loss": 0.334, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -1.8447940349578857, |
|
"rewards/margins": 1.7669038772583008, |
|
"rewards/rejected": -3.6116981506347656, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 53.85769217498667, |
|
"learning_rate": 3.960529127243902e-07, |
|
"logits/chosen": -0.31509625911712646, |
|
"logits/rejected": -0.04504912719130516, |
|
"logps/chosen": -477.027099609375, |
|
"logps/rejected": -654.2672119140625, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.053821086883545, |
|
"rewards/margins": 2.070889711380005, |
|
"rewards/rejected": -4.124711036682129, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -1.0066841840744019, |
|
"eval_logits/rejected": -0.6833571791648865, |
|
"eval_logps/chosen": -456.0086364746094, |
|
"eval_logps/rejected": -568.9822387695312, |
|
"eval_loss": 0.5275729894638062, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -1.4423211812973022, |
|
"eval_rewards/margins": 1.0722503662109375, |
|
"eval_rewards/rejected": -2.5145716667175293, |
|
"eval_runtime": 97.6519, |
|
"eval_samples_per_second": 20.481, |
|
"eval_steps_per_second": 0.328, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 53.47947486686438, |
|
"learning_rate": 3.9074069136117594e-07, |
|
"logits/chosen": -0.6587181687355042, |
|
"logits/rejected": -0.11707913875579834, |
|
"logps/chosen": -478.9352111816406, |
|
"logps/rejected": -631.669921875, |
|
"loss": 0.35, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.0555968284606934, |
|
"rewards/margins": 1.9847618341445923, |
|
"rewards/rejected": -4.040358543395996, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 48.01190508303512, |
|
"learning_rate": 3.8533375761086094e-07, |
|
"logits/chosen": -0.6520954966545105, |
|
"logits/rejected": -0.19666698575019836, |
|
"logps/chosen": -399.66455078125, |
|
"logps/rejected": -589.08251953125, |
|
"loss": 0.3518, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.5765998363494873, |
|
"rewards/margins": 2.0024795532226562, |
|
"rewards/rejected": -3.5790793895721436, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 58.201909693922666, |
|
"learning_rate": 3.79835750105581e-07, |
|
"logits/chosen": -0.015231219120323658, |
|
"logits/rejected": 0.524590253829956, |
|
"logps/chosen": -425.837890625, |
|
"logps/rejected": -576.46630859375, |
|
"loss": 0.364, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.9596973657608032, |
|
"rewards/margins": 1.918087363243103, |
|
"rewards/rejected": -3.8777847290039062, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 53.67325387574443, |
|
"learning_rate": 3.742503687661627e-07, |
|
"logits/chosen": 0.3345823585987091, |
|
"logits/rejected": 0.8041492700576782, |
|
"logps/chosen": -436.06170654296875, |
|
"logps/rejected": -628.6650390625, |
|
"loss": 0.3413, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.163074016571045, |
|
"rewards/margins": 2.0728249549865723, |
|
"rewards/rejected": -4.235899925231934, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 54.5126564713129, |
|
"learning_rate": 3.685813723122372e-07, |
|
"logits/chosen": 0.6497628688812256, |
|
"logits/rejected": 1.1682524681091309, |
|
"logps/chosen": -425.30157470703125, |
|
"logps/rejected": -617.69482421875, |
|
"loss": 0.3365, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -1.9300180673599243, |
|
"rewards/margins": 2.057875394821167, |
|
"rewards/rejected": -3.987893581390381, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 62.74924566191948, |
|
"learning_rate": 3.6283257573278466e-07, |
|
"logits/chosen": 0.867998480796814, |
|
"logits/rejected": 1.330685019493103, |
|
"logps/chosen": -455.71124267578125, |
|
"logps/rejected": -659.052978515625, |
|
"loss": 0.3223, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.0765323638916016, |
|
"rewards/margins": 2.156247615814209, |
|
"rewards/rejected": -4.2327799797058105, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 48.6969642598068, |
|
"learning_rate": 3.5700784771881224e-07, |
|
"logits/chosen": 1.0166234970092773, |
|
"logits/rejected": 1.6870880126953125, |
|
"logps/chosen": -478.86407470703125, |
|
"logps/rejected": -635.7424926757812, |
|
"loss": 0.3382, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.4357941150665283, |
|
"rewards/margins": 1.9054218530654907, |
|
"rewards/rejected": -4.341216087341309, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 43.243072977055355, |
|
"learning_rate": 3.511111080598925e-07, |
|
"logits/chosen": 0.6339820623397827, |
|
"logits/rejected": 1.3627948760986328, |
|
"logps/chosen": -447.268798828125, |
|
"logps/rejected": -636.5888671875, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9055280685424805, |
|
"rewards/margins": 2.3114867210388184, |
|
"rewards/rejected": -4.217014312744141, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 69.40196325230258, |
|
"learning_rate": 3.451463250063146e-07, |
|
"logits/chosen": 0.8395903706550598, |
|
"logits/rejected": 1.488012671470642, |
|
"logps/chosen": -432.853271484375, |
|
"logps/rejected": -630.223876953125, |
|
"loss": 0.3378, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9797086715698242, |
|
"rewards/margins": 2.143889904022217, |
|
"rewards/rejected": -4.123598098754883, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 59.19017069860126, |
|
"learning_rate": 3.3911751259862403e-07, |
|
"logits/chosen": 0.9315579533576965, |
|
"logits/rejected": 1.3961995840072632, |
|
"logps/chosen": -493.1189880371094, |
|
"logps/rejected": -684.4100341796875, |
|
"loss": 0.3291, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.291141986846924, |
|
"rewards/margins": 2.0969302654266357, |
|
"rewards/rejected": -4.3880720138549805, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -0.2334394007921219, |
|
"eval_logits/rejected": 0.188625305891037, |
|
"eval_logps/chosen": -477.9444580078125, |
|
"eval_logps/rejected": -595.6332397460938, |
|
"eval_loss": 0.5102677941322327, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": -1.6616793870925903, |
|
"eval_rewards/margins": 1.1194015741348267, |
|
"eval_rewards/rejected": -2.781080961227417, |
|
"eval_runtime": 97.2562, |
|
"eval_samples_per_second": 20.564, |
|
"eval_steps_per_second": 0.329, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 37.653590501774474, |
|
"learning_rate": 3.3302872796634754e-07, |
|
"logits/chosen": 0.9580332040786743, |
|
"logits/rejected": 1.3357497453689575, |
|
"logps/chosen": -427.964111328125, |
|
"logps/rejected": -620.7327880859375, |
|
"loss": 0.3122, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -1.95559823513031, |
|
"rewards/margins": 2.1169991493225098, |
|
"rewards/rejected": -4.072597503662109, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 47.96131506831022, |
|
"learning_rate": 3.2688406859772035e-07, |
|
"logits/chosen": 0.8878351449966431, |
|
"logits/rejected": 1.4351171255111694, |
|
"logps/chosen": -489.7989196777344, |
|
"logps/rejected": -665.8047485351562, |
|
"loss": 0.3224, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.195067882537842, |
|
"rewards/margins": 2.1086602210998535, |
|
"rewards/rejected": -4.3037285804748535, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 65.32009143781127, |
|
"learning_rate": 3.206876695822541e-07, |
|
"logits/chosen": 1.3710159063339233, |
|
"logits/rejected": 1.7163244485855103, |
|
"logps/chosen": -493.956298828125, |
|
"logps/rejected": -688.6646728515625, |
|
"loss": 0.3129, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.438476085662842, |
|
"rewards/margins": 2.2680106163024902, |
|
"rewards/rejected": -4.706486701965332, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 66.03238810693847, |
|
"learning_rate": 3.144437008280012e-07, |
|
"logits/chosen": 0.709919273853302, |
|
"logits/rejected": 1.0818461179733276, |
|
"logps/chosen": -468.56890869140625, |
|
"logps/rejected": -691.1434326171875, |
|
"loss": 0.3232, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.252897262573242, |
|
"rewards/margins": 2.3767807483673096, |
|
"rewards/rejected": -4.629677772521973, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 47.885060646853404, |
|
"learning_rate": 3.0815636425538665e-07, |
|
"logits/chosen": 1.0194989442825317, |
|
"logits/rejected": 1.571274995803833, |
|
"logps/chosen": -446.6681213378906, |
|
"logps/rejected": -611.84033203125, |
|
"loss": 0.3429, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.190187454223633, |
|
"rewards/margins": 2.0423951148986816, |
|
"rewards/rejected": -4.232582092285156, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 59.75526535732341, |
|
"learning_rate": 3.018298909694986e-07, |
|
"logits/chosen": 1.3580573797225952, |
|
"logits/rejected": 1.913851022720337, |
|
"logps/chosen": -489.56982421875, |
|
"logps/rejected": -673.2572021484375, |
|
"loss": 0.3288, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.595083236694336, |
|
"rewards/margins": 2.0307328701019287, |
|
"rewards/rejected": -4.6258158683776855, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 51.20761564052719, |
|
"learning_rate": 2.954685384127371e-07, |
|
"logits/chosen": 0.8674410581588745, |
|
"logits/rejected": 1.4072096347808838, |
|
"logps/chosen": -482.65789794921875, |
|
"logps/rejected": -649.311279296875, |
|
"loss": 0.301, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.194945812225342, |
|
"rewards/margins": 2.093947172164917, |
|
"rewards/rejected": -4.288893222808838, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 62.65952308868226, |
|
"learning_rate": 2.8907658749974054e-07, |
|
"logits/chosen": 0.9979363679885864, |
|
"logits/rejected": 1.4131087064743042, |
|
"logps/chosen": -457.8363342285156, |
|
"logps/rejected": -703.2235107421875, |
|
"loss": 0.2929, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.264411449432373, |
|
"rewards/margins": 2.5431039333343506, |
|
"rewards/rejected": -4.807515621185303, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 49.65473672539794, |
|
"learning_rate": 2.8265833973651503e-07, |
|
"logits/chosen": 0.6275979280471802, |
|
"logits/rejected": 1.0561200380325317, |
|
"logps/chosen": -459.69976806640625, |
|
"logps/rejected": -684.1864013671875, |
|
"loss": 0.2859, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.8421128988265991, |
|
"rewards/margins": 2.5259382724761963, |
|
"rewards/rejected": -4.368051528930664, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 48.72864396453521, |
|
"learning_rate": 2.7621811432570736e-07, |
|
"logits/chosen": 0.8585799336433411, |
|
"logits/rejected": 1.5937745571136475, |
|
"logps/chosen": -518.5455932617188, |
|
"logps/rejected": -734.5382690429688, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.441080093383789, |
|
"rewards/margins": 2.6617679595947266, |
|
"rewards/rejected": -5.102847576141357, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 0.18704134225845337, |
|
"eval_logits/rejected": 0.6721899509429932, |
|
"eval_logps/chosen": -541.279541015625, |
|
"eval_logps/rejected": -687.587158203125, |
|
"eval_loss": 0.5288776159286499, |
|
"eval_rewards/accuracies": 0.76171875, |
|
"eval_rewards/chosen": -2.2950310707092285, |
|
"eval_rewards/margins": 1.40558922290802, |
|
"eval_rewards/rejected": -3.70061993598938, |
|
"eval_runtime": 97.5006, |
|
"eval_samples_per_second": 20.513, |
|
"eval_steps_per_second": 0.328, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 50.62866425523001, |
|
"learning_rate": 2.6976024525996917e-07, |
|
"logits/chosen": 1.1524347066879272, |
|
"logits/rejected": 1.7467842102050781, |
|
"logps/chosen": -503.6927795410156, |
|
"logps/rejected": -780.6187744140625, |
|
"loss": 0.286, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.7125723361968994, |
|
"rewards/margins": 2.8134512901306152, |
|
"rewards/rejected": -5.5260233879089355, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 56.03367218705217, |
|
"learning_rate": 2.6328907840536706e-07, |
|
"logits/chosen": 0.7062090039253235, |
|
"logits/rejected": 1.2199087142944336, |
|
"logps/chosen": -460.45794677734375, |
|
"logps/rejected": -685.5617065429688, |
|
"loss": 0.3244, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.43827748298645, |
|
"rewards/margins": 2.252427577972412, |
|
"rewards/rejected": -4.690704822540283, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 57.82647372234183, |
|
"learning_rate": 2.568089685768038e-07, |
|
"logits/chosen": 0.6572129130363464, |
|
"logits/rejected": 1.0754339694976807, |
|
"logps/chosen": -530.2496337890625, |
|
"logps/rejected": -698.03662109375, |
|
"loss": 0.313, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.59128475189209, |
|
"rewards/margins": 2.117705821990967, |
|
"rewards/rejected": -4.708990573883057, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 50.473574423912424, |
|
"learning_rate": 2.503242766074156e-07, |
|
"logits/chosen": 0.42826253175735474, |
|
"logits/rejected": 1.0195951461791992, |
|
"logps/chosen": -451.046142578125, |
|
"logps/rejected": -653.2913818359375, |
|
"loss": 0.2898, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.9979403018951416, |
|
"rewards/margins": 2.318507432937622, |
|
"rewards/rejected": -4.316447734832764, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 61.13648555404995, |
|
"learning_rate": 2.4383936641392136e-07, |
|
"logits/chosen": 0.6429548859596252, |
|
"logits/rejected": 1.103127360343933, |
|
"logps/chosen": -467.82049560546875, |
|
"logps/rejected": -702.5692749023438, |
|
"loss": 0.2975, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.0785393714904785, |
|
"rewards/margins": 2.386026382446289, |
|
"rewards/rejected": -4.464566230773926, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 51.760001565819636, |
|
"learning_rate": 2.3735860205989493e-07, |
|
"logits/chosen": 0.7451823353767395, |
|
"logits/rejected": 1.1489431858062744, |
|
"logps/chosen": -462.767333984375, |
|
"logps/rejected": -706.5615234375, |
|
"loss": 0.2627, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.312885284423828, |
|
"rewards/margins": 2.6091692447662354, |
|
"rewards/rejected": -4.922054767608643, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 56.13632726849474, |
|
"learning_rate": 2.308863448189402e-07, |
|
"logits/chosen": 0.5960752367973328, |
|
"logits/rejected": 1.0421712398529053, |
|
"logps/chosen": -498.1941833496094, |
|
"logps/rejected": -695.0504760742188, |
|
"loss": 0.2811, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.29612398147583, |
|
"rewards/margins": 2.4551825523376465, |
|
"rewards/rejected": -4.751306533813477, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 67.7549300842345, |
|
"learning_rate": 2.2442695023974246e-07, |
|
"logits/chosen": 0.6856900453567505, |
|
"logits/rejected": 1.3306076526641846, |
|
"logps/chosen": -444.3168029785156, |
|
"logps/rejected": -679.816650390625, |
|
"loss": 0.2713, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.0717947483062744, |
|
"rewards/margins": 2.6752490997314453, |
|
"rewards/rejected": -4.747043609619141, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 55.628538802719504, |
|
"learning_rate": 2.179847652149729e-07, |
|
"logits/chosen": 0.7401930093765259, |
|
"logits/rejected": 1.288172960281372, |
|
"logps/chosen": -496.6468811035156, |
|
"logps/rejected": -687.7960205078125, |
|
"loss": 0.295, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.4609100818634033, |
|
"rewards/margins": 2.223629951477051, |
|
"rewards/rejected": -4.684540271759033, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 63.651106043315345, |
|
"learning_rate": 2.115641250560183e-07, |
|
"logits/chosen": 0.8801604509353638, |
|
"logits/rejected": 1.5266039371490479, |
|
"logps/chosen": -473.2115173339844, |
|
"logps/rejected": -701.8800659179688, |
|
"loss": 0.2752, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.4201507568359375, |
|
"rewards/margins": 2.4442293643951416, |
|
"rewards/rejected": -4.864380836486816, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_logits/chosen": -0.16280797123908997, |
|
"eval_logits/rejected": 0.2751551866531372, |
|
"eval_logps/chosen": -533.1201782226562, |
|
"eval_logps/rejected": -668.2235717773438, |
|
"eval_loss": 0.5228938460350037, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -2.2134366035461426, |
|
"eval_rewards/margins": 1.2935477495193481, |
|
"eval_rewards/rejected": -3.506984233856201, |
|
"eval_runtime": 97.387, |
|
"eval_samples_per_second": 20.537, |
|
"eval_steps_per_second": 0.329, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 70.2608582618962, |
|
"learning_rate": 2.051693505755042e-07, |
|
"logits/chosen": 0.8354732394218445, |
|
"logits/rejected": 1.2750941514968872, |
|
"logps/chosen": -461.49786376953125, |
|
"logps/rejected": -705.8599853515625, |
|
"loss": 0.2946, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.4096267223358154, |
|
"rewards/margins": 2.483677864074707, |
|
"rewards/rejected": -4.893305778503418, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 49.246802198712466, |
|
"learning_rate": 1.9880474517957542e-07, |
|
"logits/chosen": 0.9254199862480164, |
|
"logits/rejected": 1.563522458076477, |
|
"logps/chosen": -481.2748107910156, |
|
"logps/rejected": -658.328125, |
|
"loss": 0.2674, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.385385036468506, |
|
"rewards/margins": 2.1492881774902344, |
|
"rewards/rejected": -4.53467321395874, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 88.28145029556197, |
|
"learning_rate": 1.9247459197189e-07, |
|
"logits/chosen": 0.8668380975723267, |
|
"logits/rejected": 1.5001232624053955, |
|
"logps/chosen": -488.27685546875, |
|
"logps/rejected": -680.9069213867188, |
|
"loss": 0.2652, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.6699295043945312, |
|
"rewards/margins": 2.2055306434631348, |
|
"rewards/rejected": -4.875459671020508, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 43.13543734061108, |
|
"learning_rate": 1.8618315087127602e-07, |
|
"logits/chosen": 0.6826521754264832, |
|
"logits/rejected": 1.2443543672561646, |
|
"logps/chosen": -499.20892333984375, |
|
"logps/rejected": -706.3511962890625, |
|
"loss": 0.2563, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.4423558712005615, |
|
"rewards/margins": 2.461874485015869, |
|
"rewards/rejected": -4.904230117797852, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 56.63843357010467, |
|
"learning_rate": 1.7993465574499102e-07, |
|
"logits/chosen": 0.5323538184165955, |
|
"logits/rejected": 1.2176125049591064, |
|
"logps/chosen": -463.47857666015625, |
|
"logps/rejected": -663.4465942382812, |
|
"loss": 0.2759, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.189335823059082, |
|
"rewards/margins": 2.420409679412842, |
|
"rewards/rejected": -4.609745502471924, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 56.31423994279339, |
|
"learning_rate": 1.7373331155951233e-07, |
|
"logits/chosen": 0.8688204884529114, |
|
"logits/rejected": 1.4698970317840576, |
|
"logps/chosen": -510.4227600097656, |
|
"logps/rejected": -748.5259399414062, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.550417900085449, |
|
"rewards/margins": 2.730776309967041, |
|
"rewards/rejected": -5.28119421005249, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 50.688626621321205, |
|
"learning_rate": 1.6758329155077743e-07, |
|
"logits/chosen": 1.0613950490951538, |
|
"logits/rejected": 1.5818780660629272, |
|
"logps/chosen": -495.5560607910156, |
|
"logps/rejected": -708.2391967773438, |
|
"loss": 0.2711, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.642883777618408, |
|
"rewards/margins": 2.6204209327697754, |
|
"rewards/rejected": -5.263304710388184, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 46.10359729315069, |
|
"learning_rate": 1.6148873441577662e-07, |
|
"logits/chosen": 1.0479947328567505, |
|
"logits/rejected": 1.5524357557296753, |
|
"logps/chosen": -480.2462463378906, |
|
"logps/rejected": -707.98681640625, |
|
"loss": 0.2699, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.261603355407715, |
|
"rewards/margins": 2.4961774349212646, |
|
"rewards/rejected": -4.757781028747559, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 41.346767344116245, |
|
"learning_rate": 1.5545374152738934e-07, |
|
"logits/chosen": 1.1905092000961304, |
|
"logits/rejected": 1.6182410717010498, |
|
"logps/chosen": -468.92083740234375, |
|
"logps/rejected": -689.1092529296875, |
|
"loss": 0.2722, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.264604091644287, |
|
"rewards/margins": 2.391749143600464, |
|
"rewards/rejected": -4.65635347366333, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 60.48896334839974, |
|
"learning_rate": 1.4948237417433775e-07, |
|
"logits/chosen": 1.380293369293213, |
|
"logits/rejected": 2.2697908878326416, |
|
"logps/chosen": -436.1393127441406, |
|
"logps/rejected": -673.2228393554688, |
|
"loss": 0.2492, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.151729106903076, |
|
"rewards/margins": 2.624401330947876, |
|
"rewards/rejected": -4.776130676269531, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": 0.5183509588241577, |
|
"eval_logits/rejected": 1.0725551843643188, |
|
"eval_logps/chosen": -518.2382202148438, |
|
"eval_logps/rejected": -652.8116455078125, |
|
"eval_loss": 0.5152209997177124, |
|
"eval_rewards/accuracies": 0.7734375, |
|
"eval_rewards/chosen": -2.064617395401001, |
|
"eval_rewards/margins": 1.2882475852966309, |
|
"eval_rewards/rejected": -3.352864980697632, |
|
"eval_runtime": 97.3137, |
|
"eval_samples_per_second": 20.552, |
|
"eval_steps_per_second": 0.329, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 59.39383985362304, |
|
"learning_rate": 1.435786508281158e-07, |
|
"logits/chosen": 1.9009380340576172, |
|
"logits/rejected": 2.567354679107666, |
|
"logps/chosen": -482.70513916015625, |
|
"logps/rejected": -720.0316162109375, |
|
"loss": 0.2499, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.3441195487976074, |
|
"rewards/margins": 2.6516547203063965, |
|
"rewards/rejected": -4.995774269104004, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 58.953283614647454, |
|
"learning_rate": 1.3774654443873174e-07, |
|
"logits/chosen": 1.749333381652832, |
|
"logits/rejected": 2.4905173778533936, |
|
"logps/chosen": -512.65625, |
|
"logps/rejected": -763.8499145507812, |
|
"loss": 0.2542, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.783947467803955, |
|
"rewards/margins": 2.989567756652832, |
|
"rewards/rejected": -5.773515224456787, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 57.229551980352035, |
|
"learning_rate": 1.31989979761085e-07, |
|
"logits/chosen": 1.3056137561798096, |
|
"logits/rejected": 2.2303478717803955, |
|
"logps/chosen": -465.61627197265625, |
|
"logps/rejected": -746.7559814453125, |
|
"loss": 0.2416, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.5093438625335693, |
|
"rewards/margins": 3.106735944747925, |
|
"rewards/rejected": -5.616079807281494, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 53.92751444407525, |
|
"learning_rate": 1.2631283071377618e-07, |
|
"logits/chosen": 1.6224052906036377, |
|
"logits/rejected": 1.9630991220474243, |
|
"logps/chosen": -458.9669494628906, |
|
"logps/rejected": -742.6818237304688, |
|
"loss": 0.2429, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.4590606689453125, |
|
"rewards/margins": 2.7507693767547607, |
|
"rewards/rejected": -5.209830284118652, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 48.183067890071925, |
|
"learning_rate": 1.2071891777212744e-07, |
|
"logits/chosen": 1.061023235321045, |
|
"logits/rejected": 1.9151092767715454, |
|
"logps/chosen": -507.06744384765625, |
|
"logps/rejected": -707.039794921875, |
|
"loss": 0.253, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.448425054550171, |
|
"rewards/margins": 2.3641083240509033, |
|
"rewards/rejected": -4.812533855438232, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 48.31856194766799, |
|
"learning_rate": 1.1521200539716874e-07, |
|
"logits/chosen": 1.2143045663833618, |
|
"logits/rejected": 1.9916166067123413, |
|
"logps/chosen": -500.71038818359375, |
|
"logps/rejected": -771.3677978515625, |
|
"loss": 0.2426, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.3821799755096436, |
|
"rewards/margins": 3.1737558841705322, |
|
"rewards/rejected": -5.555935859680176, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 57.66373376149326, |
|
"learning_rate": 1.0979579950231821e-07, |
|
"logits/chosen": 1.1112618446350098, |
|
"logits/rejected": 2.246898889541626, |
|
"logps/chosen": -502.126220703125, |
|
"logps/rejected": -734.8248901367188, |
|
"loss": 0.241, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.395838737487793, |
|
"rewards/margins": 2.6420400142669678, |
|
"rewards/rejected": -5.03787899017334, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 55.20670800594472, |
|
"learning_rate": 1.0447394495946291e-07, |
|
"logits/chosen": 1.387683391571045, |
|
"logits/rejected": 2.400949478149414, |
|
"logps/chosen": -515.9779052734375, |
|
"logps/rejected": -765.4949340820312, |
|
"loss": 0.2468, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.70845365524292, |
|
"rewards/margins": 2.7117531299591064, |
|
"rewards/rejected": -5.420206546783447, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 45.9412294534277, |
|
"learning_rate": 9.925002314611841e-08, |
|
"logits/chosen": 1.8099420070648193, |
|
"logits/rejected": 2.5098319053649902, |
|
"logps/chosen": -484.7242736816406, |
|
"logps/rejected": -777.49169921875, |
|
"loss": 0.2383, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.564988374710083, |
|
"rewards/margins": 2.9337170124053955, |
|
"rewards/rejected": -5.498705863952637, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 64.863814963629, |
|
"learning_rate": 9.412754953531663e-08, |
|
"logits/chosen": 1.5222892761230469, |
|
"logits/rejected": 2.5317773818969727, |
|
"logps/chosen": -507.424072265625, |
|
"logps/rejected": -756.7098388671875, |
|
"loss": 0.262, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.656026601791382, |
|
"rewards/margins": 2.7969748973846436, |
|
"rewards/rejected": -5.453001976013184, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/chosen": 0.6804571151733398, |
|
"eval_logits/rejected": 1.3123811483383179, |
|
"eval_logps/chosen": -556.8264770507812, |
|
"eval_logps/rejected": -703.1602783203125, |
|
"eval_loss": 0.5241079330444336, |
|
"eval_rewards/accuracies": 0.76171875, |
|
"eval_rewards/chosen": -2.4504995346069336, |
|
"eval_rewards/margins": 1.405852198600769, |
|
"eval_rewards/rejected": -3.856351613998413, |
|
"eval_runtime": 97.4441, |
|
"eval_samples_per_second": 20.525, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 69.68207773392557, |
|
"learning_rate": 8.910997132984479e-08, |
|
"logits/chosen": 1.820955514907837, |
|
"logits/rejected": 2.952479839324951, |
|
"logps/chosen": -544.1399536132812, |
|
"logps/rejected": -808.0184936523438, |
|
"loss": 0.2504, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.861184597015381, |
|
"rewards/margins": 3.071931838989258, |
|
"rewards/rejected": -5.933116436004639, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 50.59071094029437, |
|
"learning_rate": 8.42006651424274e-08, |
|
"logits/chosen": 1.8404204845428467, |
|
"logits/rejected": 2.6863815784454346, |
|
"logps/chosen": -461.4169921875, |
|
"logps/rejected": -703.1361083984375, |
|
"loss": 0.2318, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.4329962730407715, |
|
"rewards/margins": 2.7300188541412354, |
|
"rewards/rejected": -5.163014888763428, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 57.22762908033313, |
|
"learning_rate": 7.940293472341217e-08, |
|
"logits/chosen": 2.013861894607544, |
|
"logits/rejected": 2.7502970695495605, |
|
"logps/chosen": -477.7572326660156, |
|
"logps/rejected": -773.4556884765625, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6210336685180664, |
|
"rewards/margins": 3.139965057373047, |
|
"rewards/rejected": -5.7609992027282715, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 55.15868922046573, |
|
"learning_rate": 7.472000873748918e-08, |
|
"logits/chosen": 2.0298519134521484, |
|
"logits/rejected": 2.990135431289673, |
|
"logps/chosen": -528.5840454101562, |
|
"logps/rejected": -781.4909057617188, |
|
"loss": 0.2487, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.6361494064331055, |
|
"rewards/margins": 2.9660372734069824, |
|
"rewards/rejected": -5.602187156677246, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 43.438291077124795, |
|
"learning_rate": 7.015503859093927e-08, |
|
"logits/chosen": 2.1326801776885986, |
|
"logits/rejected": 2.5511794090270996, |
|
"logps/chosen": -486.6455078125, |
|
"logps/rejected": -757.7630004882812, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.617185592651367, |
|
"rewards/margins": 2.795973062515259, |
|
"rewards/rejected": -5.413158893585205, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 63.14016572546011, |
|
"learning_rate": 6.571109631087451e-08, |
|
"logits/chosen": 2.417752742767334, |
|
"logits/rejected": 3.036146402359009, |
|
"logps/chosen": -494.73046875, |
|
"logps/rejected": -811.0126953125, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -2.588284492492676, |
|
"rewards/margins": 3.300442934036255, |
|
"rewards/rejected": -5.888727188110352, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 58.89863039830767, |
|
"learning_rate": 6.139117247789687e-08, |
|
"logits/chosen": 2.5516977310180664, |
|
"logits/rejected": 3.055995464324951, |
|
"logps/chosen": -535.7842407226562, |
|
"logps/rejected": -800.0374145507812, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.956123113632202, |
|
"rewards/margins": 2.720890998840332, |
|
"rewards/rejected": -5.677014350891113, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 41.21215573686561, |
|
"learning_rate": 5.719817421356685e-08, |
|
"logits/chosen": 1.9021530151367188, |
|
"logits/rejected": 2.7421538829803467, |
|
"logps/chosen": -549.5343017578125, |
|
"logps/rejected": -820.0753784179688, |
|
"loss": 0.2033, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.7265052795410156, |
|
"rewards/margins": 3.281470537185669, |
|
"rewards/rejected": -6.007976055145264, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 58.39711865385947, |
|
"learning_rate": 5.313492322403701e-08, |
|
"logits/chosen": 2.2018539905548096, |
|
"logits/rejected": 2.951138496398926, |
|
"logps/chosen": -533.9331665039062, |
|
"logps/rejected": -891.0558471679688, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.8866357803344727, |
|
"rewards/margins": 3.6149306297302246, |
|
"rewards/rejected": -6.501566410064697, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 51.18256501676837, |
|
"learning_rate": 4.9204153901165805e-08, |
|
"logits/chosen": 1.9893665313720703, |
|
"logits/rejected": 2.7781219482421875, |
|
"logps/chosen": -530.7794189453125, |
|
"logps/rejected": -824.0559692382812, |
|
"loss": 0.2299, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.8573508262634277, |
|
"rewards/margins": 3.2189173698425293, |
|
"rewards/rejected": -6.076268196105957, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/chosen": 0.8391125202178955, |
|
"eval_logits/rejected": 1.4834216833114624, |
|
"eval_logps/chosen": -588.2494506835938, |
|
"eval_logps/rejected": -741.857421875, |
|
"eval_loss": 0.5312901139259338, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -2.7647294998168945, |
|
"eval_rewards/margins": 1.4785932302474976, |
|
"eval_rewards/rejected": -4.243322849273682, |
|
"eval_runtime": 97.5423, |
|
"eval_samples_per_second": 20.504, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 68.60925195657734, |
|
"learning_rate": 4.540851148239036e-08, |
|
"logits/chosen": 1.7061752080917358, |
|
"logits/rejected": 2.698995351791382, |
|
"logps/chosen": -537.1931762695312, |
|
"logps/rejected": -848.33154296875, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.7809014320373535, |
|
"rewards/margins": 3.3348469734191895, |
|
"rewards/rejected": -6.115748405456543, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 48.80096479357628, |
|
"learning_rate": 4.1750550270596206e-08, |
|
"logits/chosen": 1.531884789466858, |
|
"logits/rejected": 2.923696994781494, |
|
"logps/chosen": -509.5885314941406, |
|
"logps/rejected": -794.9307250976562, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.630959987640381, |
|
"rewards/margins": 3.3725571632385254, |
|
"rewards/rejected": -6.003516674041748, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 68.79197398198284, |
|
"learning_rate": 3.823273191518234e-08, |
|
"logits/chosen": 1.5292671918869019, |
|
"logits/rejected": 2.3230159282684326, |
|
"logps/chosen": -568.5833740234375, |
|
"logps/rejected": -835.826171875, |
|
"loss": 0.2178, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.0106937885284424, |
|
"rewards/margins": 3.2017643451690674, |
|
"rewards/rejected": -6.212458610534668, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 59.434543375011025, |
|
"learning_rate": 3.485742375547745e-08, |
|
"logits/chosen": 1.4421080350875854, |
|
"logits/rejected": 2.442089796066284, |
|
"logps/chosen": -553.727294921875, |
|
"logps/rejected": -822.7138671875, |
|
"loss": 0.2009, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.1090734004974365, |
|
"rewards/margins": 2.9853668212890625, |
|
"rewards/rejected": -6.094440460205078, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 38.888275757403804, |
|
"learning_rate": 3.162689722762365e-08, |
|
"logits/chosen": 1.5811113119125366, |
|
"logits/rejected": 2.2564284801483154, |
|
"logps/chosen": -543.1163940429688, |
|
"logps/rejected": -842.681640625, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -2.9668571949005127, |
|
"rewards/margins": 3.10882830619812, |
|
"rewards/rejected": -6.075685024261475, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 42.47551430381964, |
|
"learning_rate": 2.8543326335997904e-08, |
|
"logits/chosen": 1.768690824508667, |
|
"logits/rejected": 2.4484939575195312, |
|
"logps/chosen": -556.0635375976562, |
|
"logps/rejected": -805.807373046875, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -2.865739107131958, |
|
"rewards/margins": 2.8989548683166504, |
|
"rewards/rejected": -5.764693737030029, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 59.36158165544989, |
|
"learning_rate": 2.560878619020157e-08, |
|
"logits/chosen": 1.9017894268035889, |
|
"logits/rejected": 2.7026009559631348, |
|
"logps/chosen": -521.269287109375, |
|
"logps/rejected": -813.7127685546875, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -2.9693474769592285, |
|
"rewards/margins": 3.1322848796844482, |
|
"rewards/rejected": -6.101632595062256, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 49.475189963130575, |
|
"learning_rate": 2.2825251608601466e-08, |
|
"logits/chosen": 1.8870357275009155, |
|
"logits/rejected": 2.8944287300109863, |
|
"logps/chosen": -558.059814453125, |
|
"logps/rejected": -868.568359375, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.1376397609710693, |
|
"rewards/margins": 3.2884891033172607, |
|
"rewards/rejected": -6.426129341125488, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 85.599165147591, |
|
"learning_rate": 2.0194595789362474e-08, |
|
"logits/chosen": 1.9095745086669922, |
|
"logits/rejected": 2.530900478363037, |
|
"logps/chosen": -577.1746826171875, |
|
"logps/rejected": -892.88623046875, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.0735995769500732, |
|
"rewards/margins": 3.377427339553833, |
|
"rewards/rejected": -6.451026916503906, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 45.52491787365754, |
|
"learning_rate": 1.7718589049866728e-08, |
|
"logits/chosen": 2.376490592956543, |
|
"logits/rejected": 3.1364424228668213, |
|
"logps/chosen": -510.269287109375, |
|
"logps/rejected": -829.1940307617188, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.9278645515441895, |
|
"rewards/margins": 3.433408737182617, |
|
"rewards/rejected": -6.361273765563965, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": 0.8963963389396667, |
|
"eval_logits/rejected": 1.5457934141159058, |
|
"eval_logps/chosen": -606.617431640625, |
|
"eval_logps/rejected": -764.6512451171875, |
|
"eval_loss": 0.5366576910018921, |
|
"eval_rewards/accuracies": 0.76171875, |
|
"eval_rewards/chosen": -2.948409080505371, |
|
"eval_rewards/margins": 1.5228519439697266, |
|
"eval_rewards/rejected": -4.471261024475098, |
|
"eval_runtime": 97.4355, |
|
"eval_samples_per_second": 20.526, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 56.7147448955845, |
|
"learning_rate": 1.539889763536645e-08, |
|
"logits/chosen": 1.9441492557525635, |
|
"logits/rejected": 3.0478804111480713, |
|
"logps/chosen": -538.355224609375, |
|
"logps/rejected": -856.01416015625, |
|
"loss": 0.2187, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.842240571975708, |
|
"rewards/margins": 3.5280959606170654, |
|
"rewards/rejected": -6.370336055755615, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 60.258963508413004, |
|
"learning_rate": 1.3237082597673172e-08, |
|
"logits/chosen": 2.1856608390808105, |
|
"logits/rejected": 2.853616237640381, |
|
"logps/chosen": -517.0845947265625, |
|
"logps/rejected": -845.6990966796875, |
|
"loss": 0.204, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0185937881469727, |
|
"rewards/margins": 3.2306289672851562, |
|
"rewards/rejected": -6.249222755432129, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 71.41232139420377, |
|
"learning_rate": 1.1234598744637502e-08, |
|
"logits/chosen": 1.5448696613311768, |
|
"logits/rejected": 2.610525608062744, |
|
"logps/chosen": -545.0371704101562, |
|
"logps/rejected": -821.2421875, |
|
"loss": 0.2063, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.1403965950012207, |
|
"rewards/margins": 3.1843514442443848, |
|
"rewards/rejected": -6.3247480392456055, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 57.959377016977456, |
|
"learning_rate": 9.392793661126414e-09, |
|
"logits/chosen": 1.898782730102539, |
|
"logits/rejected": 2.7061781883239746, |
|
"logps/chosen": -582.9857177734375, |
|
"logps/rejected": -879.3019409179688, |
|
"loss": 0.1979, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.2453556060791016, |
|
"rewards/margins": 3.297309160232544, |
|
"rewards/rejected": -6.542665004730225, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 50.86760187147993, |
|
"learning_rate": 7.71290680215711e-09, |
|
"logits/chosen": 2.0340778827667236, |
|
"logits/rejected": 2.8080642223358154, |
|
"logps/chosen": -558.147705078125, |
|
"logps/rejected": -874.9266357421875, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.0640769004821777, |
|
"rewards/margins": 3.380338668823242, |
|
"rewards/rejected": -6.444415092468262, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 61.973766270626015, |
|
"learning_rate": 6.196068658797543e-09, |
|
"logits/chosen": 1.8814232349395752, |
|
"logits/rejected": 2.7813236713409424, |
|
"logps/chosen": -551.5777587890625, |
|
"logps/rejected": -826.7698974609375, |
|
"loss": 0.1971, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.9602150917053223, |
|
"rewards/margins": 3.0024728775024414, |
|
"rewards/rejected": -5.9626874923706055, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 67.6695850405579, |
|
"learning_rate": 4.843299997394717e-09, |
|
"logits/chosen": 1.856507658958435, |
|
"logits/rejected": 2.7601516246795654, |
|
"logps/chosen": -540.268310546875, |
|
"logps/rejected": -846.9691162109375, |
|
"loss": 0.2067, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.077454090118408, |
|
"rewards/margins": 3.414836883544922, |
|
"rewards/rejected": -6.492290496826172, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 68.73319089653008, |
|
"learning_rate": 3.655511172643372e-09, |
|
"logits/chosen": 1.932074785232544, |
|
"logits/rejected": 2.437225818634033, |
|
"logps/chosen": -531.4140625, |
|
"logps/rejected": -836.9505615234375, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.8276994228363037, |
|
"rewards/margins": 3.25665020942688, |
|
"rewards/rejected": -6.084350109100342, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 50.423800165908794, |
|
"learning_rate": 2.633501514956532e-09, |
|
"logits/chosen": 1.9169034957885742, |
|
"logits/rejected": 2.7369441986083984, |
|
"logps/chosen": -586.8289794921875, |
|
"logps/rejected": -896.8014526367188, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.1295228004455566, |
|
"rewards/margins": 3.5232949256896973, |
|
"rewards/rejected": -6.652817726135254, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 57.31903342529662, |
|
"learning_rate": 1.777958792550993e-09, |
|
"logits/chosen": 1.5464543104171753, |
|
"logits/rejected": 2.9688878059387207, |
|
"logps/chosen": -587.2015380859375, |
|
"logps/rejected": -853.0357666015625, |
|
"loss": 0.1842, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -2.988502025604248, |
|
"rewards/margins": 3.156489372253418, |
|
"rewards/rejected": -6.144991397857666, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": 0.9558575749397278, |
|
"eval_logits/rejected": 1.609464406967163, |
|
"eval_logps/chosen": -609.159423828125, |
|
"eval_logps/rejected": -767.4317016601562, |
|
"eval_loss": 0.5365558862686157, |
|
"eval_rewards/accuracies": 0.76171875, |
|
"eval_rewards/chosen": -2.9738292694091797, |
|
"eval_rewards/margins": 1.5252362489700317, |
|
"eval_rewards/rejected": -4.499065399169922, |
|
"eval_runtime": 97.3239, |
|
"eval_samples_per_second": 20.55, |
|
"eval_steps_per_second": 0.329, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 66.21886288694567, |
|
"learning_rate": 1.0894587486089125e-09, |
|
"logits/chosen": 1.8931999206542969, |
|
"logits/rejected": 2.824298858642578, |
|
"logps/chosen": -563.06201171875, |
|
"logps/rejected": -834.8709716796875, |
|
"loss": 0.2157, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.2370285987854004, |
|
"rewards/margins": 3.035515546798706, |
|
"rewards/rejected": -6.272543430328369, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 45.779926433395936, |
|
"learning_rate": 5.684647138277098e-10, |
|
"logits/chosen": 1.7055333852767944, |
|
"logits/rejected": 2.308079719543457, |
|
"logps/chosen": -531.0139770507812, |
|
"logps/rejected": -862.2609252929688, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -2.956573486328125, |
|
"rewards/margins": 3.375626802444458, |
|
"rewards/rejected": -6.332200050354004, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 58.05458328657747, |
|
"learning_rate": 2.153272946184559e-10, |
|
"logits/chosen": 1.735358476638794, |
|
"logits/rejected": 2.259385585784912, |
|
"logps/chosen": -585.9295043945312, |
|
"logps/rejected": -861.4645385742188, |
|
"loss": 0.1738, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.10073184967041, |
|
"rewards/margins": 2.996291399002075, |
|
"rewards/rejected": -6.097023010253906, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 46.42702960995785, |
|
"learning_rate": 3.0284137163189004e-11, |
|
"logits/chosen": 2.000138759613037, |
|
"logits/rejected": 2.7859671115875244, |
|
"logps/chosen": -530.1033935546875, |
|
"logps/rejected": -878.3465576171875, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.1844658851623535, |
|
"rewards/margins": 3.3884029388427734, |
|
"rewards/rejected": -6.572869300842285, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1346, |
|
"total_flos": 0.0, |
|
"train_loss": 0.335402155391883, |
|
"train_runtime": 21644.3608, |
|
"train_samples_per_second": 7.959, |
|
"train_steps_per_second": 0.062 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1346, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|