|
{ |
|
"best_metric": 0.6194455623626709, |
|
"best_model_checkpoint": "./checkpoints_dpo_final_2/Phi-3-mini-4k-instruct/checkpoint-1500", |
|
"epoch": 2.0, |
|
"eval_steps": 50, |
|
"global_step": 1608, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004975124378109453, |
|
"grad_norm": 7.420848846435547, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": 0.31535276770591736, |
|
"logits/rejected": 0.2069419026374817, |
|
"logps/chosen": -443.7961120605469, |
|
"logps/rejected": -403.4725341796875, |
|
"loss": 1.2553, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 7.078475475311279, |
|
"rewards/margins": 0.11215054243803024, |
|
"rewards/rejected": 6.966324329376221, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009950248756218905, |
|
"grad_norm": 7.148873805999756, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": 0.28295886516571045, |
|
"logits/rejected": 0.3022560179233551, |
|
"logps/chosen": -366.45233154296875, |
|
"logps/rejected": -426.3655090332031, |
|
"loss": 0.9806, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 6.658623218536377, |
|
"rewards/margins": -0.2977091073989868, |
|
"rewards/rejected": 6.956332683563232, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.014925373134328358, |
|
"grad_norm": 8.879262924194336, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": 0.5261741876602173, |
|
"logits/rejected": 0.47682714462280273, |
|
"logps/chosen": -399.4539489746094, |
|
"logps/rejected": -382.5442810058594, |
|
"loss": 0.9681, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 6.928550720214844, |
|
"rewards/margins": 0.06680499017238617, |
|
"rewards/rejected": 6.861745357513428, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01990049751243781, |
|
"grad_norm": 12.120668411254883, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": 0.7348346710205078, |
|
"logits/rejected": 0.45035141706466675, |
|
"logps/chosen": -433.3005065917969, |
|
"logps/rejected": -387.620361328125, |
|
"loss": 1.1552, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 6.386252403259277, |
|
"rewards/margins": 0.23828373849391937, |
|
"rewards/rejected": 6.147968292236328, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.024875621890547265, |
|
"grad_norm": 7.843857765197754, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": 0.5343019962310791, |
|
"logits/rejected": 0.24399122595787048, |
|
"logps/chosen": -425.4538879394531, |
|
"logps/rejected": -353.02947998046875, |
|
"loss": 0.9466, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 5.57993745803833, |
|
"rewards/margins": 0.3308122158050537, |
|
"rewards/rejected": 5.249125957489014, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 6.334160804748535, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": 0.27708423137664795, |
|
"logits/rejected": 0.22941020131111145, |
|
"logps/chosen": -384.0626525878906, |
|
"logps/rejected": -335.5965881347656, |
|
"loss": 0.8395, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": 4.043614864349365, |
|
"rewards/margins": -0.3541470766067505, |
|
"rewards/rejected": 4.397762298583984, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03482587064676617, |
|
"grad_norm": 6.785109519958496, |
|
"learning_rate": 5.600000000000001e-06, |
|
"logits/chosen": 0.3940538465976715, |
|
"logits/rejected": 0.13648821413516998, |
|
"logps/chosen": -459.2915954589844, |
|
"logps/rejected": -384.01031494140625, |
|
"loss": 0.7924, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 3.638526439666748, |
|
"rewards/margins": 0.16708242893218994, |
|
"rewards/rejected": 3.4714441299438477, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03980099502487562, |
|
"grad_norm": 6.778156280517578, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"logits/chosen": 0.6169087886810303, |
|
"logits/rejected": 0.4341488480567932, |
|
"logps/chosen": -476.3402404785156, |
|
"logps/rejected": -463.748779296875, |
|
"loss": 0.7885, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 3.291264533996582, |
|
"rewards/margins": 0.29383713006973267, |
|
"rewards/rejected": 2.9974277019500732, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04477611940298507, |
|
"grad_norm": 7.815089702606201, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"logits/chosen": 0.8724404573440552, |
|
"logits/rejected": 0.5648743510246277, |
|
"logps/chosen": -418.50372314453125, |
|
"logps/rejected": -364.5290222167969, |
|
"loss": 0.8768, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 2.4819023609161377, |
|
"rewards/margins": -0.11123146116733551, |
|
"rewards/rejected": 2.5931336879730225, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04975124378109453, |
|
"grad_norm": 5.426750659942627, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": 0.4279947876930237, |
|
"logits/rejected": 0.23432603478431702, |
|
"logps/chosen": -429.2681579589844, |
|
"logps/rejected": -391.44281005859375, |
|
"loss": 0.7756, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 2.358858585357666, |
|
"rewards/margins": 0.2736659348011017, |
|
"rewards/rejected": 2.0851926803588867, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05472636815920398, |
|
"grad_norm": 6.188413619995117, |
|
"learning_rate": 8.8e-06, |
|
"logits/chosen": 0.13682100176811218, |
|
"logits/rejected": 0.03174281492829323, |
|
"logps/chosen": -440.99224853515625, |
|
"logps/rejected": -466.3514404296875, |
|
"loss": 0.7254, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 2.198638439178467, |
|
"rewards/margins": 0.15260140597820282, |
|
"rewards/rejected": 2.046036958694458, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 6.239718914031982, |
|
"learning_rate": 9.600000000000001e-06, |
|
"logits/chosen": 0.23234650492668152, |
|
"logits/rejected": 0.25346270203590393, |
|
"logps/chosen": -512.7527465820312, |
|
"logps/rejected": -494.216552734375, |
|
"loss": 0.7121, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 2.1018052101135254, |
|
"rewards/margins": 0.08936208486557007, |
|
"rewards/rejected": 2.0124430656433105, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06218905472636816, |
|
"eval_logits/chosen": 0.33167123794555664, |
|
"eval_logits/rejected": 0.1882716715335846, |
|
"eval_logps/chosen": -432.4385070800781, |
|
"eval_logps/rejected": -392.6837158203125, |
|
"eval_loss": 0.7078412175178528, |
|
"eval_rewards/accuracies": 0.5694444179534912, |
|
"eval_rewards/chosen": 1.9858527183532715, |
|
"eval_rewards/margins": 0.0740758553147316, |
|
"eval_rewards/rejected": 1.9117769002914429, |
|
"eval_runtime": 149.9332, |
|
"eval_samples_per_second": 7.623, |
|
"eval_steps_per_second": 0.24, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06467661691542288, |
|
"grad_norm": 6.6676530838012695, |
|
"learning_rate": 1.04e-05, |
|
"logits/chosen": 0.046434201300144196, |
|
"logits/rejected": 0.02955937385559082, |
|
"logps/chosen": -452.5304260253906, |
|
"logps/rejected": -471.0543518066406, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 2.0271408557891846, |
|
"rewards/margins": 0.25260087847709656, |
|
"rewards/rejected": 1.7745399475097656, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06965174129353234, |
|
"grad_norm": 6.596746444702148, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"logits/chosen": 0.5402776598930359, |
|
"logits/rejected": 0.4161326587200165, |
|
"logps/chosen": -451.6429748535156, |
|
"logps/rejected": -434.0633239746094, |
|
"loss": 0.7366, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 1.8598072528839111, |
|
"rewards/margins": -0.006673937663435936, |
|
"rewards/rejected": 1.8664811849594116, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 5.593472957611084, |
|
"learning_rate": 1.2e-05, |
|
"logits/chosen": 0.40079164505004883, |
|
"logits/rejected": 0.25033000111579895, |
|
"logps/chosen": -504.302490234375, |
|
"logps/rejected": -494.911376953125, |
|
"loss": 0.7328, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.5626219511032104, |
|
"rewards/margins": 0.10345478355884552, |
|
"rewards/rejected": 1.4591671228408813, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07960199004975124, |
|
"grad_norm": 5.58954381942749, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"logits/chosen": 0.25867709517478943, |
|
"logits/rejected": 0.14657628536224365, |
|
"logps/chosen": -431.3748474121094, |
|
"logps/rejected": -424.9607849121094, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.8101712465286255, |
|
"rewards/margins": 0.0693005919456482, |
|
"rewards/rejected": 0.7408705949783325, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0845771144278607, |
|
"grad_norm": 6.670442581176758, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"logits/chosen": 0.3967319130897522, |
|
"logits/rejected": 0.3033946752548218, |
|
"logps/chosen": -457.9029541015625, |
|
"logps/rejected": -449.4042053222656, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.5467469692230225, |
|
"rewards/margins": 0.12546321749687195, |
|
"rewards/rejected": 0.4212837517261505, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08955223880597014, |
|
"grad_norm": 5.157428741455078, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"logits/chosen": 0.5071850419044495, |
|
"logits/rejected": 0.3140091598033905, |
|
"logps/chosen": -429.99468994140625, |
|
"logps/rejected": -405.0130615234375, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.8408704996109009, |
|
"rewards/margins": 0.23394504189491272, |
|
"rewards/rejected": 0.6069254279136658, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0945273631840796, |
|
"grad_norm": 5.6009521484375, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"logits/chosen": 0.2075415998697281, |
|
"logits/rejected": 0.07978951930999756, |
|
"logps/chosen": -417.7720947265625, |
|
"logps/rejected": -385.97576904296875, |
|
"loss": 0.7021, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 1.121203064918518, |
|
"rewards/margins": 0.08919668942689896, |
|
"rewards/rejected": 1.0320063829421997, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09950248756218906, |
|
"grad_norm": 5.1235575675964355, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"logits/chosen": 0.4204176962375641, |
|
"logits/rejected": 0.03434094786643982, |
|
"logps/chosen": -658.8785400390625, |
|
"logps/rejected": -461.9447937011719, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.9897336363792419, |
|
"rewards/margins": 0.21804025769233704, |
|
"rewards/rejected": 0.7716932892799377, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1044776119402985, |
|
"grad_norm": 6.649357795715332, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"logits/chosen": 0.2298906296491623, |
|
"logits/rejected": 0.1789359152317047, |
|
"logps/chosen": -478.6454772949219, |
|
"logps/rejected": -463.6629638671875, |
|
"loss": 0.7429, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.5649065971374512, |
|
"rewards/margins": 0.21340136229991913, |
|
"rewards/rejected": 0.35150521993637085, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10945273631840796, |
|
"grad_norm": 6.433568954467773, |
|
"learning_rate": 1.76e-05, |
|
"logits/chosen": 0.1520080417394638, |
|
"logits/rejected": 0.10301964730024338, |
|
"logps/chosen": -534.1607666015625, |
|
"logps/rejected": -517.918701171875, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.4300222396850586, |
|
"rewards/margins": 0.29336607456207275, |
|
"rewards/rejected": 0.13665619492530823, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.11442786069651742, |
|
"grad_norm": 5.594571590423584, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"logits/chosen": 0.18866638839244843, |
|
"logits/rejected": 0.03936055302619934, |
|
"logps/chosen": -477.4437255859375, |
|
"logps/rejected": -434.723388671875, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.16455253958702087, |
|
"rewards/margins": 0.15519294142723083, |
|
"rewards/rejected": 0.00935959443449974, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11940298507462686, |
|
"grad_norm": 4.924789905548096, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"logits/chosen": 0.19729886949062347, |
|
"logits/rejected": 0.06473005563020706, |
|
"logps/chosen": -444.3799133300781, |
|
"logps/rejected": -394.0942687988281, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.34238868951797485, |
|
"rewards/margins": 0.23226764798164368, |
|
"rewards/rejected": 0.11012104153633118, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.12437810945273632, |
|
"grad_norm": 9.16905403137207, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": 0.1745055466890335, |
|
"logits/rejected": 0.18110498785972595, |
|
"logps/chosen": -590.113525390625, |
|
"logps/rejected": -565.5681762695312, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.5021845102310181, |
|
"rewards/margins": 0.1258457899093628, |
|
"rewards/rejected": 0.3763387203216553, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12437810945273632, |
|
"eval_logits/chosen": 0.27222201228141785, |
|
"eval_logits/rejected": 0.13300225138664246, |
|
"eval_logps/chosen": -448.08441162109375, |
|
"eval_logps/rejected": -409.7933349609375, |
|
"eval_loss": 0.6717547178268433, |
|
"eval_rewards/accuracies": 0.5972222089767456, |
|
"eval_rewards/chosen": 0.42125940322875977, |
|
"eval_rewards/margins": 0.22044435143470764, |
|
"eval_rewards/rejected": 0.20081506669521332, |
|
"eval_runtime": 150.2898, |
|
"eval_samples_per_second": 7.605, |
|
"eval_steps_per_second": 0.24, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12935323383084577, |
|
"grad_norm": 5.174230098724365, |
|
"learning_rate": 1.9999652796146877e-05, |
|
"logits/chosen": 0.4161723256111145, |
|
"logits/rejected": 0.32582810521125793, |
|
"logps/chosen": -493.2930908203125, |
|
"logps/rejected": -458.1988830566406, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.37257951498031616, |
|
"rewards/margins": 0.16849718987941742, |
|
"rewards/rejected": 0.20408231019973755, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.13432835820895522, |
|
"grad_norm": 5.811988353729248, |
|
"learning_rate": 1.9998611208697607e-05, |
|
"logits/chosen": 0.5949371457099915, |
|
"logits/rejected": 0.41842520236968994, |
|
"logps/chosen": -447.29522705078125, |
|
"logps/rejected": -407.77264404296875, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.040240589529275894, |
|
"rewards/margins": 0.27960366010665894, |
|
"rewards/rejected": -0.23936308920383453, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.13930348258706468, |
|
"grad_norm": 6.341477870941162, |
|
"learning_rate": 1.9996875309980824e-05, |
|
"logits/chosen": 0.5326985120773315, |
|
"logits/rejected": 0.3219985067844391, |
|
"logps/chosen": -592.6687622070312, |
|
"logps/rejected": -506.40093994140625, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08561475574970245, |
|
"rewards/margins": 0.18506459891796112, |
|
"rewards/rejected": -0.27067938446998596, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.14427860696517414, |
|
"grad_norm": 6.128859519958496, |
|
"learning_rate": 1.9994445220538678e-05, |
|
"logits/chosen": 0.2585601210594177, |
|
"logits/rejected": 0.06527578085660934, |
|
"logps/chosen": -442.81512451171875, |
|
"logps/rejected": -460.4501953125, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.07791093736886978, |
|
"rewards/margins": 0.12571600079536438, |
|
"rewards/rejected": -0.04780507832765579, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 5.584373950958252, |
|
"learning_rate": 1.999132110911845e-05, |
|
"logits/chosen": 0.27150627970695496, |
|
"logits/rejected": 0.1847885251045227, |
|
"logps/chosen": -469.2530517578125, |
|
"logps/rejected": -458.80413818359375, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.3338521122932434, |
|
"rewards/margins": 0.2423708438873291, |
|
"rewards/rejected": 0.09148130565881729, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15422885572139303, |
|
"grad_norm": 5.340240955352783, |
|
"learning_rate": 1.9987503192660842e-05, |
|
"logits/chosen": 0.2772689759731293, |
|
"logits/rejected": 0.20361235737800598, |
|
"logps/chosen": -403.8421630859375, |
|
"logps/rejected": -364.796630859375, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.463238924741745, |
|
"rewards/margins": 0.17842896282672882, |
|
"rewards/rejected": 0.2848099172115326, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.15920398009950248, |
|
"grad_norm": 5.592566013336182, |
|
"learning_rate": 1.9982991736284914e-05, |
|
"logits/chosen": 0.482767254114151, |
|
"logits/rejected": 0.42924097180366516, |
|
"logps/chosen": -474.4277648925781, |
|
"logps/rejected": -526.3604736328125, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6025639176368713, |
|
"rewards/margins": 0.2601732611656189, |
|
"rewards/rejected": 0.3423907160758972, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.16417910447761194, |
|
"grad_norm": 6.180532932281494, |
|
"learning_rate": 1.997778705326968e-05, |
|
"logits/chosen": 0.20447391271591187, |
|
"logits/rejected": 0.13856717944145203, |
|
"logps/chosen": -433.2802734375, |
|
"logps/rejected": -459.3676452636719, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.37419936060905457, |
|
"rewards/margins": 0.33122357726097107, |
|
"rewards/rejected": 0.04297574609518051, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1691542288557214, |
|
"grad_norm": 5.7720417976379395, |
|
"learning_rate": 1.9971889505032337e-05, |
|
"logits/chosen": 0.37103909254074097, |
|
"logits/rejected": 0.18156485259532928, |
|
"logps/chosen": -431.2093200683594, |
|
"logps/rejected": -415.803955078125, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.00045023113489151, |
|
"rewards/margins": 0.28690749406814575, |
|
"rewards/rejected": -0.28645727038383484, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.17412935323383086, |
|
"grad_norm": 5.279744625091553, |
|
"learning_rate": 1.9965299501103178e-05, |
|
"logits/chosen": 0.6684572696685791, |
|
"logits/rejected": 0.4265105724334717, |
|
"logps/chosen": -405.96636962890625, |
|
"logps/rejected": -363.99810791015625, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.31693071126937866, |
|
"rewards/margins": 0.12704896926879883, |
|
"rewards/rejected": -0.4439797103404999, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1791044776119403, |
|
"grad_norm": 5.412991046905518, |
|
"learning_rate": 1.995801749909715e-05, |
|
"logits/chosen": 0.3472476601600647, |
|
"logits/rejected": 0.1070006936788559, |
|
"logps/chosen": -525.67529296875, |
|
"logps/rejected": -470.4413146972656, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.21376881003379822, |
|
"rewards/margins": 0.07273076474666595, |
|
"rewards/rejected": -0.286499559879303, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.18407960199004975, |
|
"grad_norm": 5.56497049331665, |
|
"learning_rate": 1.995004400468209e-05, |
|
"logits/chosen": 0.23391787707805634, |
|
"logits/rejected": 0.42092186212539673, |
|
"logps/chosen": -431.3445739746094, |
|
"logps/rejected": -513.2816772460938, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.5987527966499329, |
|
"rewards/margins": 0.20438729226589203, |
|
"rewards/rejected": 0.39436548948287964, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1865671641791045, |
|
"eval_logits/chosen": 0.3917093575000763, |
|
"eval_logits/rejected": 0.2565095126628876, |
|
"eval_logps/chosen": -440.29315185546875, |
|
"eval_logps/rejected": -402.7275390625, |
|
"eval_loss": 0.6632580161094666, |
|
"eval_rewards/accuracies": 0.6215277910232544, |
|
"eval_rewards/chosen": 1.2003861665725708, |
|
"eval_rewards/margins": 0.29299187660217285, |
|
"eval_rewards/rejected": 0.907394289970398, |
|
"eval_runtime": 150.4796, |
|
"eval_samples_per_second": 7.596, |
|
"eval_steps_per_second": 0.239, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1890547263681592, |
|
"grad_norm": 6.083770751953125, |
|
"learning_rate": 1.9941379571543597e-05, |
|
"logits/chosen": 0.33355918526649475, |
|
"logits/rejected": 0.4423186779022217, |
|
"logps/chosen": -489.43389892578125, |
|
"logps/rejected": -527.8333129882812, |
|
"loss": 0.7118, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 1.1078438758850098, |
|
"rewards/margins": 0.08143356442451477, |
|
"rewards/rejected": 1.0264102220535278, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.19402985074626866, |
|
"grad_norm": 5.055530071258545, |
|
"learning_rate": 1.9932024801346583e-05, |
|
"logits/chosen": 0.37234047055244446, |
|
"logits/rejected": 0.23300248384475708, |
|
"logps/chosen": -445.1590270996094, |
|
"logps/rejected": -421.0417175292969, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 1.0396504402160645, |
|
"rewards/margins": 0.15197786688804626, |
|
"rewards/rejected": 0.8876725435256958, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.19900497512437812, |
|
"grad_norm": 4.7338433265686035, |
|
"learning_rate": 1.992198034369349e-05, |
|
"logits/chosen": 0.016373004764318466, |
|
"logits/rejected": 0.12857607007026672, |
|
"logps/chosen": -392.64678955078125, |
|
"logps/rejected": -409.271240234375, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.56157386302948, |
|
"rewards/margins": 0.11771346628665924, |
|
"rewards/rejected": 0.44386038184165955, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.20398009950248755, |
|
"grad_norm": 5.313661098480225, |
|
"learning_rate": 1.991124689607921e-05, |
|
"logits/chosen": 0.6525070667266846, |
|
"logits/rejected": 0.5595052242279053, |
|
"logps/chosen": -499.96746826171875, |
|
"logps/rejected": -459.646728515625, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.22787390649318695, |
|
"rewards/margins": 0.18890802562236786, |
|
"rewards/rejected": 0.038965899497270584, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.208955223880597, |
|
"grad_norm": 5.3913984298706055, |
|
"learning_rate": 1.9899825203842613e-05, |
|
"logits/chosen": 0.4010236859321594, |
|
"logits/rejected": 0.2576262950897217, |
|
"logps/chosen": -378.7827453613281, |
|
"logps/rejected": -365.35235595703125, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.21431918442249298, |
|
"rewards/margins": 0.11841318756341934, |
|
"rewards/rejected": 0.09590599685907364, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.21393034825870647, |
|
"grad_norm": 4.707008361816406, |
|
"learning_rate": 1.988771606011481e-05, |
|
"logits/chosen": 0.5776969790458679, |
|
"logits/rejected": 0.5886460542678833, |
|
"logps/chosen": -452.3276672363281, |
|
"logps/rejected": -492.7530822753906, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.4167579114437103, |
|
"rewards/margins": 0.38403820991516113, |
|
"rewards/rejected": 0.03271971270442009, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.21890547263681592, |
|
"grad_norm": 4.890253067016602, |
|
"learning_rate": 1.987492030576407e-05, |
|
"logits/chosen": 0.4215804934501648, |
|
"logits/rejected": 0.3395119309425354, |
|
"logps/chosen": -443.6938781738281, |
|
"logps/rejected": -448.883056640625, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.3662562072277069, |
|
"rewards/margins": 0.25069642066955566, |
|
"rewards/rejected": 0.11555974185466766, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 4.925017833709717, |
|
"learning_rate": 1.986143882933744e-05, |
|
"logits/chosen": 0.7153533697128296, |
|
"logits/rejected": 0.5962733626365662, |
|
"logps/chosen": -378.98199462890625, |
|
"logps/rejected": -362.8702697753906, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.10049009323120117, |
|
"rewards/margins": 0.24912574887275696, |
|
"rewards/rejected": -0.1486356258392334, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22885572139303484, |
|
"grad_norm": 4.816032409667969, |
|
"learning_rate": 1.9847272566999026e-05, |
|
"logits/chosen": 0.3551070988178253, |
|
"logits/rejected": 0.1886759102344513, |
|
"logps/chosen": -481.8218688964844, |
|
"logps/rejected": -461.4677429199219, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11597292125225067, |
|
"rewards/margins": 0.4469318389892578, |
|
"rewards/rejected": -0.5629047155380249, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.23383084577114427, |
|
"grad_norm": 5.482291221618652, |
|
"learning_rate": 1.9832422502465013e-05, |
|
"logits/chosen": 0.07703270018100739, |
|
"logits/rejected": 0.08134737610816956, |
|
"logps/chosen": -430.76470947265625, |
|
"logps/rejected": -479.6883239746094, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.38663041591644287, |
|
"rewards/margins": 0.16125822067260742, |
|
"rewards/rejected": -0.5478886365890503, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 5.5123677253723145, |
|
"learning_rate": 1.9816889666935318e-05, |
|
"logits/chosen": 0.46063917875289917, |
|
"logits/rejected": 0.40867650508880615, |
|
"logps/chosen": -496.49615478515625, |
|
"logps/rejected": -474.017578125, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.224630206823349, |
|
"rewards/margins": 0.17337118089199066, |
|
"rewards/rejected": -0.39800137281417847, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.24378109452736318, |
|
"grad_norm": 8.160882949829102, |
|
"learning_rate": 1.9800675139022006e-05, |
|
"logits/chosen": 0.5780532956123352, |
|
"logits/rejected": 0.3103576898574829, |
|
"logps/chosen": -491.5118103027344, |
|
"logps/rejected": -422.33807373046875, |
|
"loss": 0.6543, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.08265501260757446, |
|
"rewards/margins": 0.24835649132728577, |
|
"rewards/rejected": -0.33101150393486023, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.24875621890547264, |
|
"grad_norm": 6.051442623138428, |
|
"learning_rate": 1.9783780044674402e-05, |
|
"logits/chosen": 0.5951110124588013, |
|
"logits/rejected": 0.5504649877548218, |
|
"logps/chosen": -438.7686767578125, |
|
"logps/rejected": -458.02325439453125, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.20991414785385132, |
|
"rewards/margins": 0.2382897138595581, |
|
"rewards/rejected": -0.4482038617134094, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24875621890547264, |
|
"eval_logits/chosen": 0.27061331272125244, |
|
"eval_logits/rejected": 0.13349506258964539, |
|
"eval_logps/chosen": -454.5816955566406, |
|
"eval_logps/rejected": -416.6123046875, |
|
"eval_loss": 0.6534828543663025, |
|
"eval_rewards/accuracies": 0.59375, |
|
"eval_rewards/chosen": -0.22846804559230804, |
|
"eval_rewards/margins": 0.25261345505714417, |
|
"eval_rewards/rejected": -0.4810815453529358, |
|
"eval_runtime": 150.5659, |
|
"eval_samples_per_second": 7.591, |
|
"eval_steps_per_second": 0.239, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2537313432835821, |
|
"grad_norm": 5.35200834274292, |
|
"learning_rate": 1.976620555710087e-05, |
|
"logits/chosen": 0.2719428837299347, |
|
"logits/rejected": 0.18390944600105286, |
|
"logps/chosen": -401.3759765625, |
|
"logps/rejected": -378.04510498046875, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.2212618738412857, |
|
"rewards/margins": 0.1665419489145279, |
|
"rewards/rejected": -0.3878038227558136, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.25870646766169153, |
|
"grad_norm": 5.549587726593018, |
|
"learning_rate": 1.974795289668737e-05, |
|
"logits/chosen": 0.222773939371109, |
|
"logits/rejected": 0.27480173110961914, |
|
"logps/chosen": -450.5555419921875, |
|
"logps/rejected": -478.2663879394531, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5931901335716248, |
|
"rewards/margins": 0.2987501919269562, |
|
"rewards/rejected": 0.29443997144699097, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.263681592039801, |
|
"grad_norm": 5.261623859405518, |
|
"learning_rate": 1.972902333091271e-05, |
|
"logits/chosen": 0.41583824157714844, |
|
"logits/rejected": 0.16713739931583405, |
|
"logps/chosen": -533.6800537109375, |
|
"logps/rejected": -458.5304260253906, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.5807373523712158, |
|
"rewards/margins": 0.1561833620071411, |
|
"rewards/rejected": 0.4245539605617523, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.26865671641791045, |
|
"grad_norm": 5.055637359619141, |
|
"learning_rate": 1.9709418174260523e-05, |
|
"logits/chosen": 0.3311361074447632, |
|
"logits/rejected": 0.3872915506362915, |
|
"logps/chosen": -467.373046875, |
|
"logps/rejected": -458.4536437988281, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.7115026712417603, |
|
"rewards/margins": 0.30211564898490906, |
|
"rewards/rejected": 0.40938708186149597, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2736318407960199, |
|
"grad_norm": 4.662365913391113, |
|
"learning_rate": 1.9689138788127994e-05, |
|
"logits/chosen": 0.43617844581604004, |
|
"logits/rejected": 0.209380641579628, |
|
"logps/chosen": -391.93701171875, |
|
"logps/rejected": -352.4445495605469, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.24579453468322754, |
|
"rewards/margins": 0.27014172077178955, |
|
"rewards/rejected": -0.0243472121655941, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.27860696517412936, |
|
"grad_norm": 5.244974136352539, |
|
"learning_rate": 1.966818658073133e-05, |
|
"logits/chosen": 0.179366797208786, |
|
"logits/rejected": 0.17232109606266022, |
|
"logps/chosen": -475.9603271484375, |
|
"logps/rejected": -503.4451904296875, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.10947009921073914, |
|
"rewards/margins": 0.0003622081130743027, |
|
"rewards/rejected": 0.10910789668560028, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2835820895522388, |
|
"grad_norm": 4.5364179611206055, |
|
"learning_rate": 1.9646563007007952e-05, |
|
"logits/chosen": 0.11134719103574753, |
|
"logits/rejected": -0.09881246089935303, |
|
"logps/chosen": -491.548828125, |
|
"logps/rejected": -504.40496826171875, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.12891899049282074, |
|
"rewards/margins": 0.29636111855506897, |
|
"rewards/rejected": -0.16744214296340942, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2885572139303483, |
|
"grad_norm": 4.562107086181641, |
|
"learning_rate": 1.9624269568515486e-05, |
|
"logits/chosen": 0.33666372299194336, |
|
"logits/rejected": 0.3560597896575928, |
|
"logps/chosen": -485.7892150878906, |
|
"logps/rejected": -458.96600341796875, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.3998726010322571, |
|
"rewards/margins": 0.21638146042823792, |
|
"rewards/rejected": 0.18349118530750275, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.2935323383084577, |
|
"grad_norm": 6.737706661224365, |
|
"learning_rate": 1.960130781332748e-05, |
|
"logits/chosen": 0.6583088040351868, |
|
"logits/rejected": 0.5398542284965515, |
|
"logps/chosen": -500.09442138671875, |
|
"logps/rejected": -470.6582946777344, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.579067587852478, |
|
"rewards/margins": 0.4634256958961487, |
|
"rewards/rejected": 0.11564186215400696, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 5.656876087188721, |
|
"learning_rate": 1.957767933592591e-05, |
|
"logits/chosen": 0.40276038646698, |
|
"logits/rejected": 0.3526462912559509, |
|
"logps/chosen": -452.8072509765625, |
|
"logps/rejected": -455.0268249511719, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.6671885848045349, |
|
"rewards/margins": 0.457474946975708, |
|
"rewards/rejected": 0.20971357822418213, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3034825870646766, |
|
"grad_norm": 4.534661769866943, |
|
"learning_rate": 1.955338577709046e-05, |
|
"logits/chosen": 0.11831162869930267, |
|
"logits/rejected": -0.027393575757741928, |
|
"logps/chosen": -446.3404846191406, |
|
"logps/rejected": -409.2080078125, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.14041246473789215, |
|
"rewards/margins": 0.2885099947452545, |
|
"rewards/rejected": -0.14809754490852356, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.30845771144278605, |
|
"grad_norm": 4.870954513549805, |
|
"learning_rate": 1.9528428823784567e-05, |
|
"logits/chosen": -0.06817762553691864, |
|
"logits/rejected": 0.10090361535549164, |
|
"logps/chosen": -417.2574157714844, |
|
"logps/rejected": -500.5931701660156, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2122226357460022, |
|
"rewards/margins": 0.06963346153497696, |
|
"rewards/rejected": -0.28185608983039856, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.31094527363184077, |
|
"eval_logits/chosen": 0.24545568227767944, |
|
"eval_logits/rejected": 0.10711152106523514, |
|
"eval_logps/chosen": -453.1002502441406, |
|
"eval_logps/rejected": -414.6319580078125, |
|
"eval_loss": 0.6768244504928589, |
|
"eval_rewards/accuracies": 0.6006944179534912, |
|
"eval_rewards/chosen": -0.08032441139221191, |
|
"eval_rewards/margins": 0.20272159576416016, |
|
"eval_rewards/rejected": -0.2830459773540497, |
|
"eval_runtime": 150.4022, |
|
"eval_samples_per_second": 7.6, |
|
"eval_steps_per_second": 0.239, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.31343283582089554, |
|
"grad_norm": 4.674103260040283, |
|
"learning_rate": 1.9502810209038302e-05, |
|
"logits/chosen": 0.2548333406448364, |
|
"logits/rejected": 0.23590323328971863, |
|
"logps/chosen": -441.0978698730469, |
|
"logps/rejected": -444.6314697265625, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.21375508606433868, |
|
"rewards/margins": 0.04151350259780884, |
|
"rewards/rejected": -0.25526857376098633, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.31840796019900497, |
|
"grad_norm": 15.962188720703125, |
|
"learning_rate": 1.9476531711828027e-05, |
|
"logits/chosen": 0.22388213872909546, |
|
"logits/rejected": 0.02985329180955887, |
|
"logps/chosen": -527.328369140625, |
|
"logps/rejected": -451.01165771484375, |
|
"loss": 0.6971, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.01938755437731743, |
|
"rewards/margins": 0.19766713678836823, |
|
"rewards/rejected": -0.17827960848808289, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.32338308457711445, |
|
"grad_norm": 4.411048412322998, |
|
"learning_rate": 1.9449595156952827e-05, |
|
"logits/chosen": 0.09123142063617706, |
|
"logits/rejected": 0.008157305419445038, |
|
"logps/chosen": -464.3664245605469, |
|
"logps/rejected": -449.2779846191406, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.09136360138654709, |
|
"rewards/margins": 0.1354491412639618, |
|
"rewards/rejected": -0.044085558503866196, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3283582089552239, |
|
"grad_norm": 4.79756498336792, |
|
"learning_rate": 1.9422002414907837e-05, |
|
"logits/chosen": 0.4070839583873749, |
|
"logits/rejected": 0.3463619649410248, |
|
"logps/chosen": -413.456298828125, |
|
"logps/rejected": -413.7463073730469, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4945845901966095, |
|
"rewards/margins": 0.20277410745620728, |
|
"rewards/rejected": 0.29181045293807983, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 4.720849990844727, |
|
"learning_rate": 1.9393755401754324e-05, |
|
"logits/chosen": 0.29830023646354675, |
|
"logits/rejected": 0.3905254602432251, |
|
"logps/chosen": -390.8925476074219, |
|
"logps/rejected": -460.8228759765625, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.34830933809280396, |
|
"rewards/margins": 0.27932432293891907, |
|
"rewards/rejected": 0.06898501515388489, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.3383084577114428, |
|
"grad_norm": 5.378329277038574, |
|
"learning_rate": 1.936485607898665e-05, |
|
"logits/chosen": 0.07186523079872131, |
|
"logits/rejected": 0.15830281376838684, |
|
"logps/chosen": -393.9452819824219, |
|
"logps/rejected": -455.62957763671875, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.17164385318756104, |
|
"rewards/margins": 0.09308388829231262, |
|
"rewards/rejected": 0.0785599797964096, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.34328358208955223, |
|
"grad_norm": 4.18524169921875, |
|
"learning_rate": 1.9335306453396066e-05, |
|
"logits/chosen": 0.056332044303417206, |
|
"logits/rejected": 0.07097341120243073, |
|
"logps/chosen": -490.37994384765625, |
|
"logps/rejected": -514.7352294921875, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.11651282012462616, |
|
"rewards/margins": 0.41814491152763367, |
|
"rewards/rejected": -0.3016320765018463, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.3482587064676617, |
|
"grad_norm": 5.413318634033203, |
|
"learning_rate": 1.9305108576931336e-05, |
|
"logits/chosen": 0.01699664443731308, |
|
"logits/rejected": -0.03439049795269966, |
|
"logps/chosen": -382.8931579589844, |
|
"logps/rejected": -419.8720703125, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.3111031949520111, |
|
"rewards/margins": 0.21040624380111694, |
|
"rewards/rejected": -0.5215094089508057, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.35323383084577115, |
|
"grad_norm": 5.107039928436279, |
|
"learning_rate": 1.927426454655627e-05, |
|
"logits/chosen": 0.30719754099845886, |
|
"logits/rejected": 0.2690942883491516, |
|
"logps/chosen": -494.9206237792969, |
|
"logps/rejected": -498.79901123046875, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.8447175025939941, |
|
"rewards/margins": 0.18089117109775543, |
|
"rewards/rejected": -1.0256086587905884, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3582089552238806, |
|
"grad_norm": 4.356219291687012, |
|
"learning_rate": 1.924277650410412e-05, |
|
"logits/chosen": 0.13748708367347717, |
|
"logits/rejected": 0.2504044473171234, |
|
"logps/chosen": -548.0153198242188, |
|
"logps/rejected": -559.4176635742188, |
|
"loss": 0.6994, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.6704975366592407, |
|
"rewards/margins": -0.03709391877055168, |
|
"rewards/rejected": -0.6334035992622375, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.36318407960199006, |
|
"grad_norm": 4.30932092666626, |
|
"learning_rate": 1.9210646636128805e-05, |
|
"logits/chosen": 0.16785617172718048, |
|
"logits/rejected": 0.32375362515449524, |
|
"logps/chosen": -417.7137145996094, |
|
"logps/rejected": -482.4889221191406, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.3137147128582001, |
|
"rewards/margins": 0.08970025926828384, |
|
"rewards/rejected": -0.4034149646759033, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3681592039800995, |
|
"grad_norm": 4.273219108581543, |
|
"learning_rate": 1.9177877173753127e-05, |
|
"logits/chosen": 0.1516554057598114, |
|
"logits/rejected": 0.0621149055659771, |
|
"logps/chosen": -439.8550109863281, |
|
"logps/rejected": -445.9311218261719, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.12972819805145264, |
|
"rewards/margins": 0.31710904836654663, |
|
"rewards/rejected": -0.44683724641799927, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 4.085379123687744, |
|
"learning_rate": 1.91444703925138e-05, |
|
"logits/chosen": 0.2226869910955429, |
|
"logits/rejected": 0.2288302779197693, |
|
"logps/chosen": -402.9095458984375, |
|
"logps/rejected": -436.23846435546875, |
|
"loss": 0.642, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.1526205837726593, |
|
"rewards/margins": 0.27878373861312866, |
|
"rewards/rejected": -0.12616315484046936, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"eval_logits/chosen": 0.2271442711353302, |
|
"eval_logits/rejected": 0.08639353513717651, |
|
"eval_logps/chosen": -448.8922119140625, |
|
"eval_logps/rejected": -411.57562255859375, |
|
"eval_loss": 0.6402102112770081, |
|
"eval_rewards/accuracies": 0.6145833134651184, |
|
"eval_rewards/chosen": 0.3404841423034668, |
|
"eval_rewards/margins": 0.31789708137512207, |
|
"eval_rewards/rejected": 0.022587047889828682, |
|
"eval_runtime": 149.8006, |
|
"eval_samples_per_second": 7.63, |
|
"eval_steps_per_second": 0.24, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3781094527363184, |
|
"grad_norm": 4.7133870124816895, |
|
"learning_rate": 1.9110428612203463e-05, |
|
"logits/chosen": 0.28455495834350586, |
|
"logits/rejected": 0.3236948847770691, |
|
"logps/chosen": -557.7841186523438, |
|
"logps/rejected": -595.4920654296875, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.502812922000885, |
|
"rewards/margins": 0.31084102392196655, |
|
"rewards/rejected": 0.19197186827659607, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.38308457711442784, |
|
"grad_norm": 4.747364521026611, |
|
"learning_rate": 1.9075754196709574e-05, |
|
"logits/chosen": 0.3259233832359314, |
|
"logits/rejected": 0.2481708824634552, |
|
"logps/chosen": -431.3799133300781, |
|
"logps/rejected": -437.3810729980469, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.463757187128067, |
|
"rewards/margins": 0.22637638449668884, |
|
"rewards/rejected": 0.23738083243370056, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.3880597014925373, |
|
"grad_norm": 4.707996368408203, |
|
"learning_rate": 1.904044955385026e-05, |
|
"logits/chosen": 0.2886297404766083, |
|
"logits/rejected": 0.035777147859334946, |
|
"logps/chosen": -497.3841857910156, |
|
"logps/rejected": -406.03729248046875, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6689484119415283, |
|
"rewards/margins": 0.5415085554122925, |
|
"rewards/rejected": 0.12743981182575226, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.39303482587064675, |
|
"grad_norm": 4.470433235168457, |
|
"learning_rate": 1.9004517135207127e-05, |
|
"logits/chosen": 0.22225256264209747, |
|
"logits/rejected": 0.2989833652973175, |
|
"logps/chosen": -394.5459289550781, |
|
"logps/rejected": -429.8094177246094, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.2795145511627197, |
|
"rewards/margins": 0.16655004024505615, |
|
"rewards/rejected": 0.11296449601650238, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.39800995024875624, |
|
"grad_norm": 4.836558818817139, |
|
"learning_rate": 1.8967959435955027e-05, |
|
"logits/chosen": 0.37761908769607544, |
|
"logits/rejected": 0.26500552892684937, |
|
"logps/chosen": -482.1424560546875, |
|
"logps/rejected": -428.25, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.08129014819860458, |
|
"rewards/margins": 0.13191911578178406, |
|
"rewards/rejected": -0.21320928633213043, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.40298507462686567, |
|
"grad_norm": 5.268253326416016, |
|
"learning_rate": 1.893077899468876e-05, |
|
"logits/chosen": 0.2713007926940918, |
|
"logits/rejected": 0.04821309447288513, |
|
"logps/chosen": -563.9439697265625, |
|
"logps/rejected": -503.7855529785156, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.37922781705856323, |
|
"rewards/margins": 0.29474934935569763, |
|
"rewards/rejected": -0.6739771366119385, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.4079601990049751, |
|
"grad_norm": 4.993300437927246, |
|
"learning_rate": 1.889297839324682e-05, |
|
"logits/chosen": 0.34269845485687256, |
|
"logits/rejected": 0.27501022815704346, |
|
"logps/chosen": -438.5770568847656, |
|
"logps/rejected": -437.1994934082031, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.48632100224494934, |
|
"rewards/margins": 0.22347672283649445, |
|
"rewards/rejected": -0.709797739982605, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.4129353233830846, |
|
"grad_norm": 4.8070149421691895, |
|
"learning_rate": 1.8854560256532098e-05, |
|
"logits/chosen": 0.04936538636684418, |
|
"logits/rejected": -0.0027198120951652527, |
|
"logps/chosen": -438.79168701171875, |
|
"logps/rejected": -428.35308837890625, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.2654213309288025, |
|
"rewards/margins": 0.41919708251953125, |
|
"rewards/rejected": -0.6846184134483337, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.417910447761194, |
|
"grad_norm": 4.997420787811279, |
|
"learning_rate": 1.8815527252329624e-05, |
|
"logits/chosen": 0.2193477749824524, |
|
"logits/rejected": 0.03042268194258213, |
|
"logps/chosen": -468.4323425292969, |
|
"logps/rejected": -426.1119384765625, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03436838462948799, |
|
"rewards/margins": 0.521725058555603, |
|
"rewards/rejected": -0.5560933351516724, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.4228855721393035, |
|
"grad_norm": 4.165882110595703, |
|
"learning_rate": 1.8775882091121282e-05, |
|
"logits/chosen": 0.5012113451957703, |
|
"logits/rejected": 0.35550257563591003, |
|
"logps/chosen": -505.60626220703125, |
|
"logps/rejected": -438.73095703125, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1919064074754715, |
|
"rewards/margins": 0.44131097197532654, |
|
"rewards/rejected": -0.24940457940101624, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.42786069651741293, |
|
"grad_norm": 4.5491251945495605, |
|
"learning_rate": 1.8735627525897618e-05, |
|
"logits/chosen": 0.3401688039302826, |
|
"logits/rejected": 0.10173173248767853, |
|
"logps/chosen": -449.0252990722656, |
|
"logps/rejected": -379.44598388671875, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.304412841796875, |
|
"rewards/margins": 0.298681378364563, |
|
"rewards/rejected": 0.005731441080570221, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.43283582089552236, |
|
"grad_norm": 4.290804862976074, |
|
"learning_rate": 1.8694766351966665e-05, |
|
"logits/chosen": 0.20657242834568024, |
|
"logits/rejected": 0.16187314689159393, |
|
"logps/chosen": -430.30169677734375, |
|
"logps/rejected": -508.4122314453125, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.5791266560554504, |
|
"rewards/margins": 0.42658331990242004, |
|
"rewards/rejected": 0.152543306350708, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.43532338308457713, |
|
"eval_logits/chosen": 0.27793338894844055, |
|
"eval_logits/rejected": 0.1382322907447815, |
|
"eval_logps/chosen": -444.71087646484375, |
|
"eval_logps/rejected": -407.1244201660156, |
|
"eval_loss": 0.6471754908561707, |
|
"eval_rewards/accuracies": 0.6006944179534912, |
|
"eval_rewards/chosen": 0.7586135864257812, |
|
"eval_rewards/margins": 0.29090631008148193, |
|
"eval_rewards/rejected": 0.4677073061466217, |
|
"eval_runtime": 150.2506, |
|
"eval_samples_per_second": 7.607, |
|
"eval_steps_per_second": 0.24, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.43781094527363185, |
|
"grad_norm": 4.04841947555542, |
|
"learning_rate": 1.8653301406759827e-05, |
|
"logits/chosen": 0.26602596044540405, |
|
"logits/rejected": 0.26264214515686035, |
|
"logps/chosen": -414.1706848144531, |
|
"logps/rejected": -394.3015441894531, |
|
"loss": 0.7136, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.5916293263435364, |
|
"rewards/margins": -0.07215756177902222, |
|
"rewards/rejected": 0.6637868881225586, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.4427860696517413, |
|
"grad_norm": 4.154921054840088, |
|
"learning_rate": 1.8611235569634852e-05, |
|
"logits/chosen": 0.47313758730888367, |
|
"logits/rejected": 0.21173089742660522, |
|
"logps/chosen": -429.60491943359375, |
|
"logps/rejected": -392.5804138183594, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.679366946220398, |
|
"rewards/margins": 0.12296590954065323, |
|
"rewards/rejected": 0.5564010739326477, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 3.9639251232147217, |
|
"learning_rate": 1.8568571761675893e-05, |
|
"logits/chosen": 0.4981469213962555, |
|
"logits/rejected": 0.49814143776893616, |
|
"logps/chosen": -423.76898193359375, |
|
"logps/rejected": -453.06573486328125, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.6893520355224609, |
|
"rewards/margins": 0.2094535529613495, |
|
"rewards/rejected": 0.47989848256111145, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4527363184079602, |
|
"grad_norm": 4.174687385559082, |
|
"learning_rate": 1.8525312945490647e-05, |
|
"logits/chosen": 0.1745152622461319, |
|
"logits/rejected": 0.22328950464725494, |
|
"logps/chosen": -420.2294616699219, |
|
"logps/rejected": -452.5687255859375, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.4526810348033905, |
|
"rewards/margins": 0.409667432308197, |
|
"rewards/rejected": 0.043013621121644974, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.4577114427860697, |
|
"grad_norm": 5.690698146820068, |
|
"learning_rate": 1.8481462125004647e-05, |
|
"logits/chosen": 0.3042501211166382, |
|
"logits/rejected": 0.19751590490341187, |
|
"logps/chosen": -480.2320556640625, |
|
"logps/rejected": -409.99993896484375, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.034170668572187424, |
|
"rewards/margins": 0.10836675763130188, |
|
"rewards/rejected": -0.1425374299287796, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.4626865671641791, |
|
"grad_norm": 3.7740769386291504, |
|
"learning_rate": 1.8437022345252666e-05, |
|
"logits/chosen": 0.410859614610672, |
|
"logits/rejected": 0.2786995470523834, |
|
"logps/chosen": -536.8661499023438, |
|
"logps/rejected": -485.7401123046875, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.021265551447868347, |
|
"rewards/margins": 0.20183995366096497, |
|
"rewards/rejected": -0.1805744171142578, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.46766169154228854, |
|
"grad_norm": 4.466541290283203, |
|
"learning_rate": 1.8391996692167242e-05, |
|
"logits/chosen": 0.36077880859375, |
|
"logits/rejected": 0.02420664392411709, |
|
"logps/chosen": -574.6773071289062, |
|
"logps/rejected": -416.6241455078125, |
|
"loss": 0.7154, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.05402504652738571, |
|
"rewards/margins": 0.175716370344162, |
|
"rewards/rejected": -0.2297414094209671, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.472636815920398, |
|
"grad_norm": 4.715292930603027, |
|
"learning_rate": 1.8346388292364438e-05, |
|
"logits/chosen": 0.5576101541519165, |
|
"logits/rejected": 0.2390051931142807, |
|
"logps/chosen": -482.841796875, |
|
"logps/rejected": -415.119384765625, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0752110630273819, |
|
"rewards/margins": 0.1834164559841156, |
|
"rewards/rejected": -0.2586275339126587, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 4.4041523933410645, |
|
"learning_rate": 1.8300200312926674e-05, |
|
"logits/chosen": 0.4594465494155884, |
|
"logits/rejected": 0.21978969871997833, |
|
"logps/chosen": -478.629638671875, |
|
"logps/rejected": -375.6353454589844, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.09999266266822815, |
|
"rewards/margins": 0.25770846009254456, |
|
"rewards/rejected": -0.3577011227607727, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.48258706467661694, |
|
"grad_norm": 4.524245738983154, |
|
"learning_rate": 1.8253435961182844e-05, |
|
"logits/chosen": 0.011010982096195221, |
|
"logits/rejected": -0.07573414593935013, |
|
"logps/chosen": -508.1129455566406, |
|
"logps/rejected": -466.13006591796875, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.21897917985916138, |
|
"rewards/margins": 0.22618308663368225, |
|
"rewards/rejected": -0.007203895598649979, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.48756218905472637, |
|
"grad_norm": 3.8508663177490234, |
|
"learning_rate": 1.8206098484485563e-05, |
|
"logits/chosen": 0.17437395453453064, |
|
"logits/rejected": 0.12683795392513275, |
|
"logps/chosen": -448.64056396484375, |
|
"logps/rejected": -439.05767822265625, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.2330358326435089, |
|
"rewards/margins": 0.20018717646598816, |
|
"rewards/rejected": 0.03284864127635956, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.4925373134328358, |
|
"grad_norm": 4.620991230010986, |
|
"learning_rate": 1.8158191169985696e-05, |
|
"logits/chosen": 0.18229001760482788, |
|
"logits/rejected": 0.053403086960315704, |
|
"logps/chosen": -529.84814453125, |
|
"logps/rejected": -488.3792724609375, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.21858179569244385, |
|
"rewards/margins": 0.3658568859100342, |
|
"rewards/rejected": -0.14727509021759033, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.4975124378109453, |
|
"grad_norm": 4.5922722816467285, |
|
"learning_rate": 1.810971734440408e-05, |
|
"logits/chosen": 0.30341237783432007, |
|
"logits/rejected": 0.07493434846401215, |
|
"logps/chosen": -452.9410705566406, |
|
"logps/rejected": -400.3564453125, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.029513243585824966, |
|
"rewards/margins": 0.16814345121383667, |
|
"rewards/rejected": -0.1386302411556244, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4975124378109453, |
|
"eval_logits/chosen": 0.17696020007133484, |
|
"eval_logits/rejected": 0.03260684758424759, |
|
"eval_logps/chosen": -452.606689453125, |
|
"eval_logps/rejected": -414.8606872558594, |
|
"eval_loss": 0.6501542925834656, |
|
"eval_rewards/accuracies": 0.6180555820465088, |
|
"eval_rewards/chosen": -0.030969224870204926, |
|
"eval_rewards/margins": 0.27494877576828003, |
|
"eval_rewards/rejected": -0.3059180676937103, |
|
"eval_runtime": 150.3142, |
|
"eval_samples_per_second": 7.604, |
|
"eval_steps_per_second": 0.239, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5024875621890548, |
|
"grad_norm": 3.963479518890381, |
|
"learning_rate": 1.806068037380052e-05, |
|
"logits/chosen": 0.27582094073295593, |
|
"logits/rejected": 0.19119888544082642, |
|
"logps/chosen": -423.74456787109375, |
|
"logps/rejected": -438.787841796875, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.0536828339099884, |
|
"rewards/margins": 0.13508188724517822, |
|
"rewards/rejected": -0.18876472115516663, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.5074626865671642, |
|
"grad_norm": 4.1663713455200195, |
|
"learning_rate": 1.801108366334004e-05, |
|
"logits/chosen": 0.17915582656860352, |
|
"logits/rejected": 0.18883880972862244, |
|
"logps/chosen": -480.3377380371094, |
|
"logps/rejected": -529.461669921875, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4978042244911194, |
|
"rewards/margins": 0.31464439630508423, |
|
"rewards/rejected": -0.8124486207962036, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.5124378109452736, |
|
"grad_norm": 3.5810389518737793, |
|
"learning_rate": 1.796093065705644e-05, |
|
"logits/chosen": 0.3043825030326843, |
|
"logits/rejected": 0.20817437767982483, |
|
"logps/chosen": -431.47955322265625, |
|
"logps/rejected": -417.6255798339844, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6938939690589905, |
|
"rewards/margins": 0.24841205775737762, |
|
"rewards/rejected": -0.9423060417175293, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.5174129353233831, |
|
"grad_norm": 4.341555118560791, |
|
"learning_rate": 1.791022483761312e-05, |
|
"logits/chosen": 0.2805790603160858, |
|
"logits/rejected": 0.07360462844371796, |
|
"logps/chosen": -518.8629760742188, |
|
"logps/rejected": -453.9353332519531, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8408029079437256, |
|
"rewards/margins": 0.42247286438941956, |
|
"rewards/rejected": -1.2632758617401123, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 4.9308390617370605, |
|
"learning_rate": 1.7858969726061262e-05, |
|
"logits/chosen": 0.061581894755363464, |
|
"logits/rejected": 0.14411726593971252, |
|
"logps/chosen": -428.17498779296875, |
|
"logps/rejected": -457.570068359375, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.0671128034591675, |
|
"rewards/margins": 0.08442307263612747, |
|
"rewards/rejected": -1.151535987854004, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.527363184079602, |
|
"grad_norm": 4.160035133361816, |
|
"learning_rate": 1.7807168881595304e-05, |
|
"logits/chosen": -0.10161225497722626, |
|
"logits/rejected": -0.09652488678693771, |
|
"logps/chosen": -465.89825439453125, |
|
"logps/rejected": -476.0804138183594, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8282710313796997, |
|
"rewards/margins": 0.3242005407810211, |
|
"rewards/rejected": -1.152471661567688, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.5323383084577115, |
|
"grad_norm": 4.315358638763428, |
|
"learning_rate": 1.7754825901305814e-05, |
|
"logits/chosen": 0.30026042461395264, |
|
"logits/rejected": 0.15877141058444977, |
|
"logps/chosen": -469.1257019042969, |
|
"logps/rejected": -489.8163757324219, |
|
"loss": 0.6313, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5448905825614929, |
|
"rewards/margins": 0.34440505504608154, |
|
"rewards/rejected": -0.8892955780029297, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.5373134328358209, |
|
"grad_norm": 4.043447017669678, |
|
"learning_rate": 1.7701944419929673e-05, |
|
"logits/chosen": 0.3924216628074646, |
|
"logits/rejected": 0.34802040457725525, |
|
"logps/chosen": -483.4385070800781, |
|
"logps/rejected": -494.6759033203125, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4959757328033447, |
|
"rewards/margins": 0.3944730758666992, |
|
"rewards/rejected": -0.890448808670044, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.5422885572139303, |
|
"grad_norm": 4.426882266998291, |
|
"learning_rate": 1.7648528109597704e-05, |
|
"logits/chosen": 0.42673125863075256, |
|
"logits/rejected": 0.25516799092292786, |
|
"logps/chosen": -504.686279296875, |
|
"logps/rejected": -443.46954345703125, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.49745240807533264, |
|
"rewards/margins": 0.42378664016723633, |
|
"rewards/rejected": -0.9212391376495361, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.5472636815920398, |
|
"grad_norm": 4.089346885681152, |
|
"learning_rate": 1.7594580679579654e-05, |
|
"logits/chosen": 0.09302594512701035, |
|
"logits/rejected": 0.11728382110595703, |
|
"logps/chosen": -459.3074951171875, |
|
"logps/rejected": -414.81268310546875, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.38374608755111694, |
|
"rewards/margins": 0.3443138897418976, |
|
"rewards/rejected": -0.7280599474906921, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5522388059701493, |
|
"grad_norm": 3.8262646198272705, |
|
"learning_rate": 1.7540105876026647e-05, |
|
"logits/chosen": 0.20306290686130524, |
|
"logits/rejected": 0.07559295743703842, |
|
"logps/chosen": -558.5977172851562, |
|
"logps/rejected": -493.43841552734375, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0867682546377182, |
|
"rewards/margins": 0.407266229391098, |
|
"rewards/rejected": -0.32049790024757385, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5572139303482587, |
|
"grad_norm": 4.2944440841674805, |
|
"learning_rate": 1.7485107481711014e-05, |
|
"logits/chosen": 0.20840412378311157, |
|
"logits/rejected": 0.08403539657592773, |
|
"logps/chosen": -517.5396728515625, |
|
"logps/rejected": -472.1680908203125, |
|
"loss": 0.6155, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.10591678321361542, |
|
"rewards/margins": 0.29278436303138733, |
|
"rewards/rejected": -0.18686755001544952, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.5597014925373134, |
|
"eval_logits/chosen": 0.24904420971870422, |
|
"eval_logits/rejected": 0.11017153412103653, |
|
"eval_logps/chosen": -452.0428161621094, |
|
"eval_logps/rejected": -414.6964111328125, |
|
"eval_loss": 0.6415970921516418, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": 0.025422947481274605, |
|
"eval_rewards/margins": 0.3149137794971466, |
|
"eval_rewards/rejected": -0.28949081897735596, |
|
"eval_runtime": 150.2184, |
|
"eval_samples_per_second": 7.609, |
|
"eval_steps_per_second": 0.24, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5621890547263682, |
|
"grad_norm": 4.054657936096191, |
|
"learning_rate": 1.7429589315763637e-05, |
|
"logits/chosen": 0.2601884603500366, |
|
"logits/rejected": 0.022673480212688446, |
|
"logps/chosen": -499.178466796875, |
|
"logps/rejected": -424.4082946777344, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.014242544770240784, |
|
"rewards/margins": 0.29500868916511536, |
|
"rewards/rejected": -0.28076615929603577, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.5671641791044776, |
|
"grad_norm": 4.114813804626465, |
|
"learning_rate": 1.737355523340875e-05, |
|
"logits/chosen": 0.2519476115703583, |
|
"logits/rejected": 0.17674781382083893, |
|
"logps/chosen": -425.04718017578125, |
|
"logps/rejected": -395.718505859375, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.1875460147857666, |
|
"rewards/margins": 0.26022982597351074, |
|
"rewards/rejected": -0.07268380373716354, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.572139303482587, |
|
"grad_norm": 4.393073558807373, |
|
"learning_rate": 1.7317009125696208e-05, |
|
"logits/chosen": 0.3865906298160553, |
|
"logits/rejected": 0.1851556897163391, |
|
"logps/chosen": -487.2419738769531, |
|
"logps/rejected": -482.6796875, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.4721629023551941, |
|
"rewards/margins": 0.6545584797859192, |
|
"rewards/rejected": -0.18239565193653107, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5771144278606966, |
|
"grad_norm": 4.722254276275635, |
|
"learning_rate": 1.725995491923131e-05, |
|
"logits/chosen": 0.019634254276752472, |
|
"logits/rejected": -0.1314508616924286, |
|
"logps/chosen": -511.8298645019531, |
|
"logps/rejected": -418.1177062988281, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.29104384779930115, |
|
"rewards/margins": 0.20025068521499634, |
|
"rewards/rejected": -0.4912944734096527, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.582089552238806, |
|
"grad_norm": 3.6944985389709473, |
|
"learning_rate": 1.7202396575902118e-05, |
|
"logits/chosen": 0.3104863464832306, |
|
"logits/rejected": 0.17023295164108276, |
|
"logps/chosen": -443.22528076171875, |
|
"logps/rejected": -439.04559326171875, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12756507098674774, |
|
"rewards/margins": 0.5996299386024475, |
|
"rewards/rejected": -0.7271949648857117, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.5870646766169154, |
|
"grad_norm": 4.278947353363037, |
|
"learning_rate": 1.714433809260435e-05, |
|
"logits/chosen": 0.2733452320098877, |
|
"logits/rejected": 0.1945551484823227, |
|
"logps/chosen": -472.4483642578125, |
|
"logps/rejected": -459.3942565917969, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.2876668870449066, |
|
"rewards/margins": 0.2343754768371582, |
|
"rewards/rejected": -0.5220423340797424, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.5920398009950248, |
|
"grad_norm": 5.243388652801514, |
|
"learning_rate": 1.7085783500963825e-05, |
|
"logits/chosen": 0.26794660091400146, |
|
"logits/rejected": 0.19214050471782684, |
|
"logps/chosen": -464.2667541503906, |
|
"logps/rejected": -466.7138977050781, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.38191846013069153, |
|
"rewards/margins": 0.3241249918937683, |
|
"rewards/rejected": -0.7060434818267822, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 3.732940435409546, |
|
"learning_rate": 1.702673686705651e-05, |
|
"logits/chosen": 0.4054350256919861, |
|
"logits/rejected": 0.4670087993144989, |
|
"logps/chosen": -428.61163330078125, |
|
"logps/rejected": -499.5010681152344, |
|
"loss": 0.6277, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.007774517871439457, |
|
"rewards/margins": 0.18405042588710785, |
|
"rewards/rejected": -0.19182495772838593, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6019900497512438, |
|
"grad_norm": 3.6961166858673096, |
|
"learning_rate": 1.6967202291126174e-05, |
|
"logits/chosen": 0.25117918848991394, |
|
"logits/rejected": 0.1439165323972702, |
|
"logps/chosen": -419.8067321777344, |
|
"logps/rejected": -385.7373352050781, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.3202853500843048, |
|
"rewards/margins": 0.39529967308044434, |
|
"rewards/rejected": -0.07501433044672012, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.6069651741293532, |
|
"grad_norm": 3.861052989959717, |
|
"learning_rate": 1.690718390729964e-05, |
|
"logits/chosen": 0.6219749450683594, |
|
"logits/rejected": 0.3956920802593231, |
|
"logps/chosen": -487.5699768066406, |
|
"logps/rejected": -440.1090087890625, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.43934863805770874, |
|
"rewards/margins": 0.5191280245780945, |
|
"rewards/rejected": -0.07977931201457977, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.6119402985074627, |
|
"grad_norm": 3.7701494693756104, |
|
"learning_rate": 1.684668588329973e-05, |
|
"logits/chosen": 0.23229114711284637, |
|
"logits/rejected": 0.18851926922798157, |
|
"logps/chosen": -467.3754577636719, |
|
"logps/rejected": -452.4528503417969, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.5834671854972839, |
|
"rewards/margins": 0.5684060454368591, |
|
"rewards/rejected": 0.015061168000102043, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.6169154228855721, |
|
"grad_norm": 4.047066688537598, |
|
"learning_rate": 1.6785712420155864e-05, |
|
"logits/chosen": 0.35120919346809387, |
|
"logits/rejected": 0.15895111858844757, |
|
"logps/chosen": -609.0511474609375, |
|
"logps/rejected": -520.7322998046875, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3787741959095001, |
|
"rewards/margins": 0.24649052321910858, |
|
"rewards/rejected": 0.13228368759155273, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.6218905472636815, |
|
"grad_norm": 4.4764604568481445, |
|
"learning_rate": 1.67242677519123e-05, |
|
"logits/chosen": 0.6815188527107239, |
|
"logits/rejected": 0.49643221497535706, |
|
"logps/chosen": -530.8171997070312, |
|
"logps/rejected": -430.7647399902344, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.17197714745998383, |
|
"rewards/margins": 0.11586709320545197, |
|
"rewards/rejected": -0.2878442406654358, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6218905472636815, |
|
"eval_logits/chosen": 0.21956767141819, |
|
"eval_logits/rejected": 0.07988239079713821, |
|
"eval_logps/chosen": -455.1015319824219, |
|
"eval_logps/rejected": -417.8031005859375, |
|
"eval_loss": 0.638308048248291, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -0.2804534435272217, |
|
"eval_rewards/margins": 0.31970784068107605, |
|
"eval_rewards/rejected": -0.6001612544059753, |
|
"eval_runtime": 150.1876, |
|
"eval_samples_per_second": 7.61, |
|
"eval_steps_per_second": 0.24, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6268656716417911, |
|
"grad_norm": 4.097576141357422, |
|
"learning_rate": 1.6662356145334158e-05, |
|
"logits/chosen": 0.17615841329097748, |
|
"logits/rejected": 0.03691507875919342, |
|
"logps/chosen": -502.0008544921875, |
|
"logps/rejected": -458.7081604003906, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2368411123752594, |
|
"rewards/margins": 0.48721814155578613, |
|
"rewards/rejected": -0.7240592241287231, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.6318407960199005, |
|
"grad_norm": 4.976437091827393, |
|
"learning_rate": 1.6599981899611103e-05, |
|
"logits/chosen": 0.12691722810268402, |
|
"logits/rejected": 0.1578553318977356, |
|
"logps/chosen": -495.80755615234375, |
|
"logps/rejected": -519.9158935546875, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2605741322040558, |
|
"rewards/margins": 0.3820592761039734, |
|
"rewards/rejected": -0.6426333785057068, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.6368159203980099, |
|
"grad_norm": 4.70882511138916, |
|
"learning_rate": 1.653714934605883e-05, |
|
"logits/chosen": 0.09863700717687607, |
|
"logits/rejected": -0.042115092277526855, |
|
"logps/chosen": -526.08251953125, |
|
"logps/rejected": -482.67034912109375, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.35250020027160645, |
|
"rewards/margins": 0.47799065709114075, |
|
"rewards/rejected": -0.8304908275604248, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.6417910447761194, |
|
"grad_norm": 4.004364967346191, |
|
"learning_rate": 1.647386284781828e-05, |
|
"logits/chosen": 0.4434223175048828, |
|
"logits/rejected": 0.3743742108345032, |
|
"logps/chosen": -461.95660400390625, |
|
"logps/rejected": -461.05523681640625, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.1911483108997345, |
|
"rewards/margins": 0.49052226543426514, |
|
"rewards/rejected": -0.681670606136322, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.6467661691542289, |
|
"grad_norm": 4.580120086669922, |
|
"learning_rate": 1.6410126799552653e-05, |
|
"logits/chosen": 0.04173935577273369, |
|
"logits/rejected": 0.12166699767112732, |
|
"logps/chosen": -442.1742858886719, |
|
"logps/rejected": -484.1488342285156, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.40055373311042786, |
|
"rewards/margins": 0.04041279852390289, |
|
"rewards/rejected": -0.44096654653549194, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6517412935323383, |
|
"grad_norm": 4.086886882781982, |
|
"learning_rate": 1.6345945627142264e-05, |
|
"logits/chosen": 0.27961117029190063, |
|
"logits/rejected": 0.2143298089504242, |
|
"logps/chosen": -431.29827880859375, |
|
"logps/rejected": -459.7369384765625, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11182767897844315, |
|
"rewards/margins": 0.17104047536849976, |
|
"rewards/rejected": -0.2828681766986847, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.6567164179104478, |
|
"grad_norm": 12.731650352478027, |
|
"learning_rate": 1.628132378737718e-05, |
|
"logits/chosen": 0.4169122278690338, |
|
"logits/rejected": 0.07197268307209015, |
|
"logps/chosen": -529.1094970703125, |
|
"logps/rejected": -411.6646423339844, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.21321269869804382, |
|
"rewards/margins": 0.4294634461402893, |
|
"rewards/rejected": -0.6426761150360107, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.6616915422885572, |
|
"grad_norm": 3.989084005355835, |
|
"learning_rate": 1.6216265767647756e-05, |
|
"logits/chosen": 0.30040451884269714, |
|
"logits/rejected": 0.2668513357639313, |
|
"logps/chosen": -517.35009765625, |
|
"logps/rejected": -487.19268798828125, |
|
"loss": 0.64, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3091263175010681, |
|
"rewards/margins": 0.4227018654346466, |
|
"rewards/rejected": -0.7318282127380371, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 4.201297760009766, |
|
"learning_rate": 1.615077608563302e-05, |
|
"logits/chosen": 0.41413354873657227, |
|
"logits/rejected": 0.10457613319158554, |
|
"logps/chosen": -612.5179443359375, |
|
"logps/rejected": -496.9129333496094, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.36306214332580566, |
|
"rewards/margins": 0.50215744972229, |
|
"rewards/rejected": -0.8652196526527405, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 4.379523277282715, |
|
"learning_rate": 1.6084859288986957e-05, |
|
"logits/chosen": 0.19828909635543823, |
|
"logits/rejected": 0.02061871998012066, |
|
"logps/chosen": -455.3958435058594, |
|
"logps/rejected": -407.6889953613281, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.3266015648841858, |
|
"rewards/margins": 0.33076098561286926, |
|
"rewards/rejected": -0.6573625206947327, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6766169154228856, |
|
"grad_norm": 3.583308696746826, |
|
"learning_rate": 1.601851995502272e-05, |
|
"logits/chosen": 0.5686550736427307, |
|
"logits/rejected": 0.585844874382019, |
|
"logps/chosen": -415.34234619140625, |
|
"logps/rejected": -441.9537048339844, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.4486159682273865, |
|
"rewards/margins": 0.2918972969055176, |
|
"rewards/rejected": -0.740513265132904, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.681592039800995, |
|
"grad_norm": 4.024960517883301, |
|
"learning_rate": 1.5951762690394788e-05, |
|
"logits/chosen": 0.30994874238967896, |
|
"logits/rejected": 0.0973886027932167, |
|
"logps/chosen": -457.6329345703125, |
|
"logps/rejected": -444.02935791015625, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.29979974031448364, |
|
"rewards/margins": 0.32890307903289795, |
|
"rewards/rejected": -0.6287028789520264, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.6840796019900498, |
|
"eval_logits/chosen": 0.2462157905101776, |
|
"eval_logits/rejected": 0.10789595544338226, |
|
"eval_logps/chosen": -458.82330322265625, |
|
"eval_logps/rejected": -421.25732421875, |
|
"eval_loss": 0.6359681487083435, |
|
"eval_rewards/accuracies": 0.6006944179534912, |
|
"eval_rewards/chosen": -0.6526302695274353, |
|
"eval_rewards/margins": 0.2929559648036957, |
|
"eval_rewards/rejected": -0.9455862045288086, |
|
"eval_runtime": 150.077, |
|
"eval_samples_per_second": 7.616, |
|
"eval_steps_per_second": 0.24, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6865671641791045, |
|
"grad_norm": 4.124903202056885, |
|
"learning_rate": 1.5884592130779056e-05, |
|
"logits/chosen": 0.14517062902450562, |
|
"logits/rejected": 0.04018905386328697, |
|
"logps/chosen": -487.6488342285156, |
|
"logps/rejected": -464.5549011230469, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6954266428947449, |
|
"rewards/margins": 0.17861610651016235, |
|
"rewards/rejected": -0.8740427494049072, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.6915422885572139, |
|
"grad_norm": 3.633653402328491, |
|
"learning_rate": 1.581701294055095e-05, |
|
"logits/chosen": 0.2584773302078247, |
|
"logits/rejected": -0.018043681979179382, |
|
"logps/chosen": -511.7429504394531, |
|
"logps/rejected": -504.0497741699219, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7117761373519897, |
|
"rewards/margins": 0.30023542046546936, |
|
"rewards/rejected": -1.0120115280151367, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.6965174129353234, |
|
"grad_norm": 4.155452251434326, |
|
"learning_rate": 1.5749029812461515e-05, |
|
"logits/chosen": 0.314390629529953, |
|
"logits/rejected": 0.3334752023220062, |
|
"logps/chosen": -530.2760009765625, |
|
"logps/rejected": -522.5399780273438, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6176879405975342, |
|
"rewards/margins": -0.009039867669343948, |
|
"rewards/rejected": -0.6086481213569641, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7014925373134329, |
|
"grad_norm": 4.27207088470459, |
|
"learning_rate": 1.568064746731156e-05, |
|
"logits/chosen": 0.3614248037338257, |
|
"logits/rejected": 0.06402953714132309, |
|
"logps/chosen": -542.3954467773438, |
|
"logps/rejected": -467.5965881347656, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07103107869625092, |
|
"rewards/margins": 0.46965718269348145, |
|
"rewards/rejected": -0.5406882166862488, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.7064676616915423, |
|
"grad_norm": 4.479232311248779, |
|
"learning_rate": 1.5611870653623826e-05, |
|
"logits/chosen": 0.045674506574869156, |
|
"logits/rejected": -0.21201254427433014, |
|
"logps/chosen": -595.710205078125, |
|
"logps/rejected": -465.8755187988281, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17292442917823792, |
|
"rewards/margins": 0.23611289262771606, |
|
"rewards/rejected": -0.40903735160827637, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.7114427860696517, |
|
"grad_norm": 3.8208746910095215, |
|
"learning_rate": 1.5542704147313257e-05, |
|
"logits/chosen": 0.4481641948223114, |
|
"logits/rejected": 0.399469256401062, |
|
"logps/chosen": -401.0939636230469, |
|
"logps/rejected": -375.7672424316406, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.2306845486164093, |
|
"rewards/margins": 0.27575400471687317, |
|
"rewards/rejected": -0.5064386129379272, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 4.367500305175781, |
|
"learning_rate": 1.5473152751355353e-05, |
|
"logits/chosen": 0.11335344612598419, |
|
"logits/rejected": -0.00670961756259203, |
|
"logps/chosen": -442.5565185546875, |
|
"logps/rejected": -403.98382568359375, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.47417518496513367, |
|
"rewards/margins": 0.30662575364112854, |
|
"rewards/rejected": -0.7808009386062622, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.7213930348258707, |
|
"grad_norm": 3.969484567642212, |
|
"learning_rate": 1.5403221295452647e-05, |
|
"logits/chosen": 0.31861090660095215, |
|
"logits/rejected": 0.11263471841812134, |
|
"logps/chosen": -439.46630859375, |
|
"logps/rejected": -427.44677734375, |
|
"loss": 0.6126, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.6381343603134155, |
|
"rewards/margins": 0.34186607599258423, |
|
"rewards/rejected": -0.9800004363059998, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.7263681592039801, |
|
"grad_norm": 3.820988655090332, |
|
"learning_rate": 1.5332914635699327e-05, |
|
"logits/chosen": 0.40720105171203613, |
|
"logits/rejected": 0.1580687016248703, |
|
"logps/chosen": -461.20068359375, |
|
"logps/rejected": -403.16094970703125, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4938652217388153, |
|
"rewards/margins": 0.117790088057518, |
|
"rewards/rejected": -0.6116552948951721, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.7313432835820896, |
|
"grad_norm": 3.886521339416504, |
|
"learning_rate": 1.5262237654244026e-05, |
|
"logits/chosen": 0.5522980093955994, |
|
"logits/rejected": 0.4452764093875885, |
|
"logps/chosen": -421.0696105957031, |
|
"logps/rejected": -399.4547424316406, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.3875323235988617, |
|
"rewards/margins": 0.46860218048095703, |
|
"rewards/rejected": -0.8561345338821411, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.736318407960199, |
|
"grad_norm": 4.079859733581543, |
|
"learning_rate": 1.5191195258950804e-05, |
|
"logits/chosen": 0.40618038177490234, |
|
"logits/rejected": 0.134785994887352, |
|
"logps/chosen": -651.926025390625, |
|
"logps/rejected": -503.9865417480469, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.186576247215271, |
|
"rewards/margins": 0.5179406404495239, |
|
"rewards/rejected": -0.7045168876647949, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.7412935323383084, |
|
"grad_norm": 3.5976879596710205, |
|
"learning_rate": 1.5119792383058338e-05, |
|
"logits/chosen": 0.09992431104183197, |
|
"logits/rejected": -0.01921015977859497, |
|
"logps/chosen": -470.5938720703125, |
|
"logps/rejected": -444.0641174316406, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.31064364314079285, |
|
"rewards/margins": 0.2923417091369629, |
|
"rewards/rejected": -0.6029854416847229, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 4.070811748504639, |
|
"learning_rate": 1.5048033984837352e-05, |
|
"logits/chosen": 0.15589873492717743, |
|
"logits/rejected": -0.034729793667793274, |
|
"logps/chosen": -567.2528076171875, |
|
"logps/rejected": -548.9393310546875, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1316080242395401, |
|
"rewards/margins": 0.4812317490577698, |
|
"rewards/rejected": -0.6128398180007935, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"eval_logits/chosen": 0.18069180846214294, |
|
"eval_logits/rejected": 0.03814281150698662, |
|
"eval_logps/chosen": -453.0019836425781, |
|
"eval_logps/rejected": -415.46087646484375, |
|
"eval_loss": 0.6348658800125122, |
|
"eval_rewards/accuracies": 0.6215277910232544, |
|
"eval_rewards/chosen": -0.07049696147441864, |
|
"eval_rewards/margins": 0.29544174671173096, |
|
"eval_rewards/rejected": -0.3659386932849884, |
|
"eval_runtime": 150.1609, |
|
"eval_samples_per_second": 7.612, |
|
"eval_steps_per_second": 0.24, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7512437810945274, |
|
"grad_norm": 4.077042579650879, |
|
"learning_rate": 1.4975925047246319e-05, |
|
"logits/chosen": 0.09503468126058578, |
|
"logits/rejected": 0.14383243024349213, |
|
"logps/chosen": -511.3801574707031, |
|
"logps/rejected": -459.1648254394531, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.030200934037566185, |
|
"rewards/margins": 0.34608855843544006, |
|
"rewards/rejected": -0.376289427280426, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.7562189054726368, |
|
"grad_norm": 4.1146039962768555, |
|
"learning_rate": 1.4903470577585433e-05, |
|
"logits/chosen": 0.5371518731117249, |
|
"logits/rejected": 0.44205495715141296, |
|
"logps/chosen": -480.82513427734375, |
|
"logps/rejected": -464.04632568359375, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.07143578678369522, |
|
"rewards/margins": 0.3510420322418213, |
|
"rewards/rejected": -0.27960628271102905, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.7611940298507462, |
|
"grad_norm": 4.153296947479248, |
|
"learning_rate": 1.4830675607148899e-05, |
|
"logits/chosen": 0.2690809369087219, |
|
"logits/rejected": 0.2488354742527008, |
|
"logps/chosen": -472.01849365234375, |
|
"logps/rejected": -491.8638916015625, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1490858495235443, |
|
"rewards/margins": 0.38102632761001587, |
|
"rewards/rejected": -0.5301121473312378, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.7661691542288557, |
|
"grad_norm": 4.3426337242126465, |
|
"learning_rate": 1.475754519087557e-05, |
|
"logits/chosen": 0.4082140624523163, |
|
"logits/rejected": 0.4368078410625458, |
|
"logps/chosen": -414.7156677246094, |
|
"logps/rejected": -415.9171142578125, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.10527034103870392, |
|
"rewards/margins": 0.1847696453332901, |
|
"rewards/rejected": -0.2900400161743164, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.7711442786069652, |
|
"grad_norm": 3.6295218467712402, |
|
"learning_rate": 1.4684084406997903e-05, |
|
"logits/chosen": 0.23331183195114136, |
|
"logits/rejected": 0.01600750908255577, |
|
"logps/chosen": -577.4814453125, |
|
"logps/rejected": -505.6333923339844, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.024487711489200592, |
|
"rewards/margins": 0.35684019327163696, |
|
"rewards/rejected": -0.38132789731025696, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7761194029850746, |
|
"grad_norm": 4.441697120666504, |
|
"learning_rate": 1.4610298356689341e-05, |
|
"logits/chosen": 0.19809234142303467, |
|
"logits/rejected": 0.22685889899730682, |
|
"logps/chosen": -413.6700134277344, |
|
"logps/rejected": -467.2070007324219, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.029005911201238632, |
|
"rewards/margins": 0.28220340609550476, |
|
"rewards/rejected": -0.2531975209712982, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.7810945273631841, |
|
"grad_norm": 4.630463600158691, |
|
"learning_rate": 1.453619216371008e-05, |
|
"logits/chosen": 0.42978817224502563, |
|
"logits/rejected": 0.39091044664382935, |
|
"logps/chosen": -480.048095703125, |
|
"logps/rejected": -498.24530029296875, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.5016615986824036, |
|
"rewards/margins": 0.07318463921546936, |
|
"rewards/rejected": -0.5748462080955505, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.7860696517412935, |
|
"grad_norm": 4.055500030517578, |
|
"learning_rate": 1.446177097405127e-05, |
|
"logits/chosen": 0.19197359681129456, |
|
"logits/rejected": 0.267251193523407, |
|
"logps/chosen": -554.1470336914062, |
|
"logps/rejected": -491.2269287109375, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.36392736434936523, |
|
"rewards/margins": 0.29624325037002563, |
|
"rewards/rejected": -0.6601705551147461, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.7910447761194029, |
|
"grad_norm": 4.158740520477295, |
|
"learning_rate": 1.4387039955577668e-05, |
|
"logits/chosen": 0.28597795963287354, |
|
"logits/rejected": 0.2785332202911377, |
|
"logps/chosen": -504.1370849609375, |
|
"logps/rejected": -474.9548645019531, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.12694326043128967, |
|
"rewards/margins": 0.2074156403541565, |
|
"rewards/rejected": -0.33435890078544617, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.7960199004975125, |
|
"grad_norm": 4.120989799499512, |
|
"learning_rate": 1.4312004297668791e-05, |
|
"logits/chosen": 0.17556458711624146, |
|
"logits/rejected": 0.0959894210100174, |
|
"logps/chosen": -404.9556579589844, |
|
"logps/rejected": -391.56549072265625, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.09616656601428986, |
|
"rewards/margins": 0.39131832122802734, |
|
"rewards/rejected": -0.487484872341156, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8009950248756219, |
|
"grad_norm": 4.523448944091797, |
|
"learning_rate": 1.4236669210858544e-05, |
|
"logits/chosen": 0.25030747056007385, |
|
"logits/rejected": 0.20863890647888184, |
|
"logps/chosen": -498.8720703125, |
|
"logps/rejected": -507.3069152832031, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.24966737627983093, |
|
"rewards/margins": 0.2907105088233948, |
|
"rewards/rejected": -0.5403779149055481, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.8059701492537313, |
|
"grad_norm": 4.00128173828125, |
|
"learning_rate": 1.4161039926473412e-05, |
|
"logits/chosen": 0.4552380442619324, |
|
"logits/rejected": 0.18772940337657928, |
|
"logps/chosen": -536.3428344726562, |
|
"logps/rejected": -490.09429931640625, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.040442317724227905, |
|
"rewards/margins": 0.29100677371025085, |
|
"rewards/rejected": -0.33144912123680115, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.8084577114427861, |
|
"eval_logits/chosen": 0.2775518596172333, |
|
"eval_logits/rejected": 0.14060264825820923, |
|
"eval_logps/chosen": -455.484375, |
|
"eval_logps/rejected": -418.57281494140625, |
|
"eval_loss": 0.6331359148025513, |
|
"eval_rewards/accuracies": 0.6527777910232544, |
|
"eval_rewards/chosen": -0.31874096393585205, |
|
"eval_rewards/margins": 0.3583892583847046, |
|
"eval_rewards/rejected": -0.6771301627159119, |
|
"eval_runtime": 150.1695, |
|
"eval_samples_per_second": 7.611, |
|
"eval_steps_per_second": 0.24, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8109452736318408, |
|
"grad_norm": 6.469996452331543, |
|
"learning_rate": 1.4085121696269185e-05, |
|
"logits/chosen": 0.5448468327522278, |
|
"logits/rejected": 0.19260184466838837, |
|
"logps/chosen": -587.0845947265625, |
|
"logps/rejected": -455.35992431640625, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.3791936933994293, |
|
"rewards/margins": 0.3534315824508667, |
|
"rewards/rejected": -0.7326253056526184, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.8159203980099502, |
|
"grad_norm": 4.154365539550781, |
|
"learning_rate": 1.4008919792066273e-05, |
|
"logits/chosen": 0.24580639600753784, |
|
"logits/rejected": 0.30128005146980286, |
|
"logps/chosen": -402.3567199707031, |
|
"logps/rejected": -456.4067687988281, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.35055863857269287, |
|
"rewards/margins": 0.36460378766059875, |
|
"rewards/rejected": -0.7151623964309692, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 4.000673294067383, |
|
"learning_rate": 1.3932439505383628e-05, |
|
"logits/chosen": 0.4568510055541992, |
|
"logits/rejected": 0.26491212844848633, |
|
"logps/chosen": -578.89453125, |
|
"logps/rejected": -484.39654541015625, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.5648167133331299, |
|
"rewards/margins": 0.4626457989215851, |
|
"rewards/rejected": -1.0274624824523926, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.8258706467661692, |
|
"grad_norm": 4.538127899169922, |
|
"learning_rate": 1.385568614707129e-05, |
|
"logits/chosen": 0.4450688660144806, |
|
"logits/rejected": 0.1880086064338684, |
|
"logps/chosen": -522.4884033203125, |
|
"logps/rejected": -430.0087585449219, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.8698031902313232, |
|
"rewards/margins": 0.45266827940940857, |
|
"rewards/rejected": -1.3224713802337646, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.8308457711442786, |
|
"grad_norm": 3.7640268802642822, |
|
"learning_rate": 1.3778665046941616e-05, |
|
"logits/chosen": 0.3476739525794983, |
|
"logits/rejected": 0.0015247669070959091, |
|
"logps/chosen": -535.035888671875, |
|
"logps/rejected": -452.0794677734375, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.602352499961853, |
|
"rewards/margins": 0.5098788142204285, |
|
"rewards/rejected": -1.1122313737869263, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.835820895522388, |
|
"grad_norm": 3.9549944400787354, |
|
"learning_rate": 1.3701381553399147e-05, |
|
"logits/chosen": 0.6829994916915894, |
|
"logits/rejected": 0.7129935622215271, |
|
"logps/chosen": -473.60589599609375, |
|
"logps/rejected": -505.9931945800781, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7586303949356079, |
|
"rewards/margins": 0.21102982759475708, |
|
"rewards/rejected": -0.9696601629257202, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.8407960199004975, |
|
"grad_norm": 4.284757614135742, |
|
"learning_rate": 1.3623841033069232e-05, |
|
"logits/chosen": 0.40947800874710083, |
|
"logits/rejected": 0.37943294644355774, |
|
"logps/chosen": -399.6325378417969, |
|
"logps/rejected": -418.98846435546875, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6792310476303101, |
|
"rewards/margins": 0.2475418746471405, |
|
"rewards/rejected": -0.9267728924751282, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.845771144278607, |
|
"grad_norm": 4.278886795043945, |
|
"learning_rate": 1.3546048870425356e-05, |
|
"logits/chosen": 0.38366350531578064, |
|
"logits/rejected": 0.2522772252559662, |
|
"logps/chosen": -413.2381286621094, |
|
"logps/rejected": -408.53399658203125, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.6863256692886353, |
|
"rewards/margins": 0.4241764545440674, |
|
"rewards/rejected": -1.110502004623413, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.8507462686567164, |
|
"grad_norm": 3.915144920349121, |
|
"learning_rate": 1.3468010467415248e-05, |
|
"logits/chosen": 0.3846738636493683, |
|
"logits/rejected": 0.17315393686294556, |
|
"logps/chosen": -491.52935791015625, |
|
"logps/rejected": -414.61328125, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7921670079231262, |
|
"rewards/margins": 0.18218526244163513, |
|
"rewards/rejected": -0.9743523597717285, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.8557213930348259, |
|
"grad_norm": 4.119470596313477, |
|
"learning_rate": 1.3389731243085747e-05, |
|
"logits/chosen": 0.23576557636260986, |
|
"logits/rejected": 0.2507054805755615, |
|
"logps/chosen": -452.724365234375, |
|
"logps/rejected": -467.43682861328125, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6324411630630493, |
|
"rewards/margins": 0.29806220531463623, |
|
"rewards/rejected": -0.9305033683776855, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.8606965174129353, |
|
"grad_norm": 3.8940017223358154, |
|
"learning_rate": 1.3311216633206514e-05, |
|
"logits/chosen": 0.19210243225097656, |
|
"logits/rejected": 0.04936864227056503, |
|
"logps/chosen": -511.88641357421875, |
|
"logps/rejected": -425.4428405761719, |
|
"loss": 0.6167, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.5534711480140686, |
|
"rewards/margins": 0.3882919251918793, |
|
"rewards/rejected": -0.9417631030082703, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.8656716417910447, |
|
"grad_norm": 4.534005165100098, |
|
"learning_rate": 1.3232472089892567e-05, |
|
"logits/chosen": 0.4114670157432556, |
|
"logits/rejected": 0.31665346026420593, |
|
"logps/chosen": -458.63006591796875, |
|
"logps/rejected": -402.3096923828125, |
|
"loss": 0.6983, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4137086868286133, |
|
"rewards/margins": 0.13000260293483734, |
|
"rewards/rejected": -0.543711245059967, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.8706467661691543, |
|
"grad_norm": 5.47732400894165, |
|
"learning_rate": 1.315350308122567e-05, |
|
"logits/chosen": 0.38530704379081726, |
|
"logits/rejected": 0.30780690908432007, |
|
"logps/chosen": -444.169677734375, |
|
"logps/rejected": -430.9597473144531, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.41313281655311584, |
|
"rewards/margins": 0.10978913307189941, |
|
"rewards/rejected": -0.5229219794273376, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8706467661691543, |
|
"eval_logits/chosen": 0.23911841213703156, |
|
"eval_logits/rejected": 0.09861024469137192, |
|
"eval_logps/chosen": -456.55279541015625, |
|
"eval_logps/rejected": -419.2005615234375, |
|
"eval_loss": 0.6294909715652466, |
|
"eval_rewards/accuracies": 0.6111111044883728, |
|
"eval_rewards/chosen": -0.4255761504173279, |
|
"eval_rewards/margins": 0.31433236598968506, |
|
"eval_rewards/rejected": -0.7399084568023682, |
|
"eval_runtime": 150.0352, |
|
"eval_samples_per_second": 7.618, |
|
"eval_steps_per_second": 0.24, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8756218905472637, |
|
"grad_norm": 3.469086170196533, |
|
"learning_rate": 1.3074315090874652e-05, |
|
"logits/chosen": 0.09198964387178421, |
|
"logits/rejected": 0.1355361044406891, |
|
"logps/chosen": -370.5699462890625, |
|
"logps/rejected": -411.66070556640625, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.27120280265808105, |
|
"rewards/margins": 0.3581971824169159, |
|
"rewards/rejected": -0.6293999552726746, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.8805970149253731, |
|
"grad_norm": 3.4145619869232178, |
|
"learning_rate": 1.2994913617714573e-05, |
|
"logits/chosen": 0.39067643880844116, |
|
"logits/rejected": 0.1868411898612976, |
|
"logps/chosen": -448.4958801269531, |
|
"logps/rejected": -393.5880126953125, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.31874287128448486, |
|
"rewards/margins": 0.48711925745010376, |
|
"rewards/rejected": -0.8058621883392334, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.8855721393034826, |
|
"grad_norm": 3.769350528717041, |
|
"learning_rate": 1.2915304175444929e-05, |
|
"logits/chosen": 0.43691831827163696, |
|
"logits/rejected": 0.3168666958808899, |
|
"logps/chosen": -450.9046630859375, |
|
"logps/rejected": -430.5725402832031, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.4702681005001068, |
|
"rewards/margins": 0.20657652616500854, |
|
"rewards/rejected": -0.6768447160720825, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.8905472636815921, |
|
"grad_norm": 4.2490763664245605, |
|
"learning_rate": 1.2835492292206735e-05, |
|
"logits/chosen": 0.5658756494522095, |
|
"logits/rejected": 0.4351132810115814, |
|
"logps/chosen": -418.0053405761719, |
|
"logps/rejected": -432.3887634277344, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.36454248428344727, |
|
"rewards/margins": 0.2455846071243286, |
|
"rewards/rejected": -0.6101270914077759, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 3.402549982070923, |
|
"learning_rate": 1.2755483510198668e-05, |
|
"logits/chosen": 0.18330873548984528, |
|
"logits/rejected": 0.06855818629264832, |
|
"logps/chosen": -470.72052001953125, |
|
"logps/rejected": -431.72314453125, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.3080929219722748, |
|
"rewards/margins": 0.3066636025905609, |
|
"rewards/rejected": -0.6147565245628357, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.900497512437811, |
|
"grad_norm": 3.684074640274048, |
|
"learning_rate": 1.2675283385292212e-05, |
|
"logits/chosen": 0.2085587978363037, |
|
"logits/rejected": 0.11812448501586914, |
|
"logps/chosen": -449.0517883300781, |
|
"logps/rejected": -440.92767333984375, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.25056391954421997, |
|
"rewards/margins": 0.2954629063606262, |
|
"rewards/rejected": -0.546026885509491, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.9054726368159204, |
|
"grad_norm": 3.109182596206665, |
|
"learning_rate": 1.2594897486645836e-05, |
|
"logits/chosen": 0.21170970797538757, |
|
"logits/rejected": 0.11683456599712372, |
|
"logps/chosen": -457.32684326171875, |
|
"logps/rejected": -442.80426025390625, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.32196876406669617, |
|
"rewards/margins": 0.5613601803779602, |
|
"rewards/rejected": -0.8833289742469788, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.9104477611940298, |
|
"grad_norm": 6.466948986053467, |
|
"learning_rate": 1.2514331396318298e-05, |
|
"logits/chosen": 0.16703735291957855, |
|
"logits/rejected": 0.1217992752790451, |
|
"logps/chosen": -456.64312744140625, |
|
"logps/rejected": -477.60943603515625, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.4616580009460449, |
|
"rewards/margins": 0.1874929666519165, |
|
"rewards/rejected": -0.6491509079933167, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.9154228855721394, |
|
"grad_norm": 3.79681396484375, |
|
"learning_rate": 1.2433590708880991e-05, |
|
"logits/chosen": 0.08391296863555908, |
|
"logits/rejected": -0.12564268708229065, |
|
"logps/chosen": -556.4468994140625, |
|
"logps/rejected": -446.3089294433594, |
|
"loss": 0.625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7545434236526489, |
|
"rewards/margins": 0.35477572679519653, |
|
"rewards/rejected": -1.1093190908432007, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.9203980099502488, |
|
"grad_norm": 4.004853248596191, |
|
"learning_rate": 1.2352681031029476e-05, |
|
"logits/chosen": 0.21419230103492737, |
|
"logits/rejected": 0.11660319566726685, |
|
"logps/chosen": -400.4061584472656, |
|
"logps/rejected": -386.1788330078125, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8190222978591919, |
|
"rewards/margins": 0.254682332277298, |
|
"rewards/rejected": -1.073704481124878, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.9253731343283582, |
|
"grad_norm": 3.5617058277130127, |
|
"learning_rate": 1.2271607981194132e-05, |
|
"logits/chosen": 0.23552103340625763, |
|
"logits/rejected": 0.17505709826946259, |
|
"logps/chosen": -487.88153076171875, |
|
"logps/rejected": -487.0649719238281, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8327006697654724, |
|
"rewards/margins": 0.439730167388916, |
|
"rewards/rejected": -1.2724308967590332, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.9303482587064676, |
|
"grad_norm": 3.8972809314727783, |
|
"learning_rate": 1.2190377189150016e-05, |
|
"logits/chosen": 0.1701466292142868, |
|
"logits/rejected": -0.17507055401802063, |
|
"logps/chosen": -546.63134765625, |
|
"logps/rejected": -442.184814453125, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6856258511543274, |
|
"rewards/margins": 0.2289436310529709, |
|
"rewards/rejected": -0.9145694971084595, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.9328358208955224, |
|
"eval_logits/chosen": 0.21203385293483734, |
|
"eval_logits/rejected": 0.0725901871919632, |
|
"eval_logps/chosen": -458.26568603515625, |
|
"eval_logps/rejected": -420.7373962402344, |
|
"eval_loss": 0.6389397382736206, |
|
"eval_rewards/accuracies": 0.6006944179534912, |
|
"eval_rewards/chosen": -0.5968630313873291, |
|
"eval_rewards/margins": 0.29672402143478394, |
|
"eval_rewards/rejected": -0.8935869932174683, |
|
"eval_runtime": 149.6811, |
|
"eval_samples_per_second": 7.636, |
|
"eval_steps_per_second": 0.241, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.9353233830845771, |
|
"grad_norm": 3.4292774200439453, |
|
"learning_rate": 1.2108994295625924e-05, |
|
"logits/chosen": 0.3646988868713379, |
|
"logits/rejected": 0.3169184625148773, |
|
"logps/chosen": -452.8676452636719, |
|
"logps/rejected": -477.1390075683594, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6115289926528931, |
|
"rewards/margins": 0.3036550283432007, |
|
"rewards/rejected": -0.9151840209960938, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.9402985074626866, |
|
"grad_norm": 4.111717700958252, |
|
"learning_rate": 1.2027464951912703e-05, |
|
"logits/chosen": -0.010581929236650467, |
|
"logits/rejected": -0.32105395197868347, |
|
"logps/chosen": -577.8338012695312, |
|
"logps/rejected": -460.8430480957031, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.42835140228271484, |
|
"rewards/margins": 0.29241591691970825, |
|
"rewards/rejected": -0.7207673192024231, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.945273631840796, |
|
"grad_norm": 3.9025838375091553, |
|
"learning_rate": 1.1945794819470805e-05, |
|
"logits/chosen": 0.17384302616119385, |
|
"logits/rejected": 0.1552993208169937, |
|
"logps/chosen": -457.97296142578125, |
|
"logps/rejected": -547.5758056640625, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.3758777976036072, |
|
"rewards/margins": 0.24084581434726715, |
|
"rewards/rejected": -0.6167235970497131, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.9502487562189055, |
|
"grad_norm": 3.4757816791534424, |
|
"learning_rate": 1.1863989569537165e-05, |
|
"logits/chosen": 0.0369485542178154, |
|
"logits/rejected": -0.15935146808624268, |
|
"logps/chosen": -442.7840270996094, |
|
"logps/rejected": -408.35162353515625, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.12775643169879913, |
|
"rewards/margins": 0.40465879440307617, |
|
"rewards/rejected": -0.27690234780311584, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 3.849163770675659, |
|
"learning_rate": 1.1782054882731377e-05, |
|
"logits/chosen": 0.30783870816230774, |
|
"logits/rejected": 0.18860141932964325, |
|
"logps/chosen": -455.1307373046875, |
|
"logps/rejected": -429.60430908203125, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10190219432115555, |
|
"rewards/margins": 0.3229271173477173, |
|
"rewards/rejected": -0.4248293340206146, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.9601990049751243, |
|
"grad_norm": 3.603282928466797, |
|
"learning_rate": 1.1699996448661242e-05, |
|
"logits/chosen": 0.2716388702392578, |
|
"logits/rejected": 0.048077456653118134, |
|
"logps/chosen": -479.42657470703125, |
|
"logps/rejected": -440.88507080078125, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.03539299592375755, |
|
"rewards/margins": 0.5358645915985107, |
|
"rewards/rejected": -0.5004715919494629, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.9651741293532339, |
|
"grad_norm": 3.572518825531006, |
|
"learning_rate": 1.161781996552765e-05, |
|
"logits/chosen": 0.17352545261383057, |
|
"logits/rejected": 0.060841046273708344, |
|
"logps/chosen": -446.12451171875, |
|
"logps/rejected": -426.59222412109375, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.16514363884925842, |
|
"rewards/margins": 0.26698362827301025, |
|
"rewards/rejected": -0.10184000432491302, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 4.555858135223389, |
|
"learning_rate": 1.1535531139728918e-05, |
|
"logits/chosen": 0.24533721804618835, |
|
"logits/rejected": -0.07171311974525452, |
|
"logps/chosen": -523.5499877929688, |
|
"logps/rejected": -437.7908935546875, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.12301430106163025, |
|
"rewards/margins": 0.2650811970233917, |
|
"rewards/rejected": -0.14206688106060028, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.9751243781094527, |
|
"grad_norm": 3.932504892349243, |
|
"learning_rate": 1.1453135685464524e-05, |
|
"logits/chosen": 0.08851994574069977, |
|
"logits/rejected": 0.14470553398132324, |
|
"logps/chosen": -439.4774169921875, |
|
"logps/rejected": -538.0982666015625, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.26700615882873535, |
|
"rewards/margins": 0.5629878640174866, |
|
"rewards/rejected": -0.295981764793396, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.9800995024875622, |
|
"grad_norm": 3.7867138385772705, |
|
"learning_rate": 1.1370639324338313e-05, |
|
"logits/chosen": 0.26342546939849854, |
|
"logits/rejected": -0.009939752519130707, |
|
"logps/chosen": -462.4211120605469, |
|
"logps/rejected": -397.9438171386719, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.277785062789917, |
|
"rewards/margins": 0.45373010635375977, |
|
"rewards/rejected": -0.1759449988603592, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.9850746268656716, |
|
"grad_norm": 3.5211777687072754, |
|
"learning_rate": 1.1288047784961166e-05, |
|
"logits/chosen": 0.3734492063522339, |
|
"logits/rejected": 0.2930186092853546, |
|
"logps/chosen": -512.7589721679688, |
|
"logps/rejected": -474.911865234375, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.332348495721817, |
|
"rewards/margins": 0.38974326848983765, |
|
"rewards/rejected": -0.05739474669098854, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.9900497512437811, |
|
"grad_norm": 3.415151357650757, |
|
"learning_rate": 1.1205366802553231e-05, |
|
"logits/chosen": 0.2811368703842163, |
|
"logits/rejected": 0.15755276381969452, |
|
"logps/chosen": -549.73583984375, |
|
"logps/rejected": -500.3531494140625, |
|
"loss": 0.6064, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.04190271347761154, |
|
"rewards/margins": 0.3072620630264282, |
|
"rewards/rejected": -0.26535937190055847, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.9950248756218906, |
|
"grad_norm": 3.637012481689453, |
|
"learning_rate": 1.1122602118545642e-05, |
|
"logits/chosen": 0.12841928005218506, |
|
"logits/rejected": -0.023960597813129425, |
|
"logps/chosen": -483.5614013671875, |
|
"logps/rejected": -490.4222412109375, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.10303351283073425, |
|
"rewards/margins": 0.44947099685668945, |
|
"rewards/rejected": -0.3464375436306, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9950248756218906, |
|
"eval_logits/chosen": 0.2406376451253891, |
|
"eval_logits/rejected": 0.10255695879459381, |
|
"eval_logps/chosen": -454.539794921875, |
|
"eval_logps/rejected": -417.31793212890625, |
|
"eval_loss": 0.6309738159179688, |
|
"eval_rewards/accuracies": 0.6284722089767456, |
|
"eval_rewards/chosen": -0.22427807748317719, |
|
"eval_rewards/margins": 0.3273647427558899, |
|
"eval_rewards/rejected": -0.5516427755355835, |
|
"eval_runtime": 150.3056, |
|
"eval_samples_per_second": 7.605, |
|
"eval_steps_per_second": 0.24, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.7533183097839355, |
|
"learning_rate": 1.1039759480181836e-05, |
|
"logits/chosen": 0.12052932381629944, |
|
"logits/rejected": 0.07770034670829773, |
|
"logps/chosen": -418.409912109375, |
|
"logps/rejected": -415.7170715332031, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11391990631818771, |
|
"rewards/margins": 0.34513598680496216, |
|
"rewards/rejected": -0.45905593037605286, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.0049751243781095, |
|
"grad_norm": 3.1612284183502197, |
|
"learning_rate": 1.0956844640118462e-05, |
|
"logits/chosen": 0.3718172311782837, |
|
"logits/rejected": 0.10918774455785751, |
|
"logps/chosen": -493.1455078125, |
|
"logps/rejected": -435.0345153808594, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.20489560067653656, |
|
"rewards/margins": 0.660038411617279, |
|
"rewards/rejected": -0.864933967590332, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.0099502487562189, |
|
"grad_norm": 3.007526397705078, |
|
"learning_rate": 1.0873863356025911e-05, |
|
"logits/chosen": 0.23822058737277985, |
|
"logits/rejected": 0.07988135516643524, |
|
"logps/chosen": -398.7310791015625, |
|
"logps/rejected": -416.26171875, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.2089827060699463, |
|
"rewards/margins": 0.7835352420806885, |
|
"rewards/rejected": -0.9925180077552795, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.0149253731343284, |
|
"grad_norm": 2.805800437927246, |
|
"learning_rate": 1.0790821390188493e-05, |
|
"logits/chosen": 0.32303646206855774, |
|
"logits/rejected": 0.2196110635995865, |
|
"logps/chosen": -503.0115966796875, |
|
"logps/rejected": -474.46246337890625, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5059514045715332, |
|
"rewards/margins": 0.6203033328056335, |
|
"rewards/rejected": -1.1262547969818115, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.0199004975124377, |
|
"grad_norm": 2.849635362625122, |
|
"learning_rate": 1.0707724509104318e-05, |
|
"logits/chosen": 0.12564139068126678, |
|
"logits/rejected": -0.1032138541340828, |
|
"logps/chosen": -492.5018615722656, |
|
"logps/rejected": -433.72662353515625, |
|
"loss": 0.4503, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.2990395128726959, |
|
"rewards/margins": 0.7483544945716858, |
|
"rewards/rejected": -1.047394037246704, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.0248756218905473, |
|
"grad_norm": 3.1247310638427734, |
|
"learning_rate": 1.062457848308484e-05, |
|
"logits/chosen": 0.1007804125547409, |
|
"logits/rejected": -0.011237893253564835, |
|
"logps/chosen": -511.64373779296875, |
|
"logps/rejected": -443.93951416015625, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.3589535057544708, |
|
"rewards/margins": 0.5756500959396362, |
|
"rewards/rejected": -0.9346035718917847, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.0298507462686568, |
|
"grad_norm": 3.1016077995300293, |
|
"learning_rate": 1.0541389085854177e-05, |
|
"logits/chosen": 0.5417459011077881, |
|
"logits/rejected": 0.2698720395565033, |
|
"logps/chosen": -491.50115966796875, |
|
"logps/rejected": -424.73309326171875, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.044443853199481964, |
|
"rewards/margins": 0.7540363073348999, |
|
"rewards/rejected": -0.7984801530838013, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.0348258706467661, |
|
"grad_norm": 3.130073070526123, |
|
"learning_rate": 1.0458162094148185e-05, |
|
"logits/chosen": 0.5757798552513123, |
|
"logits/rejected": 0.4289059042930603, |
|
"logps/chosen": -455.25860595703125, |
|
"logps/rejected": -414.67767333984375, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.16106560826301575, |
|
"rewards/margins": 0.6384649872779846, |
|
"rewards/rejected": -0.47739943861961365, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.0398009950248757, |
|
"grad_norm": 2.843583822250366, |
|
"learning_rate": 1.0374903287313307e-05, |
|
"logits/chosen": 0.6092027425765991, |
|
"logits/rejected": 0.4161675274372101, |
|
"logps/chosen": -471.7634582519531, |
|
"logps/rejected": -432.1963195800781, |
|
"loss": 0.4692, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.15890881419181824, |
|
"rewards/margins": 0.7123965620994568, |
|
"rewards/rejected": -0.5534877777099609, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.044776119402985, |
|
"grad_norm": 3.054884433746338, |
|
"learning_rate": 1.029161844690525e-05, |
|
"logits/chosen": 0.04671328887343407, |
|
"logits/rejected": -0.0623294860124588, |
|
"logps/chosen": -432.43463134765625, |
|
"logps/rejected": -444.45330810546875, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 0.2890382409095764, |
|
"rewards/margins": 0.9058393836021423, |
|
"rewards/rejected": -0.6168012022972107, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.0497512437810945, |
|
"grad_norm": 2.780414581298828, |
|
"learning_rate": 1.0208313356287505e-05, |
|
"logits/chosen": 0.16017179191112518, |
|
"logits/rejected": 0.3132883310317993, |
|
"logps/chosen": -367.1025390625, |
|
"logps/rejected": -421.85235595703125, |
|
"loss": 0.4423, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.15863201022148132, |
|
"rewards/margins": 0.6779903173446655, |
|
"rewards/rejected": -0.5193582773208618, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.054726368159204, |
|
"grad_norm": 2.5658442974090576, |
|
"learning_rate": 1.0124993800229774e-05, |
|
"logits/chosen": 0.5552780628204346, |
|
"logits/rejected": 0.4013071060180664, |
|
"logps/chosen": -482.0791015625, |
|
"logps/rejected": -464.7220764160156, |
|
"loss": 0.4431, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 0.09260371327400208, |
|
"rewards/margins": 0.9124071002006531, |
|
"rewards/rejected": -0.8198033571243286, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.0572139303482586, |
|
"eval_logits/chosen": 0.19921229779720306, |
|
"eval_logits/rejected": 0.06041179224848747, |
|
"eval_logps/chosen": -455.6217346191406, |
|
"eval_logps/rejected": -418.9701843261719, |
|
"eval_loss": 0.6237961649894714, |
|
"eval_rewards/accuracies": 0.6631944179534912, |
|
"eval_rewards/chosen": -0.332474023103714, |
|
"eval_rewards/margins": 0.3843950629234314, |
|
"eval_rewards/rejected": -0.7168691158294678, |
|
"eval_runtime": 150.1103, |
|
"eval_samples_per_second": 7.614, |
|
"eval_steps_per_second": 0.24, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.0597014925373134, |
|
"grad_norm": 2.8231992721557617, |
|
"learning_rate": 1.004166556450623e-05, |
|
"logits/chosen": 0.33288416266441345, |
|
"logits/rejected": 0.09707096964120865, |
|
"logps/chosen": -464.0365905761719, |
|
"logps/rejected": -426.97601318359375, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.08150234073400497, |
|
"rewards/margins": 0.8820555210113525, |
|
"rewards/rejected": -0.9635578393936157, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.064676616915423, |
|
"grad_norm": 3.675276756286621, |
|
"learning_rate": 9.958334435493776e-06, |
|
"logits/chosen": 0.19826172292232513, |
|
"logits/rejected": 0.07607944309711456, |
|
"logps/chosen": -414.4964599609375, |
|
"logps/rejected": -442.0020751953125, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.1501787155866623, |
|
"rewards/margins": 0.8547053337097168, |
|
"rewards/rejected": -1.004884123802185, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.0696517412935322, |
|
"grad_norm": 2.7473108768463135, |
|
"learning_rate": 9.87500619977023e-06, |
|
"logits/chosen": 0.11883494257926941, |
|
"logits/rejected": -0.021124478429555893, |
|
"logps/chosen": -453.2391052246094, |
|
"logps/rejected": -427.75830078125, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.10561797767877579, |
|
"rewards/margins": 0.6551963090896606, |
|
"rewards/rejected": -0.7608143091201782, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.0746268656716418, |
|
"grad_norm": 3.5719878673553467, |
|
"learning_rate": 9.791686643712498e-06, |
|
"logits/chosen": 0.3640301525592804, |
|
"logits/rejected": 0.11707509309053421, |
|
"logps/chosen": -486.08709716796875, |
|
"logps/rejected": -430.9344787597656, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.09809544682502747, |
|
"rewards/margins": 0.7188686728477478, |
|
"rewards/rejected": -0.8169642090797424, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.0796019900497513, |
|
"grad_norm": 3.0903241634368896, |
|
"learning_rate": 9.708381553094754e-06, |
|
"logits/chosen": 0.26261359453201294, |
|
"logits/rejected": 0.11373281478881836, |
|
"logps/chosen": -434.330078125, |
|
"logps/rejected": -389.1520080566406, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.051621563732624054, |
|
"rewards/margins": 0.8995364904403687, |
|
"rewards/rejected": -0.84791499376297, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.0845771144278606, |
|
"grad_norm": 3.3322811126708984, |
|
"learning_rate": 9.625096712686694e-06, |
|
"logits/chosen": 0.011679138988256454, |
|
"logits/rejected": 0.09790559113025665, |
|
"logps/chosen": -424.90777587890625, |
|
"logps/rejected": -438.1689758300781, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2651669383049011, |
|
"rewards/margins": 0.9951549768447876, |
|
"rewards/rejected": -1.260321855545044, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.0895522388059702, |
|
"grad_norm": 2.9339921474456787, |
|
"learning_rate": 9.541837905851817e-06, |
|
"logits/chosen": 0.047505155205726624, |
|
"logits/rejected": 0.039440758526325226, |
|
"logps/chosen": -442.19647216796875, |
|
"logps/rejected": -480.726318359375, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.05062495172023773, |
|
"rewards/margins": 0.8886253237724304, |
|
"rewards/rejected": -0.9392504096031189, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.0945273631840795, |
|
"grad_norm": 3.0647056102752686, |
|
"learning_rate": 9.458610914145826e-06, |
|
"logits/chosen": 0.14126014709472656, |
|
"logits/rejected": 0.15012700855731964, |
|
"logps/chosen": -428.18438720703125, |
|
"logps/rejected": -441.0984191894531, |
|
"loss": 0.4385, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.17007280886173248, |
|
"rewards/margins": 0.8784099817276001, |
|
"rewards/rejected": -1.048482894897461, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.099502487562189, |
|
"grad_norm": 2.8182191848754883, |
|
"learning_rate": 9.375421516915165e-06, |
|
"logits/chosen": 0.26013338565826416, |
|
"logits/rejected": 0.15596720576286316, |
|
"logps/chosen": -466.556396484375, |
|
"logps/rejected": -454.1954040527344, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03595845773816109, |
|
"rewards/margins": 1.0611960887908936, |
|
"rewards/rejected": -1.0971544981002808, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.1044776119402986, |
|
"grad_norm": 2.964768171310425, |
|
"learning_rate": 9.292275490895685e-06, |
|
"logits/chosen": 0.1450473666191101, |
|
"logits/rejected": 0.006217047572135925, |
|
"logps/chosen": -560.520751953125, |
|
"logps/rejected": -509.8658142089844, |
|
"loss": 0.4187, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.46229514479637146, |
|
"rewards/margins": 0.9188253283500671, |
|
"rewards/rejected": -1.3811204433441162, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.109452736318408, |
|
"grad_norm": 3.2267932891845703, |
|
"learning_rate": 9.209178609811509e-06, |
|
"logits/chosen": 0.30923786759376526, |
|
"logits/rejected": 0.10800696909427643, |
|
"logps/chosen": -449.5932922363281, |
|
"logps/rejected": -435.5396423339844, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5209077596664429, |
|
"rewards/margins": 0.8729082942008972, |
|
"rewards/rejected": -1.3938158750534058, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.1144278606965174, |
|
"grad_norm": 3.159635066986084, |
|
"learning_rate": 9.126136643974094e-06, |
|
"logits/chosen": 0.03288649767637253, |
|
"logits/rejected": 0.00020163506269454956, |
|
"logps/chosen": -508.6898498535156, |
|
"logps/rejected": -505.1740417480469, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6636922359466553, |
|
"rewards/margins": 0.842619776725769, |
|
"rewards/rejected": -1.5063120126724243, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 3.421736478805542, |
|
"learning_rate": 9.043155359881538e-06, |
|
"logits/chosen": 0.192546546459198, |
|
"logits/rejected": -0.08039741218090057, |
|
"logps/chosen": -563.0585327148438, |
|
"logps/rejected": -510.62469482421875, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5328125953674316, |
|
"rewards/margins": 0.9578996300697327, |
|
"rewards/rejected": -1.4907121658325195, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1194029850746268, |
|
"eval_logits/chosen": 0.11539439111948013, |
|
"eval_logits/rejected": -0.026901576668024063, |
|
"eval_logps/chosen": -458.8861083984375, |
|
"eval_logps/rejected": -422.9441223144531, |
|
"eval_loss": 0.6285870671272278, |
|
"eval_rewards/accuracies": 0.6597222089767456, |
|
"eval_rewards/chosen": -0.6589114665985107, |
|
"eval_rewards/margins": 0.4553508758544922, |
|
"eval_rewards/rejected": -1.114262342453003, |
|
"eval_runtime": 151.3253, |
|
"eval_samples_per_second": 7.553, |
|
"eval_steps_per_second": 0.238, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1243781094527363, |
|
"grad_norm": 3.3492910861968994, |
|
"learning_rate": 8.960240519818167e-06, |
|
"logits/chosen": 0.002382766455411911, |
|
"logits/rejected": 0.02159365639090538, |
|
"logps/chosen": -570.236328125, |
|
"logps/rejected": -555.9732666015625, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7100229859352112, |
|
"rewards/margins": 0.716564953327179, |
|
"rewards/rejected": -1.4265879392623901, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.1293532338308458, |
|
"grad_norm": 3.300537347793579, |
|
"learning_rate": 8.877397881454358e-06, |
|
"logits/chosen": 0.3528830409049988, |
|
"logits/rejected": 0.20602422952651978, |
|
"logps/chosen": -531.8645629882812, |
|
"logps/rejected": -465.4809265136719, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.6002427935600281, |
|
"rewards/margins": 0.8341575264930725, |
|
"rewards/rejected": -1.4344004392623901, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.1343283582089552, |
|
"grad_norm": 3.9822070598602295, |
|
"learning_rate": 8.79463319744677e-06, |
|
"logits/chosen": -0.01692591980099678, |
|
"logits/rejected": -0.15085071325302124, |
|
"logps/chosen": -470.937744140625, |
|
"logps/rejected": -452.7400207519531, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.19091981649398804, |
|
"rewards/margins": 0.8660258054733276, |
|
"rewards/rejected": -1.0569454431533813, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.1393034825870647, |
|
"grad_norm": 2.87620210647583, |
|
"learning_rate": 8.711952215038837e-06, |
|
"logits/chosen": 0.38492149114608765, |
|
"logits/rejected": 0.373988538980484, |
|
"logps/chosen": -454.4605407714844, |
|
"logps/rejected": -512.40966796875, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.19313852488994598, |
|
"rewards/margins": 0.8566547632217407, |
|
"rewards/rejected": -1.0497933626174927, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.144278606965174, |
|
"grad_norm": 2.891087770462036, |
|
"learning_rate": 8.629360675661693e-06, |
|
"logits/chosen": 0.4491625130176544, |
|
"logits/rejected": 0.3486018776893616, |
|
"logps/chosen": -431.761474609375, |
|
"logps/rejected": -429.3786926269531, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.27446961402893066, |
|
"rewards/margins": 0.7661145329475403, |
|
"rewards/rejected": -1.0405840873718262, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.1492537313432836, |
|
"grad_norm": 3.027414560317993, |
|
"learning_rate": 8.546864314535478e-06, |
|
"logits/chosen": 0.15871021151542664, |
|
"logits/rejected": 0.12511278688907623, |
|
"logps/chosen": -464.06573486328125, |
|
"logps/rejected": -463.8356018066406, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.32652032375335693, |
|
"rewards/margins": 0.6782144904136658, |
|
"rewards/rejected": -1.0047348737716675, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.154228855721393, |
|
"grad_norm": 3.0336227416992188, |
|
"learning_rate": 8.464468860271084e-06, |
|
"logits/chosen": 0.05021004378795624, |
|
"logits/rejected": -0.06525184959173203, |
|
"logps/chosen": -471.3458557128906, |
|
"logps/rejected": -464.1771240234375, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.007861072197556496, |
|
"rewards/margins": 1.1992193460464478, |
|
"rewards/rejected": -1.191358208656311, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.1592039800995024, |
|
"grad_norm": 3.0443830490112305, |
|
"learning_rate": 8.382180034472353e-06, |
|
"logits/chosen": 0.40720558166503906, |
|
"logits/rejected": 0.19039109349250793, |
|
"logps/chosen": -528.5640258789062, |
|
"logps/rejected": -479.5672302246094, |
|
"loss": 0.4447, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.19556114077568054, |
|
"rewards/margins": 0.9518004655838013, |
|
"rewards/rejected": -1.1473615169525146, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.164179104477612, |
|
"grad_norm": 3.200226068496704, |
|
"learning_rate": 8.30000355133876e-06, |
|
"logits/chosen": 0.3259899318218231, |
|
"logits/rejected": 0.12083222717046738, |
|
"logps/chosen": -434.56243896484375, |
|
"logps/rejected": -426.33038330078125, |
|
"loss": 0.4644, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.26636868715286255, |
|
"rewards/margins": 0.7016679048538208, |
|
"rewards/rejected": -0.9680365920066833, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.1691542288557213, |
|
"grad_norm": 3.3915629386901855, |
|
"learning_rate": 8.217945117268624e-06, |
|
"logits/chosen": -0.04203636944293976, |
|
"logits/rejected": 0.0748155415058136, |
|
"logps/chosen": -479.9148864746094, |
|
"logps/rejected": -463.1988220214844, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.46628129482269287, |
|
"rewards/margins": 0.8466652631759644, |
|
"rewards/rejected": -1.3129465579986572, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.1741293532338308, |
|
"grad_norm": 4.10882043838501, |
|
"learning_rate": 8.136010430462837e-06, |
|
"logits/chosen": 0.14211219549179077, |
|
"logits/rejected": 0.05810711905360222, |
|
"logps/chosen": -468.7088317871094, |
|
"logps/rejected": -482.8578796386719, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.2802658677101135, |
|
"rewards/margins": 1.0771757364273071, |
|
"rewards/rejected": -1.3574416637420654, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.1791044776119404, |
|
"grad_norm": 2.790282726287842, |
|
"learning_rate": 8.0542051805292e-06, |
|
"logits/chosen": -0.11554953455924988, |
|
"logits/rejected": -0.208129420876503, |
|
"logps/chosen": -456.598876953125, |
|
"logps/rejected": -420.1100158691406, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.33638888597488403, |
|
"rewards/margins": 0.7983494400978088, |
|
"rewards/rejected": -1.1347384452819824, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.1815920398009951, |
|
"eval_logits/chosen": 0.14645284414291382, |
|
"eval_logits/rejected": 0.006150411441922188, |
|
"eval_logps/chosen": -458.5403747558594, |
|
"eval_logps/rejected": -422.0716552734375, |
|
"eval_loss": 0.6252362132072449, |
|
"eval_rewards/accuracies": 0.6354166865348816, |
|
"eval_rewards/chosen": -0.6243360042572021, |
|
"eval_rewards/margins": 0.4026750922203064, |
|
"eval_rewards/rejected": -1.0270111560821533, |
|
"eval_runtime": 150.2224, |
|
"eval_samples_per_second": 7.609, |
|
"eval_steps_per_second": 0.24, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.1840796019900497, |
|
"grad_norm": 2.771230697631836, |
|
"learning_rate": 7.9725350480873e-06, |
|
"logits/chosen": 0.3046668767929077, |
|
"logits/rejected": 0.17750529944896698, |
|
"logps/chosen": -487.18145751953125, |
|
"logps/rejected": -489.9942932128906, |
|
"loss": 0.4533, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21156734228134155, |
|
"rewards/margins": 0.8424564599990845, |
|
"rewards/rejected": -1.0540237426757812, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.1890547263681592, |
|
"grad_norm": 3.108633041381836, |
|
"learning_rate": 7.89100570437408e-06, |
|
"logits/chosen": 0.5195536017417908, |
|
"logits/rejected": 0.25093546509742737, |
|
"logps/chosen": -470.94891357421875, |
|
"logps/rejected": -412.3416748046875, |
|
"loss": 0.4549, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.2779660224914551, |
|
"rewards/margins": 0.7905957102775574, |
|
"rewards/rejected": -1.0685617923736572, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.1940298507462686, |
|
"grad_norm": 3.2091431617736816, |
|
"learning_rate": 7.809622810849986e-06, |
|
"logits/chosen": 0.3036578595638275, |
|
"logits/rejected": 0.09663239121437073, |
|
"logps/chosen": -518.3139038085938, |
|
"logps/rejected": -491.35693359375, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.2721414566040039, |
|
"rewards/margins": 0.8471077084541321, |
|
"rewards/rejected": -1.1192492246627808, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.199004975124378, |
|
"grad_norm": 3.8627357482910156, |
|
"learning_rate": 7.72839201880587e-06, |
|
"logits/chosen": 0.18529945611953735, |
|
"logits/rejected": 0.0777958407998085, |
|
"logps/chosen": -406.6213684082031, |
|
"logps/rejected": -376.0231628417969, |
|
"loss": 0.4521, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.16672272980213165, |
|
"rewards/margins": 0.8920981884002686, |
|
"rewards/rejected": -1.0588209629058838, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.2039800995024876, |
|
"grad_norm": 3.3319764137268066, |
|
"learning_rate": 7.647318968970528e-06, |
|
"logits/chosen": 0.2665182650089264, |
|
"logits/rejected": 0.11450497806072235, |
|
"logps/chosen": -494.59991455078125, |
|
"logps/rejected": -406.59857177734375, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.35199272632598877, |
|
"rewards/margins": 0.6968194246292114, |
|
"rewards/rejected": -1.0488121509552002, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.208955223880597, |
|
"grad_norm": 3.6332457065582275, |
|
"learning_rate": 7.566409291119008e-06, |
|
"logits/chosen": 0.16549383103847504, |
|
"logits/rejected": -0.042187366634607315, |
|
"logps/chosen": -411.47100830078125, |
|
"logps/rejected": -402.9810791015625, |
|
"loss": 0.4496, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.3397058844566345, |
|
"rewards/margins": 0.9274094700813293, |
|
"rewards/rejected": -1.2671154737472534, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.2139303482587065, |
|
"grad_norm": 6.592203617095947, |
|
"learning_rate": 7.485668603681706e-06, |
|
"logits/chosen": 0.42461320757865906, |
|
"logits/rejected": 0.21914049983024597, |
|
"logps/chosen": -555.8026123046875, |
|
"logps/rejected": -517.2098388671875, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5179316401481628, |
|
"rewards/margins": 0.991509735584259, |
|
"rewards/rejected": -1.5094413757324219, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.2189054726368158, |
|
"grad_norm": 2.970505952835083, |
|
"learning_rate": 7.405102513354166e-06, |
|
"logits/chosen": 0.33260223269462585, |
|
"logits/rejected": 0.3816107511520386, |
|
"logps/chosen": -447.697509765625, |
|
"logps/rejected": -462.2044677734375, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.3011395037174225, |
|
"rewards/margins": 0.949279248714447, |
|
"rewards/rejected": -1.250418782234192, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.2238805970149254, |
|
"grad_norm": 3.419555187225342, |
|
"learning_rate": 7.324716614707794e-06, |
|
"logits/chosen": 0.2947157025337219, |
|
"logits/rejected": 0.11895683407783508, |
|
"logps/chosen": -479.42059326171875, |
|
"logps/rejected": -443.11993408203125, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.37127968668937683, |
|
"rewards/margins": 0.907949686050415, |
|
"rewards/rejected": -1.2792294025421143, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.228855721393035, |
|
"grad_norm": 3.920330286026001, |
|
"learning_rate": 7.2445164898013345e-06, |
|
"logits/chosen": 0.2471471130847931, |
|
"logits/rejected": 0.16002610325813293, |
|
"logps/chosen": -482.1630859375, |
|
"logps/rejected": -452.62213134765625, |
|
"loss": 0.4621, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.4815042316913605, |
|
"rewards/margins": 0.6901217699050903, |
|
"rewards/rejected": -1.171626091003418, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.2338308457711442, |
|
"grad_norm": 3.4007022380828857, |
|
"learning_rate": 7.1645077077932666e-06, |
|
"logits/chosen": 0.13965100049972534, |
|
"logits/rejected": -0.005473073571920395, |
|
"logps/chosen": -492.3451843261719, |
|
"logps/rejected": -430.3792419433594, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.6226473450660706, |
|
"rewards/margins": 0.7819827795028687, |
|
"rewards/rejected": -1.404630184173584, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.2388059701492538, |
|
"grad_norm": 3.3789188861846924, |
|
"learning_rate": 7.084695824555074e-06, |
|
"logits/chosen": 0.12412463128566742, |
|
"logits/rejected": -0.04749886691570282, |
|
"logps/chosen": -456.4656982421875, |
|
"logps/rejected": -449.3288269042969, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.37792813777923584, |
|
"rewards/margins": 0.9587810635566711, |
|
"rewards/rejected": -1.3367091417312622, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.243781094527363, |
|
"grad_norm": 3.2594857215881348, |
|
"learning_rate": 7.005086382285426e-06, |
|
"logits/chosen": 0.19527438282966614, |
|
"logits/rejected": -0.04300277307629585, |
|
"logps/chosen": -524.0037841796875, |
|
"logps/rejected": -452.43988037109375, |
|
"loss": 0.4483, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.3103996217250824, |
|
"rewards/margins": 0.7246525883674622, |
|
"rewards/rejected": -1.0350522994995117, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.243781094527363, |
|
"eval_logits/chosen": 0.1812911480665207, |
|
"eval_logits/rejected": 0.043385788798332214, |
|
"eval_logps/chosen": -458.6221923828125, |
|
"eval_logps/rejected": -422.3155822753906, |
|
"eval_loss": 0.6238306164741516, |
|
"eval_rewards/accuracies": 0.6319444179534912, |
|
"eval_rewards/chosen": -0.6325181126594543, |
|
"eval_rewards/margins": 0.41889193654060364, |
|
"eval_rewards/rejected": -1.05141019821167, |
|
"eval_runtime": 150.5346, |
|
"eval_samples_per_second": 7.593, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2487562189054726, |
|
"grad_norm": 3.286379098892212, |
|
"learning_rate": 6.925684909125354e-06, |
|
"logits/chosen": 0.3578662574291229, |
|
"logits/rejected": 0.16462884843349457, |
|
"logps/chosen": -489.2138366699219, |
|
"logps/rejected": -452.0278015136719, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4701817035675049, |
|
"rewards/margins": 0.689208984375, |
|
"rewards/rejected": -1.1593906879425049, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.2537313432835822, |
|
"grad_norm": 3.4609503746032715, |
|
"learning_rate": 6.84649691877433e-06, |
|
"logits/chosen": 0.292858362197876, |
|
"logits/rejected": 0.3071328401565552, |
|
"logps/chosen": -498.725341796875, |
|
"logps/rejected": -514.9793701171875, |
|
"loss": 0.4504, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.3274274468421936, |
|
"rewards/margins": 0.735092282295227, |
|
"rewards/rejected": -1.0625197887420654, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.2587064676616915, |
|
"grad_norm": 4.149216175079346, |
|
"learning_rate": 6.767527910107437e-06, |
|
"logits/chosen": 0.41340234875679016, |
|
"logits/rejected": 0.23155152797698975, |
|
"logps/chosen": -589.994384765625, |
|
"logps/rejected": -512.817138671875, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.050317004323005676, |
|
"rewards/margins": 0.8621648550033569, |
|
"rewards/rejected": -0.9124818444252014, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.263681592039801, |
|
"grad_norm": 3.068629026412964, |
|
"learning_rate": 6.688783366793488e-06, |
|
"logits/chosen": 0.07183945924043655, |
|
"logits/rejected": 0.1859855055809021, |
|
"logps/chosen": -416.20281982421875, |
|
"logps/rejected": -504.38360595703125, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.47665709257125854, |
|
"rewards/margins": 1.1034715175628662, |
|
"rewards/rejected": -1.5801286697387695, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.2686567164179103, |
|
"grad_norm": 3.3486008644104004, |
|
"learning_rate": 6.610268756914254e-06, |
|
"logits/chosen": 0.2134770154953003, |
|
"logits/rejected": 0.20936183631420135, |
|
"logps/chosen": -456.9624328613281, |
|
"logps/rejected": -467.43695068359375, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4169631004333496, |
|
"rewards/margins": 0.7615378499031067, |
|
"rewards/rejected": -1.1785008907318115, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.2736318407960199, |
|
"grad_norm": 2.8429603576660156, |
|
"learning_rate": 6.5319895325847535e-06, |
|
"logits/chosen": 0.37751051783561707, |
|
"logits/rejected": 0.24831168353557587, |
|
"logps/chosen": -482.39044189453125, |
|
"logps/rejected": -445.41131591796875, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.3370966613292694, |
|
"rewards/margins": 1.0583980083465576, |
|
"rewards/rejected": -1.3954945802688599, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.2786069651741294, |
|
"grad_norm": 3.422572374343872, |
|
"learning_rate": 6.453951129574644e-06, |
|
"logits/chosen": 0.09760095179080963, |
|
"logits/rejected": -0.02564432844519615, |
|
"logps/chosen": -478.0529479980469, |
|
"logps/rejected": -460.9502258300781, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.4741438329219818, |
|
"rewards/margins": 0.7156703472137451, |
|
"rewards/rejected": -1.1898140907287598, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.2835820895522387, |
|
"grad_norm": 7.709662914276123, |
|
"learning_rate": 6.3761589669307745e-06, |
|
"logits/chosen": 0.2523835301399231, |
|
"logits/rejected": -0.018656061962246895, |
|
"logps/chosen": -464.8309326171875, |
|
"logps/rejected": -421.08831787109375, |
|
"loss": 0.4486, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.46211400628089905, |
|
"rewards/margins": 0.7744930982589722, |
|
"rewards/rejected": -1.2366070747375488, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.2885572139303483, |
|
"grad_norm": 3.1016786098480225, |
|
"learning_rate": 6.298618446600856e-06, |
|
"logits/chosen": 0.28200894594192505, |
|
"logits/rejected": 0.07383685559034348, |
|
"logps/chosen": -498.76904296875, |
|
"logps/rejected": -477.27203369140625, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.26558923721313477, |
|
"rewards/margins": 1.0237658023834229, |
|
"rewards/rejected": -1.2893550395965576, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.2935323383084576, |
|
"grad_norm": 3.2327277660369873, |
|
"learning_rate": 6.221334953058389e-06, |
|
"logits/chosen": 0.15867388248443604, |
|
"logits/rejected": 0.08303281664848328, |
|
"logps/chosen": -409.2590026855469, |
|
"logps/rejected": -455.77191162109375, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.4104706346988678, |
|
"rewards/margins": 0.7841976881027222, |
|
"rewards/rejected": -1.1946682929992676, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.2985074626865671, |
|
"grad_norm": 3.339369058609009, |
|
"learning_rate": 6.144313852928712e-06, |
|
"logits/chosen": -0.03164299577474594, |
|
"logits/rejected": -0.07064341753721237, |
|
"logps/chosen": -476.3907470703125, |
|
"logps/rejected": -485.3206787109375, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3814237713813782, |
|
"rewards/margins": 0.6672918796539307, |
|
"rewards/rejected": -1.048715591430664, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.3034825870646767, |
|
"grad_norm": 3.5485079288482666, |
|
"learning_rate": 6.067560494616374e-06, |
|
"logits/chosen": 0.07933502644300461, |
|
"logits/rejected": -0.09825630486011505, |
|
"logps/chosen": -476.7882080078125, |
|
"logps/rejected": -423.75634765625, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.4646296203136444, |
|
"rewards/margins": 0.9645313620567322, |
|
"rewards/rejected": -1.4291609525680542, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.3059701492537314, |
|
"eval_logits/chosen": 0.2900795340538025, |
|
"eval_logits/rejected": 0.1563159078359604, |
|
"eval_logps/chosen": -461.8538818359375, |
|
"eval_logps/rejected": -425.2582702636719, |
|
"eval_loss": 0.6297169923782349, |
|
"eval_rewards/accuracies": 0.6284722089767456, |
|
"eval_rewards/chosen": -0.9556920528411865, |
|
"eval_rewards/margins": 0.38998663425445557, |
|
"eval_rewards/rejected": -1.3456788063049316, |
|
"eval_runtime": 150.3263, |
|
"eval_samples_per_second": 7.603, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.308457711442786, |
|
"grad_norm": 3.0293073654174805, |
|
"learning_rate": 5.9910802079337285e-06, |
|
"logits/chosen": 0.6111765503883362, |
|
"logits/rejected": 0.5151335000991821, |
|
"logps/chosen": -502.5360412597656, |
|
"logps/rejected": -504.69293212890625, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5117080807685852, |
|
"rewards/margins": 0.8272175788879395, |
|
"rewards/rejected": -1.338925838470459, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.3134328358208955, |
|
"grad_norm": 3.2000210285186768, |
|
"learning_rate": 5.9148783037308154e-06, |
|
"logits/chosen": 0.513329029083252, |
|
"logits/rejected": 0.3263680338859558, |
|
"logps/chosen": -450.2699890136719, |
|
"logps/rejected": -474.0347900390625, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.664053201675415, |
|
"rewards/margins": 0.9260993003845215, |
|
"rewards/rejected": -1.5901525020599365, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.3184079601990049, |
|
"grad_norm": 3.6587066650390625, |
|
"learning_rate": 5.838960073526589e-06, |
|
"logits/chosen": 0.306156724691391, |
|
"logits/rejected": 0.25492236018180847, |
|
"logps/chosen": -410.9796142578125, |
|
"logps/rejected": -433.6587829589844, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.671193540096283, |
|
"rewards/margins": 0.6943222880363464, |
|
"rewards/rejected": -1.365515947341919, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.3233830845771144, |
|
"grad_norm": 3.327662706375122, |
|
"learning_rate": 5.763330789141457e-06, |
|
"logits/chosen": 0.48469293117523193, |
|
"logits/rejected": 0.19550618529319763, |
|
"logps/chosen": -453.54766845703125, |
|
"logps/rejected": -384.0401611328125, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7684299945831299, |
|
"rewards/margins": 0.6500393152236938, |
|
"rewards/rejected": -1.4184694290161133, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.328358208955224, |
|
"grad_norm": 3.2267775535583496, |
|
"learning_rate": 5.687995702331211e-06, |
|
"logits/chosen": 0.2688726484775543, |
|
"logits/rejected": -0.08813167363405228, |
|
"logps/chosen": -501.2934265136719, |
|
"logps/rejected": -409.6513671875, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.40029340982437134, |
|
"rewards/margins": 1.1426159143447876, |
|
"rewards/rejected": -1.5429092645645142, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 2.840391159057617, |
|
"learning_rate": 5.612960044422335e-06, |
|
"logits/chosen": 0.149551659822464, |
|
"logits/rejected": 0.09185415506362915, |
|
"logps/chosen": -478.0235290527344, |
|
"logps/rejected": -465.431640625, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.33660268783569336, |
|
"rewards/margins": 0.9488120079040527, |
|
"rewards/rejected": -1.2854145765304565, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.3383084577114428, |
|
"grad_norm": 5.434293270111084, |
|
"learning_rate": 5.538229025948729e-06, |
|
"logits/chosen": 0.27715224027633667, |
|
"logits/rejected": 0.15390388667583466, |
|
"logps/chosen": -462.994873046875, |
|
"logps/rejected": -443.705322265625, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3295069932937622, |
|
"rewards/margins": 0.7583127617835999, |
|
"rewards/rejected": -1.0878196954727173, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.3432835820895521, |
|
"grad_norm": 2.998685121536255, |
|
"learning_rate": 5.463807836289921e-06, |
|
"logits/chosen": 0.21240472793579102, |
|
"logits/rejected": 0.08427554368972778, |
|
"logps/chosen": -515.3600463867188, |
|
"logps/rejected": -489.23199462890625, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.37038007378578186, |
|
"rewards/margins": 0.8079636693000793, |
|
"rewards/rejected": -1.1783437728881836, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.3482587064676617, |
|
"grad_norm": 3.10492205619812, |
|
"learning_rate": 5.389701643310661e-06, |
|
"logits/chosen": 0.025207914412021637, |
|
"logits/rejected": -0.02044026553630829, |
|
"logps/chosen": -456.87506103515625, |
|
"logps/rejected": -439.67376708984375, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.6905679106712341, |
|
"rewards/margins": 0.7245473861694336, |
|
"rewards/rejected": -1.4151153564453125, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.3532338308457712, |
|
"grad_norm": 3.226285934448242, |
|
"learning_rate": 5.3159155930021e-06, |
|
"logits/chosen": 0.3893488645553589, |
|
"logits/rejected": 0.08346641063690186, |
|
"logps/chosen": -531.7063598632812, |
|
"logps/rejected": -439.24896240234375, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.45566755533218384, |
|
"rewards/margins": 0.9592388868331909, |
|
"rewards/rejected": -1.4149065017700195, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.3582089552238805, |
|
"grad_norm": 3.0475516319274902, |
|
"learning_rate": 5.2424548091244334e-06, |
|
"logits/chosen": 0.2637353539466858, |
|
"logits/rejected": 0.1712309867143631, |
|
"logps/chosen": -587.0075073242188, |
|
"logps/rejected": -532.0603637695312, |
|
"loss": 0.4242, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.4887734055519104, |
|
"rewards/margins": 0.7975496053695679, |
|
"rewards/rejected": -1.286323070526123, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.36318407960199, |
|
"grad_norm": 3.3424243927001953, |
|
"learning_rate": 5.169324392851105e-06, |
|
"logits/chosen": 0.08936847001314163, |
|
"logits/rejected": 0.15995635092258453, |
|
"logps/chosen": -441.9477233886719, |
|
"logps/rejected": -479.75811767578125, |
|
"loss": 0.4372, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.15634143352508545, |
|
"rewards/margins": 0.9987101554870605, |
|
"rewards/rejected": -1.155051589012146, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.3681592039800994, |
|
"grad_norm": 3.5509424209594727, |
|
"learning_rate": 5.096529422414571e-06, |
|
"logits/chosen": 0.23796138167381287, |
|
"logits/rejected": 0.1327465921640396, |
|
"logps/chosen": -397.3380126953125, |
|
"logps/rejected": -398.999755859375, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.27862298488616943, |
|
"rewards/margins": 0.8094170093536377, |
|
"rewards/rejected": -1.0880398750305176, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3681592039800994, |
|
"eval_logits/chosen": 0.2271488755941391, |
|
"eval_logits/rejected": 0.09045371413230896, |
|
"eval_logps/chosen": -458.12158203125, |
|
"eval_logps/rejected": -421.81396484375, |
|
"eval_loss": 0.6310929656028748, |
|
"eval_rewards/accuracies": 0.6319444179534912, |
|
"eval_rewards/chosen": -0.5824543833732605, |
|
"eval_rewards/margins": 0.4187923073768616, |
|
"eval_rewards/rejected": -1.001246690750122, |
|
"eval_runtime": 150.3083, |
|
"eval_samples_per_second": 7.604, |
|
"eval_steps_per_second": 0.24, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.373134328358209, |
|
"grad_norm": 3.0843594074249268, |
|
"learning_rate": 5.0240749527536845e-06, |
|
"logits/chosen": 0.34869927167892456, |
|
"logits/rejected": 0.4646757245063782, |
|
"logps/chosen": -461.98406982421875, |
|
"logps/rejected": -484.62005615234375, |
|
"loss": 0.4463, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.35243409872055054, |
|
"rewards/margins": 0.8469762206077576, |
|
"rewards/rejected": -1.199410319328308, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.3781094527363185, |
|
"grad_norm": 3.259739637374878, |
|
"learning_rate": 4.951966015162652e-06, |
|
"logits/chosen": 0.2850785255432129, |
|
"logits/rejected": 0.18751974403858185, |
|
"logps/chosen": -446.40777587890625, |
|
"logps/rejected": -452.7511291503906, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5133770108222961, |
|
"rewards/margins": 0.7468600273132324, |
|
"rewards/rejected": -1.2602368593215942, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.3830845771144278, |
|
"grad_norm": 3.8932905197143555, |
|
"learning_rate": 4.880207616941663e-06, |
|
"logits/chosen": 0.12189581990242004, |
|
"logits/rejected": -0.021296532824635506, |
|
"logps/chosen": -558.7091674804688, |
|
"logps/rejected": -541.033203125, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5688372254371643, |
|
"rewards/margins": 0.9365439414978027, |
|
"rewards/rejected": -1.5053812265396118, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.3880597014925373, |
|
"grad_norm": 3.545090913772583, |
|
"learning_rate": 4.8088047410492e-06, |
|
"logits/chosen": 0.388312965631485, |
|
"logits/rejected": 0.4156542122364044, |
|
"logps/chosen": -481.10894775390625, |
|
"logps/rejected": -492.54351806640625, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.45832952857017517, |
|
"rewards/margins": 0.7087246179580688, |
|
"rewards/rejected": -1.1670540571212769, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.3930348258706466, |
|
"grad_norm": 6.003669738769531, |
|
"learning_rate": 4.737762345755975e-06, |
|
"logits/chosen": 0.4008483588695526, |
|
"logits/rejected": 0.18298931419849396, |
|
"logps/chosen": -427.1964111328125, |
|
"logps/rejected": -415.52569580078125, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3898906111717224, |
|
"rewards/margins": 0.851486086845398, |
|
"rewards/rejected": -1.2413768768310547, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.3980099502487562, |
|
"grad_norm": 3.3979361057281494, |
|
"learning_rate": 4.667085364300678e-06, |
|
"logits/chosen": 0.2219407558441162, |
|
"logits/rejected": 0.34066152572631836, |
|
"logps/chosen": -412.3261413574219, |
|
"logps/rejected": -455.014404296875, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.659538745880127, |
|
"rewards/margins": 0.5176219344139099, |
|
"rewards/rejected": -1.1771607398986816, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.4029850746268657, |
|
"grad_norm": 3.4212944507598877, |
|
"learning_rate": 4.596778704547359e-06, |
|
"logits/chosen": 0.26894667744636536, |
|
"logits/rejected": 0.4513319134712219, |
|
"logps/chosen": -422.888916015625, |
|
"logps/rejected": -494.77490234375, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5094490647315979, |
|
"rewards/margins": 0.8285016417503357, |
|
"rewards/rejected": -1.3379508256912231, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.407960199004975, |
|
"grad_norm": 3.561112403869629, |
|
"learning_rate": 4.526847248644652e-06, |
|
"logits/chosen": 0.4178231358528137, |
|
"logits/rejected": 0.2783817648887634, |
|
"logps/chosen": -483.21710205078125, |
|
"logps/rejected": -457.58880615234375, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2329222410917282, |
|
"rewards/margins": 0.928954005241394, |
|
"rewards/rejected": -1.1618762016296387, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.4129353233830846, |
|
"grad_norm": 2.9117441177368164, |
|
"learning_rate": 4.457295852686746e-06, |
|
"logits/chosen": 0.0378599688410759, |
|
"logits/rejected": -0.03257442265748978, |
|
"logps/chosen": -457.05810546875, |
|
"logps/rejected": -469.375244140625, |
|
"loss": 0.4197, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.36898645758628845, |
|
"rewards/margins": 0.7519980072975159, |
|
"rewards/rejected": -1.120984435081482, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.417910447761194, |
|
"grad_norm": 2.848207950592041, |
|
"learning_rate": 4.388129346376177e-06, |
|
"logits/chosen": 0.286159873008728, |
|
"logits/rejected": 0.172444686293602, |
|
"logps/chosen": -449.7042541503906, |
|
"logps/rejected": -421.1620178222656, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.26226454973220825, |
|
"rewards/margins": 0.8386867046356201, |
|
"rewards/rejected": -1.1009511947631836, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.4228855721393034, |
|
"grad_norm": 3.3167192935943604, |
|
"learning_rate": 4.319352532688444e-06, |
|
"logits/chosen": 0.5666424632072449, |
|
"logits/rejected": 0.4337669014930725, |
|
"logps/chosen": -525.3414306640625, |
|
"logps/rejected": -470.1195373535156, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.21915507316589355, |
|
"rewards/margins": 0.6568694114685059, |
|
"rewards/rejected": -0.8760244250297546, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.427860696517413, |
|
"grad_norm": 3.3861823081970215, |
|
"learning_rate": 4.250970187538484e-06, |
|
"logits/chosen": 0.5643165707588196, |
|
"logits/rejected": 0.32966622710227966, |
|
"logps/chosen": -515.8458251953125, |
|
"logps/rejected": -457.2875061035156, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.32475122809410095, |
|
"rewards/margins": 0.6942940950393677, |
|
"rewards/rejected": -1.0190452337265015, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.4303482587064678, |
|
"eval_logits/chosen": 0.1860918253660202, |
|
"eval_logits/rejected": 0.04717594385147095, |
|
"eval_logps/chosen": -457.6623229980469, |
|
"eval_logps/rejected": -421.17510986328125, |
|
"eval_loss": 0.6247898936271667, |
|
"eval_rewards/accuracies": 0.6423611044883728, |
|
"eval_rewards/chosen": -0.5365298390388489, |
|
"eval_rewards/margins": 0.40083229541778564, |
|
"eval_rewards/rejected": -0.9373621344566345, |
|
"eval_runtime": 150.2353, |
|
"eval_samples_per_second": 7.608, |
|
"eval_steps_per_second": 0.24, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.4328358208955223, |
|
"grad_norm": 3.2325477600097656, |
|
"learning_rate": 4.182987059449056e-06, |
|
"logits/chosen": 0.7428713440895081, |
|
"logits/rejected": 0.5120058059692383, |
|
"logps/chosen": -538.3825073242188, |
|
"logps/rejected": -495.9022521972656, |
|
"loss": 0.4357, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06033702194690704, |
|
"rewards/margins": 0.8632436990737915, |
|
"rewards/rejected": -0.9235806465148926, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.4378109452736318, |
|
"grad_norm": 3.037522554397583, |
|
"learning_rate": 4.115407869220948e-06, |
|
"logits/chosen": 0.14752769470214844, |
|
"logits/rejected": 0.11974264681339264, |
|
"logps/chosen": -451.0633544921875, |
|
"logps/rejected": -483.72320556640625, |
|
"loss": 0.4238, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.340376079082489, |
|
"rewards/margins": 0.8940660953521729, |
|
"rewards/rejected": -1.2344422340393066, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.4427860696517412, |
|
"grad_norm": 3.1680057048797607, |
|
"learning_rate": 4.048237309605216e-06, |
|
"logits/chosen": 0.09094828367233276, |
|
"logits/rejected": 0.04780995845794678, |
|
"logps/chosen": -484.3055725097656, |
|
"logps/rejected": -510.98077392578125, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.12246174365282059, |
|
"rewards/margins": 0.9445063471794128, |
|
"rewards/rejected": -1.0669679641723633, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.4477611940298507, |
|
"grad_norm": 3.2691431045532227, |
|
"learning_rate": 3.981480044977284e-06, |
|
"logits/chosen": 0.40636903047561646, |
|
"logits/rejected": 0.1688281148672104, |
|
"logps/chosen": -479.45855712890625, |
|
"logps/rejected": -447.90863037109375, |
|
"loss": 0.4313, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.18339478969573975, |
|
"rewards/margins": 0.978028416633606, |
|
"rewards/rejected": -1.1614230871200562, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.4527363184079602, |
|
"grad_norm": 4.486429691314697, |
|
"learning_rate": 3.915140711013044e-06, |
|
"logits/chosen": 0.27190345525741577, |
|
"logits/rejected": -0.040328770875930786, |
|
"logps/chosen": -491.3775634765625, |
|
"logps/rejected": -391.1009521484375, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.08996576815843582, |
|
"rewards/margins": 0.9748902916908264, |
|
"rewards/rejected": -1.064855933189392, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.4577114427860698, |
|
"grad_norm": 3.12208890914917, |
|
"learning_rate": 3.849223914366981e-06, |
|
"logits/chosen": 0.47814592719078064, |
|
"logits/rejected": 0.34353479743003845, |
|
"logps/chosen": -412.7701110839844, |
|
"logps/rejected": -392.3418884277344, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.47262948751449585, |
|
"rewards/margins": 0.8124217987060547, |
|
"rewards/rejected": -1.2850513458251953, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.462686567164179, |
|
"grad_norm": 3.506924629211426, |
|
"learning_rate": 3.7837342323522454e-06, |
|
"logits/chosen": 0.21991969645023346, |
|
"logits/rejected": 0.07681813836097717, |
|
"logps/chosen": -441.36126708984375, |
|
"logps/rejected": -480.6995849609375, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.3392900824546814, |
|
"rewards/margins": 0.7776792645454407, |
|
"rewards/rejected": -1.116969347000122, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.4676616915422884, |
|
"grad_norm": 3.4964208602905273, |
|
"learning_rate": 3.7186762126228227e-06, |
|
"logits/chosen": 0.22460336983203888, |
|
"logits/rejected": 0.20655813813209534, |
|
"logps/chosen": -462.06292724609375, |
|
"logps/rejected": -461.53619384765625, |
|
"loss": 0.4446, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.4264296293258667, |
|
"rewards/margins": 0.894850492477417, |
|
"rewards/rejected": -1.3212801218032837, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.472636815920398, |
|
"grad_norm": 3.5613043308258057, |
|
"learning_rate": 3.654054372857738e-06, |
|
"logits/chosen": 0.5799933075904846, |
|
"logits/rejected": 0.6048757433891296, |
|
"logps/chosen": -396.9797058105469, |
|
"logps/rejected": -425.9620361328125, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5064554810523987, |
|
"rewards/margins": 0.8282725214958191, |
|
"rewards/rejected": -1.3347280025482178, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.4776119402985075, |
|
"grad_norm": 3.3799402713775635, |
|
"learning_rate": 3.5898732004473523e-06, |
|
"logits/chosen": 0.12272289395332336, |
|
"logits/rejected": 0.06375124305486679, |
|
"logps/chosen": -490.4371337890625, |
|
"logps/rejected": -490.0047607421875, |
|
"loss": 0.4499, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.523765504360199, |
|
"rewards/margins": 0.788061261177063, |
|
"rewards/rejected": -1.3118268251419067, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.482587064676617, |
|
"grad_norm": 3.9501986503601074, |
|
"learning_rate": 3.5261371521817247e-06, |
|
"logits/chosen": 0.410488486289978, |
|
"logits/rejected": 0.24566176533699036, |
|
"logps/chosen": -488.9652099609375, |
|
"logps/rejected": -474.0276794433594, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.2862902283668518, |
|
"rewards/margins": 0.930047869682312, |
|
"rewards/rejected": -1.216338038444519, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.4875621890547264, |
|
"grad_norm": 3.150517702102661, |
|
"learning_rate": 3.462850653941171e-06, |
|
"logits/chosen": 0.478097140789032, |
|
"logits/rejected": 0.35722148418426514, |
|
"logps/chosen": -488.7939453125, |
|
"logps/rejected": -501.26995849609375, |
|
"loss": 0.441, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.44928714632987976, |
|
"rewards/margins": 1.0384962558746338, |
|
"rewards/rejected": -1.487783432006836, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"grad_norm": 3.1727919578552246, |
|
"learning_rate": 3.4000181003889e-06, |
|
"logits/chosen": 0.5604240298271179, |
|
"logits/rejected": 0.5023808479309082, |
|
"logps/chosen": -495.4609069824219, |
|
"logps/rejected": -486.8149108886719, |
|
"loss": 0.4245, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5019670128822327, |
|
"rewards/margins": 0.7741233110427856, |
|
"rewards/rejected": -1.276090383529663, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"eval_logits/chosen": 0.0996675118803978, |
|
"eval_logits/rejected": -0.04232680797576904, |
|
"eval_logps/chosen": -458.75396728515625, |
|
"eval_logps/rejected": -422.38055419921875, |
|
"eval_loss": 0.625542402267456, |
|
"eval_rewards/accuracies": 0.6423611044883728, |
|
"eval_rewards/chosen": -0.6456986665725708, |
|
"eval_rewards/margins": 0.4122096002101898, |
|
"eval_rewards/rejected": -1.057908296585083, |
|
"eval_runtime": 150.4735, |
|
"eval_samples_per_second": 7.596, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4975124378109452, |
|
"grad_norm": 3.5961523056030273, |
|
"learning_rate": 3.337643854665843e-06, |
|
"logits/chosen": 0.24791333079338074, |
|
"logits/rejected": 0.09919527173042297, |
|
"logps/chosen": -472.2637939453125, |
|
"logps/rejected": -468.1837463378906, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3583456873893738, |
|
"rewards/margins": 0.6298627853393555, |
|
"rewards/rejected": -0.988208532333374, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.5024875621890548, |
|
"grad_norm": 3.5279345512390137, |
|
"learning_rate": 3.2757322480876996e-06, |
|
"logits/chosen": 0.5340238213539124, |
|
"logits/rejected": 0.27070313692092896, |
|
"logps/chosen": -559.6838989257812, |
|
"logps/rejected": -504.8668212890625, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.36284855008125305, |
|
"rewards/margins": 0.9084014892578125, |
|
"rewards/rejected": -1.2712500095367432, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.5074626865671643, |
|
"grad_norm": 3.2980010509490967, |
|
"learning_rate": 3.2142875798441376e-06, |
|
"logits/chosen": 0.4306156635284424, |
|
"logits/rejected": 0.39325863122940063, |
|
"logps/chosen": -497.91082763671875, |
|
"logps/rejected": -504.37799072265625, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.36765068769454956, |
|
"rewards/margins": 0.9293129444122314, |
|
"rewards/rejected": -1.2969635725021362, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.5124378109452736, |
|
"grad_norm": 3.0862483978271484, |
|
"learning_rate": 3.15331411670027e-06, |
|
"logits/chosen": 0.3046882450580597, |
|
"logits/rejected": 0.16192057728767395, |
|
"logps/chosen": -494.2255859375, |
|
"logps/rejected": -420.78521728515625, |
|
"loss": 0.4428, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.43021082878112793, |
|
"rewards/margins": 0.6426270008087158, |
|
"rewards/rejected": -1.0728377103805542, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.517412935323383, |
|
"grad_norm": 3.462331533432007, |
|
"learning_rate": 3.092816092700366e-06, |
|
"logits/chosen": 0.5411734580993652, |
|
"logits/rejected": 0.49231088161468506, |
|
"logps/chosen": -453.7169189453125, |
|
"logps/rejected": -449.0989990234375, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.22313016653060913, |
|
"rewards/margins": 0.8612147569656372, |
|
"rewards/rejected": -1.0843448638916016, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.5223880597014925, |
|
"grad_norm": 3.7653286457061768, |
|
"learning_rate": 3.032797708873828e-06, |
|
"logits/chosen": 0.11857330799102783, |
|
"logits/rejected": 0.07505325227975845, |
|
"logps/chosen": -410.9967956542969, |
|
"logps/rejected": -394.2019348144531, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18447177112102509, |
|
"rewards/margins": 0.8445629477500916, |
|
"rewards/rejected": -1.0290347337722778, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.527363184079602, |
|
"grad_norm": 3.707679033279419, |
|
"learning_rate": 2.97326313294349e-06, |
|
"logits/chosen": 0.40369507670402527, |
|
"logits/rejected": 0.31983357667922974, |
|
"logps/chosen": -493.98748779296875, |
|
"logps/rejected": -492.9511413574219, |
|
"loss": 0.4529, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.09158361703157425, |
|
"rewards/margins": 0.8120929598808289, |
|
"rewards/rejected": -0.9036765694618225, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.5323383084577116, |
|
"grad_norm": 3.2079174518585205, |
|
"learning_rate": 2.914216499036178e-06, |
|
"logits/chosen": 0.25405532121658325, |
|
"logits/rejected": 0.0883391723036766, |
|
"logps/chosen": -499.791259765625, |
|
"logps/rejected": -481.15985107421875, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.11543658375740051, |
|
"rewards/margins": 0.9941234588623047, |
|
"rewards/rejected": -1.1095600128173828, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.537313432835821, |
|
"grad_norm": 2.983884811401367, |
|
"learning_rate": 2.855661907395655e-06, |
|
"logits/chosen": 0.03389931470155716, |
|
"logits/rejected": -0.021514683961868286, |
|
"logps/chosen": -491.31011962890625, |
|
"logps/rejected": -476.60400390625, |
|
"loss": 0.426, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.34667280316352844, |
|
"rewards/margins": 0.7871711850166321, |
|
"rewards/rejected": -1.133844017982483, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.5422885572139302, |
|
"grad_norm": 3.2897229194641113, |
|
"learning_rate": 2.7976034240978834e-06, |
|
"logits/chosen": 0.2967681884765625, |
|
"logits/rejected": 0.21248680353164673, |
|
"logps/chosen": -450.992431640625, |
|
"logps/rejected": -448.6529541015625, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.4360540509223938, |
|
"rewards/margins": 0.8572646379470825, |
|
"rewards/rejected": -1.2933186292648315, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.5472636815920398, |
|
"grad_norm": 3.180680990219116, |
|
"learning_rate": 2.740045080768694e-06, |
|
"logits/chosen": 0.18130186200141907, |
|
"logits/rejected": 0.08365779370069504, |
|
"logps/chosen": -490.65093994140625, |
|
"logps/rejected": -455.1732482910156, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.30239832401275635, |
|
"rewards/margins": 1.023730993270874, |
|
"rewards/rejected": -1.3261293172836304, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.5522388059701493, |
|
"grad_norm": 3.139068841934204, |
|
"learning_rate": 2.6829908743037936e-06, |
|
"logits/chosen": 0.09111860394477844, |
|
"logits/rejected": 0.002367449924349785, |
|
"logps/chosen": -452.03741455078125, |
|
"logps/rejected": -420.84130859375, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.44431072473526, |
|
"rewards/margins": 0.6989957690238953, |
|
"rewards/rejected": -1.1433064937591553, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.554726368159204, |
|
"eval_logits/chosen": 0.2652171850204468, |
|
"eval_logits/rejected": 0.12997165322303772, |
|
"eval_logps/chosen": -459.63043212890625, |
|
"eval_logps/rejected": -423.3202209472656, |
|
"eval_loss": 0.629449188709259, |
|
"eval_rewards/accuracies": 0.6319444179534912, |
|
"eval_rewards/chosen": -0.733344554901123, |
|
"eval_rewards/margins": 0.41853055357933044, |
|
"eval_rewards/rejected": -1.1518750190734863, |
|
"eval_runtime": 149.9641, |
|
"eval_samples_per_second": 7.622, |
|
"eval_steps_per_second": 0.24, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.5572139303482588, |
|
"grad_norm": 2.9897727966308594, |
|
"learning_rate": 2.626444766591253e-06, |
|
"logits/chosen": 0.2800312340259552, |
|
"logits/rejected": 0.3279171586036682, |
|
"logps/chosen": -443.030517578125, |
|
"logps/rejected": -504.1892395019531, |
|
"loss": 0.4307, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.6528493762016296, |
|
"rewards/margins": 0.8220282793045044, |
|
"rewards/rejected": -1.4748777151107788, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.5621890547263682, |
|
"grad_norm": 3.8915977478027344, |
|
"learning_rate": 2.570410684236365e-06, |
|
"logits/chosen": 0.422254741191864, |
|
"logits/rejected": 0.20921355485916138, |
|
"logps/chosen": -451.8226318359375, |
|
"logps/rejected": -409.6569519042969, |
|
"loss": 0.4575, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5334627628326416, |
|
"rewards/margins": 0.7321829199790955, |
|
"rewards/rejected": -1.2656457424163818, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.5671641791044775, |
|
"grad_norm": 3.2011289596557617, |
|
"learning_rate": 2.514892518288988e-06, |
|
"logits/chosen": 0.2908586859703064, |
|
"logits/rejected": 0.2285086065530777, |
|
"logps/chosen": -540.8156127929688, |
|
"logps/rejected": -594.375244140625, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.4963512718677521, |
|
"rewards/margins": 1.1527037620544434, |
|
"rewards/rejected": -1.6490551233291626, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.572139303482587, |
|
"grad_norm": 3.1737494468688965, |
|
"learning_rate": 2.4598941239733555e-06, |
|
"logits/chosen": 0.3123033046722412, |
|
"logits/rejected": 0.09384813904762268, |
|
"logps/chosen": -506.7591552734375, |
|
"logps/rejected": -445.96966552734375, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5109487771987915, |
|
"rewards/margins": 0.6419615745544434, |
|
"rewards/rejected": -1.1529103517532349, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.5771144278606966, |
|
"grad_norm": 3.3242666721343994, |
|
"learning_rate": 2.4054193204203457e-06, |
|
"logits/chosen": 0.4143025279045105, |
|
"logits/rejected": 0.3821703791618347, |
|
"logps/chosen": -444.6474609375, |
|
"logps/rejected": -441.0220031738281, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.30891337990760803, |
|
"rewards/margins": 0.7618473768234253, |
|
"rewards/rejected": -1.0707608461380005, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.582089552238806, |
|
"grad_norm": 3.3792619705200195, |
|
"learning_rate": 2.3514718904022993e-06, |
|
"logits/chosen": 0.4942702651023865, |
|
"logits/rejected": 0.4701668620109558, |
|
"logps/chosen": -440.89813232421875, |
|
"logps/rejected": -436.6291809082031, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.30445003509521484, |
|
"rewards/margins": 0.7473883032798767, |
|
"rewards/rejected": -1.0518382787704468, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.5870646766169154, |
|
"grad_norm": 3.4865262508392334, |
|
"learning_rate": 2.2980555800703273e-06, |
|
"logits/chosen": 0.03151869773864746, |
|
"logits/rejected": -0.11891334503889084, |
|
"logps/chosen": -446.2289123535156, |
|
"logps/rejected": -399.5888977050781, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.37324345111846924, |
|
"rewards/margins": 0.8816293478012085, |
|
"rewards/rejected": -1.2548727989196777, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.5920398009950247, |
|
"grad_norm": 3.55745530128479, |
|
"learning_rate": 2.2451740986941905e-06, |
|
"logits/chosen": 0.06370651721954346, |
|
"logits/rejected": 0.08302780240774155, |
|
"logps/chosen": -445.5990905761719, |
|
"logps/rejected": -471.1882629394531, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.45704060792922974, |
|
"rewards/margins": 0.803365170955658, |
|
"rewards/rejected": -1.2604056596755981, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.5970149253731343, |
|
"grad_norm": 4.591226100921631, |
|
"learning_rate": 2.1928311184046967e-06, |
|
"logits/chosen": 0.3119097650051117, |
|
"logits/rejected": 0.196340873837471, |
|
"logps/chosen": -478.5575256347656, |
|
"logps/rejected": -448.47967529296875, |
|
"loss": 0.4374, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.293584406375885, |
|
"rewards/margins": 0.9845431447029114, |
|
"rewards/rejected": -1.2781274318695068, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.6019900497512438, |
|
"grad_norm": 3.375105142593384, |
|
"learning_rate": 2.1410302739387424e-06, |
|
"logits/chosen": 0.35102376341819763, |
|
"logits/rejected": 0.2489197850227356, |
|
"logps/chosen": -476.27801513671875, |
|
"logps/rejected": -478.7754821777344, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.42445307970046997, |
|
"rewards/margins": 0.7885385751724243, |
|
"rewards/rejected": -1.2129915952682495, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.6069651741293534, |
|
"grad_norm": 3.0185811519622803, |
|
"learning_rate": 2.0897751623868833e-06, |
|
"logits/chosen": 0.150477796792984, |
|
"logits/rejected": 0.00734228640794754, |
|
"logps/chosen": -409.4201354980469, |
|
"logps/rejected": -382.40716552734375, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.45322197675704956, |
|
"rewards/margins": 0.8744790554046631, |
|
"rewards/rejected": -1.3277010917663574, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.6119402985074627, |
|
"grad_norm": 3.5438005924224854, |
|
"learning_rate": 2.0390693429435626e-06, |
|
"logits/chosen": 0.27790558338165283, |
|
"logits/rejected": 0.18525969982147217, |
|
"logps/chosen": -441.6915283203125, |
|
"logps/rejected": -437.93292236328125, |
|
"loss": 0.4422, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.37093889713287354, |
|
"rewards/margins": 0.9228270053863525, |
|
"rewards/rejected": -1.2937657833099365, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.616915422885572, |
|
"grad_norm": 3.5567455291748047, |
|
"learning_rate": 1.9889163366599607e-06, |
|
"logits/chosen": 0.11875329911708832, |
|
"logits/rejected": -0.03504091128706932, |
|
"logps/chosen": -443.64599609375, |
|
"logps/rejected": -427.08154296875, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.6551141738891602, |
|
"rewards/margins": 0.9976121187210083, |
|
"rewards/rejected": -1.652726411819458, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.616915422885572, |
|
"eval_logits/chosen": 0.1787891387939453, |
|
"eval_logits/rejected": 0.03969912976026535, |
|
"eval_logps/chosen": -460.4245300292969, |
|
"eval_logps/rejected": -424.1896057128906, |
|
"eval_loss": 0.6253213286399841, |
|
"eval_rewards/accuracies": 0.6493055820465088, |
|
"eval_rewards/chosen": -0.8127551674842834, |
|
"eval_rewards/margins": 0.42605745792388916, |
|
"eval_rewards/rejected": -1.2388125658035278, |
|
"eval_runtime": 149.9009, |
|
"eval_samples_per_second": 7.625, |
|
"eval_steps_per_second": 0.24, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6218905472636815, |
|
"grad_norm": 3.5520968437194824, |
|
"learning_rate": 1.939319626199483e-06, |
|
"logits/chosen": 0.2985292375087738, |
|
"logits/rejected": 0.2237393856048584, |
|
"logps/chosen": -436.99053955078125, |
|
"logps/rejected": -440.2374267578125, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5354505777359009, |
|
"rewards/margins": 0.9315750598907471, |
|
"rewards/rejected": -1.4670255184173584, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.626865671641791, |
|
"grad_norm": 3.226693868637085, |
|
"learning_rate": 1.890282655595922e-06, |
|
"logits/chosen": 0.07176372408866882, |
|
"logits/rejected": 0.027363533154129982, |
|
"logps/chosen": -474.4735107421875, |
|
"logps/rejected": -492.5865173339844, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5504629611968994, |
|
"rewards/margins": 0.9078149795532227, |
|
"rewards/rejected": -1.458277940750122, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.6318407960199006, |
|
"grad_norm": 3.6644186973571777, |
|
"learning_rate": 1.8418088300143044e-06, |
|
"logits/chosen": 0.07038739323616028, |
|
"logits/rejected": -0.047772906720638275, |
|
"logps/chosen": -434.7318115234375, |
|
"logps/rejected": -420.2578430175781, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6160821914672852, |
|
"rewards/margins": 0.760805606842041, |
|
"rewards/rejected": -1.3768879175186157, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.63681592039801, |
|
"grad_norm": 4.1779093742370605, |
|
"learning_rate": 1.7939015155144378e-06, |
|
"logits/chosen": 0.40807458758354187, |
|
"logits/rejected": 0.42695319652557373, |
|
"logps/chosen": -497.0574645996094, |
|
"logps/rejected": -563.171142578125, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.484649658203125, |
|
"rewards/margins": 0.8875025510787964, |
|
"rewards/rejected": -1.3721522092819214, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.6417910447761193, |
|
"grad_norm": 4.379684925079346, |
|
"learning_rate": 1.7465640388171589e-06, |
|
"logits/chosen": 0.34882089495658875, |
|
"logits/rejected": 0.1509179174900055, |
|
"logps/chosen": -489.57470703125, |
|
"logps/rejected": -458.5868225097656, |
|
"loss": 0.428, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.40813666582107544, |
|
"rewards/margins": 0.9864634871482849, |
|
"rewards/rejected": -1.3946000337600708, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.6467661691542288, |
|
"grad_norm": 2.909369945526123, |
|
"learning_rate": 1.6997996870733268e-06, |
|
"logits/chosen": 0.5466185808181763, |
|
"logits/rejected": 0.4700179696083069, |
|
"logps/chosen": -430.5594787597656, |
|
"logps/rejected": -413.28466796875, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.2856665849685669, |
|
"rewards/margins": 0.9960864782333374, |
|
"rewards/rejected": -1.2817531824111938, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.6517412935323383, |
|
"grad_norm": 2.9255692958831787, |
|
"learning_rate": 1.6536117076355652e-06, |
|
"logits/chosen": 0.3447165787220001, |
|
"logits/rejected": 0.2050694227218628, |
|
"logps/chosen": -502.8382568359375, |
|
"logps/rejected": -489.395263671875, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.17628344893455505, |
|
"rewards/margins": 0.8542786240577698, |
|
"rewards/rejected": -1.0305620431900024, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 1.6567164179104479, |
|
"grad_norm": 3.957753896713257, |
|
"learning_rate": 1.6080033078327585e-06, |
|
"logits/chosen": 0.037751637399196625, |
|
"logits/rejected": -0.0011347047984600067, |
|
"logps/chosen": -483.31829833984375, |
|
"logps/rejected": -538.3037719726562, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5082539319992065, |
|
"rewards/margins": 0.6922823190689087, |
|
"rewards/rejected": -1.2005363702774048, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.6616915422885572, |
|
"grad_norm": 3.4213998317718506, |
|
"learning_rate": 1.5629776547473397e-06, |
|
"logits/chosen": 0.4570158123970032, |
|
"logits/rejected": 0.31136855483055115, |
|
"logps/chosen": -430.8033447265625, |
|
"logps/rejected": -415.0570068359375, |
|
"loss": 0.4232, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.4616736173629761, |
|
"rewards/margins": 0.7074974179267883, |
|
"rewards/rejected": -1.1691709756851196, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 4.027344226837158, |
|
"learning_rate": 1.5185378749953538e-06, |
|
"logits/chosen": 0.4271657466888428, |
|
"logits/rejected": 0.4088464379310608, |
|
"logps/chosen": -447.46575927734375, |
|
"logps/rejected": -485.32666015625, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.26651304960250854, |
|
"rewards/margins": 0.8962290287017822, |
|
"rewards/rejected": -1.162742018699646, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.671641791044776, |
|
"grad_norm": 2.962707281112671, |
|
"learning_rate": 1.4746870545093528e-06, |
|
"logits/chosen": 0.45913419127464294, |
|
"logits/rejected": 0.1819644272327423, |
|
"logps/chosen": -413.649658203125, |
|
"logps/rejected": -396.4461669921875, |
|
"loss": 0.4185, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.3579864203929901, |
|
"rewards/margins": 0.9220394492149353, |
|
"rewards/rejected": -1.2800259590148926, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 1.6766169154228856, |
|
"grad_norm": 3.308551788330078, |
|
"learning_rate": 1.4314282383241097e-06, |
|
"logits/chosen": 0.3890434205532074, |
|
"logits/rejected": 0.17695972323417664, |
|
"logps/chosen": -442.94415283203125, |
|
"logps/rejected": -399.24871826171875, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.38554510474205017, |
|
"rewards/margins": 1.0001806020736694, |
|
"rewards/rejected": -1.385725736618042, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 1.6791044776119404, |
|
"eval_logits/chosen": 0.2587340176105499, |
|
"eval_logits/rejected": 0.12335896492004395, |
|
"eval_logps/chosen": -459.9505615234375, |
|
"eval_logps/rejected": -423.8654479980469, |
|
"eval_loss": 0.6228974461555481, |
|
"eval_rewards/accuracies": 0.6423611044883728, |
|
"eval_rewards/chosen": -0.7653533220291138, |
|
"eval_rewards/margins": 0.44104525446891785, |
|
"eval_rewards/rejected": -1.2063984870910645, |
|
"eval_runtime": 149.9993, |
|
"eval_samples_per_second": 7.62, |
|
"eval_steps_per_second": 0.24, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.6815920398009951, |
|
"grad_norm": 3.7813925743103027, |
|
"learning_rate": 1.388764430365147e-06, |
|
"logits/chosen": 0.04878038913011551, |
|
"logits/rejected": 0.14894048869609833, |
|
"logps/chosen": -482.2070617675781, |
|
"logps/rejected": -522.3347778320312, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.40297234058380127, |
|
"rewards/margins": 0.8282972574234009, |
|
"rewards/rejected": -1.2312694787979126, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 1.6865671641791045, |
|
"grad_norm": 4.5991363525390625, |
|
"learning_rate": 1.3466985932401743e-06, |
|
"logits/chosen": 0.328086256980896, |
|
"logits/rejected": 0.15323612093925476, |
|
"logps/chosen": -475.05078125, |
|
"logps/rejected": -424.8678894042969, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.5331354737281799, |
|
"rewards/margins": 0.6546344757080078, |
|
"rewards/rejected": -1.187769889831543, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 1.6915422885572138, |
|
"grad_norm": 3.5663769245147705, |
|
"learning_rate": 1.3052336480333372e-06, |
|
"logits/chosen": 0.2575068771839142, |
|
"logits/rejected": 0.05139423906803131, |
|
"logps/chosen": -488.58538818359375, |
|
"logps/rejected": -455.89971923828125, |
|
"loss": 0.4237, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3154524862766266, |
|
"rewards/margins": 1.0843840837478638, |
|
"rewards/rejected": -1.399836540222168, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.6965174129353233, |
|
"grad_norm": 2.9855713844299316, |
|
"learning_rate": 1.2643724741023845e-06, |
|
"logits/chosen": 0.19129760563373566, |
|
"logits/rejected": 0.0936068594455719, |
|
"logps/chosen": -416.364501953125, |
|
"logps/rejected": -433.6866455078125, |
|
"loss": 0.4275, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.408183217048645, |
|
"rewards/margins": 1.019007921218872, |
|
"rewards/rejected": -1.427191138267517, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 1.7014925373134329, |
|
"grad_norm": 3.654536485671997, |
|
"learning_rate": 1.2241179088787192e-06, |
|
"logits/chosen": 0.39111489057540894, |
|
"logits/rejected": 0.035055145621299744, |
|
"logps/chosen": -540.2343139648438, |
|
"logps/rejected": -456.66424560546875, |
|
"loss": 0.4579, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.567006528377533, |
|
"rewards/margins": 0.8900810480117798, |
|
"rewards/rejected": -1.4570876359939575, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.7064676616915424, |
|
"grad_norm": 3.8228235244750977, |
|
"learning_rate": 1.1844727476703776e-06, |
|
"logits/chosen": 0.42539361119270325, |
|
"logits/rejected": 0.1684579700231552, |
|
"logps/chosen": -524.5021362304688, |
|
"logps/rejected": -491.33587646484375, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.535822868347168, |
|
"rewards/margins": 1.0067797899246216, |
|
"rewards/rejected": -1.5426026582717896, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 1.7114427860696517, |
|
"grad_norm": 3.2348790168762207, |
|
"learning_rate": 1.1454397434679022e-06, |
|
"logits/chosen": 0.40201398730278015, |
|
"logits/rejected": 0.2579033672809601, |
|
"logps/chosen": -561.11328125, |
|
"logps/rejected": -525.084716796875, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.30076974630355835, |
|
"rewards/margins": 1.0572198629379272, |
|
"rewards/rejected": -1.3579895496368408, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 3.4407033920288086, |
|
"learning_rate": 1.1070216067531825e-06, |
|
"logits/chosen": 0.25583919882774353, |
|
"logits/rejected": 0.3665779232978821, |
|
"logps/chosen": -391.76739501953125, |
|
"logps/rejected": -447.6612854003906, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5686730742454529, |
|
"rewards/margins": 0.7188047170639038, |
|
"rewards/rejected": -1.287477731704712, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.7213930348258706, |
|
"grad_norm": 3.3550174236297607, |
|
"learning_rate": 1.0692210053112451e-06, |
|
"logits/chosen": 0.09708093851804733, |
|
"logits/rejected": -0.10916668176651001, |
|
"logps/chosen": -493.13922119140625, |
|
"logps/rejected": -448.70391845703125, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.7006162405014038, |
|
"rewards/margins": 0.6611677408218384, |
|
"rewards/rejected": -1.3617841005325317, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 1.7263681592039801, |
|
"grad_norm": 3.5417962074279785, |
|
"learning_rate": 1.032040564044975e-06, |
|
"logits/chosen": 0.10944227129220963, |
|
"logits/rejected": 0.03197764605283737, |
|
"logps/chosen": -475.2284851074219, |
|
"logps/rejected": -441.09356689453125, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5546907186508179, |
|
"rewards/margins": 0.8569729328155518, |
|
"rewards/rejected": -1.4116637706756592, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 1.7313432835820897, |
|
"grad_norm": 2.918147087097168, |
|
"learning_rate": 9.954828647928727e-07, |
|
"logits/chosen": 0.22849802672863007, |
|
"logits/rejected": 0.1010328084230423, |
|
"logps/chosen": -401.5469055175781, |
|
"logps/rejected": -382.9391174316406, |
|
"loss": 0.4112, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.3927709460258484, |
|
"rewards/margins": 0.9075154066085815, |
|
"rewards/rejected": -1.3002864122390747, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 1.736318407960199, |
|
"grad_norm": 3.2414426803588867, |
|
"learning_rate": 9.595504461497441e-07, |
|
"logits/chosen": 0.6792712807655334, |
|
"logits/rejected": 0.5262346863746643, |
|
"logps/chosen": -517.4805297851562, |
|
"logps/rejected": -470.47845458984375, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5418896675109863, |
|
"rewards/margins": 0.7864224910736084, |
|
"rewards/rejected": -1.3283122777938843, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 1.7412935323383083, |
|
"grad_norm": 2.9658279418945312, |
|
"learning_rate": 9.242458032904311e-07, |
|
"logits/chosen": 0.32243314385414124, |
|
"logits/rejected": 0.08803755044937134, |
|
"logps/chosen": -545.0567626953125, |
|
"logps/rejected": -452.5140686035156, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7034752368927002, |
|
"rewards/margins": 0.48704254627227783, |
|
"rewards/rejected": -1.1905179023742676, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.7412935323383083, |
|
"eval_logits/chosen": 0.1930580586194992, |
|
"eval_logits/rejected": 0.05467061698436737, |
|
"eval_logps/chosen": -459.8746643066406, |
|
"eval_logps/rejected": -423.8709716796875, |
|
"eval_loss": 0.6216272115707397, |
|
"eval_rewards/accuracies": 0.6388888955116272, |
|
"eval_rewards/chosen": -0.7577680945396423, |
|
"eval_rewards/margins": 0.4491753578186035, |
|
"eval_rewards/rejected": -1.2069435119628906, |
|
"eval_runtime": 150.434, |
|
"eval_samples_per_second": 7.598, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.7462686567164178, |
|
"grad_norm": 3.2509801387786865, |
|
"learning_rate": 8.895713877965373e-07, |
|
"logits/chosen": 0.4595485031604767, |
|
"logits/rejected": 0.1782127022743225, |
|
"logps/chosen": -443.97564697265625, |
|
"logps/rejected": -366.19793701171875, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.40020307898521423, |
|
"rewards/margins": 0.7735263109207153, |
|
"rewards/rejected": -1.173729419708252, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 1.7512437810945274, |
|
"grad_norm": 3.9130449295043945, |
|
"learning_rate": 8.555296074861996e-07, |
|
"logits/chosen": 0.10901626199483871, |
|
"logits/rejected": -0.14974814653396606, |
|
"logps/chosen": -456.7862548828125, |
|
"logps/rejected": -415.9285888671875, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.35942861437797546, |
|
"rewards/margins": 1.0868324041366577, |
|
"rewards/rejected": -1.4462610483169556, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 1.756218905472637, |
|
"grad_norm": 4.010313510894775, |
|
"learning_rate": 8.22122826246875e-07, |
|
"logits/chosen": 0.6070827841758728, |
|
"logits/rejected": 0.44983193278312683, |
|
"logps/chosen": -474.1615295410156, |
|
"logps/rejected": -467.8525390625, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.38996899127960205, |
|
"rewards/margins": 0.9174278974533081, |
|
"rewards/rejected": -1.3073970079421997, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 1.7611940298507462, |
|
"grad_norm": 3.6952216625213623, |
|
"learning_rate": 7.89353363871197e-07, |
|
"logits/chosen": 0.5432174205780029, |
|
"logits/rejected": 0.16360357403755188, |
|
"logps/chosen": -484.909423828125, |
|
"logps/rejected": -380.3939514160156, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5896638631820679, |
|
"rewards/margins": 0.679851233959198, |
|
"rewards/rejected": -1.2695151567459106, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 1.7661691542288556, |
|
"grad_norm": 3.4239590167999268, |
|
"learning_rate": 7.572234958958846e-07, |
|
"logits/chosen": 0.5283284187316895, |
|
"logits/rejected": 0.45787736773490906, |
|
"logps/chosen": -476.11383056640625, |
|
"logps/rejected": -496.3194580078125, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.6096381545066833, |
|
"rewards/margins": 0.8067396283149719, |
|
"rewards/rejected": -1.4163777828216553, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.771144278606965, |
|
"grad_norm": 3.3646957874298096, |
|
"learning_rate": 7.2573545344373e-07, |
|
"logits/chosen": 0.32012930512428284, |
|
"logits/rejected": 0.18036966025829315, |
|
"logps/chosen": -473.15997314453125, |
|
"logps/rejected": -461.828369140625, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.468191534280777, |
|
"rewards/margins": 0.9105731248855591, |
|
"rewards/rejected": -1.3787648677825928, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 1.7761194029850746, |
|
"grad_norm": 3.111020088195801, |
|
"learning_rate": 6.948914230686688e-07, |
|
"logits/chosen": 0.072984479367733, |
|
"logits/rejected": -0.05354681983590126, |
|
"logps/chosen": -499.5512390136719, |
|
"logps/rejected": -478.2958679199219, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.33797788619995117, |
|
"rewards/margins": 1.0079048871994019, |
|
"rewards/rejected": -1.345882773399353, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 1.7810945273631842, |
|
"grad_norm": 3.493523597717285, |
|
"learning_rate": 6.646935466039373e-07, |
|
"logits/chosen": 0.1697852909564972, |
|
"logits/rejected": 0.06225850433111191, |
|
"logps/chosen": -436.7352600097656, |
|
"logps/rejected": -451.919677734375, |
|
"loss": 0.4171, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.30590546131134033, |
|
"rewards/margins": 1.0445671081542969, |
|
"rewards/rejected": -1.3504725694656372, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 1.7860696517412935, |
|
"grad_norm": 3.5960781574249268, |
|
"learning_rate": 6.351439210133492e-07, |
|
"logits/chosen": 0.16463078558444977, |
|
"logits/rejected": -0.0510396808385849, |
|
"logps/chosen": -489.61944580078125, |
|
"logps/rejected": -507.6096496582031, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5339682698249817, |
|
"rewards/margins": 0.8638878464698792, |
|
"rewards/rejected": -1.3978562355041504, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 1.7910447761194028, |
|
"grad_norm": 2.9790070056915283, |
|
"learning_rate": 6.062445982456777e-07, |
|
"logits/chosen": 0.34399691224098206, |
|
"logits/rejected": 0.11060275137424469, |
|
"logps/chosen": -484.4259338378906, |
|
"logps/rejected": -441.9401550292969, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.41451165080070496, |
|
"rewards/margins": 0.9917902946472168, |
|
"rewards/rejected": -1.4063019752502441, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.7960199004975124, |
|
"grad_norm": 3.2462868690490723, |
|
"learning_rate": 5.77997585092166e-07, |
|
"logits/chosen": 0.24684180319309235, |
|
"logits/rejected": 0.09757015109062195, |
|
"logps/chosen": -501.04132080078125, |
|
"logps/rejected": -473.86407470703125, |
|
"loss": 0.4557, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5149147510528564, |
|
"rewards/margins": 0.7671060562133789, |
|
"rewards/rejected": -1.2820206880569458, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 1.800995024875622, |
|
"grad_norm": 4.118817329406738, |
|
"learning_rate": 5.504048430471753e-07, |
|
"logits/chosen": 0.05877215415239334, |
|
"logits/rejected": 0.0584217831492424, |
|
"logps/chosen": -416.53216552734375, |
|
"logps/rejected": -467.2144775390625, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.606140673160553, |
|
"rewards/margins": 0.9090366363525391, |
|
"rewards/rejected": -1.5151773691177368, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 1.8034825870646767, |
|
"eval_logits/chosen": 0.20400173962116241, |
|
"eval_logits/rejected": 0.06605671346187592, |
|
"eval_logps/chosen": -459.7663879394531, |
|
"eval_logps/rejected": -423.7731018066406, |
|
"eval_loss": 0.6204391121864319, |
|
"eval_rewards/accuracies": 0.6493055820465088, |
|
"eval_rewards/chosen": -0.7469313144683838, |
|
"eval_rewards/margins": 0.45023012161254883, |
|
"eval_rewards/rejected": -1.1971614360809326, |
|
"eval_runtime": 150.447, |
|
"eval_samples_per_second": 7.597, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.8059701492537314, |
|
"grad_norm": 3.226379156112671, |
|
"learning_rate": 5.234682881719766e-07, |
|
"logits/chosen": 0.1486922800540924, |
|
"logits/rejected": 0.2820119261741638, |
|
"logps/chosen": -438.1523132324219, |
|
"logps/rejected": -485.3592529296875, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.4801374673843384, |
|
"rewards/margins": 0.7025588750839233, |
|
"rewards/rejected": -1.1826963424682617, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.8109452736318408, |
|
"grad_norm": 4.159534454345703, |
|
"learning_rate": 4.971897909616985e-07, |
|
"logits/chosen": 0.42471548914909363, |
|
"logits/rejected": 0.16627195477485657, |
|
"logps/chosen": -582.96630859375, |
|
"logps/rejected": -513.5985107421875, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.4713844358921051, |
|
"rewards/margins": 0.9388619661331177, |
|
"rewards/rejected": -1.4102462530136108, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 1.81592039800995, |
|
"grad_norm": 3.511375665664673, |
|
"learning_rate": 4.715711762154362e-07, |
|
"logits/chosen": 0.11827405542135239, |
|
"logits/rejected": 0.02628401480615139, |
|
"logps/chosen": -472.3614501953125, |
|
"logps/rejected": -421.99163818359375, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5915001034736633, |
|
"rewards/margins": 0.8902807235717773, |
|
"rewards/rejected": -1.481780767440796, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.8208955223880596, |
|
"grad_norm": 3.314199686050415, |
|
"learning_rate": 4.4661422290954495e-07, |
|
"logits/chosen": 0.1869126260280609, |
|
"logits/rejected": 0.040468111634254456, |
|
"logps/chosen": -461.4591064453125, |
|
"logps/rejected": -420.57196044921875, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.6002779006958008, |
|
"rewards/margins": 0.8377397060394287, |
|
"rewards/rejected": -1.438017725944519, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 1.8258706467661692, |
|
"grad_norm": 3.301818609237671, |
|
"learning_rate": 4.2232066407409067e-07, |
|
"logits/chosen": 0.21843373775482178, |
|
"logits/rejected": 0.028878776356577873, |
|
"logps/chosen": -522.05029296875, |
|
"logps/rejected": -468.34906005859375, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.30435776710510254, |
|
"rewards/margins": 0.8217609524726868, |
|
"rewards/rejected": -1.126118779182434, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 1.8308457711442787, |
|
"grad_norm": 3.323982000350952, |
|
"learning_rate": 3.986921866725202e-07, |
|
"logits/chosen": 0.37691932916641235, |
|
"logits/rejected": 0.059887684881687164, |
|
"logps/chosen": -472.496337890625, |
|
"logps/rejected": -386.56976318359375, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.31983205676078796, |
|
"rewards/margins": 1.0095765590667725, |
|
"rewards/rejected": -1.3294085264205933, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 1.835820895522388, |
|
"grad_norm": 3.2554380893707275, |
|
"learning_rate": 3.7573043148451673e-07, |
|
"logits/chosen": 0.21050050854682922, |
|
"logits/rejected": 0.1986107975244522, |
|
"logps/chosen": -456.1379699707031, |
|
"logps/rejected": -496.9686584472656, |
|
"loss": 0.4472, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.4782811999320984, |
|
"rewards/margins": 0.9004664421081543, |
|
"rewards/rejected": -1.3787477016448975, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 1.8407960199004973, |
|
"grad_norm": 3.1780014038085938, |
|
"learning_rate": 3.5343699299205003e-07, |
|
"logits/chosen": 0.4725267291069031, |
|
"logits/rejected": 0.20679879188537598, |
|
"logps/chosen": -454.1123352050781, |
|
"logps/rejected": -402.17718505859375, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6523603200912476, |
|
"rewards/margins": 0.8148846626281738, |
|
"rewards/rejected": -1.467245101928711, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.845771144278607, |
|
"grad_norm": 3.745753765106201, |
|
"learning_rate": 3.3181341926867283e-07, |
|
"logits/chosen": 0.48626863956451416, |
|
"logits/rejected": 0.5913352966308594, |
|
"logps/chosen": -453.683349609375, |
|
"logps/rejected": -447.6007385253906, |
|
"loss": 0.4875, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.3742063045501709, |
|
"rewards/margins": 0.7547400593757629, |
|
"rewards/rejected": -1.128946304321289, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 1.8507462686567164, |
|
"grad_norm": 3.743507146835327, |
|
"learning_rate": 3.1086121187200667e-07, |
|
"logits/chosen": 0.11791680753231049, |
|
"logits/rejected": 0.10027449578046799, |
|
"logps/chosen": -442.95611572265625, |
|
"logps/rejected": -485.8495788574219, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3810359537601471, |
|
"rewards/margins": 0.9448322653770447, |
|
"rewards/rejected": -1.3258682489395142, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 1.855721393034826, |
|
"grad_norm": 3.817042350769043, |
|
"learning_rate": 2.905818257394799e-07, |
|
"logits/chosen": 0.15434856712818146, |
|
"logits/rejected": 0.07496091723442078, |
|
"logps/chosen": -409.6748046875, |
|
"logps/rejected": -421.5605773925781, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.3658009171485901, |
|
"rewards/margins": 0.9456602334976196, |
|
"rewards/rejected": -1.3114612102508545, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 1.8606965174129353, |
|
"grad_norm": 3.0990395545959473, |
|
"learning_rate": 2.7097666908729283e-07, |
|
"logits/chosen": 0.46521398425102234, |
|
"logits/rejected": 0.3093582093715668, |
|
"logps/chosen": -508.86285400390625, |
|
"logps/rejected": -487.97943115234375, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.479577898979187, |
|
"rewards/margins": 0.6940962672233582, |
|
"rewards/rejected": -1.17367422580719, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 1.8656716417910446, |
|
"grad_norm": 3.0450475215911865, |
|
"learning_rate": 2.520471033126326e-07, |
|
"logits/chosen": 0.261200487613678, |
|
"logits/rejected": 0.14318135380744934, |
|
"logps/chosen": -501.09991455078125, |
|
"logps/rejected": -465.6934509277344, |
|
"loss": 0.4419, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5811474323272705, |
|
"rewards/margins": 0.7728549838066101, |
|
"rewards/rejected": -1.3540023565292358, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8656716417910446, |
|
"eval_logits/chosen": 0.1892445683479309, |
|
"eval_logits/rejected": 0.05091705545783043, |
|
"eval_logps/chosen": -458.9963073730469, |
|
"eval_logps/rejected": -422.9080810546875, |
|
"eval_loss": 0.6194455623626709, |
|
"eval_rewards/accuracies": 0.6458333134651184, |
|
"eval_rewards/chosen": -0.6699296832084656, |
|
"eval_rewards/margins": 0.4407287836074829, |
|
"eval_rewards/rejected": -1.1106584072113037, |
|
"eval_runtime": 150.1801, |
|
"eval_samples_per_second": 7.611, |
|
"eval_steps_per_second": 0.24, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8706467661691542, |
|
"grad_norm": 3.545064687728882, |
|
"learning_rate": 2.3379444289913344e-07, |
|
"logits/chosen": 0.47270292043685913, |
|
"logits/rejected": 0.26645568013191223, |
|
"logps/chosen": -409.9461975097656, |
|
"logps/rejected": -400.95550537109375, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.3254129886627197, |
|
"rewards/margins": 0.9240537881851196, |
|
"rewards/rejected": -1.2494667768478394, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 1.8756218905472637, |
|
"grad_norm": 3.722698926925659, |
|
"learning_rate": 2.1621995532559947e-07, |
|
"logits/chosen": 0.2734871506690979, |
|
"logits/rejected": 0.05334743112325668, |
|
"logps/chosen": -501.898193359375, |
|
"logps/rejected": -427.0452880859375, |
|
"loss": 0.4706, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5731074213981628, |
|
"rewards/margins": 0.7717230916023254, |
|
"rewards/rejected": -1.3448305130004883, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 1.8805970149253732, |
|
"grad_norm": 3.6065824031829834, |
|
"learning_rate": 1.9932486097799408e-07, |
|
"logits/chosen": 0.33175939321517944, |
|
"logits/rejected": 0.2642689049243927, |
|
"logps/chosen": -384.1080627441406, |
|
"logps/rejected": -377.58319091796875, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.4119167923927307, |
|
"rewards/margins": 0.9615055322647095, |
|
"rewards/rejected": -1.373422384262085, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 1.8855721393034826, |
|
"grad_norm": 2.6977298259735107, |
|
"learning_rate": 1.8311033306468552e-07, |
|
"logits/chosen": 0.4200694262981415, |
|
"logits/rejected": 0.023060984909534454, |
|
"logps/chosen": -495.07916259765625, |
|
"logps/rejected": -384.0296630859375, |
|
"loss": 0.4238, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.41816192865371704, |
|
"rewards/margins": 0.9803006649017334, |
|
"rewards/rejected": -1.3984625339508057, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 1.890547263681592, |
|
"grad_norm": 3.5309643745422363, |
|
"learning_rate": 1.6757749753498865e-07, |
|
"logits/chosen": 0.17390736937522888, |
|
"logits/rejected": 0.16996516287326813, |
|
"logps/chosen": -438.53564453125, |
|
"logps/rejected": -477.38470458984375, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.47648751735687256, |
|
"rewards/margins": 0.8215656876564026, |
|
"rewards/rejected": -1.2980531454086304, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.8955223880597014, |
|
"grad_norm": 3.5672707557678223, |
|
"learning_rate": 1.5272743300097316e-07, |
|
"logits/chosen": 0.35543692111968994, |
|
"logits/rejected": 0.38684284687042236, |
|
"logps/chosen": -425.6787414550781, |
|
"logps/rejected": -453.45623779296875, |
|
"loss": 0.4827, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.47601401805877686, |
|
"rewards/margins": 0.7526903748512268, |
|
"rewards/rejected": -1.2287043333053589, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 1.900497512437811, |
|
"grad_norm": 3.358347177505493, |
|
"learning_rate": 1.3856117066256225e-07, |
|
"logits/chosen": 0.2838931679725647, |
|
"logits/rejected": 0.07619883120059967, |
|
"logps/chosen": -547.037353515625, |
|
"logps/rejected": -493.8963623046875, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.11599953472614288, |
|
"rewards/margins": 0.964540958404541, |
|
"rewards/rejected": -1.080540657043457, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 1.9054726368159205, |
|
"grad_norm": 3.064255714416504, |
|
"learning_rate": 1.2507969423593225e-07, |
|
"logits/chosen": 0.29778000712394714, |
|
"logits/rejected": 0.24798990786075592, |
|
"logps/chosen": -478.83734130859375, |
|
"logps/rejected": -486.351806640625, |
|
"loss": 0.411, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.28864914178848267, |
|
"rewards/margins": 1.0248101949691772, |
|
"rewards/rejected": -1.3134592771530151, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 1.9104477611940298, |
|
"grad_norm": 3.4975333213806152, |
|
"learning_rate": 1.1228393988519381e-07, |
|
"logits/chosen": -0.012770354747772217, |
|
"logits/rejected": 0.11666233092546463, |
|
"logps/chosen": -442.9209899902344, |
|
"logps/rejected": -564.3067626953125, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.6616036891937256, |
|
"rewards/margins": 0.6503514647483826, |
|
"rewards/rejected": -1.311955213546753, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.9154228855721394, |
|
"grad_norm": 3.4289166927337646, |
|
"learning_rate": 1.0017479615738957e-07, |
|
"logits/chosen": 0.4374559223651886, |
|
"logits/rejected": 0.37482768297195435, |
|
"logps/chosen": -523.0839233398438, |
|
"logps/rejected": -634.0493774414062, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.4454476237297058, |
|
"rewards/margins": 0.5911861658096313, |
|
"rewards/rejected": -1.036633849143982, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.9203980099502487, |
|
"grad_norm": 3.3477044105529785, |
|
"learning_rate": 8.875310392079118e-08, |
|
"logits/chosen": 0.22588732838630676, |
|
"logits/rejected": -0.04192977398633957, |
|
"logps/chosen": -507.6371765136719, |
|
"logps/rejected": -440.7511901855469, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.3868103325366974, |
|
"rewards/margins": 1.0209940671920776, |
|
"rewards/rejected": -1.4078043699264526, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 1.9253731343283582, |
|
"grad_norm": 3.4166507720947266, |
|
"learning_rate": 7.801965630651165e-08, |
|
"logits/chosen": 0.0826062485575676, |
|
"logits/rejected": 0.03505164384841919, |
|
"logps/chosen": -470.573974609375, |
|
"logps/rejected": -484.4287109375, |
|
"loss": 0.4593, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5422804951667786, |
|
"rewards/margins": 0.6872562170028687, |
|
"rewards/rejected": -1.2295366525650024, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.927860696517413, |
|
"eval_logits/chosen": 0.2005145251750946, |
|
"eval_logits/rejected": 0.06279084831476212, |
|
"eval_logps/chosen": -459.1916809082031, |
|
"eval_logps/rejected": -423.0291442871094, |
|
"eval_loss": 0.6213585138320923, |
|
"eval_rewards/accuracies": 0.6527777910232544, |
|
"eval_rewards/chosen": -0.6894701719284058, |
|
"eval_rewards/margins": 0.433290034532547, |
|
"eval_rewards/rejected": -1.1227601766586304, |
|
"eval_runtime": 150.4688, |
|
"eval_samples_per_second": 7.596, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.9303482587064678, |
|
"grad_norm": 2.936657190322876, |
|
"learning_rate": 6.797519865342161e-08, |
|
"logits/chosen": 0.6600261926651001, |
|
"logits/rejected": 0.5058936476707458, |
|
"logps/chosen": -422.639404296875, |
|
"logps/rejected": -443.1512145996094, |
|
"loss": 0.4434, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5597524642944336, |
|
"rewards/margins": 0.8031996488571167, |
|
"rewards/rejected": -1.3629521131515503, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 1.935323383084577, |
|
"grad_norm": 2.7770910263061523, |
|
"learning_rate": 5.862042845640403e-08, |
|
"logits/chosen": 0.510213315486908, |
|
"logits/rejected": 0.3406936824321747, |
|
"logps/chosen": -503.9508361816406, |
|
"logps/rejected": -470.0325622558594, |
|
"loss": 0.4498, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21441341936588287, |
|
"rewards/margins": 1.0556612014770508, |
|
"rewards/rejected": -1.270074486732483, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 1.9402985074626866, |
|
"grad_norm": 3.034893751144409, |
|
"learning_rate": 4.9955995317908514e-08, |
|
"logits/chosen": 0.41465142369270325, |
|
"logits/rejected": 0.19684141874313354, |
|
"logps/chosen": -474.51544189453125, |
|
"logps/rejected": -428.1244201660156, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.16932249069213867, |
|
"rewards/margins": 1.1101325750350952, |
|
"rewards/rejected": -1.2794551849365234, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.945273631840796, |
|
"grad_norm": 2.902290105819702, |
|
"learning_rate": 4.198250090284961e-08, |
|
"logits/chosen": 0.22269777953624725, |
|
"logits/rejected": 0.017038095742464066, |
|
"logps/chosen": -470.39324951171875, |
|
"logps/rejected": -418.3542785644531, |
|
"loss": 0.4143, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.31770819425582886, |
|
"rewards/margins": 0.9097498655319214, |
|
"rewards/rejected": -1.227458119392395, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 1.9502487562189055, |
|
"grad_norm": 3.539262533187866, |
|
"learning_rate": 3.47004988968247e-08, |
|
"logits/chosen": 0.5226894021034241, |
|
"logits/rejected": 0.30332833528518677, |
|
"logps/chosen": -520.8106689453125, |
|
"logps/rejected": -476.7620544433594, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.49939191341400146, |
|
"rewards/margins": 0.961308479309082, |
|
"rewards/rejected": -1.460700273513794, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 1.955223880597015, |
|
"grad_norm": 3.8761661052703857, |
|
"learning_rate": 2.8110494967664713e-08, |
|
"logits/chosen": 0.30280035734176636, |
|
"logits/rejected": 0.13733740150928497, |
|
"logps/chosen": -457.7912292480469, |
|
"logps/rejected": -432.34967041015625, |
|
"loss": 0.437, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.2639601230621338, |
|
"rewards/margins": 0.9372557401657104, |
|
"rewards/rejected": -1.2012157440185547, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 1.9601990049751243, |
|
"grad_norm": 3.9091761112213135, |
|
"learning_rate": 2.221294673032004e-08, |
|
"logits/chosen": -0.02172435261309147, |
|
"logits/rejected": -0.22200414538383484, |
|
"logps/chosen": -475.1850891113281, |
|
"logps/rejected": -424.7291259765625, |
|
"loss": 0.4323, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.49128663539886475, |
|
"rewards/margins": 0.7886272668838501, |
|
"rewards/rejected": -1.2799140214920044, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 1.9651741293532339, |
|
"grad_norm": 3.5163590908050537, |
|
"learning_rate": 1.7008263715085904e-08, |
|
"logits/chosen": 0.2808230519294739, |
|
"logits/rejected": 0.15157818794250488, |
|
"logps/chosen": -547.7991943359375, |
|
"logps/rejected": -508.9268798828125, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3645941913127899, |
|
"rewards/margins": 1.0243759155273438, |
|
"rewards/rejected": -1.388970136642456, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.9701492537313432, |
|
"grad_norm": 3.4658432006835938, |
|
"learning_rate": 1.24968073391607e-08, |
|
"logits/chosen": 0.16077536344528198, |
|
"logits/rejected": 0.02570854127407074, |
|
"logps/chosen": -450.45086669921875, |
|
"logps/rejected": -430.0142517089844, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.4632072448730469, |
|
"rewards/margins": 0.7755333185195923, |
|
"rewards/rejected": -1.2387404441833496, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 1.9751243781094527, |
|
"grad_norm": 3.7998785972595215, |
|
"learning_rate": 8.678890881552715e-09, |
|
"logits/chosen": 0.16036288440227509, |
|
"logits/rejected": 0.17200781404972076, |
|
"logps/chosen": -450.34710693359375, |
|
"logps/rejected": -467.6290283203125, |
|
"loss": 0.4537, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.44261491298675537, |
|
"rewards/margins": 0.9358773231506348, |
|
"rewards/rejected": -1.3784922361373901, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 1.9800995024875623, |
|
"grad_norm": 3.656935214996338, |
|
"learning_rate": 5.554779461323101e-09, |
|
"logits/chosen": 0.13178521394729614, |
|
"logits/rejected": -0.02235669642686844, |
|
"logps/chosen": -462.8760070800781, |
|
"logps/rejected": -402.28424072265625, |
|
"loss": 0.4578, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.23067457973957062, |
|
"rewards/margins": 0.9117249846458435, |
|
"rewards/rejected": -1.142399549484253, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 1.9850746268656716, |
|
"grad_norm": 3.9424142837524414, |
|
"learning_rate": 3.1246900191761463e-09, |
|
"logits/chosen": 0.27911561727523804, |
|
"logits/rejected": 0.14084932208061218, |
|
"logps/chosen": -568.9130249023438, |
|
"logps/rejected": -536.7129516601562, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.47243887186050415, |
|
"rewards/margins": 0.8830912113189697, |
|
"rewards/rejected": -1.355530023574829, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.9900497512437811, |
|
"grad_norm": 3.261087656021118, |
|
"learning_rate": 1.3887913023946652e-09, |
|
"logits/chosen": 0.40209636092185974, |
|
"logits/rejected": 0.20681683719158173, |
|
"logps/chosen": -534.0907592773438, |
|
"logps/rejected": -446.65838623046875, |
|
"loss": 0.4444, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3766060769557953, |
|
"rewards/margins": 0.7923564910888672, |
|
"rewards/rejected": -1.1689625978469849, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.9900497512437811, |
|
"eval_logits/chosen": 0.22255222499370575, |
|
"eval_logits/rejected": 0.08632177859544754, |
|
"eval_logps/chosen": -459.1236572265625, |
|
"eval_logps/rejected": -423.0472106933594, |
|
"eval_loss": 0.6228893399238586, |
|
"eval_rewards/accuracies": 0.6666666865348816, |
|
"eval_rewards/chosen": -0.6826636791229248, |
|
"eval_rewards/margins": 0.441908061504364, |
|
"eval_rewards/rejected": -1.1245719194412231, |
|
"eval_runtime": 150.7224, |
|
"eval_samples_per_second": 7.583, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.9950248756218905, |
|
"grad_norm": 3.3245160579681396, |
|
"learning_rate": 3.4720385312492223e-10, |
|
"logits/chosen": 0.24731820821762085, |
|
"logits/rejected": 0.46343696117401123, |
|
"logps/chosen": -394.77288818359375, |
|
"logps/rejected": -496.1209716796875, |
|
"loss": 0.4491, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.31272804737091064, |
|
"rewards/margins": 0.8752219080924988, |
|
"rewards/rejected": -1.1879500150680542, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.5245065689086914, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.11977434158325195, |
|
"logits/rejected": 0.1967284381389618, |
|
"logps/chosen": -452.91552734375, |
|
"logps/rejected": -495.7862243652344, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.47581416368484497, |
|
"rewards/margins": 0.5203736424446106, |
|
"rewards/rejected": -0.9961878657341003, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1608, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5581450056080794, |
|
"train_runtime": 39294.0243, |
|
"train_samples_per_second": 2.619, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 1608, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|