|
{ |
|
"best_metric": 0.3238605260848999, |
|
"best_model_checkpoint": "/share/project/zhaolulu/LLama-factory/saves/qwen-7B/full/med_Aquila3_dpo_2e-7_0.03_v1/checkpoint-350", |
|
"epoch": 1.9553072625698324, |
|
"eval_steps": 50, |
|
"global_step": 350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.13675436076791, |
|
"learning_rate": 1e-08, |
|
"logits/chosen": -2.169825315475464, |
|
"logits/rejected": -2.3517727851867676, |
|
"logps/chosen": -370.6702575683594, |
|
"logps/rejected": -186.08995056152344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 10.37360079104283, |
|
"learning_rate": 2e-08, |
|
"logits/chosen": -2.1483254432678223, |
|
"logits/rejected": -2.422952890396118, |
|
"logps/chosen": -364.8175048828125, |
|
"logps/rejected": -104.55035400390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 15.677882625329767, |
|
"learning_rate": 3e-08, |
|
"logits/chosen": -2.1419529914855957, |
|
"logits/rejected": -2.258122682571411, |
|
"logps/chosen": -282.2867431640625, |
|
"logps/rejected": -143.6208953857422, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0066271936520934105, |
|
"rewards/margins": -0.009020809084177017, |
|
"rewards/rejected": 0.0023936154320836067, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.428617731690887, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -2.110610008239746, |
|
"logits/rejected": -2.4376320838928223, |
|
"logps/chosen": -369.72955322265625, |
|
"logps/rejected": -118.06014251708984, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.003459886647760868, |
|
"rewards/margins": 0.0005237694713287055, |
|
"rewards/rejected": -0.003983656410127878, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 13.025493935888877, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -2.2498154640197754, |
|
"logits/rejected": -2.409123420715332, |
|
"logps/chosen": -144.5589599609375, |
|
"logps/rejected": -99.77885437011719, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.006283703725785017, |
|
"rewards/margins": -0.003716606879606843, |
|
"rewards/rejected": -0.0025670973118394613, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.265681159744263, |
|
"learning_rate": 6e-08, |
|
"logits/chosen": -2.1781773567199707, |
|
"logits/rejected": -2.3208632469177246, |
|
"logps/chosen": -194.74285888671875, |
|
"logps/rejected": -97.00983428955078, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0076182084158062935, |
|
"rewards/margins": -0.010748159140348434, |
|
"rewards/rejected": 0.00312995002605021, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.347084629630716, |
|
"learning_rate": 6.999999999999999e-08, |
|
"logits/chosen": -2.0979971885681152, |
|
"logits/rejected": -2.405662775039673, |
|
"logps/chosen": -313.389404296875, |
|
"logps/rejected": -66.95777130126953, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.004818655550479889, |
|
"rewards/margins": -0.0025399725418537855, |
|
"rewards/rejected": 0.007358627859503031, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.350533701450699, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -2.0918493270874023, |
|
"logits/rejected": -2.3058016300201416, |
|
"logps/chosen": -431.85345458984375, |
|
"logps/rejected": -151.113037109375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.002554692327976227, |
|
"rewards/margins": 0.0028948320541530848, |
|
"rewards/rejected": -0.00034014007542282343, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.289403141621039, |
|
"learning_rate": 9e-08, |
|
"logits/chosen": -2.180878162384033, |
|
"logits/rejected": -2.5295894145965576, |
|
"logps/chosen": -463.82879638671875, |
|
"logps/rejected": -202.85638427734375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.004108843859285116, |
|
"rewards/margins": 0.004055410623550415, |
|
"rewards/rejected": 5.343282828107476e-05, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 12.99458204799066, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -2.042452573776245, |
|
"logits/rejected": -2.2264420986175537, |
|
"logps/chosen": -298.3495788574219, |
|
"logps/rejected": -61.18866729736328, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.0035140065010637045, |
|
"rewards/margins": -0.0031997794285416603, |
|
"rewards/rejected": 0.0067137861624360085, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 14.055290984256162, |
|
"learning_rate": 1.1e-07, |
|
"logits/chosen": -2.081472873687744, |
|
"logits/rejected": -2.1692185401916504, |
|
"logps/chosen": -245.54763793945312, |
|
"logps/rejected": -73.15451049804688, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005330131854861975, |
|
"rewards/margins": 0.00029961264226585627, |
|
"rewards/rejected": 0.00503051932901144, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 11.702245206286774, |
|
"learning_rate": 1.2e-07, |
|
"logits/chosen": -2.1000940799713135, |
|
"logits/rejected": -2.440720558166504, |
|
"logps/chosen": -283.462646484375, |
|
"logps/rejected": -72.38239288330078, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003204283071681857, |
|
"rewards/margins": 0.002218131674453616, |
|
"rewards/rejected": 0.0009861512808129191, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 13.763456208726748, |
|
"learning_rate": 1.3e-07, |
|
"logits/chosen": -2.158162832260132, |
|
"logits/rejected": -2.407245635986328, |
|
"logps/chosen": -393.5798645019531, |
|
"logps/rejected": -142.84457397460938, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0038590808399021626, |
|
"rewards/margins": 0.0019199349917471409, |
|
"rewards/rejected": 0.0019391458481550217, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.270738607964903, |
|
"learning_rate": 1.3999999999999998e-07, |
|
"logits/chosen": -2.1504528522491455, |
|
"logits/rejected": -2.278064250946045, |
|
"logps/chosen": -228.805908203125, |
|
"logps/rejected": -110.54583740234375, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0015889224596321583, |
|
"rewards/margins": -0.0030970978550612926, |
|
"rewards/rejected": 0.001508174929767847, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 15.508438946473573, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": -2.2077159881591797, |
|
"logits/rejected": -2.457319736480713, |
|
"logps/chosen": -420.102294921875, |
|
"logps/rejected": -168.63453674316406, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0016078853514045477, |
|
"rewards/margins": -0.007479728199541569, |
|
"rewards/rejected": 0.005871844012290239, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 13.409686768799023, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -2.0918636322021484, |
|
"logits/rejected": -2.2362842559814453, |
|
"logps/chosen": -361.9263916015625, |
|
"logps/rejected": -204.3945770263672, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01383258868008852, |
|
"rewards/margins": 0.010981044732034206, |
|
"rewards/rejected": 0.0028515439480543137, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 16.153489802534672, |
|
"learning_rate": 1.7e-07, |
|
"logits/chosen": -2.137455701828003, |
|
"logits/rejected": -2.4219908714294434, |
|
"logps/chosen": -340.22601318359375, |
|
"logps/rejected": -149.10215759277344, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0055633848533034325, |
|
"rewards/margins": 0.007908429950475693, |
|
"rewards/rejected": -0.0023450437001883984, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.924094161067577, |
|
"learning_rate": 1.8e-07, |
|
"logits/chosen": -2.3819501399993896, |
|
"logits/rejected": -2.3850955963134766, |
|
"logps/chosen": -257.6195373535156, |
|
"logps/rejected": -229.90989685058594, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.005492844618856907, |
|
"rewards/margins": -0.01068685669451952, |
|
"rewards/rejected": 0.0051940125413239, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 14.629325842518503, |
|
"learning_rate": 1.8999999999999998e-07, |
|
"logits/chosen": -2.1740121841430664, |
|
"logits/rejected": -2.4789979457855225, |
|
"logps/chosen": -357.2406311035156, |
|
"logps/rejected": -125.6826400756836, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.005801108665764332, |
|
"rewards/margins": 0.0024439399130642414, |
|
"rewards/rejected": 0.003357168985530734, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 12.509652455926858, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -2.1090314388275146, |
|
"logits/rejected": -2.3186936378479004, |
|
"logps/chosen": -437.3341979980469, |
|
"logps/rejected": -127.853759765625, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.00899372436106205, |
|
"rewards/margins": 0.005673188716173172, |
|
"rewards/rejected": 0.0033205365762114525, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.868342718304051, |
|
"learning_rate": 1.999956805025437e-07, |
|
"logits/chosen": -2.0439090728759766, |
|
"logits/rejected": -2.2628588676452637, |
|
"logps/chosen": -329.084716796875, |
|
"logps/rejected": -135.65786743164062, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00382827827706933, |
|
"rewards/margins": -0.00935549009591341, |
|
"rewards/rejected": 0.005527212284505367, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 15.025885684471918, |
|
"learning_rate": 1.9998272238333603e-07, |
|
"logits/chosen": -2.1477479934692383, |
|
"logits/rejected": -2.210273504257202, |
|
"logps/chosen": -313.54058837890625, |
|
"logps/rejected": -290.3620300292969, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.014177151955664158, |
|
"rewards/margins": 0.014184605330228806, |
|
"rewards/rejected": -7.453141734004021e-06, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 13.632297626115403, |
|
"learning_rate": 1.9996112676182827e-07, |
|
"logits/chosen": -2.164405345916748, |
|
"logits/rejected": -2.393259286880493, |
|
"logps/chosen": -378.54864501953125, |
|
"logps/rejected": -167.57174682617188, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0553203821182251, |
|
"rewards/margins": 0.05659153684973717, |
|
"rewards/rejected": -0.0012711524032056332, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 13.912168029436412, |
|
"learning_rate": 1.9993089550366496e-07, |
|
"logits/chosen": -2.123502016067505, |
|
"logits/rejected": -2.4436779022216797, |
|
"logps/chosen": -382.0030822753906, |
|
"logps/rejected": -100.3667221069336, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04434547945857048, |
|
"rewards/margins": 0.044589102268218994, |
|
"rewards/rejected": -0.00024362141266465187, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.126485544662991, |
|
"learning_rate": 1.9989203122052308e-07, |
|
"logits/chosen": -2.1825976371765137, |
|
"logits/rejected": -2.349247455596924, |
|
"logps/chosen": -336.5179443359375, |
|
"logps/rejected": -156.41336059570312, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.012656629085540771, |
|
"rewards/margins": 0.01318385824561119, |
|
"rewards/rejected": -0.0005272297421470284, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 12.881133768022176, |
|
"learning_rate": 1.9984453726988597e-07, |
|
"logits/chosen": -2.162121057510376, |
|
"logits/rejected": -2.342599630355835, |
|
"logps/chosen": -351.52069091796875, |
|
"logps/rejected": -105.91226959228516, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.061001330614089966, |
|
"rewards/margins": 0.07135318219661713, |
|
"rewards/rejected": -0.010351852513849735, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 11.920957867570188, |
|
"learning_rate": 1.9978841775475365e-07, |
|
"logits/chosen": -2.109081745147705, |
|
"logits/rejected": -2.3539679050445557, |
|
"logps/chosen": -291.2568664550781, |
|
"logps/rejected": -106.35763549804688, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.040990542620420456, |
|
"rewards/margins": 0.0412316657602787, |
|
"rewards/rejected": -0.0002411194145679474, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 14.475145205567406, |
|
"learning_rate": 1.9972367752328823e-07, |
|
"logits/chosen": -2.0869147777557373, |
|
"logits/rejected": -2.354045867919922, |
|
"logps/chosen": -456.2073669433594, |
|
"logps/rejected": -169.84324645996094, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.055279482156038284, |
|
"rewards/margins": 0.05866669863462448, |
|
"rewards/rejected": -0.0033872141502797604, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 12.532121666063755, |
|
"learning_rate": 1.996503221683949e-07, |
|
"logits/chosen": -2.0160179138183594, |
|
"logits/rejected": -2.3588764667510986, |
|
"logps/chosen": -390.41290283203125, |
|
"logps/rejected": -133.29124450683594, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.038773227483034134, |
|
"rewards/margins": 0.042331479489803314, |
|
"rewards/rejected": -0.0035582496784627438, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 13.419020536226967, |
|
"learning_rate": 1.9956835802723916e-07, |
|
"logits/chosen": -2.209843873977661, |
|
"logits/rejected": -2.4064574241638184, |
|
"logps/chosen": -405.7186279296875, |
|
"logps/rejected": -167.26657104492188, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05138365924358368, |
|
"rewards/margins": 0.04216768592596054, |
|
"rewards/rejected": 0.009215973317623138, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 10.86765582737344, |
|
"learning_rate": 1.9947779218069888e-07, |
|
"logits/chosen": -2.1761481761932373, |
|
"logits/rejected": -2.1804559230804443, |
|
"logps/chosen": -173.52847290039062, |
|
"logps/rejected": -113.20588684082031, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.04976405203342438, |
|
"rewards/margins": 0.04497087001800537, |
|
"rewards/rejected": 0.004793173633515835, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.502969061621119, |
|
"learning_rate": 1.9937863245275302e-07, |
|
"logits/chosen": -2.07767915725708, |
|
"logits/rejected": -2.260467052459717, |
|
"logps/chosen": -349.59149169921875, |
|
"logps/rejected": -194.00537109375, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.054776448756456375, |
|
"rewards/margins": 0.057643141597509384, |
|
"rewards/rejected": -0.002866692841053009, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.857022449597613, |
|
"learning_rate": 1.9927088740980536e-07, |
|
"logits/chosen": -2.1136598587036133, |
|
"logits/rejected": -2.4379379749298096, |
|
"logps/chosen": -431.517822265625, |
|
"logps/rejected": -155.318603515625, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09774884581565857, |
|
"rewards/margins": 0.09220996499061584, |
|
"rewards/rejected": 0.00553888501599431, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 11.458981035263896, |
|
"learning_rate": 1.991545663599448e-07, |
|
"logits/chosen": -2.1781678199768066, |
|
"logits/rejected": -2.398679494857788, |
|
"logps/chosen": -308.6724548339844, |
|
"logps/rejected": -104.43415069580078, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.11533545702695847, |
|
"rewards/margins": 0.11893679946660995, |
|
"rewards/rejected": -0.003601343370974064, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 10.628647934096392, |
|
"learning_rate": 1.990296793521408e-07, |
|
"logits/chosen": -1.9812856912612915, |
|
"logits/rejected": -2.246541738510132, |
|
"logps/chosen": -308.601806640625, |
|
"logps/rejected": -106.56471252441406, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.1276446282863617, |
|
"rewards/margins": 0.12667663395404816, |
|
"rewards/rejected": 0.0009679919457994401, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.989241900876351, |
|
"learning_rate": 1.9889623717537563e-07, |
|
"logits/chosen": -2.0774359703063965, |
|
"logits/rejected": -2.271974802017212, |
|
"logps/chosen": -310.50506591796875, |
|
"logps/rejected": -144.49606323242188, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1192081868648529, |
|
"rewards/margins": 0.11847703158855438, |
|
"rewards/rejected": 0.0007311587687581778, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 12.048092750050337, |
|
"learning_rate": 1.9875425135771217e-07, |
|
"logits/chosen": -2.0446596145629883, |
|
"logits/rejected": -2.348402738571167, |
|
"logps/chosen": -317.9515075683594, |
|
"logps/rejected": -67.82337951660156, |
|
"loss": 0.64, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.11614331603050232, |
|
"rewards/margins": 0.116356760263443, |
|
"rewards/rejected": -0.00021345657296478748, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.41812256063119, |
|
"learning_rate": 1.9860373416529801e-07, |
|
"logits/chosen": -2.1628098487854004, |
|
"logits/rejected": -2.5593316555023193, |
|
"logps/chosen": -365.1604309082031, |
|
"logps/rejected": -121.07073974609375, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09028297662734985, |
|
"rewards/margins": 0.0999036654829979, |
|
"rewards/rejected": -0.009620685130357742, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.66925837158948, |
|
"learning_rate": 1.9844469860130572e-07, |
|
"logits/chosen": -2.1404829025268555, |
|
"logits/rejected": -2.406595468521118, |
|
"logps/chosen": -387.9655456542969, |
|
"logps/rejected": -159.52334594726562, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13599559664726257, |
|
"rewards/margins": 0.12349364906549454, |
|
"rewards/rejected": 0.012501951307058334, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 8.314112135425724, |
|
"learning_rate": 1.9827715840480958e-07, |
|
"logits/chosen": -2.1365151405334473, |
|
"logits/rejected": -2.225376605987549, |
|
"logps/chosen": -170.3944091796875, |
|
"logps/rejected": -76.3488540649414, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0686836838722229, |
|
"rewards/margins": 0.08776295185089111, |
|
"rewards/rejected": -0.019079256802797318, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.050564274359012, |
|
"learning_rate": 1.9810112804959865e-07, |
|
"logits/chosen": -2.1081488132476807, |
|
"logits/rejected": -2.4641435146331787, |
|
"logps/chosen": -432.20794677734375, |
|
"logps/rejected": -157.08131408691406, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1458844095468521, |
|
"rewards/margins": 0.15924030542373657, |
|
"rewards/rejected": -0.013355905190110207, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.532623517273787, |
|
"learning_rate": 1.9791662274292634e-07, |
|
"logits/chosen": -2.1743876934051514, |
|
"logits/rejected": -2.4913887977600098, |
|
"logps/chosen": -322.1396484375, |
|
"logps/rejected": -71.1148681640625, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.15435950458049774, |
|
"rewards/margins": 0.1565544456243515, |
|
"rewards/rejected": -0.002194945001974702, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.51412373772502, |
|
"learning_rate": 1.9772365842419675e-07, |
|
"logits/chosen": -1.9813337326049805, |
|
"logits/rejected": -2.4765429496765137, |
|
"logps/chosen": -403.84197998046875, |
|
"logps/rejected": -107.24089050292969, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.12991374731063843, |
|
"rewards/margins": 0.14337915182113647, |
|
"rewards/rejected": -0.01346538309007883, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 13.245694792329783, |
|
"learning_rate": 1.9752225176358755e-07, |
|
"logits/chosen": -2.0392189025878906, |
|
"logits/rejected": -2.2406013011932373, |
|
"logps/chosen": -236.0980682373047, |
|
"logps/rejected": -62.11045455932617, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08092227578163147, |
|
"rewards/margins": 0.082358218729496, |
|
"rewards/rejected": -0.001435947371646762, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.762023866110251, |
|
"learning_rate": 1.9731242016060984e-07, |
|
"logits/chosen": -2.055626153945923, |
|
"logits/rejected": -2.515719175338745, |
|
"logps/chosen": -431.6874694824219, |
|
"logps/rejected": -88.8346176147461, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.17793263494968414, |
|
"rewards/margins": 0.18582700192928314, |
|
"rewards/rejected": -0.007894383743405342, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.28929712626817, |
|
"learning_rate": 1.970941817426052e-07, |
|
"logits/chosen": -1.9879474639892578, |
|
"logits/rejected": -2.3104357719421387, |
|
"logps/chosen": -211.4118194580078, |
|
"logps/rejected": -30.24435043334961, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07762061804533005, |
|
"rewards/margins": 0.08949979394674301, |
|
"rewards/rejected": -0.01187918335199356, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.941394429559, |
|
"learning_rate": 1.9686755536317942e-07, |
|
"logits/chosen": -1.982191562652588, |
|
"logits/rejected": -2.0665199756622314, |
|
"logps/chosen": -139.63328552246094, |
|
"logps/rejected": -49.644195556640625, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0728769525885582, |
|
"rewards/margins": 0.07204664498567581, |
|
"rewards/rejected": 0.0008303160429932177, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 10.284161862377836, |
|
"learning_rate": 1.9663256060057392e-07, |
|
"logits/chosen": -2.2932796478271484, |
|
"logits/rejected": -2.4093167781829834, |
|
"logps/chosen": -330.951416015625, |
|
"logps/rejected": -176.99734497070312, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.13722312450408936, |
|
"rewards/margins": 0.1075534000992775, |
|
"rewards/rejected": 0.02966972626745701, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 10.587404352257455, |
|
"learning_rate": 1.9638921775597422e-07, |
|
"logits/chosen": -1.95592200756073, |
|
"logits/rejected": -2.303375482559204, |
|
"logps/chosen": -498.4439697265625, |
|
"logps/rejected": -218.24583435058594, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.15323582291603088, |
|
"rewards/margins": 0.1492384374141693, |
|
"rewards/rejected": 0.00399737898260355, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 10.723511661626334, |
|
"learning_rate": 1.9613754785175638e-07, |
|
"logits/chosen": -2.0556998252868652, |
|
"logits/rejected": -2.219463586807251, |
|
"logps/chosen": -224.0782012939453, |
|
"logps/rejected": -103.49295806884766, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08105921745300293, |
|
"rewards/margins": 0.06892888247966766, |
|
"rewards/rejected": 0.01213033776730299, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_logits/chosen": -2.063699722290039, |
|
"eval_logits/rejected": -2.314316987991333, |
|
"eval_logps/chosen": -331.750244140625, |
|
"eval_logps/rejected": -116.3936538696289, |
|
"eval_loss": 0.6081522703170776, |
|
"eval_rewards/accuracies": 0.875, |
|
"eval_rewards/chosen": 0.1798781156539917, |
|
"eval_rewards/margins": 0.19427311420440674, |
|
"eval_rewards/rejected": -0.014395004138350487, |
|
"eval_runtime": 186.2235, |
|
"eval_samples_per_second": 6.836, |
|
"eval_steps_per_second": 0.859, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 10.09005570072562, |
|
"learning_rate": 1.9587757262967054e-07, |
|
"logits/chosen": -2.181702136993408, |
|
"logits/rejected": -2.3205502033233643, |
|
"logps/chosen": -341.22662353515625, |
|
"logps/rejected": -122.17086791992188, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.24404488503932953, |
|
"rewards/margins": 0.24747334420681, |
|
"rewards/rejected": -0.0034284451976418495, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.81518303930105, |
|
"learning_rate": 1.9560931454896297e-07, |
|
"logits/chosen": -2.1127026081085205, |
|
"logits/rejected": -2.3931002616882324, |
|
"logps/chosen": -210.6508026123047, |
|
"logps/rejected": -48.36506652832031, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.15659283101558685, |
|
"rewards/margins": 0.1508672684431076, |
|
"rewards/rejected": 0.00572555884718895, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 9.208140684891914, |
|
"learning_rate": 1.9533279678443557e-07, |
|
"logits/chosen": -2.1042847633361816, |
|
"logits/rejected": -2.1663877964019775, |
|
"logps/chosen": -252.5467987060547, |
|
"logps/rejected": -164.3186492919922, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.128390371799469, |
|
"rewards/margins": 0.13332779705524445, |
|
"rewards/rejected": -0.004937426187098026, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 7.509168689756416, |
|
"learning_rate": 1.95048043224444e-07, |
|
"logits/chosen": -2.0268847942352295, |
|
"logits/rejected": -2.2786049842834473, |
|
"logps/chosen": -317.0534973144531, |
|
"logps/rejected": -171.33995056152344, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21348123252391815, |
|
"rewards/margins": 0.20053228735923767, |
|
"rewards/rejected": 0.012948956340551376, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.171851534420124, |
|
"learning_rate": 1.9475507846883373e-07, |
|
"logits/chosen": -2.0638015270233154, |
|
"logits/rejected": -2.220968723297119, |
|
"logps/chosen": -384.039794921875, |
|
"logps/rejected": -282.49658203125, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24091266095638275, |
|
"rewards/margins": 0.24727410078048706, |
|
"rewards/rejected": -0.0063614556565880775, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.56365071461843, |
|
"learning_rate": 1.944539278268152e-07, |
|
"logits/chosen": -1.9981962442398071, |
|
"logits/rejected": -2.3010456562042236, |
|
"logps/chosen": -343.3883361816406, |
|
"logps/rejected": -97.42654418945312, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.24590067565441132, |
|
"rewards/margins": 0.2593224346637726, |
|
"rewards/rejected": -0.013421745970845222, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.892289862886692, |
|
"learning_rate": 1.94144617314777e-07, |
|
"logits/chosen": -2.0351200103759766, |
|
"logits/rejected": -2.301356792449951, |
|
"logps/chosen": -372.68609619140625, |
|
"logps/rejected": -174.614013671875, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2827102541923523, |
|
"rewards/margins": 0.2959131598472595, |
|
"rewards/rejected": -0.013202919624745846, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.3420110886371885, |
|
"learning_rate": 1.938271736540385e-07, |
|
"logits/chosen": -2.204134464263916, |
|
"logits/rejected": -2.3048620223999023, |
|
"logps/chosen": -346.56146240234375, |
|
"logps/rejected": -271.0859375, |
|
"loss": 0.573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22849659621715546, |
|
"rewards/margins": 0.20856241881847382, |
|
"rewards/rejected": 0.01993417926132679, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 6.00666118956556, |
|
"learning_rate": 1.935016242685415e-07, |
|
"logits/chosen": -2.0446014404296875, |
|
"logits/rejected": -2.142392635345459, |
|
"logps/chosen": -238.8208770751953, |
|
"logps/rejected": -118.88178253173828, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.22309349477291107, |
|
"rewards/margins": 0.1636679619550705, |
|
"rewards/rejected": 0.05942551791667938, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 7.656028095081895, |
|
"learning_rate": 1.931679972824807e-07, |
|
"logits/chosen": -2.0129806995391846, |
|
"logits/rejected": -2.261230230331421, |
|
"logps/chosen": -305.9075927734375, |
|
"logps/rejected": -101.50321197509766, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.25226718187332153, |
|
"rewards/margins": 0.2614496052265167, |
|
"rewards/rejected": -0.00918243546038866, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 12.171466631603687, |
|
"learning_rate": 1.928263215178746e-07, |
|
"logits/chosen": -1.9546587467193604, |
|
"logits/rejected": -2.1961617469787598, |
|
"logps/chosen": -279.5615539550781, |
|
"logps/rejected": -115.06743621826172, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17554552853107452, |
|
"rewards/margins": 0.18559010326862335, |
|
"rewards/rejected": -0.010044600814580917, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.610218016041575, |
|
"learning_rate": 1.9247662649207506e-07, |
|
"logits/chosen": -2.1121904850006104, |
|
"logits/rejected": -2.3557066917419434, |
|
"logps/chosen": -336.6860046386719, |
|
"logps/rejected": -159.52703857421875, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.32165953516960144, |
|
"rewards/margins": 0.3067389726638794, |
|
"rewards/rejected": 0.014920572750270367, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 7.941038400658689, |
|
"learning_rate": 1.9211894241521757e-07, |
|
"logits/chosen": -2.0181961059570312, |
|
"logits/rejected": -2.23909592628479, |
|
"logps/chosen": -240.93853759765625, |
|
"logps/rejected": -104.80455780029297, |
|
"loss": 0.5661, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.19158579409122467, |
|
"rewards/margins": 0.2334558218717575, |
|
"rewards/rejected": -0.041870009154081345, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 6.351871394862074, |
|
"learning_rate": 1.917533001876113e-07, |
|
"logits/chosen": -2.025303840637207, |
|
"logits/rejected": -2.2152905464172363, |
|
"logps/chosen": -288.76513671875, |
|
"logps/rejected": -166.4485321044922, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.18773403763771057, |
|
"rewards/margins": 0.14920204877853394, |
|
"rewards/rejected": 0.038532011210918427, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 6.998604694555862, |
|
"learning_rate": 1.913797313970697e-07, |
|
"logits/chosen": -1.9997467994689941, |
|
"logits/rejected": -2.1551594734191895, |
|
"logps/chosen": -210.7803955078125, |
|
"logps/rejected": -86.51407623291016, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.1796235293149948, |
|
"rewards/margins": 0.19531936943531036, |
|
"rewards/rejected": -0.0156958419829607, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.577421777934189, |
|
"learning_rate": 1.9099826831618167e-07, |
|
"logits/chosen": -2.1465611457824707, |
|
"logits/rejected": -2.345120906829834, |
|
"logps/chosen": -377.3128662109375, |
|
"logps/rejected": -223.61959838867188, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.39910346269607544, |
|
"rewards/margins": 0.4542591869831085, |
|
"rewards/rejected": -0.055155716836452484, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.2250131829631865, |
|
"learning_rate": 1.9060894389952328e-07, |
|
"logits/chosen": -2.024362802505493, |
|
"logits/rejected": -2.0789616107940674, |
|
"logps/chosen": -199.9416961669922, |
|
"logps/rejected": -98.576416015625, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2663131654262543, |
|
"rewards/margins": 0.2556077837944031, |
|
"rewards/rejected": 0.010705405846238136, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 7.189726097166299, |
|
"learning_rate": 1.9021179178081103e-07, |
|
"logits/chosen": -2.0059540271759033, |
|
"logits/rejected": -2.2304420471191406, |
|
"logps/chosen": -380.8197326660156, |
|
"logps/rejected": -169.16343688964844, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4966823160648346, |
|
"rewards/margins": 0.5529358386993408, |
|
"rewards/rejected": -0.05625356361269951, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 6.249636636104176, |
|
"learning_rate": 1.8980684626999638e-07, |
|
"logits/chosen": -2.033315896987915, |
|
"logits/rejected": -2.303514242172241, |
|
"logps/chosen": -260.1401672363281, |
|
"logps/rejected": -92.7634506225586, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2530212700366974, |
|
"rewards/margins": 0.29110509157180786, |
|
"rewards/rejected": -0.03808382526040077, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 6.437391045028375, |
|
"learning_rate": 1.8939414235030132e-07, |
|
"logits/chosen": -1.8933184146881104, |
|
"logits/rejected": -2.15160870552063, |
|
"logps/chosen": -208.13137817382812, |
|
"logps/rejected": -48.8433952331543, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.22771571576595306, |
|
"rewards/margins": 0.2676602900028229, |
|
"rewards/rejected": -0.0399446040391922, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 7.0182488830117675, |
|
"learning_rate": 1.889737156751965e-07, |
|
"logits/chosen": -1.9940942525863647, |
|
"logits/rejected": -2.2075793743133545, |
|
"logps/chosen": -337.55322265625, |
|
"logps/rejected": -134.0843505859375, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3136310279369354, |
|
"rewards/margins": 0.3688744008541107, |
|
"rewards/rejected": -0.0552433617413044, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.875925492294041, |
|
"learning_rate": 1.8854560256532097e-07, |
|
"logits/chosen": -2.1553783416748047, |
|
"logits/rejected": -2.242018938064575, |
|
"logps/chosen": -217.90322875976562, |
|
"logps/rejected": -135.96380615234375, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2514539659023285, |
|
"rewards/margins": 0.24648477137088776, |
|
"rewards/rejected": 0.004969220608472824, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 6.420556289383338, |
|
"learning_rate": 1.8810984000534455e-07, |
|
"logits/chosen": -2.171513319015503, |
|
"logits/rejected": -2.4142439365386963, |
|
"logps/chosen": -372.5497741699219, |
|
"logps/rejected": -107.42914581298828, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3514203429222107, |
|
"rewards/margins": 0.3896327614784241, |
|
"rewards/rejected": -0.03821243345737457, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 6.375186295982661, |
|
"learning_rate": 1.8766646564077262e-07, |
|
"logits/chosen": -1.966844081878662, |
|
"logits/rejected": -2.2431657314300537, |
|
"logps/chosen": -319.42822265625, |
|
"logps/rejected": -84.1899185180664, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3488491177558899, |
|
"rewards/margins": 0.4105728566646576, |
|
"rewards/rejected": -0.06172379106283188, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 5.838393126841468, |
|
"learning_rate": 1.8721551777469395e-07, |
|
"logits/chosen": -2.1016697883605957, |
|
"logits/rejected": -2.328932762145996, |
|
"logps/chosen": -336.9084777832031, |
|
"logps/rejected": -195.07691955566406, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3171999156475067, |
|
"rewards/margins": 0.34964245557785034, |
|
"rewards/rejected": -0.03244255110621452, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.149492969231708, |
|
"learning_rate": 1.8675703536447177e-07, |
|
"logits/chosen": -2.071046829223633, |
|
"logits/rejected": -2.1905810832977295, |
|
"logps/chosen": -261.36724853515625, |
|
"logps/rejected": -151.13186645507812, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2710087299346924, |
|
"rewards/margins": 0.234308123588562, |
|
"rewards/rejected": 0.03670059144496918, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.008863437719162, |
|
"learning_rate": 1.8629105801837816e-07, |
|
"logits/chosen": -2.0235931873321533, |
|
"logits/rejected": -2.2131049633026123, |
|
"logps/chosen": -299.67791748046875, |
|
"logps/rejected": -113.56851196289062, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16502772271633148, |
|
"rewards/margins": 0.21942166984081268, |
|
"rewards/rejected": -0.0543939545750618, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 5.378526341274013, |
|
"learning_rate": 1.8581762599217238e-07, |
|
"logits/chosen": -1.9962934255599976, |
|
"logits/rejected": -2.1097159385681152, |
|
"logps/chosen": -151.64990234375, |
|
"logps/rejected": -54.187625885009766, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.16601191461086273, |
|
"rewards/margins": 0.19980715215206146, |
|
"rewards/rejected": -0.03379523381590843, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 6.104689013268376, |
|
"learning_rate": 1.8533678018562307e-07, |
|
"logits/chosen": -1.9824721813201904, |
|
"logits/rejected": -2.10068678855896, |
|
"logps/chosen": -222.87936401367188, |
|
"logps/rejected": -121.28485870361328, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.19363561272621155, |
|
"rewards/margins": 0.1619020700454712, |
|
"rewards/rejected": 0.031733546406030655, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.874550794444151, |
|
"learning_rate": 1.8484856213897496e-07, |
|
"logits/chosen": -2.178605079650879, |
|
"logits/rejected": -2.4736976623535156, |
|
"logps/chosen": -378.0633544921875, |
|
"logps/rejected": -142.17327880859375, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.556702196598053, |
|
"rewards/margins": 0.6439647674560547, |
|
"rewards/rejected": -0.08726256340742111, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.569584190243061, |
|
"learning_rate": 1.843530140293603e-07, |
|
"logits/chosen": -2.123403787612915, |
|
"logits/rejected": -2.3839967250823975, |
|
"logps/chosen": -307.99310302734375, |
|
"logps/rejected": -94.0133285522461, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4116560220718384, |
|
"rewards/margins": 0.46545499563217163, |
|
"rewards/rejected": -0.053798969835042953, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 6.280242196576865, |
|
"learning_rate": 1.8385017866715505e-07, |
|
"logits/chosen": -1.9856494665145874, |
|
"logits/rejected": -2.2972702980041504, |
|
"logps/chosen": -420.359619140625, |
|
"logps/rejected": -149.61878967285156, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.41265326738357544, |
|
"rewards/margins": 0.4080853760242462, |
|
"rewards/rejected": 0.00456790579482913, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 6.313182239745635, |
|
"learning_rate": 1.833400994922806e-07, |
|
"logits/chosen": -1.9181156158447266, |
|
"logits/rejected": -2.2001190185546875, |
|
"logps/chosen": -341.0047302246094, |
|
"logps/rejected": -157.48458862304688, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3060517907142639, |
|
"rewards/margins": 0.2681809067726135, |
|
"rewards/rejected": 0.037870921194553375, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 6.27246004101099, |
|
"learning_rate": 1.8282282057045086e-07, |
|
"logits/chosen": -2.0791337490081787, |
|
"logits/rejected": -2.3352248668670654, |
|
"logps/chosen": -395.2298278808594, |
|
"logps/rejected": -159.21627807617188, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3726465106010437, |
|
"rewards/margins": 0.3795578181743622, |
|
"rewards/rejected": -0.006911378353834152, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.5921705697506665, |
|
"learning_rate": 1.8229838658936564e-07, |
|
"logits/chosen": -2.108044385910034, |
|
"logits/rejected": -2.3148159980773926, |
|
"logps/chosen": -296.7893981933594, |
|
"logps/rejected": -119.67386627197266, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4559445381164551, |
|
"rewards/margins": 0.5166485905647278, |
|
"rewards/rejected": -0.06070411577820778, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 6.011168548073458, |
|
"learning_rate": 1.8176684285484982e-07, |
|
"logits/chosen": -2.0125370025634766, |
|
"logits/rejected": -2.4413373470306396, |
|
"logps/chosen": -489.0357360839844, |
|
"logps/rejected": -143.520751953125, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46227729320526123, |
|
"rewards/margins": 0.5473066568374634, |
|
"rewards/rejected": -0.08502937108278275, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.544525494627247, |
|
"learning_rate": 1.8122823528693966e-07, |
|
"logits/chosen": -2.0247550010681152, |
|
"logits/rejected": -2.2515745162963867, |
|
"logps/chosen": -409.3858947753906, |
|
"logps/rejected": -185.4442596435547, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.37112706899642944, |
|
"rewards/margins": 0.4832947850227356, |
|
"rewards/rejected": -0.11216770112514496, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 5.225956930747786, |
|
"learning_rate": 1.8068261041591546e-07, |
|
"logits/chosen": -1.9530370235443115, |
|
"logits/rejected": -2.0214812755584717, |
|
"logps/chosen": -193.56881713867188, |
|
"logps/rejected": -113.12052154541016, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1621086150407791, |
|
"rewards/margins": 0.21262691915035248, |
|
"rewards/rejected": -0.05051829293370247, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.362015290793359, |
|
"learning_rate": 1.8013001537828212e-07, |
|
"logits/chosen": -1.9773004055023193, |
|
"logits/rejected": -2.4064035415649414, |
|
"logps/chosen": -473.7355651855469, |
|
"logps/rejected": -108.94012451171875, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7071879506111145, |
|
"rewards/margins": 0.8128292560577393, |
|
"rewards/rejected": -0.10564135015010834, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.418254357850609, |
|
"learning_rate": 1.7957049791269684e-07, |
|
"logits/chosen": -1.9675734043121338, |
|
"logits/rejected": -2.1122231483459473, |
|
"logps/chosen": -188.61734008789062, |
|
"logps/rejected": -89.61772155761719, |
|
"loss": 0.511, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26492881774902344, |
|
"rewards/margins": 0.3512020409107208, |
|
"rewards/rejected": -0.08627324551343918, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.872257218178851, |
|
"learning_rate": 1.7900410635584497e-07, |
|
"logits/chosen": -2.0316319465637207, |
|
"logits/rejected": -2.253246784210205, |
|
"logps/chosen": -251.0762176513672, |
|
"logps/rejected": -61.86017990112305, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.29612407088279724, |
|
"rewards/margins": 0.37898051738739014, |
|
"rewards/rejected": -0.0828564465045929, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.652130957766253, |
|
"learning_rate": 1.7843088963826433e-07, |
|
"logits/chosen": -2.0624449253082275, |
|
"logits/rejected": -2.2764549255371094, |
|
"logps/chosen": -370.9137268066406, |
|
"logps/rejected": -250.69602966308594, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4017767012119293, |
|
"rewards/margins": 0.43597471714019775, |
|
"rewards/rejected": -0.034198030829429626, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.102972695442928, |
|
"learning_rate": 1.7785089728011794e-07, |
|
"logits/chosen": -1.9869798421859741, |
|
"logits/rejected": -2.2077856063842773, |
|
"logps/chosen": -236.51177978515625, |
|
"logps/rejected": -61.656253814697266, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3908085227012634, |
|
"rewards/margins": 0.5067812204360962, |
|
"rewards/rejected": -0.11597264558076859, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 5.370818540058152, |
|
"learning_rate": 1.7726417938691619e-07, |
|
"logits/chosen": -1.967153787612915, |
|
"logits/rejected": -2.141418933868408, |
|
"logps/chosen": -231.7236328125, |
|
"logps/rejected": -116.03726196289062, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.22416304051876068, |
|
"rewards/margins": 0.2950986623764038, |
|
"rewards/rejected": -0.07093562185764313, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 5.189949899066027, |
|
"learning_rate": 1.7667078664518793e-07, |
|
"logits/chosen": -2.074751853942871, |
|
"logits/rejected": -2.0922069549560547, |
|
"logps/chosen": -173.9927978515625, |
|
"logps/rejected": -123.99789428710938, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.22283044457435608, |
|
"rewards/margins": 0.32154157757759094, |
|
"rewards/rejected": -0.09871112555265427, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 5.085022408643686, |
|
"learning_rate": 1.76070770318102e-07, |
|
"logits/chosen": -2.1210179328918457, |
|
"logits/rejected": -2.4016335010528564, |
|
"logps/chosen": -315.16082763671875, |
|
"logps/rejected": -154.37620544433594, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5534517765045166, |
|
"rewards/margins": 0.6325445175170898, |
|
"rewards/rejected": -0.07909276336431503, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.8993009503155225, |
|
"learning_rate": 1.7546418224103835e-07, |
|
"logits/chosen": -1.9710700511932373, |
|
"logits/rejected": -2.1767797470092773, |
|
"logps/chosen": -309.2714538574219, |
|
"logps/rejected": -161.23086547851562, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.45312583446502686, |
|
"rewards/margins": 0.6297840476036072, |
|
"rewards/rejected": -0.1766582429409027, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.374782063627258, |
|
"learning_rate": 1.748510748171101e-07, |
|
"logits/chosen": -1.9851081371307373, |
|
"logits/rejected": -2.043236494064331, |
|
"logps/chosen": -135.60049438476562, |
|
"logps/rejected": -68.2064437866211, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.18936334550380707, |
|
"rewards/margins": 0.26729437708854675, |
|
"rewards/rejected": -0.07793103158473969, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.392980085406151, |
|
"learning_rate": 1.7423150101263642e-07, |
|
"logits/chosen": -2.0348260402679443, |
|
"logits/rejected": -2.1746668815612793, |
|
"logps/chosen": -214.05282592773438, |
|
"logps/rejected": -100.34738159179688, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2590511441230774, |
|
"rewards/margins": 0.48322322964668274, |
|
"rewards/rejected": -0.22417207062244415, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.821003899193818, |
|
"learning_rate": 1.7360551435256673e-07, |
|
"logits/chosen": -2.0046215057373047, |
|
"logits/rejected": -2.2741754055023193, |
|
"logps/chosen": -451.2037353515625, |
|
"logps/rejected": -181.55972290039062, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.43423500657081604, |
|
"rewards/margins": 0.6116418838500977, |
|
"rewards/rejected": -0.17740684747695923, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -1.9775569438934326, |
|
"eval_logits/rejected": -2.2316997051239014, |
|
"eval_logps/chosen": -320.51092529296875, |
|
"eval_logps/rejected": -122.61784362792969, |
|
"eval_loss": 0.4467245638370514, |
|
"eval_rewards/accuracies": 0.918749988079071, |
|
"eval_rewards/chosen": 0.517056405544281, |
|
"eval_rewards/margins": 0.7181770205497742, |
|
"eval_rewards/rejected": -0.20112057030200958, |
|
"eval_runtime": 186.4668, |
|
"eval_samples_per_second": 6.827, |
|
"eval_steps_per_second": 0.858, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.15265430176453, |
|
"learning_rate": 1.7297316891585675e-07, |
|
"logits/chosen": -1.9079805612564087, |
|
"logits/rejected": -2.1030519008636475, |
|
"logps/chosen": -268.8660888671875, |
|
"logps/rejected": -115.76235961914062, |
|
"loss": 0.482, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4167216420173645, |
|
"rewards/margins": 0.5568986535072327, |
|
"rewards/rejected": -0.14017705619335175, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 4.0504682508727035, |
|
"learning_rate": 1.723345193307966e-07, |
|
"logits/chosen": -2.1078760623931885, |
|
"logits/rejected": -2.110827922821045, |
|
"logps/chosen": -220.6822967529297, |
|
"logps/rejected": -237.88125610351562, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.31573280692100525, |
|
"rewards/margins": 0.257499635219574, |
|
"rewards/rejected": 0.058233220130205154, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.073084534105835, |
|
"learning_rate": 1.7168962077029147e-07, |
|
"logits/chosen": -2.0834879875183105, |
|
"logits/rejected": -2.268869400024414, |
|
"logps/chosen": -368.4338684082031, |
|
"logps/rejected": -146.4982147216797, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6909863948822021, |
|
"rewards/margins": 0.9143600463867188, |
|
"rewards/rejected": -0.2233736366033554, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.37978318257449, |
|
"learning_rate": 1.7103852894709516e-07, |
|
"logits/chosen": -1.9810847043991089, |
|
"logits/rejected": -2.2168128490448, |
|
"logps/chosen": -315.2268981933594, |
|
"logps/rejected": -112.0322494506836, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5689502358436584, |
|
"rewards/margins": 0.8497422933578491, |
|
"rewards/rejected": -0.2807920277118683, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.889756281915941, |
|
"learning_rate": 1.7038130010899715e-07, |
|
"logits/chosen": -1.9811582565307617, |
|
"logits/rejected": -2.092210292816162, |
|
"logps/chosen": -289.2423095703125, |
|
"logps/rejected": -178.71807861328125, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3411446213722229, |
|
"rewards/margins": 0.4420354962348938, |
|
"rewards/rejected": -0.10089084506034851, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 4.020472458246672, |
|
"learning_rate": 1.6971799103396333e-07, |
|
"logits/chosen": -1.9673709869384766, |
|
"logits/rejected": -2.158069372177124, |
|
"logps/chosen": -243.9493408203125, |
|
"logps/rejected": -94.94026947021484, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5125857591629028, |
|
"rewards/margins": 0.6839098930358887, |
|
"rewards/rejected": -0.1713240146636963, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.7354805196661727, |
|
"learning_rate": 1.6904865902523097e-07, |
|
"logits/chosen": -1.9753053188323975, |
|
"logits/rejected": -2.1016111373901367, |
|
"logps/chosen": -220.32826232910156, |
|
"logps/rejected": -112.85079193115234, |
|
"loss": 0.4533, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.31250494718551636, |
|
"rewards/margins": 0.6757313013076782, |
|
"rewards/rejected": -0.36322641372680664, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.9245064384013704, |
|
"learning_rate": 1.6837336190635822e-07, |
|
"logits/chosen": -2.059147357940674, |
|
"logits/rejected": -2.1772873401641846, |
|
"logps/chosen": -324.65899658203125, |
|
"logps/rejected": -223.98416137695312, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.38603684306144714, |
|
"rewards/margins": 0.4854813516139984, |
|
"rewards/rejected": -0.09944455325603485, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 4.239398591437667, |
|
"learning_rate": 1.6769215801622881e-07, |
|
"logits/chosen": -1.8967094421386719, |
|
"logits/rejected": -2.27612566947937, |
|
"logps/chosen": -385.6495666503906, |
|
"logps/rejected": -123.2912826538086, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5274486541748047, |
|
"rewards/margins": 0.695229172706604, |
|
"rewards/rejected": -0.1677805632352829, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 3.4050976316519455, |
|
"learning_rate": 1.6700510620401222e-07, |
|
"logits/chosen": -1.9731296300888062, |
|
"logits/rejected": -2.123116970062256, |
|
"logps/chosen": -230.08668518066406, |
|
"logps/rejected": -102.91403198242188, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.45512545108795166, |
|
"rewards/margins": 0.5183148980140686, |
|
"rewards/rejected": -0.06318947672843933, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 3.722592336469093, |
|
"learning_rate": 1.6631226582407951e-07, |
|
"logits/chosen": -1.9364876747131348, |
|
"logits/rejected": -2.2068028450012207, |
|
"logps/chosen": -327.61737060546875, |
|
"logps/rejected": -150.93841552734375, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.530191957950592, |
|
"rewards/margins": 0.799619197845459, |
|
"rewards/rejected": -0.26942718029022217, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.9447811103480306, |
|
"learning_rate": 1.6561369673087585e-07, |
|
"logits/chosen": -1.9829171895980835, |
|
"logits/rejected": -2.2110044956207275, |
|
"logps/chosen": -412.00341796875, |
|
"logps/rejected": -145.0166778564453, |
|
"loss": 0.38, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8012962937355042, |
|
"rewards/margins": 1.1100391149520874, |
|
"rewards/rejected": -0.3087427020072937, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 4.484517066813305, |
|
"learning_rate": 1.6490945927374968e-07, |
|
"logits/chosen": -1.9895436763763428, |
|
"logits/rejected": -2.232736587524414, |
|
"logps/chosen": -420.51300048828125, |
|
"logps/rejected": -231.12796020507812, |
|
"loss": 0.3833, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6339386105537415, |
|
"rewards/margins": 0.8945336937904358, |
|
"rewards/rejected": -0.26059508323669434, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.8362793915513818, |
|
"learning_rate": 1.641996142917391e-07, |
|
"logits/chosen": -2.0275871753692627, |
|
"logits/rejected": -2.1543703079223633, |
|
"logps/chosen": -336.6465759277344, |
|
"logps/rejected": -169.4688262939453, |
|
"loss": 0.4334, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5943139791488647, |
|
"rewards/margins": 0.9754756093025208, |
|
"rewards/rejected": -0.3811616003513336, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.288478315089954, |
|
"learning_rate": 1.6348422310831595e-07, |
|
"logits/chosen": -2.013012170791626, |
|
"logits/rejected": -2.29827618598938, |
|
"logps/chosen": -322.6732177734375, |
|
"logps/rejected": -139.569091796875, |
|
"loss": 0.4019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5506273508071899, |
|
"rewards/margins": 0.8705551624298096, |
|
"rewards/rejected": -0.31992778182029724, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.932854863653483, |
|
"learning_rate": 1.627633475260882e-07, |
|
"logits/chosen": -1.8620115518569946, |
|
"logits/rejected": -2.178684711456299, |
|
"logps/chosen": -432.8457946777344, |
|
"logps/rejected": -142.8162841796875, |
|
"loss": 0.4383, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7144624590873718, |
|
"rewards/margins": 1.1024161577224731, |
|
"rewards/rejected": -0.38795360922813416, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.5258705477452312, |
|
"learning_rate": 1.620370498214607e-07, |
|
"logits/chosen": -1.9928277730941772, |
|
"logits/rejected": -2.294769525527954, |
|
"logps/chosen": -327.63983154296875, |
|
"logps/rejected": -107.83856201171875, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5440024137496948, |
|
"rewards/margins": 0.7528430223464966, |
|
"rewards/rejected": -0.20884062349796295, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.9842534931020355, |
|
"learning_rate": 1.6130539273925528e-07, |
|
"logits/chosen": -2.0068583488464355, |
|
"logits/rejected": -2.042741060256958, |
|
"logps/chosen": -169.5665740966797, |
|
"logps/rejected": -145.59188842773438, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.28490278124809265, |
|
"rewards/margins": 0.25545331835746765, |
|
"rewards/rejected": 0.029449433088302612, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.2633771102193707, |
|
"learning_rate": 1.6056843948728997e-07, |
|
"logits/chosen": -2.04612135887146, |
|
"logits/rejected": -2.1641111373901367, |
|
"logps/chosen": -304.9841003417969, |
|
"logps/rejected": -259.2249755859375, |
|
"loss": 0.4496, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4365397095680237, |
|
"rewards/margins": 0.5547477006912231, |
|
"rewards/rejected": -0.11820797622203827, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 3.2661747416858713, |
|
"learning_rate": 1.5982625373091874e-07, |
|
"logits/chosen": -1.9853219985961914, |
|
"logits/rejected": -2.247044801712036, |
|
"logps/chosen": -274.5014953613281, |
|
"logps/rejected": -98.39961242675781, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3941829204559326, |
|
"rewards/margins": 0.7147579193115234, |
|
"rewards/rejected": -0.3205750286579132, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.6997185994891217, |
|
"learning_rate": 1.5907889958753132e-07, |
|
"logits/chosen": -1.9539928436279297, |
|
"logits/rejected": -2.259643316268921, |
|
"logps/chosen": -360.8001708984375, |
|
"logps/rejected": -139.05746459960938, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6151716113090515, |
|
"rewards/margins": 0.8953421115875244, |
|
"rewards/rejected": -0.2801705002784729, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.26180309012713, |
|
"learning_rate": 1.5832644162101417e-07, |
|
"logits/chosen": -1.9639915227890015, |
|
"logits/rejected": -2.3155434131622314, |
|
"logps/chosen": -341.48138427734375, |
|
"logps/rejected": -142.3020782470703, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6940339207649231, |
|
"rewards/margins": 1.0427274703979492, |
|
"rewards/rejected": -0.34869351983070374, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.475146283236811, |
|
"learning_rate": 1.5756894483617267e-07, |
|
"logits/chosen": -2.0319931507110596, |
|
"logits/rejected": -2.0845324993133545, |
|
"logps/chosen": -108.24813079833984, |
|
"logps/rejected": -56.015464782714844, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.175735205411911, |
|
"rewards/margins": 0.34109964966773987, |
|
"rewards/rejected": -0.16536441445350647, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.711332096082553, |
|
"learning_rate": 1.5680647467311557e-07, |
|
"logits/chosen": -2.0805673599243164, |
|
"logits/rejected": -2.3789658546447754, |
|
"logps/chosen": -322.76593017578125, |
|
"logps/rejected": -113.76570129394531, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6527657508850098, |
|
"rewards/margins": 0.8439643383026123, |
|
"rewards/rejected": -0.19119860231876373, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.565726315114208, |
|
"learning_rate": 1.560390970016015e-07, |
|
"logits/chosen": -2.0362613201141357, |
|
"logits/rejected": -2.2976279258728027, |
|
"logps/chosen": -359.05126953125, |
|
"logps/rejected": -207.07476806640625, |
|
"loss": 0.42, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6665505766868591, |
|
"rewards/margins": 1.1819754838943481, |
|
"rewards/rejected": -0.515424907207489, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.6197726640789805, |
|
"learning_rate": 1.5526687811534838e-07, |
|
"logits/chosen": -1.9589186906814575, |
|
"logits/rejected": -2.149359703063965, |
|
"logps/chosen": -383.7507629394531, |
|
"logps/rejected": -239.794677734375, |
|
"loss": 0.3565, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5040831565856934, |
|
"rewards/margins": 0.9846323728561401, |
|
"rewards/rejected": -0.48054924607276917, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.4172376150457113, |
|
"learning_rate": 1.5448988472630654e-07, |
|
"logits/chosen": -2.003847599029541, |
|
"logits/rejected": -2.1804280281066895, |
|
"logps/chosen": -319.24346923828125, |
|
"logps/rejected": -137.32794189453125, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48472583293914795, |
|
"rewards/margins": 0.8298259973526001, |
|
"rewards/rejected": -0.34510016441345215, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.2166471546285473, |
|
"learning_rate": 1.5370818395889537e-07, |
|
"logits/chosen": -2.06440806388855, |
|
"logits/rejected": -2.2347941398620605, |
|
"logps/chosen": -379.2797546386719, |
|
"logps/rejected": -195.081298828125, |
|
"loss": 0.406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.8205428123474121, |
|
"rewards/margins": 1.3144938945770264, |
|
"rewards/rejected": -0.4939510226249695, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.142444037300061, |
|
"learning_rate": 1.5292184334420432e-07, |
|
"logits/chosen": -2.001006841659546, |
|
"logits/rejected": -2.2717456817626953, |
|
"logps/chosen": -307.4756774902344, |
|
"logps/rejected": -124.45487976074219, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.49742940068244934, |
|
"rewards/margins": 0.9193170070648193, |
|
"rewards/rejected": -0.4218876361846924, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.0019240217136964, |
|
"learning_rate": 1.5213093081415919e-07, |
|
"logits/chosen": -2.0484771728515625, |
|
"logits/rejected": -2.3329827785491943, |
|
"logps/chosen": -382.6004943847656, |
|
"logps/rejected": -139.74937438964844, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7651157379150391, |
|
"rewards/margins": 1.1857589483261108, |
|
"rewards/rejected": -0.4206430912017822, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.324382092882249, |
|
"learning_rate": 1.513355146956531e-07, |
|
"logits/chosen": -1.9227899312973022, |
|
"logits/rejected": -2.203601598739624, |
|
"logps/chosen": -398.6375732421875, |
|
"logps/rejected": -149.57205200195312, |
|
"loss": 0.426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5919572114944458, |
|
"rewards/margins": 1.1777626276016235, |
|
"rewards/rejected": -0.585805356502533, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.665359099834768, |
|
"learning_rate": 1.5053566370464416e-07, |
|
"logits/chosen": -2.04799222946167, |
|
"logits/rejected": -2.237955331802368, |
|
"logps/chosen": -287.4353942871094, |
|
"logps/rejected": -117.89936065673828, |
|
"loss": 0.3617, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7061726450920105, |
|
"rewards/margins": 1.3360240459442139, |
|
"rewards/rejected": -0.6298512816429138, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.0978268391650627, |
|
"learning_rate": 1.4973144694021873e-07, |
|
"logits/chosen": -1.8632404804229736, |
|
"logits/rejected": -2.116689682006836, |
|
"logps/chosen": -297.6752014160156, |
|
"logps/rejected": -127.87348937988281, |
|
"loss": 0.4035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4323517680168152, |
|
"rewards/margins": 0.7440455555915833, |
|
"rewards/rejected": -0.31169381737709045, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.259874240443474, |
|
"learning_rate": 1.489229338786222e-07, |
|
"logits/chosen": -1.9522405862808228, |
|
"logits/rejected": -2.1952342987060547, |
|
"logps/chosen": -405.1477355957031, |
|
"logps/rejected": -266.0748291015625, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6451842784881592, |
|
"rewards/margins": 0.8864087462425232, |
|
"rewards/rejected": -0.2412244975566864, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.19860848209686, |
|
"learning_rate": 1.4811019436725682e-07, |
|
"logits/chosen": -2.013823986053467, |
|
"logits/rejected": -1.9999889135360718, |
|
"logps/chosen": -173.16363525390625, |
|
"logps/rejected": -175.79783630371094, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2981584668159485, |
|
"rewards/margins": 0.20658811926841736, |
|
"rewards/rejected": 0.09157033264636993, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.6728926343933406, |
|
"learning_rate": 1.4729329861864768e-07, |
|
"logits/chosen": -2.057213306427002, |
|
"logits/rejected": -2.1428587436676025, |
|
"logps/chosen": -313.03656005859375, |
|
"logps/rejected": -199.88522338867188, |
|
"loss": 0.3831, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5142601728439331, |
|
"rewards/margins": 1.0320128202438354, |
|
"rewards/rejected": -0.5177526473999023, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.42354969285846, |
|
"learning_rate": 1.4647231720437685e-07, |
|
"logits/chosen": -2.0417561531066895, |
|
"logits/rejected": -2.2340712547302246, |
|
"logps/chosen": -366.47406005859375, |
|
"logps/rejected": -236.64149475097656, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4168028235435486, |
|
"rewards/margins": 0.5594974160194397, |
|
"rewards/rejected": -0.1426945924758911, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.681331408633769, |
|
"learning_rate": 1.45647321048987e-07, |
|
"logits/chosen": -2.0262880325317383, |
|
"logits/rejected": -2.1669650077819824, |
|
"logps/chosen": -264.099609375, |
|
"logps/rejected": -138.3554229736328, |
|
"loss": 0.4216, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3780781924724579, |
|
"rewards/margins": 0.6789387464523315, |
|
"rewards/rejected": -0.30086055397987366, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.1553640769838704, |
|
"learning_rate": 1.4481838142385402e-07, |
|
"logits/chosen": -2.100149631500244, |
|
"logits/rejected": -2.246675729751587, |
|
"logps/chosen": -292.51287841796875, |
|
"logps/rejected": -152.18310546875, |
|
"loss": 0.3896, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.460287868976593, |
|
"rewards/margins": 1.0902036428451538, |
|
"rewards/rejected": -0.629915714263916, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.34078252938261, |
|
"learning_rate": 1.4398556994102994e-07, |
|
"logits/chosen": -1.9987834692001343, |
|
"logits/rejected": -2.1652636528015137, |
|
"logps/chosen": -336.60321044921875, |
|
"logps/rejected": -197.3555145263672, |
|
"loss": 0.4285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7229927778244019, |
|
"rewards/margins": 1.0515646934509277, |
|
"rewards/rejected": -0.32857200503349304, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.2837068260200324, |
|
"learning_rate": 1.431489585470564e-07, |
|
"logits/chosen": -1.9157692193984985, |
|
"logits/rejected": -2.0339536666870117, |
|
"logps/chosen": -246.109130859375, |
|
"logps/rejected": -140.8620147705078, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4217866063117981, |
|
"rewards/margins": 0.8604772686958313, |
|
"rewards/rejected": -0.4386906027793884, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.9978563958102957, |
|
"learning_rate": 1.4230861951674913e-07, |
|
"logits/chosen": -1.864048957824707, |
|
"logits/rejected": -2.2588746547698975, |
|
"logps/chosen": -398.6397705078125, |
|
"logps/rejected": -112.80824279785156, |
|
"loss": 0.4073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7403268814086914, |
|
"rewards/margins": 1.137719988822937, |
|
"rewards/rejected": -0.39739301800727844, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.692917813565496, |
|
"learning_rate": 1.4146462544695427e-07, |
|
"logits/chosen": -1.9859681129455566, |
|
"logits/rejected": -2.0894880294799805, |
|
"logps/chosen": -252.89605712890625, |
|
"logps/rejected": -177.11151123046875, |
|
"loss": 0.4258, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.41960960626602173, |
|
"rewards/margins": 0.7052587270736694, |
|
"rewards/rejected": -0.2856491208076477, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.0493282694050374, |
|
"learning_rate": 1.4061704925027652e-07, |
|
"logits/chosen": -1.9278736114501953, |
|
"logits/rejected": -2.2551167011260986, |
|
"logps/chosen": -409.3521728515625, |
|
"logps/rejected": -139.8559112548828, |
|
"loss": 0.4258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7342904210090637, |
|
"rewards/margins": 1.162542462348938, |
|
"rewards/rejected": -0.4282519817352295, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.8366166452321324, |
|
"learning_rate": 1.3976596414878042e-07, |
|
"logits/chosen": -1.9339745044708252, |
|
"logits/rejected": -2.0851519107818604, |
|
"logps/chosen": -315.5764465332031, |
|
"logps/rejected": -161.580810546875, |
|
"loss": 0.4188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4443025588989258, |
|
"rewards/margins": 0.8001634478569031, |
|
"rewards/rejected": -0.3558608591556549, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.7752964496805377, |
|
"learning_rate": 1.3891144366766457e-07, |
|
"logits/chosen": -1.9236942529678345, |
|
"logits/rejected": -2.1249706745147705, |
|
"logps/chosen": -464.9728698730469, |
|
"logps/rejected": -287.4863586425781, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.17092488706111908, |
|
"rewards/margins": 0.4706559181213379, |
|
"rewards/rejected": -0.29973104596138, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.3368235459802063, |
|
"learning_rate": 1.380535616289099e-07, |
|
"logits/chosen": -1.956026554107666, |
|
"logits/rejected": -2.0740966796875, |
|
"logps/chosen": -301.49847412109375, |
|
"logps/rejected": -199.72262573242188, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4182983636856079, |
|
"rewards/margins": 0.6974088549613953, |
|
"rewards/rejected": -0.27911046147346497, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.617033810085924, |
|
"learning_rate": 1.3719239214490203e-07, |
|
"logits/chosen": -1.961683750152588, |
|
"logits/rejected": -2.088846445083618, |
|
"logps/chosen": -185.6378173828125, |
|
"logps/rejected": -52.31831741333008, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.42800790071487427, |
|
"rewards/margins": 0.7910484075546265, |
|
"rewards/rejected": -0.36304041743278503, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.9547282694046486, |
|
"learning_rate": 1.3632800961202888e-07, |
|
"logits/chosen": -2.000007152557373, |
|
"logits/rejected": -2.2719762325286865, |
|
"logps/chosen": -264.94610595703125, |
|
"logps/rejected": -60.602142333984375, |
|
"loss": 0.4003, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.47297602891921997, |
|
"rewards/margins": 0.7190853953361511, |
|
"rewards/rejected": -0.24610941112041473, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.672734779611145, |
|
"learning_rate": 1.3546048870425355e-07, |
|
"logits/chosen": -1.8334035873413086, |
|
"logits/rejected": -2.062422752380371, |
|
"logps/chosen": -295.311767578125, |
|
"logps/rejected": -144.99703979492188, |
|
"loss": 0.4231, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.27950870990753174, |
|
"rewards/margins": 0.37616291642189026, |
|
"rewards/rejected": -0.09665423631668091, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.9373823404312134, |
|
"eval_logits/rejected": -2.1733014583587646, |
|
"eval_logps/chosen": -317.19122314453125, |
|
"eval_logps/rejected": -132.44723510742188, |
|
"eval_loss": 0.375163197517395, |
|
"eval_rewards/accuracies": 0.9437500238418579, |
|
"eval_rewards/chosen": 0.6166489720344543, |
|
"eval_rewards/margins": 1.1126511096954346, |
|
"eval_rewards/rejected": -0.4960021376609802, |
|
"eval_runtime": 184.2667, |
|
"eval_samples_per_second": 6.908, |
|
"eval_steps_per_second": 0.868, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.8687751990991144, |
|
"learning_rate": 1.345899043666631e-07, |
|
"logits/chosen": -2.0048770904541016, |
|
"logits/rejected": -2.2049386501312256, |
|
"logps/chosen": -202.36087036132812, |
|
"logps/rejected": -59.840599060058594, |
|
"loss": 0.4417, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6141016483306885, |
|
"rewards/margins": 0.914747953414917, |
|
"rewards/rejected": -0.3006463646888733, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.7728421710924707, |
|
"learning_rate": 1.3371633180899414e-07, |
|
"logits/chosen": -1.927375078201294, |
|
"logits/rejected": -2.266674041748047, |
|
"logps/chosen": -390.30902099609375, |
|
"logps/rejected": -173.35003662109375, |
|
"loss": 0.3938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9191557168960571, |
|
"rewards/margins": 1.6978180408477783, |
|
"rewards/rejected": -0.7786622047424316, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.0360315000193454, |
|
"learning_rate": 1.328398464991355e-07, |
|
"logits/chosen": -1.8318389654159546, |
|
"logits/rejected": -2.1461503505706787, |
|
"logps/chosen": -339.9506530761719, |
|
"logps/rejected": -144.16354370117188, |
|
"loss": 0.3618, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5340854525566101, |
|
"rewards/margins": 1.0414400100708008, |
|
"rewards/rejected": -0.5073546171188354, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.7772677805340003, |
|
"learning_rate": 1.3196052415660855e-07, |
|
"logits/chosen": -1.9493682384490967, |
|
"logits/rejected": -2.2572414875030518, |
|
"logps/chosen": -297.68194580078125, |
|
"logps/rejected": -81.56572723388672, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.806023359298706, |
|
"rewards/margins": 1.2982468605041504, |
|
"rewards/rejected": -0.4922235906124115, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.8726649702254616, |
|
"learning_rate": 1.3107844074602566e-07, |
|
"logits/chosen": -2.0517020225524902, |
|
"logits/rejected": -2.3108386993408203, |
|
"logps/chosen": -284.3307800292969, |
|
"logps/rejected": -92.55622100830078, |
|
"loss": 0.3985, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7782894372940063, |
|
"rewards/margins": 1.382967472076416, |
|
"rewards/rejected": -0.6046780347824097, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.915466394554351, |
|
"learning_rate": 1.301936724705278e-07, |
|
"logits/chosen": -1.935138463973999, |
|
"logits/rejected": -2.229339599609375, |
|
"logps/chosen": -348.0072937011719, |
|
"logps/rejected": -179.30294799804688, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6688775420188904, |
|
"rewards/margins": 1.195144772529602, |
|
"rewards/rejected": -0.5262673497200012, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.9307421779325877, |
|
"learning_rate": 1.2930629576520132e-07, |
|
"logits/chosen": -1.937353491783142, |
|
"logits/rejected": -2.111067295074463, |
|
"logps/chosen": -232.2332763671875, |
|
"logps/rejected": -105.97891998291016, |
|
"loss": 0.3727, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4184137284755707, |
|
"rewards/margins": 0.6602100729942322, |
|
"rewards/rejected": -0.2417963743209839, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.704607717596039, |
|
"learning_rate": 1.2841638729047463e-07, |
|
"logits/chosen": -1.896262764930725, |
|
"logits/rejected": -2.1101086139678955, |
|
"logps/chosen": -276.5863037109375, |
|
"logps/rejected": -109.18448638916016, |
|
"loss": 0.432, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.822554886341095, |
|
"rewards/margins": 1.2940058708190918, |
|
"rewards/rejected": -0.47145119309425354, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.030614573668553, |
|
"learning_rate": 1.2752402392549554e-07, |
|
"logits/chosen": -1.9761974811553955, |
|
"logits/rejected": -2.10685658454895, |
|
"logps/chosen": -211.11436462402344, |
|
"logps/rejected": -134.52835083007812, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.24754217267036438, |
|
"rewards/margins": 0.4351383447647095, |
|
"rewards/rejected": -0.1875961720943451, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.298515748860272, |
|
"learning_rate": 1.2662928276148983e-07, |
|
"logits/chosen": -2.1360230445861816, |
|
"logits/rejected": -2.257654905319214, |
|
"logps/chosen": -249.20669555664062, |
|
"logps/rejected": -131.0137176513672, |
|
"loss": 0.4254, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.386399507522583, |
|
"rewards/margins": 0.8481476306915283, |
|
"rewards/rejected": -0.4617480933666229, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.0649725875120377, |
|
"learning_rate": 1.257322410951011e-07, |
|
"logits/chosen": -1.9837696552276611, |
|
"logits/rejected": -2.2624270915985107, |
|
"logps/chosen": -298.5034484863281, |
|
"logps/rejected": -119.38221740722656, |
|
"loss": 0.3324, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.8645215034484863, |
|
"rewards/margins": 1.5025858879089355, |
|
"rewards/rejected": -0.6380643248558044, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 3.0712945490963937, |
|
"learning_rate": 1.248329764217133e-07, |
|
"logits/chosen": -1.90183424949646, |
|
"logits/rejected": -2.1589910984039307, |
|
"logps/chosen": -363.40972900390625, |
|
"logps/rejected": -109.76152801513672, |
|
"loss": 0.3242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6098992824554443, |
|
"rewards/margins": 1.0329221487045288, |
|
"rewards/rejected": -0.4230228662490845, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.9440897282331484, |
|
"learning_rate": 1.2393156642875578e-07, |
|
"logits/chosen": -1.9756544828414917, |
|
"logits/rejected": -2.169515609741211, |
|
"logps/chosen": -272.5865173339844, |
|
"logps/rejected": -102.83159637451172, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.38845062255859375, |
|
"rewards/margins": 0.74920654296875, |
|
"rewards/rejected": -0.36075589060783386, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.7376552759938884, |
|
"learning_rate": 1.2302808898899197e-07, |
|
"logits/chosen": -1.858717918395996, |
|
"logits/rejected": -2.072749137878418, |
|
"logps/chosen": -332.71893310546875, |
|
"logps/rejected": -134.6237030029297, |
|
"loss": 0.3703, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6717262864112854, |
|
"rewards/margins": 1.630864143371582, |
|
"rewards/rejected": -0.9591379165649414, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.702952272294126, |
|
"learning_rate": 1.2212262215379195e-07, |
|
"logits/chosen": -1.915556788444519, |
|
"logits/rejected": -2.1649136543273926, |
|
"logps/chosen": -353.37335205078125, |
|
"logps/rejected": -116.70270538330078, |
|
"loss": 0.4008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6816975474357605, |
|
"rewards/margins": 1.3016923666000366, |
|
"rewards/rejected": -0.6199949979782104, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.860968950223889, |
|
"learning_rate": 1.2121524414638958e-07, |
|
"logits/chosen": -1.981323003768921, |
|
"logits/rejected": -2.0863146781921387, |
|
"logps/chosen": -278.7300109863281, |
|
"logps/rejected": -162.6415252685547, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5619724988937378, |
|
"rewards/margins": 0.8598495721817017, |
|
"rewards/rejected": -0.2978770434856415, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.9772601665108738, |
|
"learning_rate": 1.2030603335512466e-07, |
|
"logits/chosen": -2.0251593589782715, |
|
"logits/rejected": -2.337674617767334, |
|
"logps/chosen": -397.1326904296875, |
|
"logps/rejected": -146.89852905273438, |
|
"loss": 0.417, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.8214364051818848, |
|
"rewards/margins": 1.3918532133102417, |
|
"rewards/rejected": -0.5704168081283569, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.6713639375367237, |
|
"learning_rate": 1.1939506832667128e-07, |
|
"logits/chosen": -1.9390000104904175, |
|
"logits/rejected": -2.1995325088500977, |
|
"logps/chosen": -366.1788330078125, |
|
"logps/rejected": -156.14825439453125, |
|
"loss": 0.363, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9185819029808044, |
|
"rewards/margins": 1.6624879837036133, |
|
"rewards/rejected": -0.7439059615135193, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.5368366922940884, |
|
"learning_rate": 1.1848242775925185e-07, |
|
"logits/chosen": -1.8598655462265015, |
|
"logits/rejected": -2.018507719039917, |
|
"logps/chosen": -140.5003204345703, |
|
"logps/rejected": -81.32078552246094, |
|
"loss": 0.3813, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.22455516457557678, |
|
"rewards/margins": 0.4118771553039551, |
|
"rewards/rejected": -0.18732202053070068, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.9108161123614926, |
|
"learning_rate": 1.175681904958386e-07, |
|
"logits/chosen": -2.0405771732330322, |
|
"logits/rejected": -2.107203483581543, |
|
"logps/chosen": -227.36375427246094, |
|
"logps/rejected": -147.64981079101562, |
|
"loss": 0.3762, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.35193291306495667, |
|
"rewards/margins": 0.7042461633682251, |
|
"rewards/rejected": -0.35231325030326843, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.2027237105876867, |
|
"learning_rate": 1.1665243551734219e-07, |
|
"logits/chosen": -1.9685847759246826, |
|
"logits/rejected": -2.2867276668548584, |
|
"logps/chosen": -288.57080078125, |
|
"logps/rejected": -110.56059265136719, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7529231905937195, |
|
"rewards/margins": 1.138110876083374, |
|
"rewards/rejected": -0.38518771529197693, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.6386128158434756, |
|
"learning_rate": 1.157352419357886e-07, |
|
"logits/chosen": -1.964128851890564, |
|
"logits/rejected": -2.035123348236084, |
|
"logps/chosen": -198.30899047851562, |
|
"logps/rejected": -119.32826232910156, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22413687407970428, |
|
"rewards/margins": 0.5808830261230469, |
|
"rewards/rejected": -0.3567461669445038, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.5439840312023967, |
|
"learning_rate": 1.1481668898748473e-07, |
|
"logits/chosen": -1.8627464771270752, |
|
"logits/rejected": -2.195005178451538, |
|
"logps/chosen": -508.2653503417969, |
|
"logps/rejected": -201.6099090576172, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7898422479629517, |
|
"rewards/margins": 1.3333909511566162, |
|
"rewards/rejected": -0.5435488224029541, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.7840535206098735, |
|
"learning_rate": 1.1389685602617301e-07, |
|
"logits/chosen": -1.9687414169311523, |
|
"logits/rejected": -2.079282760620117, |
|
"logps/chosen": -273.3905944824219, |
|
"logps/rejected": -167.68350219726562, |
|
"loss": 0.4538, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.44472670555114746, |
|
"rewards/margins": 0.6912115216255188, |
|
"rewards/rejected": -0.2464849352836609, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 3.1528869938353474, |
|
"learning_rate": 1.1297582251617617e-07, |
|
"logits/chosen": -2.000757932662964, |
|
"logits/rejected": -2.3343327045440674, |
|
"logps/chosen": -323.3289489746094, |
|
"logps/rejected": -116.5455551147461, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5140612125396729, |
|
"rewards/margins": 0.8541378974914551, |
|
"rewards/rejected": -0.340076744556427, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.5396783882345075, |
|
"learning_rate": 1.1205366802553229e-07, |
|
"logits/chosen": -1.9811201095581055, |
|
"logits/rejected": -2.090970516204834, |
|
"logps/chosen": -265.3904113769531, |
|
"logps/rejected": -177.4838409423828, |
|
"loss": 0.3467, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1779039204120636, |
|
"rewards/margins": 0.2935892939567566, |
|
"rewards/rejected": -0.115685373544693, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.1452451342239893, |
|
"learning_rate": 1.1113047221912095e-07, |
|
"logits/chosen": -1.8996617794036865, |
|
"logits/rejected": -2.1304678916931152, |
|
"logps/chosen": -225.79818725585938, |
|
"logps/rejected": -57.702632904052734, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6985273361206055, |
|
"rewards/margins": 1.1475094556808472, |
|
"rewards/rejected": -0.4489821195602417, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.9774371938674387, |
|
"learning_rate": 1.1020631485178082e-07, |
|
"logits/chosen": -1.9348338842391968, |
|
"logits/rejected": -2.2529170513153076, |
|
"logps/chosen": -398.8677978515625, |
|
"logps/rejected": -131.35577392578125, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9068738222122192, |
|
"rewards/margins": 1.5566710233688354, |
|
"rewards/rejected": -0.649797260761261, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.5452995220193033, |
|
"learning_rate": 1.092812757614199e-07, |
|
"logits/chosen": -1.9580453634262085, |
|
"logits/rejected": -2.2709877490997314, |
|
"logps/chosen": -397.8183898925781, |
|
"logps/rejected": -121.63996124267578, |
|
"loss": 0.3738, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0113794803619385, |
|
"rewards/margins": 1.6582667827606201, |
|
"rewards/rejected": -0.6468873023986816, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 2.4951114841562623, |
|
"learning_rate": 1.0835543486211813e-07, |
|
"logits/chosen": -1.9591948986053467, |
|
"logits/rejected": -2.068274974822998, |
|
"logps/chosen": -218.165771484375, |
|
"logps/rejected": -147.83116149902344, |
|
"loss": 0.3839, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38550132513046265, |
|
"rewards/margins": 1.0801736116409302, |
|
"rewards/rejected": -0.6946722865104675, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 2.739194097290467, |
|
"learning_rate": 1.0742887213722372e-07, |
|
"logits/chosen": -1.8857489824295044, |
|
"logits/rejected": -2.036141872406006, |
|
"logps/chosen": -347.1433410644531, |
|
"logps/rejected": -185.33444213867188, |
|
"loss": 0.3615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.666320264339447, |
|
"rewards/margins": 1.2669157981872559, |
|
"rewards/rejected": -0.6005955338478088, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.7874477837091036, |
|
"learning_rate": 1.0650166763244328e-07, |
|
"logits/chosen": -1.9431777000427246, |
|
"logits/rejected": -2.0786421298980713, |
|
"logps/chosen": -217.96388244628906, |
|
"logps/rejected": -161.750732421875, |
|
"loss": 0.3242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3562370538711548, |
|
"rewards/margins": 0.847759485244751, |
|
"rewards/rejected": -0.4915224611759186, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.5576723261926944, |
|
"learning_rate": 1.0557390144892684e-07, |
|
"logits/chosen": -1.9544168710708618, |
|
"logits/rejected": -2.2021868228912354, |
|
"logps/chosen": -460.05682373046875, |
|
"logps/rejected": -235.51014709472656, |
|
"loss": 0.3113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7885012030601501, |
|
"rewards/margins": 1.7033418416976929, |
|
"rewards/rejected": -0.914840579032898, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.4953298951762664, |
|
"learning_rate": 1.0464565373634781e-07, |
|
"logits/chosen": -1.8733316659927368, |
|
"logits/rejected": -2.055713415145874, |
|
"logps/chosen": -170.8631591796875, |
|
"logps/rejected": -65.48750305175781, |
|
"loss": 0.3993, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37032586336135864, |
|
"rewards/margins": 0.8034341335296631, |
|
"rewards/rejected": -0.43310827016830444, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.6489494096336954, |
|
"learning_rate": 1.0371700468597883e-07, |
|
"logits/chosen": -1.8701441287994385, |
|
"logits/rejected": -2.035473585128784, |
|
"logps/chosen": -155.58811950683594, |
|
"logps/rejected": -72.45659637451172, |
|
"loss": 0.3746, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2507627010345459, |
|
"rewards/margins": 0.5653121471405029, |
|
"rewards/rejected": -0.3145494759082794, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.8950171966527543, |
|
"learning_rate": 1.0278803452376414e-07, |
|
"logits/chosen": -2.0079855918884277, |
|
"logits/rejected": -2.0556740760803223, |
|
"logps/chosen": -259.5736999511719, |
|
"logps/rejected": -169.33746337890625, |
|
"loss": 0.3624, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.393430233001709, |
|
"rewards/margins": 0.8617305755615234, |
|
"rewards/rejected": -0.4683002829551697, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.7819221061446946, |
|
"learning_rate": 1.0185882350338879e-07, |
|
"logits/chosen": -1.8643912076950073, |
|
"logits/rejected": -2.1565229892730713, |
|
"logps/chosen": -332.9527587890625, |
|
"logps/rejected": -144.50399780273438, |
|
"loss": 0.3721, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.554010272026062, |
|
"rewards/margins": 1.1745232343673706, |
|
"rewards/rejected": -0.6205129027366638, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 2.8762918304755365, |
|
"learning_rate": 1.0092945189934557e-07, |
|
"logits/chosen": -2.03650164604187, |
|
"logits/rejected": -2.168185234069824, |
|
"logps/chosen": -310.5986633300781, |
|
"logps/rejected": -188.8811492919922, |
|
"loss": 0.3686, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5031630396842957, |
|
"rewards/margins": 0.8894560933113098, |
|
"rewards/rejected": -0.38629305362701416, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.6024918959221424, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -1.9812936782836914, |
|
"logits/rejected": -2.0917391777038574, |
|
"logps/chosen": -264.7093811035156, |
|
"logps/rejected": -173.5496368408203, |
|
"loss": 0.429, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.48823249340057373, |
|
"rewards/margins": 0.9868167638778687, |
|
"rewards/rejected": -0.49858415126800537, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.2471126430767163, |
|
"learning_rate": 9.907054810065445e-08, |
|
"logits/chosen": -2.0062572956085205, |
|
"logits/rejected": -2.277808666229248, |
|
"logps/chosen": -332.92169189453125, |
|
"logps/rejected": -126.70708465576172, |
|
"loss": 0.3629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8331867456436157, |
|
"rewards/margins": 1.6676141023635864, |
|
"rewards/rejected": -0.8344272375106812, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 2.698246167261979, |
|
"learning_rate": 9.81411764966112e-08, |
|
"logits/chosen": -1.9375407695770264, |
|
"logits/rejected": -2.0159647464752197, |
|
"logps/chosen": -309.874267578125, |
|
"logps/rejected": -234.45297241210938, |
|
"loss": 0.4218, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.24357011914253235, |
|
"rewards/margins": 0.4638202488422394, |
|
"rewards/rejected": -0.22025014460086823, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 2.2470535964848417, |
|
"learning_rate": 9.721196547623583e-08, |
|
"logits/chosen": -1.9590727090835571, |
|
"logits/rejected": -2.049032211303711, |
|
"logps/chosen": -219.5438232421875, |
|
"logps/rejected": -159.00331115722656, |
|
"loss": 0.3911, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3394254446029663, |
|
"rewards/margins": 0.7338558435440063, |
|
"rewards/rejected": -0.39443033933639526, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.191536937514122, |
|
"learning_rate": 9.628299531402117e-08, |
|
"logits/chosen": -2.009471893310547, |
|
"logits/rejected": -2.1102893352508545, |
|
"logps/chosen": -288.97894287109375, |
|
"logps/rejected": -184.86932373046875, |
|
"loss": 0.4043, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5465821623802185, |
|
"rewards/margins": 1.0711835622787476, |
|
"rewards/rejected": -0.5246013402938843, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.3562073323406607, |
|
"learning_rate": 9.535434626365219e-08, |
|
"logits/chosen": -1.9866628646850586, |
|
"logits/rejected": -1.9806206226348877, |
|
"logps/chosen": -145.32054138183594, |
|
"logps/rejected": -112.49516296386719, |
|
"loss": 0.399, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.06548295170068741, |
|
"rewards/margins": 0.25554153323173523, |
|
"rewards/rejected": -0.19005858898162842, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 2.5285073507638876, |
|
"learning_rate": 9.442609855107316e-08, |
|
"logits/chosen": -1.9528928995132446, |
|
"logits/rejected": -2.200953245162964, |
|
"logps/chosen": -309.15008544921875, |
|
"logps/rejected": -144.05575561523438, |
|
"loss": 0.3231, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.46039754152297974, |
|
"rewards/margins": 0.8913149833679199, |
|
"rewards/rejected": -0.43091750144958496, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 2.0784906826561236, |
|
"learning_rate": 9.349833236755673e-08, |
|
"logits/chosen": -1.8770756721496582, |
|
"logits/rejected": -2.105363607406616, |
|
"logps/chosen": -317.50152587890625, |
|
"logps/rejected": -153.7784881591797, |
|
"loss": 0.3486, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5030040144920349, |
|
"rewards/margins": 0.996347963809967, |
|
"rewards/rejected": -0.4933440089225769, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.223176405056625, |
|
"learning_rate": 9.25711278627763e-08, |
|
"logits/chosen": -2.0445773601531982, |
|
"logits/rejected": -2.2006373405456543, |
|
"logps/chosen": -278.0598449707031, |
|
"logps/rejected": -142.84823608398438, |
|
"loss": 0.3397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6799702048301697, |
|
"rewards/margins": 1.194618582725525, |
|
"rewards/rejected": -0.5146484971046448, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 2.406512154679947, |
|
"learning_rate": 9.164456513788186e-08, |
|
"logits/chosen": -2.011723518371582, |
|
"logits/rejected": -1.9958322048187256, |
|
"logps/chosen": -247.50254821777344, |
|
"logps/rejected": -222.34512329101562, |
|
"loss": 0.3644, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3076442778110504, |
|
"rewards/margins": 0.4636233150959015, |
|
"rewards/rejected": -0.1559790074825287, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 2.4695974188706433, |
|
"learning_rate": 9.071872423858009e-08, |
|
"logits/chosen": -1.9512934684753418, |
|
"logits/rejected": -2.1975042819976807, |
|
"logps/chosen": -483.12774658203125, |
|
"logps/rejected": -263.1443176269531, |
|
"loss": 0.388, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8154278993606567, |
|
"rewards/margins": 1.8745701313018799, |
|
"rewards/rejected": -1.0591423511505127, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 2.9791630871699613, |
|
"learning_rate": 8.979368514821916e-08, |
|
"logits/chosen": -2.008679151535034, |
|
"logits/rejected": -2.0934290885925293, |
|
"logps/chosen": -258.72503662109375, |
|
"logps/rejected": -192.31565856933594, |
|
"loss": 0.3708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.44245606660842896, |
|
"rewards/margins": 0.9912886619567871, |
|
"rewards/rejected": -0.5488325357437134, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_logits/chosen": -1.9327764511108398, |
|
"eval_logits/rejected": -2.1473922729492188, |
|
"eval_logps/chosen": -316.40032958984375, |
|
"eval_logps/rejected": -138.82333374023438, |
|
"eval_loss": 0.34432658553123474, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": 0.6403750777244568, |
|
"eval_rewards/margins": 1.3276602029800415, |
|
"eval_rewards/rejected": -0.6872851848602295, |
|
"eval_runtime": 190.3561, |
|
"eval_samples_per_second": 6.687, |
|
"eval_steps_per_second": 0.841, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 2.259506635951963, |
|
"learning_rate": 8.886952778087908e-08, |
|
"logits/chosen": -1.9257737398147583, |
|
"logits/rejected": -2.1019535064697266, |
|
"logps/chosen": -170.0314178466797, |
|
"logps/rejected": -37.75267791748047, |
|
"loss": 0.4161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4945024251937866, |
|
"rewards/margins": 0.9052304029464722, |
|
"rewards/rejected": -0.41072794795036316, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 2.4528744856208076, |
|
"learning_rate": 8.79463319744677e-08, |
|
"logits/chosen": -1.9754432439804077, |
|
"logits/rejected": -2.2768192291259766, |
|
"logps/chosen": -404.13897705078125, |
|
"logps/rejected": -176.906005859375, |
|
"loss": 0.3842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8032380938529968, |
|
"rewards/margins": 1.7221509218215942, |
|
"rewards/rejected": -0.9189128875732422, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 3.28097929328298, |
|
"learning_rate": 8.702417748382384e-08, |
|
"logits/chosen": -2.0560543537139893, |
|
"logits/rejected": -1.9956598281860352, |
|
"logps/chosen": -257.294677734375, |
|
"logps/rejected": -254.02484130859375, |
|
"loss": 0.3235, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1712501496076584, |
|
"rewards/margins": 0.5382880568504333, |
|
"rewards/rejected": -0.36703795194625854, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.6393878582724704, |
|
"learning_rate": 8.6103143973827e-08, |
|
"logits/chosen": -2.0064706802368164, |
|
"logits/rejected": -2.2147860527038574, |
|
"logps/chosen": -283.7664794921875, |
|
"logps/rejected": -125.88124084472656, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9503887891769409, |
|
"rewards/margins": 1.508412480354309, |
|
"rewards/rejected": -0.5580236911773682, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.0503962665532116, |
|
"learning_rate": 8.518331101251527e-08, |
|
"logits/chosen": -2.0628817081451416, |
|
"logits/rejected": -2.214637517929077, |
|
"logps/chosen": -342.5946350097656, |
|
"logps/rejected": -173.35684204101562, |
|
"loss": 0.3738, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8031637072563171, |
|
"rewards/margins": 1.4461843967437744, |
|
"rewards/rejected": -0.6430206894874573, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.3772305238257907, |
|
"learning_rate": 8.426475806421139e-08, |
|
"logits/chosen": -1.9149799346923828, |
|
"logits/rejected": -2.1681957244873047, |
|
"logps/chosen": -321.2602844238281, |
|
"logps/rejected": -96.91996002197266, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5701212882995605, |
|
"rewards/margins": 1.2240173816680908, |
|
"rewards/rejected": -0.6538960337638855, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 2.3700618894565273, |
|
"learning_rate": 8.33475644826578e-08, |
|
"logits/chosen": -1.86668860912323, |
|
"logits/rejected": -2.027719020843506, |
|
"logps/chosen": -322.3348693847656, |
|
"logps/rejected": -189.26300048828125, |
|
"loss": 0.3527, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7509647011756897, |
|
"rewards/margins": 1.6569186449050903, |
|
"rewards/rejected": -0.9059540629386902, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 2.3204387917178884, |
|
"learning_rate": 8.243180950416141e-08, |
|
"logits/chosen": -1.9971275329589844, |
|
"logits/rejected": -2.099766492843628, |
|
"logps/chosen": -222.3831787109375, |
|
"logps/rejected": -123.86671447753906, |
|
"loss": 0.3796, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40789657831192017, |
|
"rewards/margins": 0.9203974008560181, |
|
"rewards/rejected": -0.5125008821487427, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 2.544884720920933, |
|
"learning_rate": 8.151757224074815e-08, |
|
"logits/chosen": -1.9076409339904785, |
|
"logits/rejected": -2.030827522277832, |
|
"logps/chosen": -233.522216796875, |
|
"logps/rejected": -173.6394805908203, |
|
"loss": 0.3593, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.33733195066452026, |
|
"rewards/margins": 0.6667687296867371, |
|
"rewards/rejected": -0.329436719417572, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 2.37240337516102, |
|
"learning_rate": 8.060493167332874e-08, |
|
"logits/chosen": -2.0240964889526367, |
|
"logits/rejected": -2.0981948375701904, |
|
"logps/chosen": -324.484375, |
|
"logps/rejected": -219.33116149902344, |
|
"loss": 0.3292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6331191062927246, |
|
"rewards/margins": 1.715273141860962, |
|
"rewards/rejected": -1.0821539163589478, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 3.012051922259596, |
|
"learning_rate": 7.969396664487533e-08, |
|
"logits/chosen": -1.9107437133789062, |
|
"logits/rejected": -2.1371631622314453, |
|
"logps/chosen": -339.2627258300781, |
|
"logps/rejected": -230.1804962158203, |
|
"loss": 0.4533, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2044561207294464, |
|
"rewards/margins": 0.44547808170318604, |
|
"rewards/rejected": -0.24102194607257843, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 2.2052254247044116, |
|
"learning_rate": 7.878475585361043e-08, |
|
"logits/chosen": -1.9317848682403564, |
|
"logits/rejected": -2.19038724899292, |
|
"logps/chosen": -323.2309875488281, |
|
"logps/rejected": -125.88053894042969, |
|
"loss": 0.4097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6508646011352539, |
|
"rewards/margins": 1.367360234260559, |
|
"rewards/rejected": -0.7164957523345947, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 2.235268190842379, |
|
"learning_rate": 7.787737784620802e-08, |
|
"logits/chosen": -1.909670352935791, |
|
"logits/rejected": -2.064894914627075, |
|
"logps/chosen": -219.2584228515625, |
|
"logps/rejected": -68.53303527832031, |
|
"loss": 0.3531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38158148527145386, |
|
"rewards/margins": 0.9134366512298584, |
|
"rewards/rejected": -0.5318551659584045, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.3948915922057386, |
|
"learning_rate": 7.6971911011008e-08, |
|
"logits/chosen": -1.9540221691131592, |
|
"logits/rejected": -2.157740592956543, |
|
"logps/chosen": -316.31884765625, |
|
"logps/rejected": -154.80227661132812, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45785102248191833, |
|
"rewards/margins": 1.0194741487503052, |
|
"rewards/rejected": -0.5616230964660645, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.4074374529306155, |
|
"learning_rate": 7.606843357124425e-08, |
|
"logits/chosen": -1.9372565746307373, |
|
"logits/rejected": -2.164517879486084, |
|
"logps/chosen": -308.13494873046875, |
|
"logps/rejected": -149.24195861816406, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7418156266212463, |
|
"rewards/margins": 1.4810779094696045, |
|
"rewards/rejected": -0.7392622232437134, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 2.130755933970431, |
|
"learning_rate": 7.516702357828671e-08, |
|
"logits/chosen": -1.990491509437561, |
|
"logits/rejected": -2.4479219913482666, |
|
"logps/chosen": -500.0743103027344, |
|
"logps/rejected": -156.37615966796875, |
|
"loss": 0.3141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9668587446212769, |
|
"rewards/margins": 1.7484171390533447, |
|
"rewards/rejected": -0.7815585136413574, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 2.519590433851525, |
|
"learning_rate": 7.426775890489889e-08, |
|
"logits/chosen": -1.8623852729797363, |
|
"logits/rejected": -2.0746591091156006, |
|
"logps/chosen": -345.7522277832031, |
|
"logps/rejected": -132.41415405273438, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9244540929794312, |
|
"rewards/margins": 1.5649265050888062, |
|
"rewards/rejected": -0.6404723525047302, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 2.1874875532732285, |
|
"learning_rate": 7.337071723851017e-08, |
|
"logits/chosen": -1.9167838096618652, |
|
"logits/rejected": -2.1459975242614746, |
|
"logps/chosen": -303.1180419921875, |
|
"logps/rejected": -105.34794616699219, |
|
"loss": 0.2966, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.881919264793396, |
|
"rewards/margins": 1.6922099590301514, |
|
"rewards/rejected": -0.8102905750274658, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 2.9186874702232544, |
|
"learning_rate": 7.247597607450445e-08, |
|
"logits/chosen": -2.002195119857788, |
|
"logits/rejected": -2.1577107906341553, |
|
"logps/chosen": -310.2653503417969, |
|
"logps/rejected": -124.31275939941406, |
|
"loss": 0.3963, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7073330879211426, |
|
"rewards/margins": 1.4355955123901367, |
|
"rewards/rejected": -0.7282623648643494, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.995931596526281, |
|
"learning_rate": 7.15836127095254e-08, |
|
"logits/chosen": -1.8800973892211914, |
|
"logits/rejected": -2.0754265785217285, |
|
"logps/chosen": -272.69476318359375, |
|
"logps/rejected": -99.58995056152344, |
|
"loss": 0.361, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5042135119438171, |
|
"rewards/margins": 1.2575228214263916, |
|
"rewards/rejected": -0.7533093690872192, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 6.773519914904181, |
|
"learning_rate": 7.06937042347987e-08, |
|
"logits/chosen": -1.9714473485946655, |
|
"logits/rejected": -2.09019136428833, |
|
"logps/chosen": -364.12158203125, |
|
"logps/rejected": -252.11019897460938, |
|
"loss": 0.4276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7859811186790466, |
|
"rewards/margins": 1.5761654376983643, |
|
"rewards/rejected": -0.7901844382286072, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.4224792412511054, |
|
"learning_rate": 6.98063275294722e-08, |
|
"logits/chosen": -2.033931255340576, |
|
"logits/rejected": -2.032353639602661, |
|
"logps/chosen": -182.28111267089844, |
|
"logps/rejected": -170.1015167236328, |
|
"loss": 0.3984, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.20503102242946625, |
|
"rewards/margins": 0.6778407692909241, |
|
"rewards/rejected": -0.4728098213672638, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 2.8578473574930796, |
|
"learning_rate": 6.892155925397436e-08, |
|
"logits/chosen": -1.863666296005249, |
|
"logits/rejected": -2.2041678428649902, |
|
"logps/chosen": -364.0481262207031, |
|
"logps/rejected": -116.85747528076172, |
|
"loss": 0.3863, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8524085283279419, |
|
"rewards/margins": 1.405036449432373, |
|
"rewards/rejected": -0.5526278614997864, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 2.4553850662794003, |
|
"learning_rate": 6.803947584339147e-08, |
|
"logits/chosen": -1.9987305402755737, |
|
"logits/rejected": -2.1810014247894287, |
|
"logps/chosen": -308.6746826171875, |
|
"logps/rejected": -156.14413452148438, |
|
"loss": 0.3519, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.757546067237854, |
|
"rewards/margins": 1.9111757278442383, |
|
"rewards/rejected": -1.1536295413970947, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 2.3959548460575517, |
|
"learning_rate": 6.716015350086448e-08, |
|
"logits/chosen": -1.9721763134002686, |
|
"logits/rejected": -2.15269136428833, |
|
"logps/chosen": -360.6725158691406, |
|
"logps/rejected": -232.14779663085938, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4904707968235016, |
|
"rewards/margins": 1.2632602453231812, |
|
"rewards/rejected": -0.772789478302002, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.8852130124655633, |
|
"learning_rate": 6.628366819100585e-08, |
|
"logits/chosen": -1.8738517761230469, |
|
"logits/rejected": -2.2090766429901123, |
|
"logps/chosen": -344.3710632324219, |
|
"logps/rejected": -123.95575714111328, |
|
"loss": 0.3684, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7661735415458679, |
|
"rewards/margins": 1.6990057229995728, |
|
"rewards/rejected": -0.9328320026397705, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 2.207509020107613, |
|
"learning_rate": 6.541009563333689e-08, |
|
"logits/chosen": -1.970944881439209, |
|
"logits/rejected": -2.104224681854248, |
|
"logps/chosen": -401.86761474609375, |
|
"logps/rejected": -276.5339050292969, |
|
"loss": 0.3071, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6489874720573425, |
|
"rewards/margins": 1.4881306886672974, |
|
"rewards/rejected": -0.8391432166099548, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 2.3035753230701586, |
|
"learning_rate": 6.453951129574643e-08, |
|
"logits/chosen": -1.8832050561904907, |
|
"logits/rejected": -2.253293514251709, |
|
"logps/chosen": -479.3350830078125, |
|
"logps/rejected": -133.14219665527344, |
|
"loss": 0.2556, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2150325775146484, |
|
"rewards/margins": 2.2640023231506348, |
|
"rewards/rejected": -1.0489698648452759, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 2.563274710615486, |
|
"learning_rate": 6.367199038797109e-08, |
|
"logits/chosen": -1.9041603803634644, |
|
"logits/rejected": -2.0262975692749023, |
|
"logps/chosen": -199.96775817871094, |
|
"logps/rejected": -110.95515441894531, |
|
"loss": 0.3621, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.37201952934265137, |
|
"rewards/margins": 1.0209636688232422, |
|
"rewards/rejected": -0.6489440202713013, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 2.2766910704747567, |
|
"learning_rate": 6.2807607855098e-08, |
|
"logits/chosen": -1.885288119316101, |
|
"logits/rejected": -2.1321539878845215, |
|
"logps/chosen": -268.776611328125, |
|
"logps/rejected": -114.54924011230469, |
|
"loss": 0.2867, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36375609040260315, |
|
"rewards/margins": 0.8478094339370728, |
|
"rewards/rejected": -0.4840533137321472, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 2.2615311670632128, |
|
"learning_rate": 6.194643837109014e-08, |
|
"logits/chosen": -1.8483718633651733, |
|
"logits/rejected": -2.052673816680908, |
|
"logps/chosen": -231.9535369873047, |
|
"logps/rejected": -95.31845092773438, |
|
"loss": 0.3074, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4465009570121765, |
|
"rewards/margins": 1.0453064441680908, |
|
"rewards/rejected": -0.5988054275512695, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.8323460108873135, |
|
"learning_rate": 6.108855633233544e-08, |
|
"logits/chosen": -2.08075213432312, |
|
"logits/rejected": -2.3139994144439697, |
|
"logps/chosen": -300.26141357421875, |
|
"logps/rejected": -123.13629150390625, |
|
"loss": 0.4024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6193065047264099, |
|
"rewards/margins": 1.3384159803390503, |
|
"rewards/rejected": -0.7191093564033508, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.308172837492481, |
|
"learning_rate": 6.02340358512196e-08, |
|
"logits/chosen": -1.918696641921997, |
|
"logits/rejected": -2.0569751262664795, |
|
"logps/chosen": -186.91880798339844, |
|
"logps/rejected": -65.9375, |
|
"loss": 0.337, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5072881579399109, |
|
"rewards/margins": 0.9656892418861389, |
|
"rewards/rejected": -0.45840105414390564, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 3.2533259381605513, |
|
"learning_rate": 5.938295074972349e-08, |
|
"logits/chosen": -1.9810622930526733, |
|
"logits/rejected": -2.1882359981536865, |
|
"logps/chosen": -328.361328125, |
|
"logps/rejected": -165.9792938232422, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40560880303382874, |
|
"rewards/margins": 1.1706748008728027, |
|
"rewards/rejected": -0.7650659084320068, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 2.477262358757034, |
|
"learning_rate": 5.8535374553045746e-08, |
|
"logits/chosen": -1.893616795539856, |
|
"logits/rejected": -2.1727473735809326, |
|
"logps/chosen": -326.0169982910156, |
|
"logps/rejected": -136.06997680664062, |
|
"loss": 0.366, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6259748935699463, |
|
"rewards/margins": 1.5379406213760376, |
|
"rewards/rejected": -0.9119656682014465, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.9233773900803872, |
|
"learning_rate": 5.7691380483250864e-08, |
|
"logits/chosen": -1.9818511009216309, |
|
"logits/rejected": -2.2494049072265625, |
|
"logps/chosen": -303.03594970703125, |
|
"logps/rejected": -107.28809356689453, |
|
"loss": 0.3623, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6850159168243408, |
|
"rewards/margins": 1.286649465560913, |
|
"rewards/rejected": -0.6016334295272827, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 2.4029669436961596, |
|
"learning_rate": 5.6851041452943636e-08, |
|
"logits/chosen": -1.9243234395980835, |
|
"logits/rejected": -2.0990493297576904, |
|
"logps/chosen": -305.79205322265625, |
|
"logps/rejected": -156.5312042236328, |
|
"loss": 0.3443, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3460720479488373, |
|
"rewards/margins": 0.9225568175315857, |
|
"rewards/rejected": -0.576484739780426, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 2.7172071884731333, |
|
"learning_rate": 5.601443005897011e-08, |
|
"logits/chosen": -1.990132212638855, |
|
"logits/rejected": -2.103468179702759, |
|
"logps/chosen": -316.22296142578125, |
|
"logps/rejected": -216.75155639648438, |
|
"loss": 0.3121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46712929010391235, |
|
"rewards/margins": 1.2178815603256226, |
|
"rewards/rejected": -0.7507522106170654, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 2.331069676044177, |
|
"learning_rate": 5.5181618576145995e-08, |
|
"logits/chosen": -1.9477646350860596, |
|
"logits/rejected": -2.16232967376709, |
|
"logps/chosen": -326.668212890625, |
|
"logps/rejected": -136.16281127929688, |
|
"loss": 0.3737, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6249808669090271, |
|
"rewards/margins": 1.3888401985168457, |
|
"rewards/rejected": -0.7638593912124634, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 2.4198590554719344, |
|
"learning_rate": 5.435267895101302e-08, |
|
"logits/chosen": -2.0229196548461914, |
|
"logits/rejected": -2.160853624343872, |
|
"logps/chosen": -235.00137329101562, |
|
"logps/rejected": -92.64382934570312, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3488693833351135, |
|
"rewards/margins": 0.6747847199440002, |
|
"rewards/rejected": -0.3259153962135315, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 2.903826654030489, |
|
"learning_rate": 5.352768279562314e-08, |
|
"logits/chosen": -1.9650566577911377, |
|
"logits/rejected": -2.255340337753296, |
|
"logps/chosen": -419.90032958984375, |
|
"logps/rejected": -208.98171997070312, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7806905508041382, |
|
"rewards/margins": 1.5795037746429443, |
|
"rewards/rejected": -0.7988132238388062, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 2.104301012306948, |
|
"learning_rate": 5.2706701381352336e-08, |
|
"logits/chosen": -1.9476219415664673, |
|
"logits/rejected": -2.1780595779418945, |
|
"logps/chosen": -285.74176025390625, |
|
"logps/rejected": -128.45465087890625, |
|
"loss": 0.3659, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5625160932540894, |
|
"rewards/margins": 1.3298622369766235, |
|
"rewards/rejected": -0.767346203327179, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 2.054448385701696, |
|
"learning_rate": 5.1889805632743145e-08, |
|
"logits/chosen": -2.205219030380249, |
|
"logits/rejected": -2.2034475803375244, |
|
"logps/chosen": -378.1851501464844, |
|
"logps/rejected": -274.8927001953125, |
|
"loss": 0.329, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.49013322591781616, |
|
"rewards/margins": 0.9879810810089111, |
|
"rewards/rejected": -0.4978478252887726, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 2.6121103191536537, |
|
"learning_rate": 5.107706612137775e-08, |
|
"logits/chosen": -1.9490768909454346, |
|
"logits/rejected": -2.1126646995544434, |
|
"logps/chosen": -360.6116943359375, |
|
"logps/rejected": -191.07272338867188, |
|
"loss": 0.2892, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7919806838035583, |
|
"rewards/margins": 1.782927393913269, |
|
"rewards/rejected": -0.9909467101097107, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 2.385541686750622, |
|
"learning_rate": 5.026855305978128e-08, |
|
"logits/chosen": -1.9246346950531006, |
|
"logits/rejected": -2.1882967948913574, |
|
"logps/chosen": -483.7081604003906, |
|
"logps/rejected": -254.4589080810547, |
|
"loss": 0.3677, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9386581182479858, |
|
"rewards/margins": 1.5556895732879639, |
|
"rewards/rejected": -0.6170313954353333, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 2.6107252036470188, |
|
"learning_rate": 4.946433629535585e-08, |
|
"logits/chosen": -1.9748541116714478, |
|
"logits/rejected": -1.9948114156723022, |
|
"logps/chosen": -188.89405822753906, |
|
"logps/rejected": -140.08143615722656, |
|
"loss": 0.4692, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.32503047585487366, |
|
"rewards/margins": 1.246155858039856, |
|
"rewards/rejected": -0.9211252927780151, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 2.406035236060261, |
|
"learning_rate": 4.8664485304346916e-08, |
|
"logits/chosen": -1.9609439373016357, |
|
"logits/rejected": -2.1323890686035156, |
|
"logps/chosen": -370.1985778808594, |
|
"logps/rejected": -151.9228515625, |
|
"loss": 0.3818, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.8187414407730103, |
|
"rewards/margins": 1.6757946014404297, |
|
"rewards/rejected": -0.8570532202720642, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 2.3506751324584236, |
|
"learning_rate": 4.7869069185840824e-08, |
|
"logits/chosen": -1.9399272203445435, |
|
"logits/rejected": -2.1106154918670654, |
|
"logps/chosen": -385.4277038574219, |
|
"logps/rejected": -187.8704071044922, |
|
"loss": 0.2971, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8000746965408325, |
|
"rewards/margins": 1.7866300344467163, |
|
"rewards/rejected": -0.9865550994873047, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 2.037166364259834, |
|
"learning_rate": 4.707815665579569e-08, |
|
"logits/chosen": -1.9395835399627686, |
|
"logits/rejected": -2.105896234512329, |
|
"logps/chosen": -218.52117919921875, |
|
"logps/rejected": -102.7484359741211, |
|
"loss": 0.4023, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4448435306549072, |
|
"rewards/margins": 1.021249771118164, |
|
"rewards/rejected": -0.5764063000679016, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.0548700051829827, |
|
"learning_rate": 4.629181604110464e-08, |
|
"logits/chosen": -1.8626420497894287, |
|
"logits/rejected": -2.0713019371032715, |
|
"logps/chosen": -264.1625061035156, |
|
"logps/rejected": -78.62328338623047, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46505922079086304, |
|
"rewards/margins": 1.1402562856674194, |
|
"rewards/rejected": -0.6751970052719116, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_logits/chosen": -1.9314517974853516, |
|
"eval_logits/rejected": -2.156139850616455, |
|
"eval_logps/chosen": -316.2469787597656, |
|
"eval_logps/rejected": -142.72573852539062, |
|
"eval_loss": 0.32974618673324585, |
|
"eval_rewards/accuracies": 0.9312499761581421, |
|
"eval_rewards/chosen": 0.644976019859314, |
|
"eval_rewards/margins": 1.4493337869644165, |
|
"eval_rewards/rejected": -0.8043577075004578, |
|
"eval_runtime": 186.1915, |
|
"eval_samples_per_second": 6.837, |
|
"eval_steps_per_second": 0.859, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.1864725160708818, |
|
"learning_rate": 4.5510115273693473e-08, |
|
"logits/chosen": -1.94536554813385, |
|
"logits/rejected": -2.1171536445617676, |
|
"logps/chosen": -295.99072265625, |
|
"logps/rejected": -195.84324645996094, |
|
"loss": 0.3722, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.40991413593292236, |
|
"rewards/margins": 0.8959781527519226, |
|
"rewards/rejected": -0.48606404662132263, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 2.1951187853623693, |
|
"learning_rate": 4.473312188465166e-08, |
|
"logits/chosen": -1.919504165649414, |
|
"logits/rejected": -2.0694282054901123, |
|
"logps/chosen": -303.9930419921875, |
|
"logps/rejected": -130.7193603515625, |
|
"loss": 0.3174, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7677474021911621, |
|
"rewards/margins": 1.868253231048584, |
|
"rewards/rejected": -1.1005058288574219, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 3.1913113320998026, |
|
"learning_rate": 4.3960902998398517e-08, |
|
"logits/chosen": -1.9696305990219116, |
|
"logits/rejected": -2.1716620922088623, |
|
"logps/chosen": -374.0699462890625, |
|
"logps/rejected": -171.0696563720703, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3457423746585846, |
|
"rewards/margins": 1.1523842811584473, |
|
"rewards/rejected": -0.8066419363021851, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 2.1165374906752796, |
|
"learning_rate": 4.319352532688443e-08, |
|
"logits/chosen": -1.9709705114364624, |
|
"logits/rejected": -2.1842715740203857, |
|
"logps/chosen": -341.5719299316406, |
|
"logps/rejected": -153.75521850585938, |
|
"loss": 0.3503, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8018146753311157, |
|
"rewards/margins": 1.9906085729599, |
|
"rewards/rejected": -1.1887940168380737, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 2.713035319428413, |
|
"learning_rate": 4.2431055163827315e-08, |
|
"logits/chosen": -1.9593266248703003, |
|
"logits/rejected": -2.101644515991211, |
|
"logps/chosen": -299.6078796386719, |
|
"logps/rejected": -176.44241333007812, |
|
"loss": 0.2919, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7900698781013489, |
|
"rewards/margins": 1.7910141944885254, |
|
"rewards/rejected": -1.0009442567825317, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 2.1241918465749783, |
|
"learning_rate": 4.167355837898584e-08, |
|
"logits/chosen": -1.9116507768630981, |
|
"logits/rejected": -2.0239267349243164, |
|
"logps/chosen": -224.43470764160156, |
|
"logps/rejected": -121.06668090820312, |
|
"loss": 0.3013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3974186182022095, |
|
"rewards/margins": 1.1547796726226807, |
|
"rewards/rejected": -0.7573609352111816, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 2.7982635154400732, |
|
"learning_rate": 4.092110041246865e-08, |
|
"logits/chosen": -1.8939900398254395, |
|
"logits/rejected": -2.0005412101745605, |
|
"logps/chosen": -175.42764282226562, |
|
"logps/rejected": -72.14698791503906, |
|
"loss": 0.4166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4599054157733917, |
|
"rewards/margins": 0.9669243097305298, |
|
"rewards/rejected": -0.5070188641548157, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 2.3714338867608507, |
|
"learning_rate": 4.017374626908124e-08, |
|
"logits/chosen": -1.9074149131774902, |
|
"logits/rejected": -2.0512611865997314, |
|
"logps/chosen": -270.6667175292969, |
|
"logps/rejected": -171.60247802734375, |
|
"loss": 0.3572, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6291319727897644, |
|
"rewards/margins": 1.2309937477111816, |
|
"rewards/rejected": -0.601861834526062, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 2.125630878152154, |
|
"learning_rate": 3.9431560512710026e-08, |
|
"logits/chosen": -1.9675098657608032, |
|
"logits/rejected": -2.1794068813323975, |
|
"logps/chosen": -383.7911682128906, |
|
"logps/rejected": -206.51065063476562, |
|
"loss": 0.3502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5929781794548035, |
|
"rewards/margins": 1.4138081073760986, |
|
"rewards/rejected": -0.8208298683166504, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 2.3708331370378977, |
|
"learning_rate": 3.869460726074474e-08, |
|
"logits/chosen": -1.9422714710235596, |
|
"logits/rejected": -2.1994147300720215, |
|
"logps/chosen": -375.3470458984375, |
|
"logps/rejected": -194.87181091308594, |
|
"loss": 0.3401, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7767963409423828, |
|
"rewards/margins": 1.8286038637161255, |
|
"rewards/rejected": -1.0518076419830322, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 2.151865974418779, |
|
"learning_rate": 3.7962950178539277e-08, |
|
"logits/chosen": -1.8709148168563843, |
|
"logits/rejected": -2.118030548095703, |
|
"logps/chosen": -486.78729248046875, |
|
"logps/rejected": -245.8949737548828, |
|
"loss": 0.3139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.190994381904602, |
|
"rewards/margins": 2.3700695037841797, |
|
"rewards/rejected": -1.1790751218795776, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 2.073175030368598, |
|
"learning_rate": 3.723665247391181e-08, |
|
"logits/chosen": -1.998550295829773, |
|
"logits/rejected": -2.0971145629882812, |
|
"logps/chosen": -159.14913940429688, |
|
"logps/rejected": -89.764404296875, |
|
"loss": 0.3692, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37837278842926025, |
|
"rewards/margins": 0.7685126066207886, |
|
"rewards/rejected": -0.3901398181915283, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.6211615886977406, |
|
"learning_rate": 3.6515776891684046e-08, |
|
"logits/chosen": -1.947357177734375, |
|
"logits/rejected": -2.213313341140747, |
|
"logps/chosen": -407.5662536621094, |
|
"logps/rejected": -170.8851318359375, |
|
"loss": 0.3545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8491164445877075, |
|
"rewards/margins": 1.8658862113952637, |
|
"rewards/rejected": -1.0167697668075562, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.181847753329417, |
|
"learning_rate": 3.580038570826093e-08, |
|
"logits/chosen": -1.954185962677002, |
|
"logits/rejected": -1.9828972816467285, |
|
"logps/chosen": -225.95716857910156, |
|
"logps/rejected": -138.2814178466797, |
|
"loss": 0.3124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5291526317596436, |
|
"rewards/margins": 1.2315007448196411, |
|
"rewards/rejected": -0.7023481130599976, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 2.48244810599722, |
|
"learning_rate": 3.509054072625031e-08, |
|
"logits/chosen": -2.0149214267730713, |
|
"logits/rejected": -2.144228219985962, |
|
"logps/chosen": -233.55215454101562, |
|
"logps/rejected": -158.93942260742188, |
|
"loss": 0.357, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.21644878387451172, |
|
"rewards/margins": 0.5699070692062378, |
|
"rewards/rejected": -0.3534582853317261, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 2.5289077187729605, |
|
"learning_rate": 3.438630326912414e-08, |
|
"logits/chosen": -1.9141268730163574, |
|
"logits/rejected": -2.064079761505127, |
|
"logps/chosen": -210.72866821289062, |
|
"logps/rejected": -109.2860336303711, |
|
"loss": 0.3904, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.49795305728912354, |
|
"rewards/margins": 0.9178673028945923, |
|
"rewards/rejected": -0.41991421580314636, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 2.571622602426196, |
|
"learning_rate": 3.36877341759205e-08, |
|
"logits/chosen": -1.9544380903244019, |
|
"logits/rejected": -2.116626262664795, |
|
"logps/chosen": -357.0731201171875, |
|
"logps/rejected": -189.14581298828125, |
|
"loss": 0.3862, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5316759943962097, |
|
"rewards/margins": 1.2794767618179321, |
|
"rewards/rejected": -0.7478006482124329, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.949899789832065, |
|
"learning_rate": 3.2994893795987765e-08, |
|
"logits/chosen": -1.91293466091156, |
|
"logits/rejected": -2.20516300201416, |
|
"logps/chosen": -343.4618835449219, |
|
"logps/rejected": -165.45309448242188, |
|
"loss": 0.3214, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6179537773132324, |
|
"rewards/margins": 1.422529697418213, |
|
"rewards/rejected": -0.8045759201049805, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.0690940688924604, |
|
"learning_rate": 3.2307841983771176e-08, |
|
"logits/chosen": -1.8847911357879639, |
|
"logits/rejected": -2.0202267169952393, |
|
"logps/chosen": -200.80783081054688, |
|
"logps/rejected": -104.58744049072266, |
|
"loss": 0.3988, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2336064577102661, |
|
"rewards/margins": 0.8035344481468201, |
|
"rewards/rejected": -0.569927990436554, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 3.361447315187423, |
|
"learning_rate": 3.1626638093641775e-08, |
|
"logits/chosen": -2.018540382385254, |
|
"logits/rejected": -2.115633726119995, |
|
"logps/chosen": -364.541259765625, |
|
"logps/rejected": -258.31201171875, |
|
"loss": 0.4431, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7286382913589478, |
|
"rewards/margins": 1.7735204696655273, |
|
"rewards/rejected": -1.0448822975158691, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 2.2699381072918072, |
|
"learning_rate": 3.0951340974769036e-08, |
|
"logits/chosen": -2.064911365509033, |
|
"logits/rejected": -2.240605354309082, |
|
"logps/chosen": -236.48304748535156, |
|
"logps/rejected": -107.43959045410156, |
|
"loss": 0.3952, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.588148832321167, |
|
"rewards/margins": 1.1847357749938965, |
|
"rewards/rejected": -0.5965870022773743, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.667750743463304, |
|
"learning_rate": 3.0282008966036645e-08, |
|
"logits/chosen": -1.9100630283355713, |
|
"logits/rejected": -2.129060983657837, |
|
"logps/chosen": -336.9149169921875, |
|
"logps/rejected": -199.61476135253906, |
|
"loss": 0.3072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.721794068813324, |
|
"rewards/margins": 1.6085577011108398, |
|
"rewards/rejected": -0.8867635726928711, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.8755306754711736, |
|
"learning_rate": 2.961869989100284e-08, |
|
"logits/chosen": -1.973421573638916, |
|
"logits/rejected": -2.1563172340393066, |
|
"logps/chosen": -216.48602294921875, |
|
"logps/rejected": -124.95690155029297, |
|
"loss": 0.3827, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5886920094490051, |
|
"rewards/margins": 1.2941111326217651, |
|
"rewards/rejected": -0.7054190635681152, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 5.227217314587311, |
|
"learning_rate": 2.8961471052904852e-08, |
|
"logits/chosen": -2.024624824523926, |
|
"logits/rejected": -2.251070499420166, |
|
"logps/chosen": -281.59869384765625, |
|
"logps/rejected": -135.55624389648438, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7197825312614441, |
|
"rewards/margins": 1.5994011163711548, |
|
"rewards/rejected": -0.8796184659004211, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 2.009469131089514, |
|
"learning_rate": 2.8310379229708548e-08, |
|
"logits/chosen": -1.9827313423156738, |
|
"logits/rejected": -2.371861457824707, |
|
"logps/chosen": -489.7880859375, |
|
"logps/rejected": -177.23590087890625, |
|
"loss": 0.3523, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.9191684126853943, |
|
"rewards/margins": 1.5952149629592896, |
|
"rewards/rejected": -0.6760467886924744, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 2.1048586567415457, |
|
"learning_rate": 2.7665480669203377e-08, |
|
"logits/chosen": -1.934632420539856, |
|
"logits/rejected": -2.1390960216522217, |
|
"logps/chosen": -299.7361145019531, |
|
"logps/rejected": -145.28750610351562, |
|
"loss": 0.2493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7144760489463806, |
|
"rewards/margins": 1.7577918767929077, |
|
"rewards/rejected": -1.0433157682418823, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.9269914350384307, |
|
"learning_rate": 2.7026831084143254e-08, |
|
"logits/chosen": -1.9214329719543457, |
|
"logits/rejected": -2.1084671020507812, |
|
"logps/chosen": -256.8978271484375, |
|
"logps/rejected": -152.1795196533203, |
|
"loss": 0.2297, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6236655116081238, |
|
"rewards/margins": 1.38833487033844, |
|
"rewards/rejected": -0.7646692991256714, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.0675257149998636, |
|
"learning_rate": 2.6394485647433273e-08, |
|
"logits/chosen": -1.9709903001785278, |
|
"logits/rejected": -2.265573501586914, |
|
"logps/chosen": -394.1219482421875, |
|
"logps/rejected": -185.23574829101562, |
|
"loss": 0.2808, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.8027446269989014, |
|
"rewards/margins": 1.6347885131835938, |
|
"rewards/rejected": -0.8320440053939819, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 2.788876731328824, |
|
"learning_rate": 2.5768498987363595e-08, |
|
"logits/chosen": -1.9366790056228638, |
|
"logits/rejected": -2.2282567024230957, |
|
"logps/chosen": -319.8380126953125, |
|
"logps/rejected": -102.86990356445312, |
|
"loss": 0.3694, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8548070192337036, |
|
"rewards/margins": 1.4455407857894897, |
|
"rewards/rejected": -0.5907337665557861, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.9890175505345056, |
|
"learning_rate": 2.514892518288988e-08, |
|
"logits/chosen": -1.9826221466064453, |
|
"logits/rejected": -2.222606658935547, |
|
"logps/chosen": -382.8952331542969, |
|
"logps/rejected": -154.84893798828125, |
|
"loss": 0.2993, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.884526252746582, |
|
"rewards/margins": 1.8476271629333496, |
|
"rewards/rejected": -0.9631009101867676, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.9892914243896007, |
|
"learning_rate": 2.453581775896164e-08, |
|
"logits/chosen": -1.8972669839859009, |
|
"logits/rejected": -2.1901392936706543, |
|
"logps/chosen": -385.07183837890625, |
|
"logps/rejected": -140.5347137451172, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8599714636802673, |
|
"rewards/margins": 1.7295414209365845, |
|
"rewards/rejected": -0.8695698380470276, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 3.1227195996097796, |
|
"learning_rate": 2.3929229681898e-08, |
|
"logits/chosen": -1.9234641790390015, |
|
"logits/rejected": -2.137057304382324, |
|
"logps/chosen": -378.1383361816406, |
|
"logps/rejected": -225.235107421875, |
|
"loss": 0.3887, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6592192053794861, |
|
"rewards/margins": 1.4562088251113892, |
|
"rewards/rejected": -0.7969896793365479, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 2.259095062423167, |
|
"learning_rate": 2.3329213354812048e-08, |
|
"logits/chosen": -1.8679221868515015, |
|
"logits/rejected": -2.045198678970337, |
|
"logps/chosen": -308.4653625488281, |
|
"logps/rejected": -166.0535430908203, |
|
"loss": 0.3296, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.636101245880127, |
|
"rewards/margins": 1.3980377912521362, |
|
"rewards/rejected": -0.7619364857673645, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 2.048149379545429, |
|
"learning_rate": 2.2735820613083835e-08, |
|
"logits/chosen": -1.9634499549865723, |
|
"logits/rejected": -2.1904802322387695, |
|
"logps/chosen": -245.1457061767578, |
|
"logps/rejected": -68.16486358642578, |
|
"loss": 0.348, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4485977292060852, |
|
"rewards/margins": 0.9627138376235962, |
|
"rewards/rejected": -0.514116108417511, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.887396473086629, |
|
"learning_rate": 2.214910271988204e-08, |
|
"logits/chosen": -1.9823853969573975, |
|
"logits/rejected": -2.187169075012207, |
|
"logps/chosen": -291.22772216796875, |
|
"logps/rejected": -149.49351501464844, |
|
"loss": 0.3743, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3778786063194275, |
|
"rewards/margins": 1.0260906219482422, |
|
"rewards/rejected": -0.6482120156288147, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.3234230452821976, |
|
"learning_rate": 2.1569110361735675e-08, |
|
"logits/chosen": -2.0167317390441895, |
|
"logits/rejected": -2.172858238220215, |
|
"logps/chosen": -277.48248291015625, |
|
"logps/rejected": -144.8683319091797, |
|
"loss": 0.3099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5940869450569153, |
|
"rewards/margins": 1.403023362159729, |
|
"rewards/rejected": -0.8089364171028137, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.5446773204041664, |
|
"learning_rate": 2.0995893644155004e-08, |
|
"logits/chosen": -1.9866153001785278, |
|
"logits/rejected": -2.0847387313842773, |
|
"logps/chosen": -278.55133056640625, |
|
"logps/rejected": -204.3761444091797, |
|
"loss": 0.377, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11578173190355301, |
|
"rewards/margins": 0.541024923324585, |
|
"rewards/rejected": -0.42524319887161255, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.8357875496740423, |
|
"learning_rate": 2.0429502087303165e-08, |
|
"logits/chosen": -1.9452314376831055, |
|
"logits/rejected": -2.0776212215423584, |
|
"logps/chosen": -377.2801208496094, |
|
"logps/rejected": -231.69754028320312, |
|
"loss": 0.3189, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7734673023223877, |
|
"rewards/margins": 1.5090703964233398, |
|
"rewards/rejected": -0.7356030941009521, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 2.592831499188782, |
|
"learning_rate": 1.9869984621717882e-08, |
|
"logits/chosen": -1.886064887046814, |
|
"logits/rejected": -2.187819719314575, |
|
"logps/chosen": -343.24664306640625, |
|
"logps/rejected": -130.65252685546875, |
|
"loss": 0.4297, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6091063618659973, |
|
"rewards/margins": 1.680466651916504, |
|
"rewards/rejected": -1.0713602304458618, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.8632324855903686, |
|
"learning_rate": 1.9317389584084564e-08, |
|
"logits/chosen": -2.046363592147827, |
|
"logits/rejected": -2.3104305267333984, |
|
"logps/chosen": -275.2044372558594, |
|
"logps/rejected": -81.00444030761719, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.362895131111145, |
|
"rewards/margins": 2.0078790187835693, |
|
"rewards/rejected": -0.6449840068817139, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 2.155918100213792, |
|
"learning_rate": 1.8771764713060355e-08, |
|
"logits/chosen": -1.9281926155090332, |
|
"logits/rejected": -2.106454610824585, |
|
"logps/chosen": -272.47381591796875, |
|
"logps/rejected": -113.74870300292969, |
|
"loss": 0.2838, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8106430768966675, |
|
"rewards/margins": 1.7507152557373047, |
|
"rewards/rejected": -0.9400720596313477, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 2.1800107546257763, |
|
"learning_rate": 1.823315714515018e-08, |
|
"logits/chosen": -1.9668960571289062, |
|
"logits/rejected": -2.028254508972168, |
|
"logps/chosen": -374.9870300292969, |
|
"logps/rejected": -222.8441162109375, |
|
"loss": 0.3292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5992096662521362, |
|
"rewards/margins": 1.4854466915130615, |
|
"rewards/rejected": -0.8862370848655701, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 2.0467928150919388, |
|
"learning_rate": 1.7701613410634364e-08, |
|
"logits/chosen": -2.026965856552124, |
|
"logits/rejected": -2.1441550254821777, |
|
"logps/chosen": -362.87579345703125, |
|
"logps/rejected": -164.18157958984375, |
|
"loss": 0.3324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6154202818870544, |
|
"rewards/margins": 1.6078412532806396, |
|
"rewards/rejected": -0.9924209117889404, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.771060411498593, |
|
"learning_rate": 1.7177179429549138e-08, |
|
"logits/chosen": -1.9229191541671753, |
|
"logits/rejected": -2.0828027725219727, |
|
"logps/chosen": -307.8000793457031, |
|
"logps/rejected": -152.2085723876953, |
|
"loss": 0.3442, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5766472220420837, |
|
"rewards/margins": 1.5805346965789795, |
|
"rewards/rejected": -1.0038875341415405, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 2.1477749219966182, |
|
"learning_rate": 1.6659900507719403e-08, |
|
"logits/chosen": -1.956030011177063, |
|
"logits/rejected": -2.126971960067749, |
|
"logps/chosen": -277.1318664550781, |
|
"logps/rejected": -143.57424926757812, |
|
"loss": 0.4088, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5368402600288391, |
|
"rewards/margins": 1.3292208909988403, |
|
"rewards/rejected": -0.7923805713653564, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 2.0607954307134566, |
|
"learning_rate": 1.614982133284495e-08, |
|
"logits/chosen": -1.9652516841888428, |
|
"logits/rejected": -2.0515949726104736, |
|
"logps/chosen": -107.14926147460938, |
|
"logps/rejected": -52.71076583862305, |
|
"loss": 0.3249, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.14517386257648468, |
|
"rewards/margins": 0.4607216417789459, |
|
"rewards/rejected": -0.31554779410362244, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 2.1416211825003013, |
|
"learning_rate": 1.5646985970639714e-08, |
|
"logits/chosen": -1.8579660654067993, |
|
"logits/rejected": -2.1249003410339355, |
|
"logps/chosen": -384.6595153808594, |
|
"logps/rejected": -188.4464111328125, |
|
"loss": 0.3381, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4016474485397339, |
|
"rewards/margins": 1.1661272048950195, |
|
"rewards/rejected": -0.7644796967506409, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 2.34908271482458, |
|
"learning_rate": 1.515143786102503e-08, |
|
"logits/chosen": -1.8695746660232544, |
|
"logits/rejected": -2.0640392303466797, |
|
"logps/chosen": -237.24574279785156, |
|
"logps/rejected": -136.90809631347656, |
|
"loss": 0.375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2771724462509155, |
|
"rewards/margins": 0.8384150862693787, |
|
"rewards/rejected": -0.5612426400184631, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 2.4924316107175666, |
|
"learning_rate": 1.4663219814376938e-08, |
|
"logits/chosen": -2.012023687362671, |
|
"logits/rejected": -2.0818071365356445, |
|
"logps/chosen": -273.9322814941406, |
|
"logps/rejected": -240.0913848876953, |
|
"loss": 0.3164, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.328775554895401, |
|
"rewards/margins": 0.7493981122970581, |
|
"rewards/rejected": -0.4206225872039795, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.804749956277777, |
|
"learning_rate": 1.4182374007827603e-08, |
|
"logits/chosen": -1.9487468004226685, |
|
"logits/rejected": -2.167121410369873, |
|
"logps/chosen": -270.26507568359375, |
|
"logps/rejected": -154.2471923828125, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2056507021188736, |
|
"rewards/margins": 0.027986496686935425, |
|
"rewards/rejected": -0.2336372286081314, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_logits/chosen": -1.9295151233673096, |
|
"eval_logits/rejected": -2.141464948654175, |
|
"eval_logps/chosen": -316.00396728515625, |
|
"eval_logps/rejected": -144.11239624023438, |
|
"eval_loss": 0.32466286420822144, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": 0.6522671580314636, |
|
"eval_rewards/margins": 1.4982246160507202, |
|
"eval_rewards/rejected": -0.8459575772285461, |
|
"eval_runtime": 186.691, |
|
"eval_samples_per_second": 6.819, |
|
"eval_steps_per_second": 0.857, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.9411042495172686, |
|
"learning_rate": 1.3708941981621814e-08, |
|
"logits/chosen": -1.9763463735580444, |
|
"logits/rejected": -2.107821464538574, |
|
"logps/chosen": -155.54547119140625, |
|
"logps/rejected": -65.61723327636719, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37429141998291016, |
|
"rewards/margins": 0.8814553022384644, |
|
"rewards/rejected": -0.507163941860199, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 2.349563136392854, |
|
"learning_rate": 1.3242964635528208e-08, |
|
"logits/chosen": -1.9118015766143799, |
|
"logits/rejected": -2.0467817783355713, |
|
"logps/chosen": -196.6197052001953, |
|
"logps/rejected": -72.90583038330078, |
|
"loss": 0.4717, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.455363929271698, |
|
"rewards/margins": 0.8942562341690063, |
|
"rewards/rejected": -0.43889227509498596, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 2.0443074078432297, |
|
"learning_rate": 1.278448222530606e-08, |
|
"logits/chosen": -1.822318196296692, |
|
"logits/rejected": -2.1045279502868652, |
|
"logps/chosen": -255.2604522705078, |
|
"logps/rejected": -105.29451751708984, |
|
"loss": 0.3565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7090134620666504, |
|
"rewards/margins": 1.4576520919799805, |
|
"rewards/rejected": -0.7486386895179749, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.329468016744532, |
|
"learning_rate": 1.2333534359227383e-08, |
|
"logits/chosen": -1.9810900688171387, |
|
"logits/rejected": -1.9879897832870483, |
|
"logps/chosen": -187.4176025390625, |
|
"logps/rejected": -137.21522521972656, |
|
"loss": 0.3768, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.21685890853405, |
|
"rewards/margins": 0.7572988867759705, |
|
"rewards/rejected": -0.5404399037361145, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.4935403946193335, |
|
"learning_rate": 1.1890159994655425e-08, |
|
"logits/chosen": -1.9184632301330566, |
|
"logits/rejected": -2.1819629669189453, |
|
"logps/chosen": -317.76031494140625, |
|
"logps/rejected": -118.99726104736328, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6526443958282471, |
|
"rewards/margins": 1.706324815750122, |
|
"rewards/rejected": -1.053680419921875, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 2.4833278286779588, |
|
"learning_rate": 1.145439743467902e-08, |
|
"logits/chosen": -1.944310188293457, |
|
"logits/rejected": -2.1891067028045654, |
|
"logps/chosen": -392.849853515625, |
|
"logps/rejected": -190.91070556640625, |
|
"loss": 0.3401, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5746330618858337, |
|
"rewards/margins": 1.7914217710494995, |
|
"rewards/rejected": -1.2167887687683105, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.8943188797272705, |
|
"learning_rate": 1.1026284324803492e-08, |
|
"logits/chosen": -1.9464324712753296, |
|
"logits/rejected": -2.071516990661621, |
|
"logps/chosen": -250.5147247314453, |
|
"logps/rejected": -113.3388442993164, |
|
"loss": 0.3642, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5535628199577332, |
|
"rewards/margins": 1.0718276500701904, |
|
"rewards/rejected": -0.5182647109031677, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 2.8544492338017693, |
|
"learning_rate": 1.0605857649698669e-08, |
|
"logits/chosen": -1.902798056602478, |
|
"logits/rejected": -2.297213077545166, |
|
"logps/chosen": -473.9614562988281, |
|
"logps/rejected": -205.9657745361328, |
|
"loss": 0.347, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6481414437294006, |
|
"rewards/margins": 1.6791346073150635, |
|
"rewards/rejected": -1.030993103981018, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.0036521120268076, |
|
"learning_rate": 1.0193153730003601e-08, |
|
"logits/chosen": -1.9088249206542969, |
|
"logits/rejected": -2.0862650871276855, |
|
"logps/chosen": -252.01986694335938, |
|
"logps/rejected": -133.6540985107422, |
|
"loss": 0.3304, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.459441214799881, |
|
"rewards/margins": 0.782798171043396, |
|
"rewards/rejected": -0.3233569860458374, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.7913507857395947, |
|
"learning_rate": 9.78820821918893e-09, |
|
"logits/chosen": -1.8910510540008545, |
|
"logits/rejected": -2.1956124305725098, |
|
"logps/chosen": -254.30776977539062, |
|
"logps/rejected": -76.68388366699219, |
|
"loss": 0.4119, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5045368671417236, |
|
"rewards/margins": 1.1815844774246216, |
|
"rewards/rejected": -0.677047610282898, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 2.565223836267683, |
|
"learning_rate": 9.391056100476735e-09, |
|
"logits/chosen": -1.951928973197937, |
|
"logits/rejected": -2.1905078887939453, |
|
"logps/chosen": -359.3843994140625, |
|
"logps/rejected": -189.37869262695312, |
|
"loss": 0.3387, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8327935934066772, |
|
"rewards/margins": 1.7248027324676514, |
|
"rewards/rejected": -0.8920091390609741, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 2.519209117959205, |
|
"learning_rate": 9.001731683818336e-09, |
|
"logits/chosen": -1.8875353336334229, |
|
"logits/rejected": -2.0544509887695312, |
|
"logps/chosen": -331.9053955078125, |
|
"logps/rejected": -170.03109741210938, |
|
"loss": 0.3695, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8446786403656006, |
|
"rewards/margins": 1.7915393114089966, |
|
"rewards/rejected": -0.946860671043396, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.7585924805588735, |
|
"learning_rate": 8.620268602930269e-09, |
|
"logits/chosen": -1.9674144983291626, |
|
"logits/rejected": -1.948687195777893, |
|
"logps/chosen": -274.6681823730469, |
|
"logps/rejected": -228.2263641357422, |
|
"loss": 0.2954, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7268482446670532, |
|
"rewards/margins": 1.5142990350723267, |
|
"rewards/rejected": -0.7874506711959839, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 2.314971302336536, |
|
"learning_rate": 8.246699812388713e-09, |
|
"logits/chosen": -1.9341325759887695, |
|
"logits/rejected": -2.092617988586426, |
|
"logps/chosen": -338.190673828125, |
|
"logps/rejected": -150.9620819091797, |
|
"loss": 0.3171, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7337810397148132, |
|
"rewards/margins": 1.5115951299667358, |
|
"rewards/rejected": -0.7778141498565674, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 2.0178597083087175, |
|
"learning_rate": 7.881057584782447e-09, |
|
"logits/chosen": -1.9406661987304688, |
|
"logits/rejected": -2.1710622310638428, |
|
"logps/chosen": -285.05120849609375, |
|
"logps/rejected": -94.08568572998047, |
|
"loss": 0.385, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5303260684013367, |
|
"rewards/margins": 1.2865056991577148, |
|
"rewards/rejected": -0.7561795711517334, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 2.2008626151145534, |
|
"learning_rate": 7.523373507924947e-09, |
|
"logits/chosen": -1.8313828706741333, |
|
"logits/rejected": -2.026157855987549, |
|
"logps/chosen": -331.306884765625, |
|
"logps/rejected": -151.93821716308594, |
|
"loss": 0.3016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.559520959854126, |
|
"rewards/margins": 1.3658560514450073, |
|
"rewards/rejected": -0.806334912776947, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 2.3365923631145225, |
|
"learning_rate": 7.173678482125389e-09, |
|
"logits/chosen": -2.0174169540405273, |
|
"logits/rejected": -2.2585856914520264, |
|
"logps/chosen": -267.46875, |
|
"logps/rejected": -115.35077667236328, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4813220202922821, |
|
"rewards/margins": 1.1780027151107788, |
|
"rewards/rejected": -0.6966806650161743, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 2.481179658279427, |
|
"learning_rate": 6.83200271751927e-09, |
|
"logits/chosen": -1.8875877857208252, |
|
"logits/rejected": -2.227430820465088, |
|
"logps/chosen": -371.4129638671875, |
|
"logps/rejected": -164.30038452148438, |
|
"loss": 0.36, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8697217106819153, |
|
"rewards/margins": 1.7959797382354736, |
|
"rewards/rejected": -0.9262580275535583, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.8970176783719166, |
|
"learning_rate": 6.498375731458528e-09, |
|
"logits/chosen": -1.9906672239303589, |
|
"logits/rejected": -2.092620611190796, |
|
"logps/chosen": -159.76637268066406, |
|
"logps/rejected": -73.02427673339844, |
|
"loss": 0.3725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.289163202047348, |
|
"rewards/margins": 0.7067059874534607, |
|
"rewards/rejected": -0.41754278540611267, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 2.0293931798898868, |
|
"learning_rate": 6.172826345961479e-09, |
|
"logits/chosen": -1.8877040147781372, |
|
"logits/rejected": -2.1244630813598633, |
|
"logps/chosen": -468.999755859375, |
|
"logps/rejected": -173.8732147216797, |
|
"loss": 0.3367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5770574808120728, |
|
"rewards/margins": 1.7707054615020752, |
|
"rewards/rejected": -1.193647861480713, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 2.505226667316074, |
|
"learning_rate": 5.8553826852230095e-09, |
|
"logits/chosen": -1.9780012369155884, |
|
"logits/rejected": -2.1993906497955322, |
|
"logps/chosen": -371.938720703125, |
|
"logps/rejected": -178.89736938476562, |
|
"loss": 0.3714, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.41208526492118835, |
|
"rewards/margins": 1.3043644428253174, |
|
"rewards/rejected": -0.8922792077064514, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.6481034388278855, |
|
"learning_rate": 5.54607217318479e-09, |
|
"logits/chosen": -1.971070408821106, |
|
"logits/rejected": -2.187108278274536, |
|
"logps/chosen": -375.25604248046875, |
|
"logps/rejected": -157.79771423339844, |
|
"loss": 0.299, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4872535765171051, |
|
"rewards/margins": 1.6334389448165894, |
|
"rewards/rejected": -1.146185278892517, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.4584799350769444, |
|
"learning_rate": 5.244921531166246e-09, |
|
"logits/chosen": -1.9654029607772827, |
|
"logits/rejected": -2.007652997970581, |
|
"logps/chosen": -234.24276733398438, |
|
"logps/rejected": -158.03627014160156, |
|
"loss": 0.3454, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5839539766311646, |
|
"rewards/margins": 1.3285213708877563, |
|
"rewards/rejected": -0.744567334651947, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 2.5817479172502, |
|
"learning_rate": 4.951956775555999e-09, |
|
"logits/chosen": -2.014432430267334, |
|
"logits/rejected": -2.1201930046081543, |
|
"logps/chosen": -320.6275634765625, |
|
"logps/rejected": -213.67913818359375, |
|
"loss": 0.3529, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1901564747095108, |
|
"rewards/margins": 0.7247769236564636, |
|
"rewards/rejected": -0.534620463848114, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 2.0825760729013036, |
|
"learning_rate": 4.6672032155644305e-09, |
|
"logits/chosen": -1.882224202156067, |
|
"logits/rejected": -1.9761323928833008, |
|
"logps/chosen": -116.42622375488281, |
|
"logps/rejected": -46.48798370361328, |
|
"loss": 0.329, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.17195764183998108, |
|
"rewards/margins": 0.4231622815132141, |
|
"rewards/rejected": -0.2512046992778778, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 2.080471104569788, |
|
"learning_rate": 4.390685451037024e-09, |
|
"logits/chosen": -1.9777361154556274, |
|
"logits/rejected": -2.40352201461792, |
|
"logps/chosen": -445.56231689453125, |
|
"logps/rejected": -186.0106201171875, |
|
"loss": 0.3198, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8660315275192261, |
|
"rewards/margins": 2.2276761531829834, |
|
"rewards/rejected": -1.3616446256637573, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.6682834833589366, |
|
"learning_rate": 4.122427370329451e-09, |
|
"logits/chosen": -1.9932342767715454, |
|
"logits/rejected": -2.1058011054992676, |
|
"logps/chosen": -290.7546691894531, |
|
"logps/rejected": -160.9483642578125, |
|
"loss": 0.257, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7041120529174805, |
|
"rewards/margins": 1.5494294166564941, |
|
"rewards/rejected": -0.8453173637390137, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 2.4402456764277964, |
|
"learning_rate": 3.862452148243622e-09, |
|
"logits/chosen": -1.8214092254638672, |
|
"logits/rejected": -2.0429835319519043, |
|
"logps/chosen": -294.1298828125, |
|
"logps/rejected": -129.96456909179688, |
|
"loss": 0.3295, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3714965283870697, |
|
"rewards/margins": 1.1992201805114746, |
|
"rewards/rejected": -0.8277237415313721, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 2.34982397767895, |
|
"learning_rate": 3.6107822440257675e-09, |
|
"logits/chosen": -2.0181312561035156, |
|
"logits/rejected": -2.2349228858947754, |
|
"logps/chosen": -374.7834167480469, |
|
"logps/rejected": -183.97332763671875, |
|
"loss": 0.3781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7463111281394958, |
|
"rewards/margins": 1.571717381477356, |
|
"rewards/rejected": -0.8254062533378601, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 3.505631357350379, |
|
"learning_rate": 3.3674393994260863e-09, |
|
"logits/chosen": -1.9698312282562256, |
|
"logits/rejected": -2.2149014472961426, |
|
"logps/chosen": -351.15777587890625, |
|
"logps/rejected": -143.33502197265625, |
|
"loss": 0.4017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6873552799224854, |
|
"rewards/margins": 1.7417612075805664, |
|
"rewards/rejected": -1.0544058084487915, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 2.45112122987896, |
|
"learning_rate": 3.132444636820575e-09, |
|
"logits/chosen": -1.9869520664215088, |
|
"logits/rejected": -2.1476073265075684, |
|
"logps/chosen": -330.85552978515625, |
|
"logps/rejected": -184.3342742919922, |
|
"loss": 0.3518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6776991486549377, |
|
"rewards/margins": 1.1076922416687012, |
|
"rewards/rejected": -0.4299930930137634, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 2.18016599869388, |
|
"learning_rate": 2.9058182573947987e-09, |
|
"logits/chosen": -1.9990299940109253, |
|
"logits/rejected": -2.189465284347534, |
|
"logps/chosen": -263.61297607421875, |
|
"logps/rejected": -127.04591369628906, |
|
"loss": 0.3608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4782324433326721, |
|
"rewards/margins": 1.0925434827804565, |
|
"rewards/rejected": -0.6143109798431396, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 5.079415618213185, |
|
"learning_rate": 2.687579839390153e-09, |
|
"logits/chosen": -2.04642915725708, |
|
"logits/rejected": -2.0504298210144043, |
|
"logps/chosen": -368.04278564453125, |
|
"logps/rejected": -330.5069580078125, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3292311429977417, |
|
"rewards/margins": 0.7461775541305542, |
|
"rewards/rejected": -0.4169463515281677, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 2.4400731985711492, |
|
"learning_rate": 2.477748236412469e-09, |
|
"logits/chosen": -1.8668960332870483, |
|
"logits/rejected": -2.124622106552124, |
|
"logps/chosen": -311.70550537109375, |
|
"logps/rejected": -141.21267700195312, |
|
"loss": 0.3444, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6067173480987549, |
|
"rewards/margins": 1.432044267654419, |
|
"rewards/rejected": -0.8253269195556641, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 2.1709665403473495, |
|
"learning_rate": 2.2763415758032314e-09, |
|
"logits/chosen": -2.0512101650238037, |
|
"logits/rejected": -2.173598051071167, |
|
"logps/chosen": -420.999267578125, |
|
"logps/rejected": -267.1089172363281, |
|
"loss": 0.373, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9176816940307617, |
|
"rewards/margins": 1.998265266418457, |
|
"rewards/rejected": -1.0805835723876953, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 2.434479209137729, |
|
"learning_rate": 2.0833772570736373e-09, |
|
"logits/chosen": -1.9979689121246338, |
|
"logits/rejected": -2.113584518432617, |
|
"logps/chosen": -347.56341552734375, |
|
"logps/rejected": -191.31202697753906, |
|
"loss": 0.381, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.769294023513794, |
|
"rewards/margins": 1.7911299467086792, |
|
"rewards/rejected": -1.0218359231948853, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 2.3107985454840714, |
|
"learning_rate": 1.898871950401337e-09, |
|
"logits/chosen": -1.9912981986999512, |
|
"logits/rejected": -2.03279709815979, |
|
"logps/chosen": -330.6841735839844, |
|
"logps/rejected": -269.5788879394531, |
|
"loss": 0.3441, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5533915162086487, |
|
"rewards/margins": 1.3993961811065674, |
|
"rewards/rejected": -0.8460047245025635, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 2.1169045580497508, |
|
"learning_rate": 1.7228415951904163e-09, |
|
"logits/chosen": -1.9560718536376953, |
|
"logits/rejected": -2.2123360633850098, |
|
"logps/chosen": -327.4252014160156, |
|
"logps/rejected": -157.6983184814453, |
|
"loss": 0.3534, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8153419494628906, |
|
"rewards/margins": 1.6094058752059937, |
|
"rewards/rejected": -0.7940639853477478, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 2.0334452969637873, |
|
"learning_rate": 1.5553013986942642e-09, |
|
"logits/chosen": -2.0031869411468506, |
|
"logits/rejected": -2.055514335632324, |
|
"logps/chosen": -245.74737548828125, |
|
"logps/rejected": -185.9472198486328, |
|
"loss": 0.3302, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3879874646663666, |
|
"rewards/margins": 0.9499048590660095, |
|
"rewards/rejected": -0.5619173645973206, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 2.361728211596182, |
|
"learning_rate": 1.3962658347019819e-09, |
|
"logits/chosen": -2.008934497833252, |
|
"logits/rejected": -2.1810312271118164, |
|
"logps/chosen": -275.261962890625, |
|
"logps/rejected": -139.23110961914062, |
|
"loss": 0.3062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5827020406723022, |
|
"rewards/margins": 1.514220952987671, |
|
"rewards/rejected": -0.9315187335014343, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 2.4538771688499867, |
|
"learning_rate": 1.2457486422878138e-09, |
|
"logits/chosen": -1.9547114372253418, |
|
"logits/rejected": -2.1434385776519775, |
|
"logps/chosen": -314.6387023925781, |
|
"logps/rejected": -157.8639678955078, |
|
"loss": 0.3439, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2895660400390625, |
|
"rewards/margins": 0.775239109992981, |
|
"rewards/rejected": -0.48567306995391846, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.783407147587096, |
|
"learning_rate": 1.1037628246243769e-09, |
|
"logits/chosen": -1.960133671760559, |
|
"logits/rejected": -2.021034002304077, |
|
"logps/chosen": -156.89002990722656, |
|
"logps/rejected": -101.70440673828125, |
|
"loss": 0.3248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2527593970298767, |
|
"rewards/margins": 0.6134127378463745, |
|
"rewards/rejected": -0.3606533706188202, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.9545553326476837, |
|
"learning_rate": 9.70320647859213e-10, |
|
"logits/chosen": -1.9362705945968628, |
|
"logits/rejected": -2.137017250061035, |
|
"logps/chosen": -236.40830993652344, |
|
"logps/rejected": -113.88504791259766, |
|
"loss": 0.2863, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.44171327352523804, |
|
"rewards/margins": 1.3618862628936768, |
|
"rewards/rejected": -0.9201730489730835, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 2.611315971432463, |
|
"learning_rate": 8.454336400552154e-10, |
|
"logits/chosen": -1.94658625125885, |
|
"logits/rejected": -2.1128108501434326, |
|
"logps/chosen": -318.1528625488281, |
|
"logps/rejected": -129.91864013671875, |
|
"loss": 0.3379, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.557726263999939, |
|
"rewards/margins": 1.4944050312042236, |
|
"rewards/rejected": -0.9366787672042847, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.982035369334317, |
|
"learning_rate": 7.291125901946027e-10, |
|
"logits/chosen": -1.9644968509674072, |
|
"logits/rejected": -2.26253604888916, |
|
"logps/chosen": -368.5287780761719, |
|
"logps/rejected": -166.108154296875, |
|
"loss": 0.281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8076134324073792, |
|
"rewards/margins": 1.7294673919677734, |
|
"rewards/rejected": -0.9218538999557495, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 2.52608570768576, |
|
"learning_rate": 6.21367547246976e-10, |
|
"logits/chosen": -1.9160689115524292, |
|
"logits/rejected": -2.1490960121154785, |
|
"logps/chosen": -239.294189453125, |
|
"logps/rejected": -85.73963165283203, |
|
"loss": 0.3805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7477155923843384, |
|
"rewards/margins": 1.5674340724945068, |
|
"rewards/rejected": -0.819718599319458, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 2.319706584271849, |
|
"learning_rate": 5.222078193011125e-10, |
|
"logits/chosen": -1.9619083404541016, |
|
"logits/rejected": -2.0621178150177, |
|
"logps/chosen": -207.6097869873047, |
|
"logps/rejected": -122.38450622558594, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3802430033683777, |
|
"rewards/margins": 0.986553966999054, |
|
"rewards/rejected": -0.6063109040260315, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 2.3736151183896417, |
|
"learning_rate": 4.3164197276084336e-10, |
|
"logits/chosen": -1.9763154983520508, |
|
"logits/rejected": -2.198726177215576, |
|
"logps/chosen": -359.36981201171875, |
|
"logps/rejected": -161.1787109375, |
|
"loss": 0.3248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6110528707504272, |
|
"rewards/margins": 1.4726297855377197, |
|
"rewards/rejected": -0.861577033996582, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.828908050044205, |
|
"learning_rate": 3.4967783160507747e-10, |
|
"logits/chosen": -1.923030972480774, |
|
"logits/rejected": -2.0622808933258057, |
|
"logps/chosen": -292.14422607421875, |
|
"logps/rejected": -191.8591766357422, |
|
"loss": 0.3958, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6675036549568176, |
|
"rewards/margins": 1.3652247190475464, |
|
"rewards/rejected": -0.6977209448814392, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 2.6272657192149906, |
|
"learning_rate": 2.7632247671177665e-10, |
|
"logits/chosen": -1.8597277402877808, |
|
"logits/rejected": -2.125450849533081, |
|
"logps/chosen": -281.3786926269531, |
|
"logps/rejected": -157.38714599609375, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2373468577861786, |
|
"rewards/margins": 0.5166945457458496, |
|
"rewards/rejected": -0.27934765815734863, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_logits/chosen": -1.929107666015625, |
|
"eval_logits/rejected": -2.14176344871521, |
|
"eval_logps/chosen": -316.05657958984375, |
|
"eval_logps/rejected": -144.36167907714844, |
|
"eval_loss": 0.3238605260848999, |
|
"eval_rewards/accuracies": 0.9437500238418579, |
|
"eval_rewards/chosen": 0.6506881713867188, |
|
"eval_rewards/margins": 1.5041240453720093, |
|
"eval_rewards/rejected": -0.8534358739852905, |
|
"eval_runtime": 187.279, |
|
"eval_samples_per_second": 6.797, |
|
"eval_steps_per_second": 0.854, |
|
"step": 350 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 358, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"total_flos": 33870165049344.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|