|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 2430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 2.05761316872428e-09, |
|
"logits/chosen": -0.12849420309066772, |
|
"logits/rejected": 0.32615596055984497, |
|
"logps/chosen": -277.55615234375, |
|
"logps/rejected": -196.8867950439453, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 2.0576131687242796e-08, |
|
"logits/chosen": -0.10881485790014267, |
|
"logits/rejected": -0.0043433839455246925, |
|
"logps/chosen": -201.5467529296875, |
|
"logps/rejected": -227.90283203125, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.00041578023228794336, |
|
"rewards/margins": -0.0005395316984504461, |
|
"rewards/margins_max": 0.0015237904153764248, |
|
"rewards/margins_min": -0.002602853812277317, |
|
"rewards/margins_std": 0.0029179779812693596, |
|
"rewards/rejected": 0.00012375140795484185, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 4.115226337448559e-08, |
|
"logits/chosen": -0.09926486760377884, |
|
"logits/rejected": 0.07927028834819794, |
|
"logps/chosen": -214.200927734375, |
|
"logps/rejected": -222.8786163330078, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.019688589731231e-05, |
|
"rewards/margins": 0.00048257355228997767, |
|
"rewards/margins_max": 0.003266632091253996, |
|
"rewards/margins_min": -0.0023014850448817015, |
|
"rewards/margins_std": 0.003937253262847662, |
|
"rewards/rejected": -0.0005427704309113324, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 6.172839506172839e-08, |
|
"logits/chosen": -0.09111969918012619, |
|
"logits/rejected": 0.19229279458522797, |
|
"logps/chosen": -243.2583465576172, |
|
"logps/rejected": -227.18716430664062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0003833642113022506, |
|
"rewards/margins": 0.0004754146502818912, |
|
"rewards/margins_max": 0.003324592486023903, |
|
"rewards/margins_min": -0.0023737631272524595, |
|
"rewards/margins_std": 0.004029345698654652, |
|
"rewards/rejected": -9.20505408430472e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 8.230452674897118e-08, |
|
"logits/chosen": -0.19453440606594086, |
|
"logits/rejected": 0.028327126055955887, |
|
"logps/chosen": -221.5961151123047, |
|
"logps/rejected": -247.38626098632812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0006131277186796069, |
|
"rewards/margins": -0.00020894096815027297, |
|
"rewards/margins_max": 0.0022117348853498697, |
|
"rewards/margins_min": -0.0026296167634427547, |
|
"rewards/margins_std": 0.0034233524929732084, |
|
"rewards/rejected": -0.0004041867796331644, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 1.02880658436214e-07, |
|
"logits/chosen": -0.15002524852752686, |
|
"logits/rejected": 0.0392913818359375, |
|
"logps/chosen": -179.68746948242188, |
|
"logps/rejected": -196.04762268066406, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0005380930379033089, |
|
"rewards/margins": -0.0005233940901234746, |
|
"rewards/margins_max": 0.0024810037575662136, |
|
"rewards/margins_min": -0.0035277921706438065, |
|
"rewards/margins_std": 0.004248860292136669, |
|
"rewards/rejected": -1.4698971426696517e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.2345679012345677e-07, |
|
"logits/chosen": -0.10200711339712143, |
|
"logits/rejected": 0.0844399482011795, |
|
"logps/chosen": -211.0288543701172, |
|
"logps/rejected": -226.002685546875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0015867957845330238, |
|
"rewards/margins": -0.0001534456678200513, |
|
"rewards/margins_max": 0.003357082139700651, |
|
"rewards/margins_min": -0.003663973417133093, |
|
"rewards/margins_std": 0.004964635707437992, |
|
"rewards/rejected": -0.0014333500294014812, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.4403292181069958e-07, |
|
"logits/chosen": -0.07794054597616196, |
|
"logits/rejected": 0.18598364293575287, |
|
"logps/chosen": -198.8175048828125, |
|
"logps/rejected": -218.30746459960938, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0009691319428384304, |
|
"rewards/margins": 0.000832684978377074, |
|
"rewards/margins_max": 0.002835240215063095, |
|
"rewards/margins_min": -0.001169870374724269, |
|
"rewards/margins_std": 0.0028320408891886473, |
|
"rewards/rejected": -0.0018018169794231653, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 1.6460905349794237e-07, |
|
"logits/chosen": -0.12472915649414062, |
|
"logits/rejected": 0.1473190039396286, |
|
"logps/chosen": -239.3271942138672, |
|
"logps/rejected": -240.5177459716797, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00019421194156166166, |
|
"rewards/margins": 0.002306972863152623, |
|
"rewards/margins_max": 0.0059133050963282585, |
|
"rewards/margins_min": -0.0012993593700230122, |
|
"rewards/margins_std": 0.005100123584270477, |
|
"rewards/rejected": -0.0021127606742084026, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -0.10487590730190277, |
|
"logits/rejected": 0.12480039894580841, |
|
"logps/chosen": -207.2847137451172, |
|
"logps/rejected": -217.23159790039062, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0011818298371508718, |
|
"rewards/margins": 0.00026366618112660944, |
|
"rewards/margins_max": 0.003499214071780443, |
|
"rewards/margins_min": -0.002971881767734885, |
|
"rewards/margins_std": 0.004575755912810564, |
|
"rewards/rejected": -0.0014454961055889726, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 2.05761316872428e-07, |
|
"logits/chosen": -0.13099896907806396, |
|
"logits/rejected": 0.1086927056312561, |
|
"logps/chosen": -225.9371795654297, |
|
"logps/rejected": -218.25045776367188, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0010145825799554586, |
|
"rewards/margins": 0.0017641137819737196, |
|
"rewards/margins_max": 0.004758741240948439, |
|
"rewards/margins_min": -0.0012305134441703558, |
|
"rewards/margins_std": 0.004235042724758387, |
|
"rewards/rejected": -0.002778696594759822, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 2.2633744855967078e-07, |
|
"logits/chosen": -0.05056118965148926, |
|
"logits/rejected": 0.2104618102312088, |
|
"logps/chosen": -216.330078125, |
|
"logps/rejected": -227.7096405029297, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0010366869391873479, |
|
"rewards/margins": 0.001504565472714603, |
|
"rewards/margins_max": 0.004530596546828747, |
|
"rewards/margins_min": -0.0015214652521535754, |
|
"rewards/margins_std": 0.004279454238712788, |
|
"rewards/rejected": -0.002541252411901951, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 2.4691358024691354e-07, |
|
"logits/chosen": -0.06995914876461029, |
|
"logits/rejected": 0.1886831820011139, |
|
"logps/chosen": -231.55233764648438, |
|
"logps/rejected": -236.476806640625, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0009716992499306798, |
|
"rewards/margins": 0.0028838925063610077, |
|
"rewards/margins_max": 0.005884943995624781, |
|
"rewards/margins_min": -0.00011715893924701959, |
|
"rewards/margins_std": 0.004244127310812473, |
|
"rewards/rejected": -0.003855592105537653, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 2.6748971193415635e-07, |
|
"logits/chosen": -0.1631317138671875, |
|
"logits/rejected": 0.07130730152130127, |
|
"logps/chosen": -209.4818115234375, |
|
"logps/rejected": -209.85134887695312, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0010341443121433258, |
|
"rewards/margins": 0.0034868132788687944, |
|
"rewards/margins_max": 0.0071867769584059715, |
|
"rewards/margins_min": -0.0002131500223185867, |
|
"rewards/margins_std": 0.005232538096606731, |
|
"rewards/rejected": -0.004520958289504051, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 2.8806584362139917e-07, |
|
"logits/chosen": -0.09385956078767776, |
|
"logits/rejected": 0.1125023365020752, |
|
"logps/chosen": -211.3016357421875, |
|
"logps/rejected": -221.1059112548828, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0018074193503707647, |
|
"rewards/margins": 0.0037975527811795473, |
|
"rewards/margins_max": 0.006774452514946461, |
|
"rewards/margins_min": 0.0008206538623198867, |
|
"rewards/margins_std": 0.004209971055388451, |
|
"rewards/rejected": -0.005604972131550312, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 3.086419753086419e-07, |
|
"logits/chosen": -0.15894190967082977, |
|
"logits/rejected": 0.14013248682022095, |
|
"logps/chosen": -206.8115234375, |
|
"logps/rejected": -206.34732055664062, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.001109520555473864, |
|
"rewards/margins": 0.004203209187835455, |
|
"rewards/margins_max": 0.0073582506738603115, |
|
"rewards/margins_min": 0.001048167236149311, |
|
"rewards/margins_std": 0.004461902659386396, |
|
"rewards/rejected": -0.0053127300925552845, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 3.2921810699588474e-07, |
|
"logits/chosen": -0.16771957278251648, |
|
"logits/rejected": 0.03995511680841446, |
|
"logps/chosen": -192.62110900878906, |
|
"logps/rejected": -207.176025390625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0027189915999770164, |
|
"rewards/margins": 0.005160785745829344, |
|
"rewards/margins_max": 0.008886894211173058, |
|
"rewards/margins_min": 0.0014346761163324118, |
|
"rewards/margins_std": 0.00526951439678669, |
|
"rewards/rejected": -0.007879776880145073, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 3.4979423868312755e-07, |
|
"logits/chosen": -0.12725508213043213, |
|
"logits/rejected": 0.13450825214385986, |
|
"logps/chosen": -229.01223754882812, |
|
"logps/rejected": -225.0189971923828, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0019386851927265525, |
|
"rewards/margins": 0.0065582552924752235, |
|
"rewards/margins_max": 0.010505530051887035, |
|
"rewards/margins_min": 0.0026109800674021244, |
|
"rewards/margins_std": 0.005582289770245552, |
|
"rewards/rejected": -0.00849694013595581, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -0.11525268852710724, |
|
"logits/rejected": 0.029247064143419266, |
|
"logps/chosen": -222.8477325439453, |
|
"logps/rejected": -272.62139892578125, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.002625895431265235, |
|
"rewards/margins": 0.0063320668414235115, |
|
"rewards/margins_max": 0.010786894708871841, |
|
"rewards/margins_min": 0.0018772392068058252, |
|
"rewards/margins_std": 0.006300077773630619, |
|
"rewards/rejected": -0.008957963436841965, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 3.909465020576131e-07, |
|
"logits/chosen": -0.09508004784584045, |
|
"logits/rejected": 0.12103313207626343, |
|
"logps/chosen": -209.8905792236328, |
|
"logps/rejected": -215.1144561767578, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.00340329110622406, |
|
"rewards/margins": 0.0074819354340434074, |
|
"rewards/margins_max": 0.01133053284138441, |
|
"rewards/margins_min": 0.003633336629718542, |
|
"rewards/margins_std": 0.0054427399300038815, |
|
"rewards/rejected": -0.010885225608944893, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 4.11522633744856e-07, |
|
"logits/chosen": -0.13388411700725555, |
|
"logits/rejected": 0.08588583767414093, |
|
"logps/chosen": -222.95236206054688, |
|
"logps/rejected": -217.00717163085938, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.005303654354065657, |
|
"rewards/margins": 0.0068459659814834595, |
|
"rewards/margins_max": 0.01136021874845028, |
|
"rewards/margins_min": 0.002331711584702134, |
|
"rewards/margins_std": 0.006384119391441345, |
|
"rewards/rejected": -0.012149619869887829, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 4.320987654320987e-07, |
|
"logits/chosen": -0.04203199967741966, |
|
"logits/rejected": 0.1425343006849289, |
|
"logps/chosen": -206.8761444091797, |
|
"logps/rejected": -228.0805206298828, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.004312532022595406, |
|
"rewards/margins": 0.008573906496167183, |
|
"rewards/margins_max": 0.013535317964851856, |
|
"rewards/margins_min": 0.003612496657297015, |
|
"rewards/margins_std": 0.00701649347320199, |
|
"rewards/rejected": -0.012886440381407738, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 4.5267489711934156e-07, |
|
"logits/chosen": -0.09616607427597046, |
|
"logits/rejected": 0.13670727610588074, |
|
"logps/chosen": -203.38113403320312, |
|
"logps/rejected": -206.166259765625, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.003925122786313295, |
|
"rewards/margins": 0.010814773850142956, |
|
"rewards/margins_max": 0.01591581106185913, |
|
"rewards/margins_min": 0.005713737104088068, |
|
"rewards/margins_std": 0.007213953882455826, |
|
"rewards/rejected": -0.01473989523947239, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 4.732510288065844e-07, |
|
"logits/chosen": -0.045470915734767914, |
|
"logits/rejected": 0.10779553651809692, |
|
"logps/chosen": -189.78988647460938, |
|
"logps/rejected": -224.5202178955078, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.0036609836388379335, |
|
"rewards/margins": 0.011346762999892235, |
|
"rewards/margins_max": 0.018040811643004417, |
|
"rewards/margins_min": 0.004652712494134903, |
|
"rewards/margins_std": 0.009466813877224922, |
|
"rewards/rejected": -0.015007746405899525, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 4.938271604938271e-07, |
|
"logits/chosen": -0.19475580751895905, |
|
"logits/rejected": 0.12157033383846283, |
|
"logps/chosen": -208.2560577392578, |
|
"logps/rejected": -196.8409881591797, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.005628727376461029, |
|
"rewards/margins": 0.012200703844428062, |
|
"rewards/margins_max": 0.01744863949716091, |
|
"rewards/margins_min": 0.0069527653977274895, |
|
"rewards/margins_std": 0.007421704940497875, |
|
"rewards/rejected": -0.01782943308353424, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 4.999873612357511e-07, |
|
"logits/chosen": -0.2264724224805832, |
|
"logits/rejected": 0.03944239020347595, |
|
"logps/chosen": -222.04580688476562, |
|
"logps/rejected": -220.2604522705078, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.00675012543797493, |
|
"rewards/margins": 0.013585137203335762, |
|
"rewards/margins_max": 0.020951781421899796, |
|
"rewards/margins_min": 0.006218491587787867, |
|
"rewards/margins_std": 0.010418008081614971, |
|
"rewards/rejected": -0.020335260778665543, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 4.999254601606523e-07, |
|
"logits/chosen": -0.11498390138149261, |
|
"logits/rejected": 0.15362046658992767, |
|
"logps/chosen": -192.99081420898438, |
|
"logps/rejected": -203.53524780273438, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.005103799514472485, |
|
"rewards/margins": 0.015132298693060875, |
|
"rewards/margins_max": 0.02228725515305996, |
|
"rewards/margins_min": 0.007977343164384365, |
|
"rewards/margins_std": 0.0101186353713274, |
|
"rewards/rejected": -0.020236099138855934, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 4.998119881260575e-07, |
|
"logits/chosen": -0.19641172885894775, |
|
"logits/rejected": 0.07705807685852051, |
|
"logps/chosen": -220.6441650390625, |
|
"logps/rejected": -218.833984375, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0068857064470648766, |
|
"rewards/margins": 0.017990007996559143, |
|
"rewards/margins_max": 0.02593587338924408, |
|
"rewards/margins_min": 0.010044138878583908, |
|
"rewards/margins_std": 0.011237152852118015, |
|
"rewards/rejected": -0.024875711649656296, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 4.996469685463948e-07, |
|
"logits/chosen": -0.17675986886024475, |
|
"logits/rejected": 0.053129892796278, |
|
"logps/chosen": -230.238525390625, |
|
"logps/rejected": -226.4733123779297, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.008551741018891335, |
|
"rewards/margins": 0.017831740900874138, |
|
"rewards/margins_max": 0.025221537798643112, |
|
"rewards/margins_min": 0.010441945865750313, |
|
"rewards/margins_std": 0.010450749658048153, |
|
"rewards/rejected": -0.02638348378241062, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 4.994304354726891e-07, |
|
"logits/chosen": -0.11211202293634415, |
|
"logits/rejected": 0.17212675511837006, |
|
"logps/chosen": -240.7134552001953, |
|
"logps/rejected": -235.8301239013672, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.008315947838127613, |
|
"rewards/margins": 0.018191199749708176, |
|
"rewards/margins_max": 0.024264231324195862, |
|
"rewards/margins_min": 0.012118167243897915, |
|
"rewards/margins_std": 0.008588564582169056, |
|
"rewards/rejected": -0.026507148519158363, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 4.991624335855357e-07, |
|
"logits/chosen": -0.16157573461532593, |
|
"logits/rejected": -0.003342109965160489, |
|
"logps/chosen": -192.2877655029297, |
|
"logps/rejected": -198.39122009277344, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.00918472371995449, |
|
"rewards/margins": 0.0205369982868433, |
|
"rewards/margins_max": 0.028297582641243935, |
|
"rewards/margins_min": 0.012776409275829792, |
|
"rewards/margins_std": 0.010975128039717674, |
|
"rewards/rejected": -0.029721718281507492, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 4.988430181858809e-07, |
|
"logits/chosen": -0.20107969641685486, |
|
"logits/rejected": 0.04424827918410301, |
|
"logps/chosen": -200.2167510986328, |
|
"logps/rejected": -193.597900390625, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.009901036508381367, |
|
"rewards/margins": 0.02123742178082466, |
|
"rewards/margins_max": 0.030068615451455116, |
|
"rewards/margins_min": 0.01240622065961361, |
|
"rewards/margins_std": 0.012489198707044125, |
|
"rewards/rejected": -0.031138455495238304, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 4.984722551836112e-07, |
|
"logits/chosen": -0.08247671276330948, |
|
"logits/rejected": 0.09890026599168777, |
|
"logps/chosen": -214.65396118164062, |
|
"logps/rejected": -244.7481231689453, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.01232508011162281, |
|
"rewards/margins": 0.022393036633729935, |
|
"rewards/margins_max": 0.032050006091594696, |
|
"rewards/margins_min": 0.012736069969832897, |
|
"rewards/margins_std": 0.01365701388567686, |
|
"rewards/rejected": -0.034718118607997894, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 4.980502210839523e-07, |
|
"logits/chosen": -0.19298240542411804, |
|
"logits/rejected": 0.05112982913851738, |
|
"logps/chosen": -216.5214385986328, |
|
"logps/rejected": -205.87411499023438, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.011784660629928112, |
|
"rewards/margins": 0.02403593435883522, |
|
"rewards/margins_max": 0.035633157938718796, |
|
"rewards/margins_min": 0.012438705191016197, |
|
"rewards/margins_std": 0.016400957480072975, |
|
"rewards/rejected": -0.03582059592008591, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 4.975770029716832e-07, |
|
"logits/chosen": -0.16089969873428345, |
|
"logits/rejected": 0.06704260408878326, |
|
"logps/chosen": -198.7475128173828, |
|
"logps/rejected": -214.4049835205078, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.01207501720637083, |
|
"rewards/margins": 0.027419626712799072, |
|
"rewards/margins_max": 0.038728706538677216, |
|
"rewards/margins_min": 0.016110548749566078, |
|
"rewards/margins_std": 0.015993457287549973, |
|
"rewards/rejected": -0.03949464112520218, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 4.970526984931663e-07, |
|
"logits/chosen": -0.12522733211517334, |
|
"logits/rejected": 0.02256820723414421, |
|
"logps/chosen": -189.53872680664062, |
|
"logps/rejected": -237.49057006835938, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.014810247346758842, |
|
"rewards/margins": 0.031076129525899887, |
|
"rewards/margins_max": 0.04324505478143692, |
|
"rewards/margins_min": 0.018907207995653152, |
|
"rewards/margins_std": 0.017209455370903015, |
|
"rewards/rejected": -0.04588637501001358, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 4.96477415836199e-07, |
|
"logits/chosen": -0.12060017883777618, |
|
"logits/rejected": 0.15660127997398376, |
|
"logps/chosen": -207.26455688476562, |
|
"logps/rejected": -198.59371948242188, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.01682819053530693, |
|
"rewards/margins": 0.025025326758623123, |
|
"rewards/margins_max": 0.037398561835289, |
|
"rewards/margins_min": 0.012652089819312096, |
|
"rewards/margins_std": 0.017498398199677467, |
|
"rewards/rejected": -0.041853513568639755, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 4.958512737076895e-07, |
|
"logits/chosen": -0.1564178466796875, |
|
"logits/rejected": 0.09775165468454361, |
|
"logps/chosen": -199.6991424560547, |
|
"logps/rejected": -201.5712432861328, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.0173592958599329, |
|
"rewards/margins": 0.029393130913376808, |
|
"rewards/margins_max": 0.04372996464371681, |
|
"rewards/margins_min": 0.015056299977004528, |
|
"rewards/margins_std": 0.020275337621569633, |
|
"rewards/rejected": -0.04675242677330971, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 4.951744013091616e-07, |
|
"logits/chosen": -0.06468039751052856, |
|
"logits/rejected": 0.11344078928232193, |
|
"logps/chosen": -202.63296508789062, |
|
"logps/rejected": -218.40939331054688, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.014664116315543652, |
|
"rewards/margins": 0.035016417503356934, |
|
"rewards/margins_max": 0.04858310893177986, |
|
"rewards/margins_min": 0.021449726074934006, |
|
"rewards/margins_std": 0.019186200574040413, |
|
"rewards/rejected": -0.04968053475022316, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 4.944469383100954e-07, |
|
"logits/chosen": -0.16045762598514557, |
|
"logits/rejected": 0.10485055297613144, |
|
"logps/chosen": -204.5330810546875, |
|
"logps/rejected": -205.8036651611328, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02098998799920082, |
|
"rewards/margins": 0.03199451044201851, |
|
"rewards/margins_max": 0.048134349286556244, |
|
"rewards/margins_min": 0.015854666009545326, |
|
"rewards/margins_std": 0.02282518334686756, |
|
"rewards/rejected": -0.05298449844121933, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 4.936690348191063e-07, |
|
"logits/chosen": -0.1567983776330948, |
|
"logits/rejected": 0.08766036480665207, |
|
"logps/chosen": -227.46139526367188, |
|
"logps/rejected": -232.83517456054688, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.013347646221518517, |
|
"rewards/margins": 0.03657007962465286, |
|
"rewards/margins_max": 0.05410366132855415, |
|
"rewards/margins_min": 0.01903649792075157, |
|
"rewards/margins_std": 0.024796226993203163, |
|
"rewards/rejected": -0.04991772025823593, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 4.928408513529719e-07, |
|
"logits/chosen": -0.1951916515827179, |
|
"logits/rejected": -0.047125209122896194, |
|
"logps/chosen": -199.21746826171875, |
|
"logps/rejected": -224.54483032226562, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.01675131730735302, |
|
"rewards/margins": 0.03562582656741142, |
|
"rewards/margins_max": 0.047902870923280716, |
|
"rewards/margins_min": 0.023348785936832428, |
|
"rewards/margins_std": 0.017362359911203384, |
|
"rewards/rejected": -0.05237714573740959, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.919625588035091e-07, |
|
"logits/chosen": -0.12850052118301392, |
|
"logits/rejected": 0.22448399662971497, |
|
"logps/chosen": -234.625732421875, |
|
"logps/rejected": -220.24868774414062, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.014748236164450645, |
|
"rewards/margins": 0.039627060294151306, |
|
"rewards/margins_max": 0.05790294334292412, |
|
"rewards/margins_min": 0.021351177245378494, |
|
"rewards/margins_std": 0.025846004486083984, |
|
"rewards/rejected": -0.0543752983212471, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 4.910343384023118e-07, |
|
"logits/chosen": -0.08536979556083679, |
|
"logits/rejected": 0.1372174322605133, |
|
"logps/chosen": -233.83938598632812, |
|
"logps/rejected": -246.7130584716797, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.019801601767539978, |
|
"rewards/margins": 0.03844233602285385, |
|
"rewards/margins_max": 0.053383953869342804, |
|
"rewards/margins_min": 0.0235007144510746, |
|
"rewards/margins_std": 0.02113064005970955, |
|
"rewards/rejected": -0.05824393779039383, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.900563816833543e-07, |
|
"logits/chosen": -0.01743602380156517, |
|
"logits/rejected": 0.18241354823112488, |
|
"logps/chosen": -205.63961791992188, |
|
"logps/rejected": -239.1418914794922, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.018751228228211403, |
|
"rewards/margins": 0.04704046994447708, |
|
"rewards/margins_max": 0.06214872747659683, |
|
"rewards/margins_min": 0.03193220496177673, |
|
"rewards/margins_std": 0.021366309374570847, |
|
"rewards/rejected": -0.06579168885946274, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 4.890288904434699e-07, |
|
"logits/chosen": -0.13453516364097595, |
|
"logits/rejected": 0.11062689125537872, |
|
"logps/chosen": -203.70687866210938, |
|
"logps/rejected": -222.5287322998047, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.020012306049466133, |
|
"rewards/margins": 0.04848041012883186, |
|
"rewards/margins_max": 0.0692964643239975, |
|
"rewards/margins_min": 0.02766435220837593, |
|
"rewards/margins_std": 0.029438350349664688, |
|
"rewards/rejected": -0.06849271804094315, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 4.8795207670071e-07, |
|
"logits/chosen": -0.14224310219287872, |
|
"logits/rejected": 0.06176813691854477, |
|
"logps/chosen": -207.9291229248047, |
|
"logps/rejected": -238.2221221923828, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02270520105957985, |
|
"rewards/margins": 0.05173413082957268, |
|
"rewards/margins_max": 0.0734233409166336, |
|
"rewards/margins_min": 0.0300449226051569, |
|
"rewards/margins_std": 0.03067317232489586, |
|
"rewards/rejected": -0.07443933188915253, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 4.868261626505958e-07, |
|
"logits/chosen": -0.09490348398685455, |
|
"logits/rejected": 0.11677880585193634, |
|
"logps/chosen": -215.65878295898438, |
|
"logps/rejected": -227.8511199951172, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.026142066344618797, |
|
"rewards/margins": 0.049620382487773895, |
|
"rewards/margins_max": 0.06977085769176483, |
|
"rewards/margins_min": 0.029469912871718407, |
|
"rewards/margins_std": 0.028497066348791122, |
|
"rewards/rejected": -0.07576245814561844, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 4.856513806202697e-07, |
|
"logits/chosen": -0.17281684279441833, |
|
"logits/rejected": 0.06971795111894608, |
|
"logps/chosen": -210.6538543701172, |
|
"logps/rejected": -217.97451782226562, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.020999779924750328, |
|
"rewards/margins": 0.05904274061322212, |
|
"rewards/margins_max": 0.08474047482013702, |
|
"rewards/margins_min": 0.03334500640630722, |
|
"rewards/margins_std": 0.036342088133096695, |
|
"rewards/rejected": -0.0800425186753273, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.844279730205544e-07, |
|
"logits/chosen": -0.10984311252832413, |
|
"logits/rejected": 0.0616462342441082, |
|
"logps/chosen": -230.95059204101562, |
|
"logps/rejected": -258.0708312988281, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.027716726064682007, |
|
"rewards/margins": 0.051834236830472946, |
|
"rewards/margins_max": 0.07494363188743591, |
|
"rewards/margins_min": 0.028724845498800278, |
|
"rewards/margins_std": 0.03268161416053772, |
|
"rewards/rejected": -0.07955096662044525, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 4.831561922959338e-07, |
|
"logits/chosen": -0.1495492160320282, |
|
"logits/rejected": 0.1394021064043045, |
|
"logps/chosen": -210.3924560546875, |
|
"logps/rejected": -208.85018920898438, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.022129181772470474, |
|
"rewards/margins": 0.060523491352796555, |
|
"rewards/margins_max": 0.08514624834060669, |
|
"rewards/margins_min": 0.03590074181556702, |
|
"rewards/margins_std": 0.034821830689907074, |
|
"rewards/rejected": -0.08265267312526703, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 4.818363008724618e-07, |
|
"logits/chosen": -0.1585562378168106, |
|
"logits/rejected": 0.060911018401384354, |
|
"logps/chosen": -212.550537109375, |
|
"logps/rejected": -239.1901397705078, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.031042397022247314, |
|
"rewards/margins": 0.055754829198122025, |
|
"rewards/margins_max": 0.07818542420864105, |
|
"rewards/margins_min": 0.0333242304623127, |
|
"rewards/margins_std": 0.03172165900468826, |
|
"rewards/rejected": -0.08679722249507904, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 4.804685711036113e-07, |
|
"logits/chosen": -0.16393280029296875, |
|
"logits/rejected": 0.08432348072528839, |
|
"logps/chosen": -221.3743896484375, |
|
"logps/rejected": -245.53262329101562, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.030147483572363853, |
|
"rewards/margins": 0.058864571154117584, |
|
"rewards/margins_max": 0.07852182537317276, |
|
"rewards/margins_min": 0.039207302033901215, |
|
"rewards/margins_std": 0.027799565345048904, |
|
"rewards/rejected": -0.08901204913854599, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 4.790532852140767e-07, |
|
"logits/chosen": -0.15655621886253357, |
|
"logits/rejected": 0.1627272069454193, |
|
"logps/chosen": -224.1035919189453, |
|
"logps/rejected": -224.27749633789062, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.03070450760424137, |
|
"rewards/margins": 0.059945207089185715, |
|
"rewards/margins_max": 0.08565831184387207, |
|
"rewards/margins_min": 0.03423209488391876, |
|
"rewards/margins_std": 0.03636383265256882, |
|
"rewards/rejected": -0.09064970910549164, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 4.775907352415367e-07, |
|
"logits/chosen": -0.20449629426002502, |
|
"logits/rejected": 0.07528124749660492, |
|
"logps/chosen": -225.3180694580078, |
|
"logps/rejected": -227.5177001953125, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.03224950283765793, |
|
"rewards/margins": 0.06405209004878998, |
|
"rewards/margins_max": 0.09230367094278336, |
|
"rewards/margins_min": 0.0358005091547966, |
|
"rewards/margins_std": 0.039953768253326416, |
|
"rewards/rejected": -0.0963016003370285, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 4.760812229763944e-07, |
|
"logits/chosen": -0.14346732199192047, |
|
"logits/rejected": 0.15249694883823395, |
|
"logps/chosen": -230.9304656982422, |
|
"logps/rejected": -227.08364868164062, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.03136484697461128, |
|
"rewards/margins": 0.0606791190803051, |
|
"rewards/margins_max": 0.09166625887155533, |
|
"rewards/margins_min": 0.029691975563764572, |
|
"rewards/margins_std": 0.043822430074214935, |
|
"rewards/rejected": -0.09204395860433578, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 4.7452505989950455e-07, |
|
"logits/chosen": -0.14922063052654266, |
|
"logits/rejected": 0.08096525818109512, |
|
"logps/chosen": -227.1333770751953, |
|
"logps/rejected": -244.99362182617188, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.02952210046350956, |
|
"rewards/margins": 0.0689021423459053, |
|
"rewards/margins_max": 0.09765410423278809, |
|
"rewards/margins_min": 0.04015018790960312, |
|
"rewards/margins_std": 0.04066140204668045, |
|
"rewards/rejected": -0.09842424839735031, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 4.729225671179e-07, |
|
"logits/chosen": -0.16192954778671265, |
|
"logits/rejected": 0.16453325748443604, |
|
"logps/chosen": -222.29306030273438, |
|
"logps/rejected": -210.7808074951172, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0332382507622242, |
|
"rewards/margins": 0.06441639363765717, |
|
"rewards/margins_max": 0.09518542140722275, |
|
"rewards/margins_min": 0.03364737331867218, |
|
"rewards/margins_std": 0.04351397603750229, |
|
"rewards/rejected": -0.09765465557575226, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 4.712740752985337e-07, |
|
"logits/chosen": -0.03260333463549614, |
|
"logits/rejected": 0.15674880146980286, |
|
"logps/chosen": -225.6151580810547, |
|
"logps/rejected": -230.8135223388672, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.0472460612654686, |
|
"rewards/margins": 0.05132395029067993, |
|
"rewards/margins_max": 0.07761866599321365, |
|
"rewards/margins_min": 0.02502923086285591, |
|
"rewards/margins_std": 0.03718634322285652, |
|
"rewards/rejected": -0.09857000410556793, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 4.695799246000464e-07, |
|
"logits/chosen": -0.16538329422473907, |
|
"logits/rejected": 0.07633324712514877, |
|
"logps/chosen": -208.8240203857422, |
|
"logps/rejected": -231.292724609375, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.04221474379301071, |
|
"rewards/margins": 0.07459871470928192, |
|
"rewards/margins_max": 0.10690847784280777, |
|
"rewards/margins_min": 0.04228895902633667, |
|
"rewards/margins_std": 0.04569289833307266, |
|
"rewards/rejected": -0.11681344360113144, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 4.6784046460257694e-07, |
|
"logits/chosen": -0.1475997418165207, |
|
"logits/rejected": 0.10680235922336578, |
|
"logps/chosen": -234.5234375, |
|
"logps/rejected": -233.68527221679688, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.041780535131692886, |
|
"rewards/margins": 0.059098441153764725, |
|
"rewards/margins_max": 0.08605752140283585, |
|
"rewards/margins_min": 0.0321393683552742, |
|
"rewards/margins_std": 0.03812588378787041, |
|
"rewards/rejected": -0.10087897628545761, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 4.660560542356278e-07, |
|
"logits/chosen": -0.14557047188282013, |
|
"logits/rejected": 0.11930598318576813, |
|
"logps/chosen": -230.1462860107422, |
|
"logps/rejected": -230.4649200439453, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.039344362914562225, |
|
"rewards/margins": 0.0685037225484848, |
|
"rewards/margins_max": 0.09414297342300415, |
|
"rewards/margins_min": 0.04286447912454605, |
|
"rewards/margins_std": 0.03625936806201935, |
|
"rewards/rejected": -0.10784808546304703, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 4.6422706170400175e-07, |
|
"logits/chosen": -0.18832182884216309, |
|
"logits/rejected": 0.102397121489048, |
|
"logps/chosen": -221.0863037109375, |
|
"logps/rejected": -231.1044464111328, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03650045394897461, |
|
"rewards/margins": 0.0659482330083847, |
|
"rewards/margins_max": 0.09619072079658508, |
|
"rewards/margins_min": 0.03570573776960373, |
|
"rewards/margins_std": 0.04276934266090393, |
|
"rewards/rejected": -0.10244867950677872, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 4.6235386441182434e-07, |
|
"logits/chosen": -0.12858518958091736, |
|
"logits/rejected": 0.04834365099668503, |
|
"logps/chosen": -221.7181854248047, |
|
"logps/rejected": -244.804443359375, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.04021327942609787, |
|
"rewards/margins": 0.07245869934558868, |
|
"rewards/margins_max": 0.10741807520389557, |
|
"rewards/margins_min": 0.03749933838844299, |
|
"rewards/margins_std": 0.049440011382102966, |
|
"rewards/rejected": -0.11267199367284775, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 4.604368488846686e-07, |
|
"logits/chosen": -0.14893962442874908, |
|
"logits/rejected": 0.021318774670362473, |
|
"logps/chosen": -199.5926513671875, |
|
"logps/rejected": -227.07766723632812, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.041552286595106125, |
|
"rewards/margins": 0.07733511924743652, |
|
"rewards/margins_max": 0.11415763199329376, |
|
"rewards/margins_min": 0.04051261395215988, |
|
"rewards/margins_std": 0.052074890583753586, |
|
"rewards/rejected": -0.11888740956783295, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 4.58476410689797e-07, |
|
"logits/chosen": -0.10416440665721893, |
|
"logits/rejected": 0.03748173266649246, |
|
"logps/chosen": -217.5540771484375, |
|
"logps/rejected": -244.89523315429688, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.039487071335315704, |
|
"rewards/margins": 0.07235467433929443, |
|
"rewards/margins_max": 0.10398920625448227, |
|
"rewards/margins_min": 0.040720134973526, |
|
"rewards/margins_std": 0.04473799094557762, |
|
"rewards/rejected": -0.11184175312519073, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 4.5647295435453817e-07, |
|
"logits/chosen": -0.13231520354747772, |
|
"logits/rejected": 0.022942349314689636, |
|
"logps/chosen": -237.333740234375, |
|
"logps/rejected": -246.2041778564453, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05165981128811836, |
|
"rewards/margins": 0.06908587366342545, |
|
"rewards/margins_max": 0.10448731482028961, |
|
"rewards/margins_min": 0.033684439957141876, |
|
"rewards/margins_std": 0.05006518214941025, |
|
"rewards/rejected": -0.12074568122625351, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 4.544268932828144e-07, |
|
"logits/chosen": -0.17096921801567078, |
|
"logits/rejected": 0.009365534409880638, |
|
"logps/chosen": -211.588623046875, |
|
"logps/rejected": -257.00726318359375, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.040882695466279984, |
|
"rewards/margins": 0.081394262611866, |
|
"rewards/margins_max": 0.11418579518795013, |
|
"rewards/margins_min": 0.048602718859910965, |
|
"rewards/margins_std": 0.04637424275279045, |
|
"rewards/rejected": -0.12227696180343628, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 4.523386496698376e-07, |
|
"logits/chosen": -0.22352655231952667, |
|
"logits/rejected": 0.08650527149438858, |
|
"logps/chosen": -225.86953735351562, |
|
"logps/rejected": -221.8078155517578, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.04626091942191124, |
|
"rewards/margins": 0.07565927505493164, |
|
"rewards/margins_max": 0.10747319459915161, |
|
"rewards/margins_min": 0.043845366686582565, |
|
"rewards/margins_std": 0.044991664588451385, |
|
"rewards/rejected": -0.12192019075155258, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 4.502086544149918e-07, |
|
"logits/chosen": -0.1821189820766449, |
|
"logits/rejected": 0.08161283284425735, |
|
"logps/chosen": -209.0590057373047, |
|
"logps/rejected": -238.8436737060547, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.04758086055517197, |
|
"rewards/margins": 0.08119155466556549, |
|
"rewards/margins_max": 0.11566410213708878, |
|
"rewards/margins_min": 0.0467190146446228, |
|
"rewards/margins_std": 0.04875154048204422, |
|
"rewards/rejected": -0.12877242267131805, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 4.4803734703291845e-07, |
|
"logits/chosen": -0.17991140484809875, |
|
"logits/rejected": 0.10490355640649796, |
|
"logps/chosen": -228.5984649658203, |
|
"logps/rejected": -212.1452178955078, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.043927937746047974, |
|
"rewards/margins": 0.07159805297851562, |
|
"rewards/margins_max": 0.10323642194271088, |
|
"rewards/margins_min": 0.039959684014320374, |
|
"rewards/margins_std": 0.044743407517671585, |
|
"rewards/rejected": -0.1155259981751442, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.4582517556282474e-07, |
|
"logits/chosen": -0.18320028483867645, |
|
"logits/rejected": 0.013312360271811485, |
|
"logps/chosen": -206.8184814453125, |
|
"logps/rejected": -252.7014923095703, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.04943935200572014, |
|
"rewards/margins": 0.08512347936630249, |
|
"rewards/margins_max": 0.12365541607141495, |
|
"rewards/margins_min": 0.04659154266119003, |
|
"rewards/margins_std": 0.054492391645908356, |
|
"rewards/rejected": -0.13456283509731293, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 4.435725964760331e-07, |
|
"logits/chosen": -0.1689848005771637, |
|
"logits/rejected": 0.07571324706077576, |
|
"logps/chosen": -207.1726531982422, |
|
"logps/rejected": -228.133056640625, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04197344928979874, |
|
"rewards/margins": 0.07342037558555603, |
|
"rewards/margins_max": 0.10903932899236679, |
|
"rewards/margins_min": 0.037801433354616165, |
|
"rewards/margins_std": 0.05037280172109604, |
|
"rewards/rejected": -0.11539383232593536, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 4.412800745817901e-07, |
|
"logits/chosen": -0.16182328760623932, |
|
"logits/rejected": 0.07639019191265106, |
|
"logps/chosen": -229.777587890625, |
|
"logps/rejected": -233.11746215820312, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.04751753434538841, |
|
"rewards/margins": 0.07238186150789261, |
|
"rewards/margins_max": 0.10116531699895859, |
|
"rewards/margins_min": 0.04359840601682663, |
|
"rewards/margins_std": 0.040705952793359756, |
|
"rewards/rejected": -0.11989939212799072, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 4.3894808293135526e-07, |
|
"logits/chosen": -0.11428213119506836, |
|
"logits/rejected": 0.13538585603237152, |
|
"logps/chosen": -223.481689453125, |
|
"logps/rejected": -238.7801055908203, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.04576458781957626, |
|
"rewards/margins": 0.08550850301980972, |
|
"rewards/margins_max": 0.11636098474264145, |
|
"rewards/margins_min": 0.0546560175716877, |
|
"rewards/margins_std": 0.04363200441002846, |
|
"rewards/rejected": -0.131273090839386, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 4.365771027203896e-07, |
|
"logits/chosen": -0.14200684428215027, |
|
"logits/rejected": 0.09855206310749054, |
|
"logps/chosen": -213.05581665039062, |
|
"logps/rejected": -232.3656463623047, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.03935530036687851, |
|
"rewards/margins": 0.09331099689006805, |
|
"rewards/margins_max": 0.1294441670179367, |
|
"rewards/margins_min": 0.0571778230369091, |
|
"rewards/margins_std": 0.051100023090839386, |
|
"rewards/rejected": -0.13266630470752716, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 4.3416762318966236e-07, |
|
"logits/chosen": -0.10148487240076065, |
|
"logits/rejected": 0.1706521213054657, |
|
"logps/chosen": -225.71914672851562, |
|
"logps/rejected": -217.4258270263672, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.04290129616856575, |
|
"rewards/margins": 0.07890793681144714, |
|
"rewards/margins_max": 0.10971459001302719, |
|
"rewards/margins_min": 0.0481012761592865, |
|
"rewards/margins_std": 0.04356719180941582, |
|
"rewards/rejected": -0.12180924415588379, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 4.317201415240992e-07, |
|
"logits/chosen": -0.049225617200136185, |
|
"logits/rejected": 0.1614571064710617, |
|
"logps/chosen": -219.39144897460938, |
|
"logps/rejected": -221.80313110351562, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.06035063415765762, |
|
"rewards/margins": 0.07851085066795349, |
|
"rewards/margins_max": 0.12034505605697632, |
|
"rewards/margins_min": 0.03667663782835007, |
|
"rewards/margins_std": 0.05916251987218857, |
|
"rewards/rejected": -0.13886147737503052, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 4.2923516275018974e-07, |
|
"logits/chosen": -0.14591281116008759, |
|
"logits/rejected": 0.050023000687360764, |
|
"logps/chosen": -237.15017700195312, |
|
"logps/rejected": -258.215576171875, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05067404359579086, |
|
"rewards/margins": 0.092898890376091, |
|
"rewards/margins_max": 0.12995196878910065, |
|
"rewards/margins_min": 0.055845797061920166, |
|
"rewards/margins_std": 0.05240098387002945, |
|
"rewards/rejected": -0.14357292652130127, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 4.267131996317781e-07, |
|
"logits/chosen": -0.13919471204280853, |
|
"logits/rejected": 0.15345291793346405, |
|
"logps/chosen": -203.8855743408203, |
|
"logps/rejected": -212.9399871826172, |
|
"loss": 0.6524, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.04519537463784218, |
|
"rewards/margins": 0.08230610936880112, |
|
"rewards/margins_max": 0.11458753049373627, |
|
"rewards/margins_min": 0.05002468824386597, |
|
"rewards/margins_std": 0.045652832835912704, |
|
"rewards/rejected": -0.1275014877319336, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 4.2415477256425634e-07, |
|
"logits/chosen": -0.17129512131214142, |
|
"logits/rejected": 0.020465224981307983, |
|
"logps/chosen": -208.89291381835938, |
|
"logps/rejected": -218.3040771484375, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05368215963244438, |
|
"rewards/margins": 0.07769324630498886, |
|
"rewards/margins_max": 0.11353801190853119, |
|
"rewards/margins_min": 0.04184848070144653, |
|
"rewards/margins_std": 0.050692152231931686, |
|
"rewards/rejected": -0.13137540221214294, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 4.2156040946718343e-07, |
|
"logits/chosen": -0.10920798778533936, |
|
"logits/rejected": 0.08190996944904327, |
|
"logps/chosen": -204.00723266601562, |
|
"logps/rejected": -246.7364501953125, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.04529748111963272, |
|
"rewards/margins": 0.08185549080371857, |
|
"rewards/margins_max": 0.11279450356960297, |
|
"rewards/margins_min": 0.05091645568609238, |
|
"rewards/margins_std": 0.043754395097494125, |
|
"rewards/rejected": -0.1271529644727707, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.5, |
|
"learning_rate": 4.189306456753511e-07, |
|
"logits/chosen": -0.08927767723798752, |
|
"logits/rejected": 0.16780522465705872, |
|
"logps/chosen": -216.26211547851562, |
|
"logps/rejected": -223.88839721679688, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04724791273474693, |
|
"rewards/margins": 0.08486290276050568, |
|
"rewards/margins_max": 0.1154739111661911, |
|
"rewards/margins_min": 0.05425189062952995, |
|
"rewards/margins_std": 0.043290503323078156, |
|
"rewards/rejected": -0.1321108192205429, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 4.1626602382832044e-07, |
|
"logits/chosen": -0.11900673061609268, |
|
"logits/rejected": 0.10318160057067871, |
|
"logps/chosen": -221.74072265625, |
|
"logps/rejected": -248.9683837890625, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.04588564485311508, |
|
"rewards/margins": 0.08740084618330002, |
|
"rewards/margins_max": 0.11989488452672958, |
|
"rewards/margins_min": 0.05490681529045105, |
|
"rewards/margins_std": 0.045953501015901566, |
|
"rewards/rejected": -0.1332865059375763, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 4.1356709375845046e-07, |
|
"logits/chosen": -0.1787930279970169, |
|
"logits/rejected": 0.05423184484243393, |
|
"logps/chosen": -203.95889282226562, |
|
"logps/rejected": -226.582763671875, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.05451052263379097, |
|
"rewards/margins": 0.10125939548015594, |
|
"rewards/margins_max": 0.14539141952991486, |
|
"rewards/margins_min": 0.057127393782138824, |
|
"rewards/margins_std": 0.0624120831489563, |
|
"rewards/rejected": -0.1557699292898178, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 4.1083441237744285e-07, |
|
"logits/chosen": -0.07366688549518585, |
|
"logits/rejected": 0.04007618874311447, |
|
"logps/chosen": -222.0310516357422, |
|
"logps/rejected": -271.1426696777344, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05247300863265991, |
|
"rewards/margins": 0.09073988348245621, |
|
"rewards/margins_max": 0.1382082849740982, |
|
"rewards/margins_min": 0.043271489441394806, |
|
"rewards/margins_std": 0.06713045388460159, |
|
"rewards/rejected": -0.14321288466453552, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.0806854356142597e-07, |
|
"logits/chosen": -0.11528744548559189, |
|
"logits/rejected": 0.17569738626480103, |
|
"logps/chosen": -233.05810546875, |
|
"logps/rejected": -237.28506469726562, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.058273959904909134, |
|
"rewards/margins": 0.08572045713663101, |
|
"rewards/margins_max": 0.12326614558696747, |
|
"rewards/margins_min": 0.048174768686294556, |
|
"rewards/margins_std": 0.05309762433171272, |
|
"rewards/rejected": -0.14399442076683044, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 4.052700580346011e-07, |
|
"logits/chosen": -0.1549403816461563, |
|
"logits/rejected": 0.08772721141576767, |
|
"logps/chosen": -223.6434783935547, |
|
"logps/rejected": -235.5021514892578, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.061686016619205475, |
|
"rewards/margins": 0.08836236596107483, |
|
"rewards/margins_max": 0.13330301642417908, |
|
"rewards/margins_min": 0.04342171922326088, |
|
"rewards/margins_std": 0.06355567276477814, |
|
"rewards/rejected": -0.1500483751296997, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 4.024395332514768e-07, |
|
"logits/chosen": -0.1552925854921341, |
|
"logits/rejected": 0.08893848955631256, |
|
"logps/chosen": -224.326171875, |
|
"logps/rejected": -226.89663696289062, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.04646015912294388, |
|
"rewards/margins": 0.09568478912115097, |
|
"rewards/margins_max": 0.13710376620292664, |
|
"rewards/margins_min": 0.0542658269405365, |
|
"rewards/margins_std": 0.058575280010700226, |
|
"rewards/rejected": -0.14214494824409485, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 3.9957755327771357e-07, |
|
"logits/chosen": -0.1727294921875, |
|
"logits/rejected": -0.014213940128684044, |
|
"logps/chosen": -200.10498046875, |
|
"logps/rejected": -253.10232543945312, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.060309626162052155, |
|
"rewards/margins": 0.08810073137283325, |
|
"rewards/margins_max": 0.12239007651805878, |
|
"rewards/margins_min": 0.05381138250231743, |
|
"rewards/margins_std": 0.04849245399236679, |
|
"rewards/rejected": -0.1484103500843048, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 3.966847086696045e-07, |
|
"logits/chosen": -0.14839962124824524, |
|
"logits/rejected": 0.09455759823322296, |
|
"logps/chosen": -228.38784790039062, |
|
"logps/rejected": -245.74832153320312, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.04339542239904404, |
|
"rewards/margins": 0.1028498187661171, |
|
"rewards/margins_max": 0.14537741243839264, |
|
"rewards/margins_min": 0.06032223626971245, |
|
"rewards/margins_std": 0.060143083333969116, |
|
"rewards/rejected": -0.14624525606632233, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 3.937615963522166e-07, |
|
"logits/chosen": -0.11433364450931549, |
|
"logits/rejected": 0.19467870891094208, |
|
"logps/chosen": -223.18154907226562, |
|
"logps/rejected": -213.2982940673828, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.058440931141376495, |
|
"rewards/margins": 0.08149015158414841, |
|
"rewards/margins_max": 0.12004747241735458, |
|
"rewards/margins_min": 0.04293282702565193, |
|
"rewards/margins_std": 0.054528284817934036, |
|
"rewards/rejected": -0.1399310827255249, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.5, |
|
"learning_rate": 3.9080881949621884e-07, |
|
"logits/chosen": -0.14250853657722473, |
|
"logits/rejected": 0.048256054520606995, |
|
"logps/chosen": -207.46792602539062, |
|
"logps/rejected": -238.5066680908203, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06639896333217621, |
|
"rewards/margins": 0.09544476121664047, |
|
"rewards/margins_max": 0.1366080939769745, |
|
"rewards/margins_min": 0.05428142473101616, |
|
"rewards/margins_std": 0.05821375176310539, |
|
"rewards/rejected": -0.16184373199939728, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 3.878269873934197e-07, |
|
"logits/chosen": -0.13014793395996094, |
|
"logits/rejected": 0.1661478579044342, |
|
"logps/chosen": -212.73648071289062, |
|
"logps/rejected": -206.10086059570312, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06022537872195244, |
|
"rewards/margins": 0.08250834047794342, |
|
"rewards/margins_max": 0.12295888364315033, |
|
"rewards/margins_min": 0.042057789862155914, |
|
"rewards/margins_std": 0.05720571428537369, |
|
"rewards/rejected": -0.14273372292518616, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 3.848167153310432e-07, |
|
"logits/chosen": -0.08797403424978256, |
|
"logits/rejected": 0.062249403446912766, |
|
"logps/chosen": -190.28912353515625, |
|
"logps/rejected": -227.6543426513672, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.05042362958192825, |
|
"rewards/margins": 0.09770865738391876, |
|
"rewards/margins_max": 0.13965031504631042, |
|
"rewards/margins_min": 0.055767010897397995, |
|
"rewards/margins_std": 0.05931444838643074, |
|
"rewards/rejected": -0.1481322944164276, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 3.817786244647671e-07, |
|
"logits/chosen": -0.1679493486881256, |
|
"logits/rejected": 0.10348667949438095, |
|
"logps/chosen": -212.8212432861328, |
|
"logps/rejected": -224.01748657226562, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05369574576616287, |
|
"rewards/margins": 0.09801065921783447, |
|
"rewards/margins_max": 0.14342837035655975, |
|
"rewards/margins_min": 0.052592933177948, |
|
"rewards/margins_std": 0.06423036009073257, |
|
"rewards/rejected": -0.15170639753341675, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 3.787133416905504e-07, |
|
"logits/chosen": -0.11426540464162827, |
|
"logits/rejected": 0.06826993077993393, |
|
"logps/chosen": -224.8111114501953, |
|
"logps/rejected": -261.72686767578125, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06286445260047913, |
|
"rewards/margins": 0.10180015861988068, |
|
"rewards/margins_max": 0.14011150598526, |
|
"rewards/margins_min": 0.06348879635334015, |
|
"rewards/margins_std": 0.054180435836315155, |
|
"rewards/rejected": -0.1646645963191986, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 3.7562149951527614e-07, |
|
"logits/chosen": -0.16833610832691193, |
|
"logits/rejected": 0.05001373961567879, |
|
"logps/chosen": -193.0453643798828, |
|
"logps/rejected": -209.85385131835938, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05429987236857414, |
|
"rewards/margins": 0.09220142662525177, |
|
"rewards/margins_max": 0.1386091709136963, |
|
"rewards/margins_min": 0.04579367861151695, |
|
"rewards/margins_std": 0.0656304582953453, |
|
"rewards/rejected": -0.14650128781795502, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 3.7250373592623654e-07, |
|
"logits/chosen": -0.15326061844825745, |
|
"logits/rejected": 0.10640069097280502, |
|
"logps/chosen": -200.30386352539062, |
|
"logps/rejected": -214.9497833251953, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.04672769457101822, |
|
"rewards/margins": 0.10547290742397308, |
|
"rewards/margins_max": 0.15153531730175018, |
|
"rewards/margins_min": 0.05941050127148628, |
|
"rewards/margins_std": 0.06514209508895874, |
|
"rewards/rejected": -0.1522006094455719, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 3.693606942594872e-07, |
|
"logits/chosen": -0.12123314291238785, |
|
"logits/rejected": 0.036874063313007355, |
|
"logps/chosen": -208.25991821289062, |
|
"logps/rejected": -238.3395233154297, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.053240977227687836, |
|
"rewards/margins": 0.09769239276647568, |
|
"rewards/margins_max": 0.1466548591852188, |
|
"rewards/margins_min": 0.04872991517186165, |
|
"rewards/margins_std": 0.06924339383840561, |
|
"rewards/rejected": -0.15093335509300232, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 3.661930230670982e-07, |
|
"logits/chosen": -0.2230033129453659, |
|
"logits/rejected": 0.09608611464500427, |
|
"logps/chosen": -267.37774658203125, |
|
"logps/rejected": -247.0121307373047, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.04814675822854042, |
|
"rewards/margins": 0.10081255435943604, |
|
"rewards/margins_max": 0.1392827332019806, |
|
"rewards/margins_min": 0.06234236806631088, |
|
"rewards/margins_std": 0.05440504476428032, |
|
"rewards/rejected": -0.14895930886268616, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 3.6300137598332745e-07, |
|
"logits/chosen": -0.1410978138446808, |
|
"logits/rejected": 0.1180311068892479, |
|
"logps/chosen": -248.922607421875, |
|
"logps/rejected": -238.7857208251953, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0673481673002243, |
|
"rewards/margins": 0.08777225762605667, |
|
"rewards/margins_max": 0.13753345608711243, |
|
"rewards/margins_min": 0.03801106661558151, |
|
"rewards/margins_std": 0.07037295401096344, |
|
"rewards/rejected": -0.15512043237686157, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 3.5978641158974746e-07, |
|
"logits/chosen": -0.2249755561351776, |
|
"logits/rejected": 0.06120014935731888, |
|
"logps/chosen": -230.6334686279297, |
|
"logps/rejected": -235.4031982421875, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.057920414954423904, |
|
"rewards/margins": 0.1049647331237793, |
|
"rewards/margins_max": 0.14824087917804718, |
|
"rewards/margins_min": 0.06168859079480171, |
|
"rewards/margins_std": 0.06120172142982483, |
|
"rewards/rejected": -0.1628851592540741, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 3.565487932793489e-07, |
|
"logits/chosen": -0.17121955752372742, |
|
"logits/rejected": 0.12345151603221893, |
|
"logps/chosen": -200.95846557617188, |
|
"logps/rejected": -201.8996124267578, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.053819794207811356, |
|
"rewards/margins": 0.09484090656042099, |
|
"rewards/margins_max": 0.13828353583812714, |
|
"rewards/margins_min": 0.05139826610684395, |
|
"rewards/margins_std": 0.06143715977668762, |
|
"rewards/rejected": -0.14866070449352264, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 3.5328918911965344e-07, |
|
"logits/chosen": -0.13216093182563782, |
|
"logits/rejected": 0.06172620505094528, |
|
"logps/chosen": -219.61856079101562, |
|
"logps/rejected": -247.0506591796875, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.0629672110080719, |
|
"rewards/margins": 0.09889484941959381, |
|
"rewards/margins_max": 0.1449834108352661, |
|
"rewards/margins_min": 0.05280628800392151, |
|
"rewards/margins_std": 0.06517906486988068, |
|
"rewards/rejected": -0.1618620604276657, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 3.500082717148606e-07, |
|
"logits/chosen": -0.17225618660449982, |
|
"logits/rejected": 0.060709256678819656, |
|
"logps/chosen": -204.1416015625, |
|
"logps/rejected": -227.08505249023438, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.055190980434417725, |
|
"rewards/margins": 0.09612666070461273, |
|
"rewards/margins_max": 0.143478125333786, |
|
"rewards/margins_min": 0.048775214701890945, |
|
"rewards/margins_std": 0.06696505844593048, |
|
"rewards/rejected": -0.15131765604019165, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 3.4670671806705946e-07, |
|
"logits/chosen": -0.10742886364459991, |
|
"logits/rejected": 0.12255527079105377, |
|
"logps/chosen": -227.4519500732422, |
|
"logps/rejected": -244.0882110595703, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06157033517956734, |
|
"rewards/margins": 0.09928043186664581, |
|
"rewards/margins_max": 0.14087219536304474, |
|
"rewards/margins_min": 0.057688675820827484, |
|
"rewards/margins_std": 0.05881963297724724, |
|
"rewards/rejected": -0.16085079312324524, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 3.433852094365318e-07, |
|
"logits/chosen": -0.14116446673870087, |
|
"logits/rejected": 0.18814103305339813, |
|
"logps/chosen": -214.08493041992188, |
|
"logps/rejected": -220.2405548095703, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.053702183067798615, |
|
"rewards/margins": 0.09958843886852264, |
|
"rewards/margins_max": 0.13894253969192505, |
|
"rewards/margins_min": 0.06023435667157173, |
|
"rewards/margins_std": 0.055655092000961304, |
|
"rewards/rejected": -0.15329062938690186, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 3.400444312011776e-07, |
|
"logits/chosen": -0.11480595916509628, |
|
"logits/rejected": 0.08137498050928116, |
|
"logps/chosen": -205.24142456054688, |
|
"logps/rejected": -236.08151245117188, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.060475945472717285, |
|
"rewards/margins": 0.08602551370859146, |
|
"rewards/margins_max": 0.12408311665058136, |
|
"rewards/margins_min": 0.047967903316020966, |
|
"rewards/margins_std": 0.05382157489657402, |
|
"rewards/rejected": -0.14650145173072815, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 3.3668507271509057e-07, |
|
"logits/chosen": -0.11696537584066391, |
|
"logits/rejected": 0.15271435678005219, |
|
"logps/chosen": -206.26171875, |
|
"logps/rejected": -234.06503295898438, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05601676180958748, |
|
"rewards/margins": 0.09412574768066406, |
|
"rewards/margins_max": 0.1347564160823822, |
|
"rewards/margins_min": 0.05349506065249443, |
|
"rewards/margins_std": 0.05746046453714371, |
|
"rewards/rejected": -0.15014250576496124, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 3.333078271663128e-07, |
|
"logits/chosen": -0.16351190209388733, |
|
"logits/rejected": 0.14817702770233154, |
|
"logps/chosen": -247.7356719970703, |
|
"logps/rejected": -209.0570068359375, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.050507061183452606, |
|
"rewards/margins": 0.10204179584980011, |
|
"rewards/margins_max": 0.14540044963359833, |
|
"rewards/margins_min": 0.0586831197142601, |
|
"rewards/margins_std": 0.06131840869784355, |
|
"rewards/rejected": -0.1525488644838333, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 3.299133914337989e-07, |
|
"logits/chosen": -0.16040000319480896, |
|
"logits/rejected": 0.1360020935535431, |
|
"logps/chosen": -238.80850219726562, |
|
"logps/rejected": -253.80984497070312, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.055037401616573334, |
|
"rewards/margins": 0.11829885095357895, |
|
"rewards/margins_max": 0.1685524433851242, |
|
"rewards/margins_min": 0.0680452212691307, |
|
"rewards/margins_std": 0.07106934487819672, |
|
"rewards/rejected": -0.1733362376689911, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 3.265024659436183e-07, |
|
"logits/chosen": -0.14927372336387634, |
|
"logits/rejected": 0.09233134239912033, |
|
"logps/chosen": -236.8211212158203, |
|
"logps/rejected": -256.06243896484375, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.06843633949756622, |
|
"rewards/margins": 0.10929034650325775, |
|
"rewards/margins_max": 0.15579745173454285, |
|
"rewards/margins_min": 0.06278324127197266, |
|
"rewards/margins_std": 0.06577096879482269, |
|
"rewards/rejected": -0.17772668600082397, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.5, |
|
"learning_rate": 3.230757545244251e-07, |
|
"logits/chosen": -0.16293886303901672, |
|
"logits/rejected": 0.12322285026311874, |
|
"logps/chosen": -208.5253448486328, |
|
"logps/rejected": -223.73648071289062, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05477524548768997, |
|
"rewards/margins": 0.09718328714370728, |
|
"rewards/margins_max": 0.14000651240348816, |
|
"rewards/margins_min": 0.0543600432574749, |
|
"rewards/margins_std": 0.06056120991706848, |
|
"rewards/rejected": -0.15195852518081665, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 3.196339642622269e-07, |
|
"logits/chosen": -0.04625851660966873, |
|
"logits/rejected": 0.06496497243642807, |
|
"logps/chosen": -208.4033660888672, |
|
"logps/rejected": -250.89321899414062, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.06276147067546844, |
|
"rewards/margins": 0.10249187797307968, |
|
"rewards/margins_max": 0.15007010102272034, |
|
"rewards/margins_min": 0.054913658648729324, |
|
"rewards/margins_std": 0.06728576868772507, |
|
"rewards/rejected": -0.16525335609912872, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 3.1617780535448053e-07, |
|
"logits/chosen": -0.09090803563594818, |
|
"logits/rejected": 0.13836640119552612, |
|
"logps/chosen": -224.8212890625, |
|
"logps/rejected": -236.4337158203125, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06297770887613297, |
|
"rewards/margins": 0.08947329223155975, |
|
"rewards/margins_max": 0.12863615155220032, |
|
"rewards/margins_min": 0.0503104105591774, |
|
"rewards/margins_std": 0.055384665727615356, |
|
"rewards/rejected": -0.15245100855827332, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 3.127079909635462e-07, |
|
"logits/chosen": -0.1454629898071289, |
|
"logits/rejected": 0.09381814301013947, |
|
"logps/chosen": -210.1219940185547, |
|
"logps/rejected": -247.29519653320312, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.05111883953213692, |
|
"rewards/margins": 0.11492305994033813, |
|
"rewards/margins_max": 0.15684781968593597, |
|
"rewards/margins_min": 0.07299830764532089, |
|
"rewards/margins_std": 0.0592905655503273, |
|
"rewards/rejected": -0.16604191064834595, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 3.0922523706952976e-07, |
|
"logits/chosen": -0.12923486530780792, |
|
"logits/rejected": 0.00274011492729187, |
|
"logps/chosen": -192.57505798339844, |
|
"logps/rejected": -229.5814971923828, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06112251803278923, |
|
"rewards/margins": 0.11229976266622543, |
|
"rewards/margins_max": 0.15655739605426788, |
|
"rewards/margins_min": 0.06804212182760239, |
|
"rewards/margins_std": 0.06258974969387054, |
|
"rewards/rejected": -0.17342229187488556, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 3.057302623225434e-07, |
|
"logits/chosen": -0.08845367282629013, |
|
"logits/rejected": 0.15953145921230316, |
|
"logps/chosen": -224.7268524169922, |
|
"logps/rejected": -228.22573852539062, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06610189378261566, |
|
"rewards/margins": 0.09007870405912399, |
|
"rewards/margins_max": 0.1336970031261444, |
|
"rewards/margins_min": 0.046460412442684174, |
|
"rewards/margins_std": 0.06168559193611145, |
|
"rewards/rejected": -0.15618060529232025, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 3.0222378789441585e-07, |
|
"logits/chosen": -0.13951900601387024, |
|
"logits/rejected": 0.21554584801197052, |
|
"logps/chosen": -241.9784698486328, |
|
"logps/rejected": -247.6939239501953, |
|
"loss": 0.6468, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.0631137415766716, |
|
"rewards/margins": 0.10374633967876434, |
|
"rewards/margins_max": 0.15403683483600616, |
|
"rewards/margins_min": 0.05345584824681282, |
|
"rewards/margins_std": 0.07112149894237518, |
|
"rewards/rejected": -0.16686007380485535, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 2.9870653732988137e-07, |
|
"logits/chosen": -0.15974149107933044, |
|
"logits/rejected": 0.055376578122377396, |
|
"logps/chosen": -207.2960662841797, |
|
"logps/rejected": -221.9731903076172, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.0634765774011612, |
|
"rewards/margins": 0.08953996747732162, |
|
"rewards/margins_max": 0.12941452860832214, |
|
"rewards/margins_min": 0.04966540262103081, |
|
"rewards/margins_std": 0.056391142308712006, |
|
"rewards/rejected": -0.15301653742790222, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 2.951792363972804e-07, |
|
"logits/chosen": -0.13279682397842407, |
|
"logits/rejected": 0.10459411144256592, |
|
"logps/chosen": -224.2257080078125, |
|
"logps/rejected": -227.02102661132812, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06181638687849045, |
|
"rewards/margins": 0.10990460962057114, |
|
"rewards/margins_max": 0.15748900175094604, |
|
"rewards/margins_min": 0.062320221215486526, |
|
"rewards/margins_std": 0.06729448586702347, |
|
"rewards/rejected": -0.17172099649906158, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 2.9164261293879984e-07, |
|
"logits/chosen": -0.1285082995891571, |
|
"logits/rejected": 0.08084109425544739, |
|
"logps/chosen": -206.4801788330078, |
|
"logps/rejected": -229.85586547851562, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06227404996752739, |
|
"rewards/margins": 0.09520912915468216, |
|
"rewards/margins_max": 0.1408213973045349, |
|
"rewards/margins_min": 0.04959685727953911, |
|
"rewards/margins_std": 0.06450549513101578, |
|
"rewards/rejected": -0.15748317539691925, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 2.8809739672028677e-07, |
|
"logits/chosen": -0.12225770950317383, |
|
"logits/rejected": 0.03362155705690384, |
|
"logps/chosen": -217.4171905517578, |
|
"logps/rejected": -241.16458129882812, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.07442399859428406, |
|
"rewards/margins": 0.08279917389154434, |
|
"rewards/margins_max": 0.11553443968296051, |
|
"rewards/margins_min": 0.050063878297805786, |
|
"rewards/margins_std": 0.0462946780025959, |
|
"rewards/rejected": -0.1572231650352478, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 2.845443192806644e-07, |
|
"logits/chosen": -0.18313539028167725, |
|
"logits/rejected": 0.1276620626449585, |
|
"logps/chosen": -229.458740234375, |
|
"logps/rejected": -231.79013061523438, |
|
"loss": 0.6471, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.06001945212483406, |
|
"rewards/margins": 0.08784504234790802, |
|
"rewards/margins_max": 0.1322353482246399, |
|
"rewards/margins_min": 0.043454740196466446, |
|
"rewards/margins_std": 0.06277737021446228, |
|
"rewards/rejected": -0.14786449074745178, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 2.809841137809825e-07, |
|
"logits/chosen": -0.17046763002872467, |
|
"logits/rejected": 0.05243430286645889, |
|
"logps/chosen": -207.2881317138672, |
|
"logps/rejected": -244.39028930664062, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05877915769815445, |
|
"rewards/margins": 0.10812593996524811, |
|
"rewards/margins_max": 0.14523643255233765, |
|
"rewards/margins_min": 0.07101544737815857, |
|
"rewards/margins_std": 0.05248216539621353, |
|
"rewards/rejected": -0.16690510511398315, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 2.774175148531329e-07, |
|
"logits/chosen": -0.18015912175178528, |
|
"logits/rejected": 0.0010575338965281844, |
|
"logps/chosen": -221.28512573242188, |
|
"logps/rejected": -250.00222778320312, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.05312822386622429, |
|
"rewards/margins": 0.11167536675930023, |
|
"rewards/margins_max": 0.15390248596668243, |
|
"rewards/margins_min": 0.06944824755191803, |
|
"rewards/margins_std": 0.05971817299723625, |
|
"rewards/rejected": -0.16480359435081482, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 2.738452584482617e-07, |
|
"logits/chosen": -0.1896088570356369, |
|
"logits/rejected": 0.06044679880142212, |
|
"logps/chosen": -195.58468627929688, |
|
"logps/rejected": -227.13525390625, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.052375711500644684, |
|
"rewards/margins": 0.11357314884662628, |
|
"rewards/margins_max": 0.15889129042625427, |
|
"rewards/margins_min": 0.0682549923658371, |
|
"rewards/margins_std": 0.06408955156803131, |
|
"rewards/rejected": -0.16594885289669037, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 2.702680816849091e-07, |
|
"logits/chosen": -0.15573439002037048, |
|
"logits/rejected": 0.09830964356660843, |
|
"logps/chosen": -217.51754760742188, |
|
"logps/rejected": -240.4334259033203, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06747113913297653, |
|
"rewards/margins": 0.10181452333927155, |
|
"rewards/margins_max": 0.14106084406375885, |
|
"rewards/margins_min": 0.06256819516420364, |
|
"rewards/margins_std": 0.055502694100141525, |
|
"rewards/rejected": -0.1692856401205063, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 2.666867226969087e-07, |
|
"logits/chosen": -0.15920008718967438, |
|
"logits/rejected": 0.07830671966075897, |
|
"logps/chosen": -216.7744903564453, |
|
"logps/rejected": -231.097412109375, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.06382476538419724, |
|
"rewards/margins": 0.08724324405193329, |
|
"rewards/margins_max": 0.1375073939561844, |
|
"rewards/margins_min": 0.03697910159826279, |
|
"rewards/margins_std": 0.07108423113822937, |
|
"rewards/rejected": -0.15106801688671112, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 2.631019204810763e-07, |
|
"logits/chosen": -0.15687043964862823, |
|
"logits/rejected": -0.0006535470602102578, |
|
"logps/chosen": -218.9697265625, |
|
"logps/rejected": -246.04977416992188, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.061477065086364746, |
|
"rewards/margins": 0.08852384984493256, |
|
"rewards/margins_max": 0.1352422535419464, |
|
"rewards/margins_min": 0.0418054573237896, |
|
"rewards/margins_std": 0.06606978923082352, |
|
"rewards/rejected": -0.1500009298324585, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 2.5951441474472206e-07, |
|
"logits/chosen": -0.19294488430023193, |
|
"logits/rejected": 0.11017533391714096, |
|
"logps/chosen": -235.78421020507812, |
|
"logps/rejected": -242.24685668945312, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05141814798116684, |
|
"rewards/margins": 0.11200641095638275, |
|
"rewards/margins_max": 0.1588469296693802, |
|
"rewards/margins_min": 0.06516589224338531, |
|
"rewards/margins_std": 0.06624249368906021, |
|
"rewards/rejected": -0.163424551486969, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 2.5592494575301533e-07, |
|
"logits/chosen": -0.1245236024260521, |
|
"logits/rejected": 0.016547679901123047, |
|
"logps/chosen": -225.09353637695312, |
|
"logps/rejected": -267.2383117675781, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.053593188524246216, |
|
"rewards/margins": 0.10814990848302841, |
|
"rewards/margins_max": 0.14369070529937744, |
|
"rewards/margins_min": 0.07260910421609879, |
|
"rewards/margins_std": 0.050262290984392166, |
|
"rewards/rejected": -0.16174308955669403, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 2.523342541762335e-07, |
|
"logits/chosen": -0.20755529403686523, |
|
"logits/rejected": 0.07851056009531021, |
|
"logps/chosen": -198.47854614257812, |
|
"logps/rejected": -213.96701049804688, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05272821709513664, |
|
"rewards/margins": 0.11356940120458603, |
|
"rewards/margins_max": 0.1586931347846985, |
|
"rewards/margins_min": 0.06844566762447357, |
|
"rewards/margins_std": 0.06381459534168243, |
|
"rewards/rejected": -0.16629762947559357, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 2.487430809369293e-07, |
|
"logits/chosen": -0.048032622784376144, |
|
"logits/rejected": 0.1702868640422821, |
|
"logps/chosen": -214.9596405029297, |
|
"logps/rejected": -229.90847778320312, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.058893002569675446, |
|
"rewards/margins": 0.10679246485233307, |
|
"rewards/margins_max": 0.15617462992668152, |
|
"rewards/margins_min": 0.057410307228565216, |
|
"rewards/margins_std": 0.06983692944049835, |
|
"rewards/rejected": -0.1656854748725891, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 2.4515216705704393e-07, |
|
"logits/chosen": -0.21574148535728455, |
|
"logits/rejected": 0.07479486614465714, |
|
"logps/chosen": -235.31396484375, |
|
"logps/rejected": -240.1641845703125, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.057291556149721146, |
|
"rewards/margins": 0.11093351989984512, |
|
"rewards/margins_max": 0.15275637805461884, |
|
"rewards/margins_min": 0.06911066174507141, |
|
"rewards/margins_std": 0.0591464526951313, |
|
"rewards/rejected": -0.16822507977485657, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 2.415622535050009e-07, |
|
"logits/chosen": -0.17604181170463562, |
|
"logits/rejected": 0.05615830421447754, |
|
"logps/chosen": -204.60494995117188, |
|
"logps/rejected": -226.7333984375, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.05647622421383858, |
|
"rewards/margins": 0.09753639996051788, |
|
"rewards/margins_max": 0.1477840393781662, |
|
"rewards/margins_min": 0.04728874936699867, |
|
"rewards/margins_std": 0.0710608959197998, |
|
"rewards/rejected": -0.15401262044906616, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 2.379740810428111e-07, |
|
"logits/chosen": -0.1344299018383026, |
|
"logits/rejected": 0.13296538591384888, |
|
"logps/chosen": -205.1040496826172, |
|
"logps/rejected": -212.88705444335938, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.059041477739810944, |
|
"rewards/margins": 0.09553243964910507, |
|
"rewards/margins_max": 0.14307790994644165, |
|
"rewards/margins_min": 0.0479869581758976, |
|
"rewards/margins_std": 0.06723945587873459, |
|
"rewards/rejected": -0.15457391738891602, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 2.3438839007321936e-07, |
|
"logits/chosen": -0.11420653760433197, |
|
"logits/rejected": 0.11103509366512299, |
|
"logps/chosen": -222.06143188476562, |
|
"logps/rejected": -249.2257843017578, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.04806683585047722, |
|
"rewards/margins": 0.1108899936079979, |
|
"rewards/margins_max": 0.1588352620601654, |
|
"rewards/margins_min": 0.06294471770524979, |
|
"rewards/margins_std": 0.06780485063791275, |
|
"rewards/rejected": -0.15895681083202362, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 2.3080592048692593e-07, |
|
"logits/chosen": -0.18425148725509644, |
|
"logits/rejected": -0.025905439630150795, |
|
"logps/chosen": -223.4422149658203, |
|
"logps/rejected": -251.49514770507812, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06482952833175659, |
|
"rewards/margins": 0.09514541923999786, |
|
"rewards/margins_max": 0.1500011533498764, |
|
"rewards/margins_min": 0.04028966277837753, |
|
"rewards/margins_std": 0.07757773995399475, |
|
"rewards/rejected": -0.15997494757175446, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 2.2722741150991376e-07, |
|
"logits/chosen": -0.16047583520412445, |
|
"logits/rejected": 0.10932193696498871, |
|
"logps/chosen": -214.70315551757812, |
|
"logps/rejected": -212.7730712890625, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0567842535674572, |
|
"rewards/margins": 0.09505367279052734, |
|
"rewards/margins_max": 0.13385465741157532, |
|
"rewards/margins_min": 0.05625268071889877, |
|
"rewards/margins_std": 0.05487288907170296, |
|
"rewards/rejected": -0.15183793008327484, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 2.2365360155091238e-07, |
|
"logits/chosen": -0.11961637437343597, |
|
"logits/rejected": 0.0894673764705658, |
|
"logps/chosen": -209.631103515625, |
|
"logps/rejected": -264.8625183105469, |
|
"loss": 0.6374, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05508565157651901, |
|
"rewards/margins": 0.12142980098724365, |
|
"rewards/margins_max": 0.17250314354896545, |
|
"rewards/margins_min": 0.07035643607378006, |
|
"rewards/margins_std": 0.07222862541675568, |
|
"rewards/rejected": -0.17651543021202087, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 2.2008522804903062e-07, |
|
"logits/chosen": -0.0964190810918808, |
|
"logits/rejected": 0.10529766976833344, |
|
"logps/chosen": -212.2164306640625, |
|
"logps/rejected": -232.51168823242188, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.04842360317707062, |
|
"rewards/margins": 0.10283006727695465, |
|
"rewards/margins_max": 0.14770516753196716, |
|
"rewards/margins_min": 0.05795495584607124, |
|
"rewards/margins_std": 0.063462994992733, |
|
"rewards/rejected": -0.15125367045402527, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 2.1652302732158988e-07, |
|
"logits/chosen": -0.15599027276039124, |
|
"logits/rejected": -0.01368700247257948, |
|
"logps/chosen": -196.22361755371094, |
|
"logps/rejected": -246.3559112548828, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.06172167509794235, |
|
"rewards/margins": 0.10635526478290558, |
|
"rewards/margins_max": 0.1531476378440857, |
|
"rewards/margins_min": 0.05956289917230606, |
|
"rewards/margins_std": 0.06617439538240433, |
|
"rewards/rejected": -0.16807694733142853, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 2.1296773441218785e-07, |
|
"logits/chosen": -0.11377346515655518, |
|
"logits/rejected": 0.15727418661117554, |
|
"logps/chosen": -188.34353637695312, |
|
"logps/rejected": -210.76950073242188, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05929671972990036, |
|
"rewards/margins": 0.10617075115442276, |
|
"rewards/margins_max": 0.15472009778022766, |
|
"rewards/margins_min": 0.05762138217687607, |
|
"rewards/margins_std": 0.06865915656089783, |
|
"rewards/rejected": -0.16546745598316193, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 2.094200829390262e-07, |
|
"logits/chosen": -0.09401213377714157, |
|
"logits/rejected": 0.049455929547548294, |
|
"logps/chosen": -235.5387725830078, |
|
"logps/rejected": -273.2581787109375, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0620415136218071, |
|
"rewards/margins": 0.10562853515148163, |
|
"rewards/margins_max": 0.1506195366382599, |
|
"rewards/margins_min": 0.06063752621412277, |
|
"rewards/margins_std": 0.06362690031528473, |
|
"rewards/rejected": -0.16767004132270813, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 2.0588080494353172e-07, |
|
"logits/chosen": -0.17110076546669006, |
|
"logits/rejected": 0.08025307953357697, |
|
"logps/chosen": -227.67440795898438, |
|
"logps/rejected": -259.4073486328125, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05723772570490837, |
|
"rewards/margins": 0.10702015459537506, |
|
"rewards/margins_max": 0.14093990623950958, |
|
"rewards/margins_min": 0.07310040295124054, |
|
"rewards/margins_std": 0.04796977713704109, |
|
"rewards/rejected": -0.16425786912441254, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 2.0235063073930276e-07, |
|
"logits/chosen": -0.09825171530246735, |
|
"logits/rejected": 0.13015155494213104, |
|
"logps/chosen": -212.82754516601562, |
|
"logps/rejected": -232.27713012695312, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0580797903239727, |
|
"rewards/margins": 0.10242825746536255, |
|
"rewards/margins_max": 0.13443560898303986, |
|
"rewards/margins_min": 0.07042091339826584, |
|
"rewards/margins_std": 0.045265212655067444, |
|
"rewards/rejected": -0.16050805151462555, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 1.9883028876141266e-07, |
|
"logits/chosen": -0.15827712416648865, |
|
"logits/rejected": 0.0378829650580883, |
|
"logps/chosen": -208.70877075195312, |
|
"logps/rejected": -225.761474609375, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06385836750268936, |
|
"rewards/margins": 0.09914499521255493, |
|
"rewards/margins_max": 0.14492857456207275, |
|
"rewards/margins_min": 0.05336139351129532, |
|
"rewards/margins_std": 0.06474778801202774, |
|
"rewards/rejected": -0.1630033552646637, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 1.9532050541610058e-07, |
|
"logits/chosen": -0.08087868988513947, |
|
"logits/rejected": 0.025525391101837158, |
|
"logps/chosen": -202.79037475585938, |
|
"logps/rejected": -251.94650268554688, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.06054714322090149, |
|
"rewards/margins": 0.1071493998169899, |
|
"rewards/margins_max": 0.15883831679821014, |
|
"rewards/margins_min": 0.055460475385189056, |
|
"rewards/margins_std": 0.07309918105602264, |
|
"rewards/rejected": -0.16769655048847198, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 1.9182200493088052e-07, |
|
"logits/chosen": -0.16447165608406067, |
|
"logits/rejected": 0.11063267290592194, |
|
"logps/chosen": -216.5113067626953, |
|
"logps/rejected": -230.7987823486328, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.0644717812538147, |
|
"rewards/margins": 0.09193485975265503, |
|
"rewards/margins_max": 0.1328367441892624, |
|
"rewards/margins_min": 0.05103297159075737, |
|
"rewards/margins_std": 0.057844001799821854, |
|
"rewards/rejected": -0.15640662610530853, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 1.883355092051009e-07, |
|
"logits/chosen": -0.07868603616952896, |
|
"logits/rejected": 0.05034567043185234, |
|
"logps/chosen": -218.24722290039062, |
|
"logps/rejected": -260.0995788574219, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.052408766001462936, |
|
"rewards/margins": 0.11046520620584488, |
|
"rewards/margins_max": 0.15805724263191223, |
|
"rewards/margins_min": 0.06287316977977753, |
|
"rewards/margins_std": 0.0673053115606308, |
|
"rewards/rejected": -0.1628739833831787, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.8486173766098362e-07, |
|
"logits/chosen": -0.14829647541046143, |
|
"logits/rejected": 0.0757719874382019, |
|
"logps/chosen": -204.81362915039062, |
|
"logps/rejected": -235.73849487304688, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.04796791076660156, |
|
"rewards/margins": 0.09321852028369904, |
|
"rewards/margins_max": 0.12461745738983154, |
|
"rewards/margins_min": 0.061819594353437424, |
|
"rewards/margins_std": 0.044404786080121994, |
|
"rewards/rejected": -0.1411864459514618, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 1.8140140709517465e-07, |
|
"logits/chosen": -0.07676380127668381, |
|
"logits/rejected": 0.15969929099082947, |
|
"logps/chosen": -228.54879760742188, |
|
"logps/rejected": -246.28616333007812, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.0546930655837059, |
|
"rewards/margins": 0.0984901636838913, |
|
"rewards/margins_max": 0.13989897072315216, |
|
"rewards/margins_min": 0.057081352919340134, |
|
"rewards/margins_std": 0.058560896664857864, |
|
"rewards/rejected": -0.1531832069158554, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.7795523153083653e-07, |
|
"logits/chosen": -0.08178448677062988, |
|
"logits/rejected": 0.004074615426361561, |
|
"logps/chosen": -195.57810974121094, |
|
"logps/rejected": -247.9669647216797, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.05888283997774124, |
|
"rewards/margins": 0.09247585386037827, |
|
"rewards/margins_max": 0.14980639517307281, |
|
"rewards/margins_min": 0.035145316272974014, |
|
"rewards/margins_std": 0.08107762038707733, |
|
"rewards/rejected": -0.1513586789369583, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.7452392207031286e-07, |
|
"logits/chosen": -0.10405842959880829, |
|
"logits/rejected": -0.0477495901286602, |
|
"logps/chosen": -199.43121337890625, |
|
"logps/rejected": -295.82403564453125, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.04707425460219383, |
|
"rewards/margins": 0.11261805146932602, |
|
"rewards/margins_max": 0.15691080689430237, |
|
"rewards/margins_min": 0.06832531839609146, |
|
"rewards/margins_std": 0.06263939291238785, |
|
"rewards/rejected": -0.15969231724739075, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 1.7110818674839563e-07, |
|
"logits/chosen": -0.08448558300733566, |
|
"logits/rejected": 0.16416208446025848, |
|
"logps/chosen": -202.5157012939453, |
|
"logps/rejected": -211.07785034179688, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.059366196393966675, |
|
"rewards/margins": 0.08933896571397781, |
|
"rewards/margins_max": 0.13254401087760925, |
|
"rewards/margins_min": 0.046133920550346375, |
|
"rewards/margins_std": 0.06110116094350815, |
|
"rewards/rejected": -0.1487051546573639, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 1.6770873038622562e-07, |
|
"logits/chosen": -0.12733057141304016, |
|
"logits/rejected": 0.0979214534163475, |
|
"logps/chosen": -226.008056640625, |
|
"logps/rejected": -237.6015167236328, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06018907576799393, |
|
"rewards/margins": 0.08392681181430817, |
|
"rewards/margins_max": 0.11851127445697784, |
|
"rewards/margins_min": 0.049342334270477295, |
|
"rewards/margins_std": 0.04890982061624527, |
|
"rewards/rejected": -0.1441158801317215, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.5, |
|
"learning_rate": 1.643262544458558e-07, |
|
"logits/chosen": -0.1266459971666336, |
|
"logits/rejected": 0.1119358167052269, |
|
"logps/chosen": -248.8621826171875, |
|
"logps/rejected": -259.1594543457031, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06370563060045242, |
|
"rewards/margins": 0.09041455388069153, |
|
"rewards/margins_max": 0.12537343800067902, |
|
"rewards/margins_min": 0.05545566603541374, |
|
"rewards/margins_std": 0.04943932592868805, |
|
"rewards/rejected": -0.15412016212940216, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 1.6096145688550772e-07, |
|
"logits/chosen": -0.13403485715389252, |
|
"logits/rejected": 0.11216270923614502, |
|
"logps/chosen": -197.7595672607422, |
|
"logps/rejected": -214.38229370117188, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06467537581920624, |
|
"rewards/margins": 0.08191889524459839, |
|
"rewards/margins_max": 0.12750712037086487, |
|
"rewards/margins_min": 0.03633067384362221, |
|
"rewards/margins_std": 0.06447147578001022, |
|
"rewards/rejected": -0.14659425616264343, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 1.5761503201555138e-07, |
|
"logits/chosen": -0.14292378723621368, |
|
"logits/rejected": 0.06406668573617935, |
|
"logps/chosen": -210.74838256835938, |
|
"logps/rejected": -227.1605987548828, |
|
"loss": 0.6452, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0532594732940197, |
|
"rewards/margins": 0.09269430488348007, |
|
"rewards/margins_max": 0.1299545019865036, |
|
"rewards/margins_min": 0.05543411523103714, |
|
"rewards/margins_std": 0.05269387364387512, |
|
"rewards/rejected": -0.14595378935337067, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 1.542876703552372e-07, |
|
"logits/chosen": -0.10188720375299454, |
|
"logits/rejected": 0.10029338300228119, |
|
"logps/chosen": -222.6920928955078, |
|
"logps/rejected": -241.28536987304688, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06753533333539963, |
|
"rewards/margins": 0.10507749021053314, |
|
"rewards/margins_max": 0.1470262110233307, |
|
"rewards/margins_min": 0.06312878429889679, |
|
"rewards/margins_std": 0.05932443216443062, |
|
"rewards/rejected": -0.17261283099651337, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 1.5098005849021078e-07, |
|
"logits/chosen": -0.14857852458953857, |
|
"logits/rejected": 0.04540370777249336, |
|
"logps/chosen": -213.944580078125, |
|
"logps/rejected": -238.8897247314453, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05818655341863632, |
|
"rewards/margins": 0.10537393391132355, |
|
"rewards/margins_max": 0.14596855640411377, |
|
"rewards/margins_min": 0.06477929651737213, |
|
"rewards/margins_std": 0.057409483939409256, |
|
"rewards/rejected": -0.16356047987937927, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.4769287893083905e-07, |
|
"logits/chosen": -0.20055250823497772, |
|
"logits/rejected": 0.12372901290655136, |
|
"logps/chosen": -214.1376190185547, |
|
"logps/rejected": -244.47225952148438, |
|
"loss": 0.6379, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05139562487602234, |
|
"rewards/margins": 0.1305696666240692, |
|
"rewards/margins_max": 0.1713367998600006, |
|
"rewards/margins_min": 0.08980251848697662, |
|
"rewards/margins_std": 0.05765343829989433, |
|
"rewards/rejected": -0.18196527659893036, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 1.444268099713775e-07, |
|
"logits/chosen": -0.17057690024375916, |
|
"logits/rejected": 0.061030395328998566, |
|
"logps/chosen": -211.06912231445312, |
|
"logps/rejected": -238.83682250976562, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.0555347204208374, |
|
"rewards/margins": 0.097480908036232, |
|
"rewards/margins_max": 0.15012948215007782, |
|
"rewards/margins_min": 0.044832345098257065, |
|
"rewards/margins_std": 0.07445631921291351, |
|
"rewards/rejected": -0.1530156284570694, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 1.411825255500071e-07, |
|
"logits/chosen": -0.13909710943698883, |
|
"logits/rejected": 0.012036198750138283, |
|
"logps/chosen": -192.96359252929688, |
|
"logps/rejected": -238.0941925048828, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05943521112203598, |
|
"rewards/margins": 0.09797738492488861, |
|
"rewards/margins_max": 0.13452477753162384, |
|
"rewards/margins_min": 0.06142998859286308, |
|
"rewards/margins_std": 0.051685821264982224, |
|
"rewards/rejected": -0.1574126034975052, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.379606951097705e-07, |
|
"logits/chosen": -0.14935798943042755, |
|
"logits/rejected": 0.03216198831796646, |
|
"logps/chosen": -193.1946563720703, |
|
"logps/rejected": -228.28125, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05348304659128189, |
|
"rewards/margins": 0.10333843529224396, |
|
"rewards/margins_max": 0.14901982247829437, |
|
"rewards/margins_min": 0.057657063007354736, |
|
"rewards/margins_std": 0.06460321694612503, |
|
"rewards/rejected": -0.15682148933410645, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 1.3476198346043553e-07, |
|
"logits/chosen": -0.18985338509082794, |
|
"logits/rejected": 0.12341825664043427, |
|
"logps/chosen": -224.2623748779297, |
|
"logps/rejected": -220.1488037109375, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.059168003499507904, |
|
"rewards/margins": 0.09694478660821915, |
|
"rewards/margins_max": 0.14475250244140625, |
|
"rewards/margins_min": 0.049137067049741745, |
|
"rewards/margins_std": 0.06761031597852707, |
|
"rewards/rejected": -0.15611279010772705, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.3158705064131477e-07, |
|
"logits/chosen": -0.13782618939876556, |
|
"logits/rejected": 0.008584958501160145, |
|
"logps/chosen": -205.80709838867188, |
|
"logps/rejected": -230.9507293701172, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05955750495195389, |
|
"rewards/margins": 0.10197343677282333, |
|
"rewards/margins_max": 0.1481003761291504, |
|
"rewards/margins_min": 0.05584648996591568, |
|
"rewards/margins_std": 0.06523334980010986, |
|
"rewards/rejected": -0.16153094172477722, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 1.2843655178506943e-07, |
|
"logits/chosen": -0.18119294941425323, |
|
"logits/rejected": 0.03453055024147034, |
|
"logps/chosen": -224.9883270263672, |
|
"logps/rejected": -257.1782531738281, |
|
"loss": 0.642, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05576961115002632, |
|
"rewards/margins": 0.10703034698963165, |
|
"rewards/margins_max": 0.15220855176448822, |
|
"rewards/margins_min": 0.06185212731361389, |
|
"rewards/margins_std": 0.06389166414737701, |
|
"rewards/rejected": -0.16279995441436768, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.2531113698252565e-07, |
|
"logits/chosen": -0.2272913008928299, |
|
"logits/rejected": 0.0765170305967331, |
|
"logps/chosen": -208.4404296875, |
|
"logps/rejected": -214.04281616210938, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0450308695435524, |
|
"rewards/margins": 0.10445519536733627, |
|
"rewards/margins_max": 0.14625009894371033, |
|
"rewards/margins_min": 0.06266029924154282, |
|
"rewards/margins_std": 0.05910690873861313, |
|
"rewards/rejected": -0.14948606491088867, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 1.222114511485317e-07, |
|
"logits/chosen": -0.12483775615692139, |
|
"logits/rejected": 0.09466644376516342, |
|
"logps/chosen": -208.94430541992188, |
|
"logps/rejected": -255.8892059326172, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05880628898739815, |
|
"rewards/margins": 0.11126656830310822, |
|
"rewards/margins_max": 0.16315510869026184, |
|
"rewards/margins_min": 0.059378039091825485, |
|
"rewards/margins_std": 0.0733814612030983, |
|
"rewards/rejected": -0.17007283866405487, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.191381338888825e-07, |
|
"logits/chosen": -0.13245120644569397, |
|
"logits/rejected": 0.10843801498413086, |
|
"logps/chosen": -204.6431121826172, |
|
"logps/rejected": -226.485595703125, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05664772912859917, |
|
"rewards/margins": 0.09273017942905426, |
|
"rewards/margins_max": 0.1266176402568817, |
|
"rewards/margins_min": 0.0588427297770977, |
|
"rewards/margins_std": 0.04792410135269165, |
|
"rewards/rejected": -0.14937791228294373, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 1.1609181936833965e-07, |
|
"logits/chosen": -0.09875744581222534, |
|
"logits/rejected": 0.03166166692972183, |
|
"logps/chosen": -204.11947631835938, |
|
"logps/rejected": -255.87258911132812, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05774586275219917, |
|
"rewards/margins": 0.10740862786769867, |
|
"rewards/margins_max": 0.1617855578660965, |
|
"rewards/margins_min": 0.053031690418720245, |
|
"rewards/margins_std": 0.07690059393644333, |
|
"rewards/rejected": -0.16515448689460754, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.1307313617977512e-07, |
|
"logits/chosen": -0.1509208381175995, |
|
"logits/rejected": 0.09706972539424896, |
|
"logps/chosen": -201.7398681640625, |
|
"logps/rejected": -210.61923217773438, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06123855710029602, |
|
"rewards/margins": 0.09280522167682648, |
|
"rewards/margins_max": 0.14272518455982208, |
|
"rewards/margins_min": 0.04288526624441147, |
|
"rewards/margins_std": 0.07059746980667114, |
|
"rewards/rejected": -0.1540437638759613, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 1.1008270721446358e-07, |
|
"logits/chosen": -0.15949265658855438, |
|
"logits/rejected": 0.1050555557012558, |
|
"logps/chosen": -207.06884765625, |
|
"logps/rejected": -211.36892700195312, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05307169631123543, |
|
"rewards/margins": 0.10218322277069092, |
|
"rewards/margins_max": 0.14148741960525513, |
|
"rewards/margins_min": 0.06287900358438492, |
|
"rewards/margins_std": 0.05558454990386963, |
|
"rewards/rejected": -0.15525491535663605, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 1.071211495335518e-07, |
|
"logits/chosen": -0.09772120416164398, |
|
"logits/rejected": 0.08581139892339706, |
|
"logps/chosen": -212.9148712158203, |
|
"logps/rejected": -235.0979461669922, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.064021997153759, |
|
"rewards/margins": 0.10210440307855606, |
|
"rewards/margins_max": 0.14792828261852264, |
|
"rewards/margins_min": 0.05628051236271858, |
|
"rewards/margins_std": 0.06480477005243301, |
|
"rewards/rejected": -0.16612640023231506, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 1.0418907424073081e-07, |
|
"logits/chosen": -0.1678512841463089, |
|
"logits/rejected": 0.04902017116546631, |
|
"logps/chosen": -200.6660614013672, |
|
"logps/rejected": -223.4239959716797, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05520619824528694, |
|
"rewards/margins": 0.0991910919547081, |
|
"rewards/margins_max": 0.13650527596473694, |
|
"rewards/margins_min": 0.06187691539525986, |
|
"rewards/margins_std": 0.052770208567380905, |
|
"rewards/rejected": -0.15439727902412415, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 1.012870863561377e-07, |
|
"logits/chosen": -0.10301700979471207, |
|
"logits/rejected": 0.13756810128688812, |
|
"logps/chosen": -206.919677734375, |
|
"logps/rejected": -227.0911865234375, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.06356682628393173, |
|
"rewards/margins": 0.07415839284658432, |
|
"rewards/margins_max": 0.11350224167108536, |
|
"rewards/margins_min": 0.03481454402208328, |
|
"rewards/margins_std": 0.055640608072280884, |
|
"rewards/rejected": -0.13772521913051605, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 9.84157846915124e-08, |
|
"logits/chosen": -0.19909122586250305, |
|
"logits/rejected": 0.05684971809387207, |
|
"logps/chosen": -213.03915405273438, |
|
"logps/rejected": -251.5517120361328, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05604202672839165, |
|
"rewards/margins": 0.11602386087179184, |
|
"rewards/margins_max": 0.16751208901405334, |
|
"rewards/margins_min": 0.06453560292720795, |
|
"rewards/margins_std": 0.07281537353992462, |
|
"rewards/rejected": -0.1720658838748932, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 9.557576172663575e-08, |
|
"logits/chosen": -0.11178640276193619, |
|
"logits/rejected": 0.04836495593190193, |
|
"logps/chosen": -208.6399688720703, |
|
"logps/rejected": -242.5208740234375, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06215709447860718, |
|
"rewards/margins": 0.1025010198354721, |
|
"rewards/margins_max": 0.1522974669933319, |
|
"rewards/margins_min": 0.0527045838534832, |
|
"rewards/margins_std": 0.07042279839515686, |
|
"rewards/rejected": -0.16465812921524048, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 9.276760348707389e-08, |
|
"logits/chosen": -0.20817360281944275, |
|
"logits/rejected": 0.15238900482654572, |
|
"logps/chosen": -236.2836456298828, |
|
"logps/rejected": -212.127685546875, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.06291679292917252, |
|
"rewards/margins": 0.1133137121796608, |
|
"rewards/margins_max": 0.15874022245407104, |
|
"rewards/margins_min": 0.06788720935583115, |
|
"rewards/margins_std": 0.06424277275800705, |
|
"rewards/rejected": -0.17623049020767212, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 8.999188942325475e-08, |
|
"logits/chosen": -0.10632741451263428, |
|
"logits/rejected": 0.12391182035207748, |
|
"logps/chosen": -215.11575317382812, |
|
"logps/rejected": -262.51446533203125, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05760825797915459, |
|
"rewards/margins": 0.10929699242115021, |
|
"rewards/margins_max": 0.15289412438869476, |
|
"rewards/margins_min": 0.06569983065128326, |
|
"rewards/margins_std": 0.0616556778550148, |
|
"rewards/rejected": -0.1669052541255951, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.5, |
|
"learning_rate": 8.724919229090094e-08, |
|
"logits/chosen": -0.17001786828041077, |
|
"logits/rejected": 0.017346305772662163, |
|
"logps/chosen": -218.0846710205078, |
|
"logps/rejected": -244.435546875, |
|
"loss": 0.6445, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.059084583073854446, |
|
"rewards/margins": 0.0965479165315628, |
|
"rewards/margins_max": 0.14651286602020264, |
|
"rewards/margins_min": 0.04658297449350357, |
|
"rewards/margins_std": 0.07066110521554947, |
|
"rewards/rejected": -0.15563251078128815, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 8.454007803284452e-08, |
|
"logits/chosen": -0.10606809705495834, |
|
"logits/rejected": 0.05775570124387741, |
|
"logps/chosen": -199.2654266357422, |
|
"logps/rejected": -245.3488311767578, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.051395904272794724, |
|
"rewards/margins": 0.10947608947753906, |
|
"rewards/margins_max": 0.15262414515018463, |
|
"rewards/margins_min": 0.0663280338048935, |
|
"rewards/margins_std": 0.061020564287900925, |
|
"rewards/rejected": -0.16087199747562408, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 8.186510566224725e-08, |
|
"logits/chosen": -0.15745623409748077, |
|
"logits/rejected": 0.06878992170095444, |
|
"logps/chosen": -212.1492462158203, |
|
"logps/rejected": -233.72598266601562, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.050053782761096954, |
|
"rewards/margins": 0.11182372272014618, |
|
"rewards/margins_max": 0.1652067005634308, |
|
"rewards/margins_min": 0.05844072625041008, |
|
"rewards/margins_std": 0.07549495995044708, |
|
"rewards/rejected": -0.16187749803066254, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 7.922482714725065e-08, |
|
"logits/chosen": -0.12382777780294418, |
|
"logits/rejected": 0.10572858899831772, |
|
"logps/chosen": -218.09371948242188, |
|
"logps/rejected": -235.0614013671875, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.058691591024398804, |
|
"rewards/margins": 0.10704471170902252, |
|
"rewards/margins_max": 0.14886514842510223, |
|
"rewards/margins_min": 0.06522427499294281, |
|
"rewards/margins_std": 0.05914302542805672, |
|
"rewards/rejected": -0.16573630273342133, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 7.661978729708013e-08, |
|
"logits/chosen": -0.1819213330745697, |
|
"logits/rejected": 0.13269567489624023, |
|
"logps/chosen": -210.587158203125, |
|
"logps/rejected": -200.8339385986328, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0586087591946125, |
|
"rewards/margins": 0.08718381822109222, |
|
"rewards/margins_max": 0.12654808163642883, |
|
"rewards/margins_min": 0.04781955108046532, |
|
"rewards/margins_std": 0.05566948652267456, |
|
"rewards/rejected": -0.14579257369041443, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 7.405052364962603e-08, |
|
"logits/chosen": -0.13600589334964752, |
|
"logits/rejected": 0.14749519526958466, |
|
"logps/chosen": -207.6585693359375, |
|
"logps/rejected": -238.1978759765625, |
|
"loss": 0.64, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.05971568822860718, |
|
"rewards/margins": 0.11025593429803848, |
|
"rewards/margins_max": 0.15179547667503357, |
|
"rewards/margins_min": 0.0687163919210434, |
|
"rewards/margins_std": 0.058745790272951126, |
|
"rewards/rejected": -0.16997162997722626, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 7.151756636052527e-08, |
|
"logits/chosen": -0.15209710597991943, |
|
"logits/rejected": 0.15300539135932922, |
|
"logps/chosen": -214.08145141601562, |
|
"logps/rejected": -228.1832733154297, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05681489780545235, |
|
"rewards/margins": 0.114329494535923, |
|
"rewards/margins_max": 0.16191419959068298, |
|
"rewards/margins_min": 0.06674476712942123, |
|
"rewards/margins_std": 0.06729496270418167, |
|
"rewards/rejected": -0.17114439606666565, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 6.902143809376593e-08, |
|
"logits/chosen": -0.02033737674355507, |
|
"logits/rejected": 0.07773645222187042, |
|
"logps/chosen": -199.2742462158203, |
|
"logps/rejected": -251.02969360351562, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.055648110806941986, |
|
"rewards/margins": 0.10049048811197281, |
|
"rewards/margins_max": 0.13645590841770172, |
|
"rewards/margins_min": 0.06452508270740509, |
|
"rewards/margins_std": 0.050862766802310944, |
|
"rewards/rejected": -0.156138613820076, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 6.656265391383834e-08, |
|
"logits/chosen": -0.18140563368797302, |
|
"logits/rejected": 0.11698174476623535, |
|
"logps/chosen": -219.529541015625, |
|
"logps/rejected": -219.55859375, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.053447991609573364, |
|
"rewards/margins": 0.11373796314001083, |
|
"rewards/margins_max": 0.15492601692676544, |
|
"rewards/margins_min": 0.07254988700151443, |
|
"rewards/margins_std": 0.058248721063137054, |
|
"rewards/rejected": -0.1671859472990036, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 6.414172117945363e-08, |
|
"logits/chosen": -0.12691111862659454, |
|
"logits/rejected": -0.011705311946570873, |
|
"logps/chosen": -214.7986297607422, |
|
"logps/rejected": -251.7666015625, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.04762512072920799, |
|
"rewards/margins": 0.10763095319271088, |
|
"rewards/margins_max": 0.15741634368896484, |
|
"rewards/margins_min": 0.05784556269645691, |
|
"rewards/margins_std": 0.07040717452764511, |
|
"rewards/rejected": -0.15525606274604797, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 6.175913943885275e-08, |
|
"logits/chosen": -0.21015481650829315, |
|
"logits/rejected": 0.04729234799742699, |
|
"logps/chosen": -219.0660400390625, |
|
"logps/rejected": -230.38601684570312, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.06361646950244904, |
|
"rewards/margins": 0.0891466736793518, |
|
"rewards/margins_max": 0.1280238926410675, |
|
"rewards/margins_min": 0.05026944726705551, |
|
"rewards/margins_std": 0.05498070642352104, |
|
"rewards/rejected": -0.15276315808296204, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 5.941540032672695e-08, |
|
"logits/chosen": -0.1390005648136139, |
|
"logits/rejected": 0.1156986802816391, |
|
"logps/chosen": -203.31509399414062, |
|
"logps/rejected": -213.8002166748047, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.05469059199094772, |
|
"rewards/margins": 0.09125205874443054, |
|
"rewards/margins_max": 0.12813611328601837, |
|
"rewards/margins_min": 0.05436800792813301, |
|
"rewards/margins_std": 0.05216192454099655, |
|
"rewards/rejected": -0.14594264328479767, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 5.711098746277135e-08, |
|
"logits/chosen": -0.18979230523109436, |
|
"logits/rejected": 0.02134443074464798, |
|
"logps/chosen": -241.7399444580078, |
|
"logps/rejected": -279.031005859375, |
|
"loss": 0.6456, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06025727838277817, |
|
"rewards/margins": 0.10567016899585724, |
|
"rewards/margins_max": 0.14187535643577576, |
|
"rewards/margins_min": 0.06946493685245514, |
|
"rewards/margins_std": 0.05120190232992172, |
|
"rewards/rejected": -0.1659274399280548, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 5.484637635189185e-08, |
|
"logits/chosen": -0.18471169471740723, |
|
"logits/rejected": -0.013999777846038342, |
|
"logps/chosen": -213.14492797851562, |
|
"logps/rejected": -244.3299560546875, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.06022089719772339, |
|
"rewards/margins": 0.0926375538110733, |
|
"rewards/margins_max": 0.14156688749790192, |
|
"rewards/margins_min": 0.0437081977725029, |
|
"rewards/margins_std": 0.06919653713703156, |
|
"rewards/rejected": -0.1528584361076355, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 5.262203428608755e-08, |
|
"logits/chosen": -0.1870705783367157, |
|
"logits/rejected": 0.11638101190328598, |
|
"logps/chosen": -220.24398803710938, |
|
"logps/rejected": -216.3998260498047, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.061370521783828735, |
|
"rewards/margins": 0.10007043182849884, |
|
"rewards/margins_max": 0.1472715139389038, |
|
"rewards/margins_min": 0.05286933854222298, |
|
"rewards/margins_std": 0.06675241887569427, |
|
"rewards/rejected": -0.16144093871116638, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 5.0438420248026745e-08, |
|
"logits/chosen": -0.12526465952396393, |
|
"logits/rejected": 0.0398729033768177, |
|
"logps/chosen": -209.6382293701172, |
|
"logps/rejected": -229.21401977539062, |
|
"loss": 0.6452, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06217757612466812, |
|
"rewards/margins": 0.09538298845291138, |
|
"rewards/margins_max": 0.13602666556835175, |
|
"rewards/margins_min": 0.054739318788051605, |
|
"rewards/margins_std": 0.05747883766889572, |
|
"rewards/rejected": -0.1575605720281601, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 4.829598481633812e-08, |
|
"logits/chosen": -0.200607568025589, |
|
"logits/rejected": 0.017670905217528343, |
|
"logps/chosen": -221.0457000732422, |
|
"logps/rejected": -239.06143188476562, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06344129145145416, |
|
"rewards/margins": 0.10496222972869873, |
|
"rewards/margins_max": 0.14306050539016724, |
|
"rewards/margins_min": 0.06686393916606903, |
|
"rewards/margins_std": 0.053879112005233765, |
|
"rewards/rejected": -0.1684035062789917, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 4.619517007263596e-08, |
|
"logits/chosen": -0.12676799297332764, |
|
"logits/rejected": 0.16620242595672607, |
|
"logps/chosen": -207.8087615966797, |
|
"logps/rejected": -226.5636749267578, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.04663520306348801, |
|
"rewards/margins": 0.09131400287151337, |
|
"rewards/margins_max": 0.13556495308876038, |
|
"rewards/margins_min": 0.04706304520368576, |
|
"rewards/margins_std": 0.06258030235767365, |
|
"rewards/rejected": -0.13794919848442078, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.413640951029849e-08, |
|
"logits/chosen": -0.19667062163352966, |
|
"logits/rejected": 0.08986136317253113, |
|
"logps/chosen": -210.84982299804688, |
|
"logps/rejected": -226.7263641357422, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.0630938857793808, |
|
"rewards/margins": 0.11106900870800018, |
|
"rewards/margins_max": 0.1641812026500702, |
|
"rewards/margins_min": 0.05795680359005928, |
|
"rewards/margins_std": 0.075111985206604, |
|
"rewards/rejected": -0.17416289448738098, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 4.212012794501888e-08, |
|
"logits/chosen": -0.1816846877336502, |
|
"logits/rejected": 0.028480231761932373, |
|
"logps/chosen": -219.585693359375, |
|
"logps/rejected": -236.0741729736328, |
|
"loss": 0.6452, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.058341920375823975, |
|
"rewards/margins": 0.09123341739177704, |
|
"rewards/margins_max": 0.13824895024299622, |
|
"rewards/margins_min": 0.04421788826584816, |
|
"rewards/margins_std": 0.06649000942707062, |
|
"rewards/rejected": -0.1495753526687622, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 4.014674142714605e-08, |
|
"logits/chosen": -0.22851577401161194, |
|
"logits/rejected": -0.011786893010139465, |
|
"logps/chosen": -220.0074920654297, |
|
"logps/rejected": -241.3643798828125, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.058182261884212494, |
|
"rewards/margins": 0.09729903191328049, |
|
"rewards/margins_max": 0.13660015165805817, |
|
"rewards/margins_min": 0.05799790471792221, |
|
"rewards/margins_std": 0.05558018758893013, |
|
"rewards/rejected": -0.15548129379749298, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 3.821665715583508e-08, |
|
"logits/chosen": -0.16805607080459595, |
|
"logits/rejected": 0.06839191168546677, |
|
"logps/chosen": -205.09890747070312, |
|
"logps/rejected": -233.2451934814453, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.05809404328465462, |
|
"rewards/margins": 0.09838706254959106, |
|
"rewards/margins_max": 0.15165673196315765, |
|
"rewards/margins_min": 0.04511738568544388, |
|
"rewards/margins_std": 0.07533469051122665, |
|
"rewards/rejected": -0.15648110210895538, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 3.633027339502318e-08, |
|
"logits/chosen": -0.12912589311599731, |
|
"logits/rejected": 0.10515755414962769, |
|
"logps/chosen": -203.35064697265625, |
|
"logps/rejected": -230.3003387451172, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.06046416610479355, |
|
"rewards/margins": 0.0981229841709137, |
|
"rewards/margins_max": 0.1455102413892746, |
|
"rewards/margins_min": 0.05073573440313339, |
|
"rewards/margins_std": 0.06701570004224777, |
|
"rewards/rejected": -0.15858715772628784, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 3.448797939124992e-08, |
|
"logits/chosen": -0.18330267071723938, |
|
"logits/rejected": 0.10012233257293701, |
|
"logps/chosen": -236.8273162841797, |
|
"logps/rejected": -239.4166259765625, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06569047272205353, |
|
"rewards/margins": 0.10959680378437042, |
|
"rewards/margins_max": 0.1525718718767166, |
|
"rewards/margins_min": 0.06662173569202423, |
|
"rewards/margins_std": 0.060775917023420334, |
|
"rewards/rejected": -0.17528727650642395, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 3.269015529333805e-08, |
|
"logits/chosen": -0.14039239287376404, |
|
"logits/rejected": 0.017039867118000984, |
|
"logps/chosen": -220.995849609375, |
|
"logps/rejected": -262.5998840332031, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.06117891147732735, |
|
"rewards/margins": 0.11040657758712769, |
|
"rewards/margins_max": 0.16529114544391632, |
|
"rewards/margins_min": 0.05552203208208084, |
|
"rewards/margins_std": 0.07761847972869873, |
|
"rewards/rejected": -0.17158548533916473, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 3.0937172073951525e-08, |
|
"logits/chosen": -0.1851753294467926, |
|
"logits/rejected": 0.10401411354541779, |
|
"logps/chosen": -227.9702606201172, |
|
"logps/rejected": -212.72158813476562, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.056978046894073486, |
|
"rewards/margins": 0.08857695013284683, |
|
"rewards/margins_max": 0.12364324182271957, |
|
"rewards/margins_min": 0.05351065471768379, |
|
"rewards/margins_std": 0.049591224640607834, |
|
"rewards/rejected": -0.14555500447750092, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 2.9229391453046814e-08, |
|
"logits/chosen": -0.10515342652797699, |
|
"logits/rejected": 0.07552903145551682, |
|
"logps/chosen": -205.2806396484375, |
|
"logps/rejected": -247.0736541748047, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.05599958822131157, |
|
"rewards/margins": 0.10005147755146027, |
|
"rewards/margins_max": 0.14496475458145142, |
|
"rewards/margins_min": 0.055138200521469116, |
|
"rewards/margins_std": 0.06351695954799652, |
|
"rewards/rejected": -0.15605106949806213, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 2.756716582323407e-08, |
|
"logits/chosen": -0.18915608525276184, |
|
"logits/rejected": 0.10052738338708878, |
|
"logps/chosen": -214.53921508789062, |
|
"logps/rejected": -217.12191772460938, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.055889736860990524, |
|
"rewards/margins": 0.10114419460296631, |
|
"rewards/margins_max": 0.13807928562164307, |
|
"rewards/margins_min": 0.06420911848545074, |
|
"rewards/margins_std": 0.052234094589948654, |
|
"rewards/rejected": -0.15703395009040833, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 2.5950838177062255e-08, |
|
"logits/chosen": -0.18466925621032715, |
|
"logits/rejected": -0.0018462598090991378, |
|
"logps/chosen": -188.68309020996094, |
|
"logps/rejected": -227.0560302734375, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.04883568361401558, |
|
"rewards/margins": 0.11194157600402832, |
|
"rewards/margins_max": 0.151218444108963, |
|
"rewards/margins_min": 0.07266470044851303, |
|
"rewards/margins_std": 0.05554589629173279, |
|
"rewards/rejected": -0.1607772707939148, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 2.438074203624424e-08, |
|
"logits/chosen": -0.18436864018440247, |
|
"logits/rejected": 0.04486365243792534, |
|
"logps/chosen": -236.3048553466797, |
|
"logps/rejected": -249.7228546142578, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.05570978671312332, |
|
"rewards/margins": 0.09718596935272217, |
|
"rewards/margins_max": 0.13665816187858582, |
|
"rewards/margins_min": 0.057713788002729416, |
|
"rewards/margins_std": 0.055822111666202545, |
|
"rewards/rejected": -0.1528957486152649, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 2.2857201382836282e-08, |
|
"logits/chosen": -0.14316371083259583, |
|
"logits/rejected": 0.12006983906030655, |
|
"logps/chosen": -209.4042510986328, |
|
"logps/rejected": -232.90353393554688, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.05837802216410637, |
|
"rewards/margins": 0.1196541041135788, |
|
"rewards/margins_max": 0.1695931851863861, |
|
"rewards/margins_min": 0.06971500813961029, |
|
"rewards/margins_std": 0.07062454521656036, |
|
"rewards/rejected": -0.17803213000297546, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.5, |
|
"learning_rate": 2.138053059238573e-08, |
|
"logits/chosen": -0.16864949464797974, |
|
"logits/rejected": 0.02723456546664238, |
|
"logps/chosen": -236.72793579101562, |
|
"logps/rejected": -249.60092163085938, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.06903600692749023, |
|
"rewards/margins": 0.08803755789995193, |
|
"rewards/margins_max": 0.1324770301580429, |
|
"rewards/margins_min": 0.043598074465990067, |
|
"rewards/margins_std": 0.06284691393375397, |
|
"rewards/rejected": -0.15707355737686157, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 1.9951034369060952e-08, |
|
"logits/chosen": -0.16016066074371338, |
|
"logits/rejected": 0.05830240249633789, |
|
"logps/chosen": -234.17642211914062, |
|
"logps/rejected": -261.7164001464844, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.07695670425891876, |
|
"rewards/margins": 0.10259735584259033, |
|
"rewards/margins_max": 0.15329495072364807, |
|
"rewards/margins_min": 0.05189976841211319, |
|
"rewards/margins_std": 0.07169721275568008, |
|
"rewards/rejected": -0.1795540750026703, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 1.8569007682777415e-08, |
|
"logits/chosen": -0.12369527667760849, |
|
"logits/rejected": 0.0878855437040329, |
|
"logps/chosen": -222.7164306640625, |
|
"logps/rejected": -236.99679565429688, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.06640324741601944, |
|
"rewards/margins": 0.09582678228616714, |
|
"rewards/margins_max": 0.14717131853103638, |
|
"rewards/margins_min": 0.04448222368955612, |
|
"rewards/margins_std": 0.07261216640472412, |
|
"rewards/rejected": -0.16223004460334778, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 1.7234735708331673e-08, |
|
"logits/chosen": -0.0934344008564949, |
|
"logits/rejected": 0.15260164439678192, |
|
"logps/chosen": -228.0308837890625, |
|
"logps/rejected": -225.59909057617188, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06557970494031906, |
|
"rewards/margins": 0.09566263109445572, |
|
"rewards/margins_max": 0.13359490036964417, |
|
"rewards/margins_min": 0.05773034691810608, |
|
"rewards/margins_std": 0.05364434793591499, |
|
"rewards/rejected": -0.16124233603477478, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.594849376655702e-08, |
|
"logits/chosen": -0.15817022323608398, |
|
"logits/rejected": 0.05617784336209297, |
|
"logps/chosen": -214.3910675048828, |
|
"logps/rejected": -237.107177734375, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05829305574297905, |
|
"rewards/margins": 0.1066914051771164, |
|
"rewards/margins_max": 0.1624380648136139, |
|
"rewards/margins_min": 0.05094476789236069, |
|
"rewards/margins_std": 0.07883764803409576, |
|
"rewards/rejected": -0.16498446464538574, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 1.4710547267512253e-08, |
|
"logits/chosen": -0.14244134724140167, |
|
"logits/rejected": 0.0684979110956192, |
|
"logps/chosen": -204.86105346679688, |
|
"logps/rejected": -225.93289184570312, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.05910477787256241, |
|
"rewards/margins": 0.0981929823756218, |
|
"rewards/margins_max": 0.15315920114517212, |
|
"rewards/margins_min": 0.04322676360607147, |
|
"rewards/margins_std": 0.07773397862911224, |
|
"rewards/rejected": -0.1572977602481842, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.3521151655715602e-08, |
|
"logits/chosen": -0.08284337818622589, |
|
"logits/rejected": 0.15880750119686127, |
|
"logps/chosen": -228.5080108642578, |
|
"logps/rejected": -237.3908233642578, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0625772625207901, |
|
"rewards/margins": 0.09644486010074615, |
|
"rewards/margins_max": 0.14257869124412537, |
|
"rewards/margins_min": 0.05031103640794754, |
|
"rewards/margins_std": 0.06524308770895004, |
|
"rewards/rejected": -0.15902213752269745, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.2380552357434932e-08, |
|
"logits/chosen": -0.12101718038320541, |
|
"logits/rejected": 0.08898299932479858, |
|
"logps/chosen": -206.19943237304688, |
|
"logps/rejected": -222.4971923828125, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0597076490521431, |
|
"rewards/margins": 0.09617511183023453, |
|
"rewards/margins_max": 0.13126114010810852, |
|
"rewards/margins_min": 0.06108907610177994, |
|
"rewards/margins_std": 0.049619145691394806, |
|
"rewards/rejected": -0.15588276088237762, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.1288984730044998e-08, |
|
"logits/chosen": -0.15863756835460663, |
|
"logits/rejected": 0.09168653935194016, |
|
"logps/chosen": -225.25711059570312, |
|
"logps/rejected": -244.12844848632812, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.05879504233598709, |
|
"rewards/margins": 0.105413056910038, |
|
"rewards/margins_max": 0.14556556940078735, |
|
"rewards/margins_min": 0.06526056677103043, |
|
"rewards/margins_std": 0.05678422003984451, |
|
"rewards/rejected": -0.16420809924602509, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 1.0246674013462852e-08, |
|
"logits/chosen": -0.1646089255809784, |
|
"logits/rejected": 0.06766968220472336, |
|
"logps/chosen": -235.84341430664062, |
|
"logps/rejected": -237.8670196533203, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.060948483645915985, |
|
"rewards/margins": 0.09341399371623993, |
|
"rewards/margins_max": 0.14046376943588257, |
|
"rewards/margins_min": 0.046364206820726395, |
|
"rewards/margins_std": 0.06653843820095062, |
|
"rewards/rejected": -0.15436246991157532, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 9.253835283670381e-09, |
|
"logits/chosen": -0.13539066910743713, |
|
"logits/rejected": 0.1348244845867157, |
|
"logps/chosen": -216.82565307617188, |
|
"logps/rejected": -221.89682006835938, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.04900694265961647, |
|
"rewards/margins": 0.11065097898244858, |
|
"rewards/margins_max": 0.16241362690925598, |
|
"rewards/margins_min": 0.058888327330350876, |
|
"rewards/margins_std": 0.07320345193147659, |
|
"rewards/rejected": -0.15965792536735535, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 8.310673408334496e-09, |
|
"logits/chosen": -0.1519135981798172, |
|
"logits/rejected": 0.039234552532434464, |
|
"logps/chosen": -198.13656616210938, |
|
"logps/rejected": -219.90493774414062, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05604385584592819, |
|
"rewards/margins": 0.10299549251794815, |
|
"rewards/margins_max": 0.13935771584510803, |
|
"rewards/margins_min": 0.06663324683904648, |
|
"rewards/margins_std": 0.05142395943403244, |
|
"rewards/rejected": -0.15903934836387634, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 7.417383004533567e-09, |
|
"logits/chosen": -0.18010476231575012, |
|
"logits/rejected": 0.00459135789424181, |
|
"logps/chosen": -200.7607421875, |
|
"logps/rejected": -229.78955078125, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06125596910715103, |
|
"rewards/margins": 0.08764808624982834, |
|
"rewards/margins_max": 0.1255454272031784, |
|
"rewards/margins_min": 0.04975075647234917, |
|
"rewards/margins_std": 0.0535949282348156, |
|
"rewards/rejected": -0.14890405535697937, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 6.574148398599183e-09, |
|
"logits/chosen": -0.10086911916732788, |
|
"logits/rejected": 0.08481906354427338, |
|
"logps/chosen": -219.78976440429688, |
|
"logps/rejected": -238.8533477783203, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.057928215712308884, |
|
"rewards/margins": 0.10362323373556137, |
|
"rewards/margins_max": 0.15504048764705658, |
|
"rewards/margins_min": 0.05220597982406616, |
|
"rewards/margins_std": 0.07271497696638107, |
|
"rewards/rejected": -0.16155146062374115, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 5.7811435880811e-09, |
|
"logits/chosen": -0.17055755853652954, |
|
"logits/rejected": 0.11661942303180695, |
|
"logps/chosen": -214.5771942138672, |
|
"logps/rejected": -227.61026000976562, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.04704277589917183, |
|
"rewards/margins": 0.14319387078285217, |
|
"rewards/margins_max": 0.18986010551452637, |
|
"rewards/margins_min": 0.09652762115001678, |
|
"rewards/margins_std": 0.06599602103233337, |
|
"rewards/rejected": -0.1902366429567337, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 5.03853220584391e-09, |
|
"logits/chosen": -0.13923177123069763, |
|
"logits/rejected": 0.1401730477809906, |
|
"logps/chosen": -230.38186645507812, |
|
"logps/rejected": -243.49148559570312, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.06614328920841217, |
|
"rewards/margins": 0.10860340297222137, |
|
"rewards/margins_max": 0.15587784349918365, |
|
"rewards/margins_min": 0.0613289400935173, |
|
"rewards/margins_std": 0.06685616821050644, |
|
"rewards/rejected": -0.17474667727947235, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 4.346467486301881e-09, |
|
"logits/chosen": -0.1736827790737152, |
|
"logits/rejected": 0.1112961396574974, |
|
"logps/chosen": -211.4966278076172, |
|
"logps/rejected": -222.78640747070312, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.053413182497024536, |
|
"rewards/margins": 0.09374302625656128, |
|
"rewards/margins_max": 0.1378607153892517, |
|
"rewards/margins_min": 0.04962532967329025, |
|
"rewards/margins_std": 0.062391847372055054, |
|
"rewards/rejected": -0.14715620875358582, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 3.7050922338e-09, |
|
"logits/chosen": -0.1892717033624649, |
|
"logits/rejected": 0.03548423945903778, |
|
"logps/chosen": -221.7522735595703, |
|
"logps/rejected": -260.614501953125, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.06389190256595612, |
|
"rewards/margins": 0.09883169084787369, |
|
"rewards/margins_max": 0.14532563090324402, |
|
"rewards/margins_min": 0.05233774706721306, |
|
"rewards/margins_std": 0.06575236469507217, |
|
"rewards/rejected": -0.162723571062088, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 3.1145387931467705e-09, |
|
"logits/chosen": -0.16101527214050293, |
|
"logits/rejected": 0.12721626460552216, |
|
"logps/chosen": -215.1056365966797, |
|
"logps/rejected": -217.00265502929688, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.06273301690816879, |
|
"rewards/margins": 0.09039122611284256, |
|
"rewards/margins_max": 0.13275966048240662, |
|
"rewards/margins_min": 0.04802277684211731, |
|
"rewards/margins_std": 0.059918034821748734, |
|
"rewards/rejected": -0.15312424302101135, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 2.5749290223055498e-09, |
|
"logits/chosen": -0.0827338844537735, |
|
"logits/rejected": 0.14059332013130188, |
|
"logps/chosen": -216.60971069335938, |
|
"logps/rejected": -236.35986328125, |
|
"loss": 0.6445, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.061508674174547195, |
|
"rewards/margins": 0.10081305354833603, |
|
"rewards/margins_max": 0.13673627376556396, |
|
"rewards/margins_min": 0.0648898258805275, |
|
"rewards/margins_std": 0.05080310255289078, |
|
"rewards/rejected": -0.16232173144817352, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 2.086374267249724e-09, |
|
"logits/chosen": -0.16651371121406555, |
|
"logits/rejected": 0.09923712909221649, |
|
"logps/chosen": -231.68002319335938, |
|
"logps/rejected": -244.4033203125, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.056234586983919144, |
|
"rewards/margins": 0.10171985626220703, |
|
"rewards/margins_max": 0.14425083994865417, |
|
"rewards/margins_min": 0.05918886512517929, |
|
"rewards/margins_std": 0.06014790013432503, |
|
"rewards/rejected": -0.15795443952083588, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.6489753389869742e-09, |
|
"logits/chosen": -0.1336033046245575, |
|
"logits/rejected": 0.05479846149682999, |
|
"logps/chosen": -219.4292449951172, |
|
"logps/rejected": -253.1160125732422, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05078500509262085, |
|
"rewards/margins": 0.11668237298727036, |
|
"rewards/margins_max": 0.16264045238494873, |
|
"rewards/margins_min": 0.07072430849075317, |
|
"rewards/margins_std": 0.06499452143907547, |
|
"rewards/rejected": -0.1674673855304718, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.262822492757415e-09, |
|
"logits/chosen": -0.1401694118976593, |
|
"logits/rejected": 0.09876145422458649, |
|
"logps/chosen": -195.66046142578125, |
|
"logps/rejected": -218.9545135498047, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05541166663169861, |
|
"rewards/margins": 0.103818379342556, |
|
"rewards/margins_max": 0.15159131586551666, |
|
"rewards/margins_min": 0.05604543536901474, |
|
"rewards/margins_std": 0.06756114214658737, |
|
"rewards/rejected": -0.159230038523674, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 9.279954094097709e-10, |
|
"logits/chosen": -0.18476589024066925, |
|
"logits/rejected": 0.04040500894188881, |
|
"logps/chosen": -211.14932250976562, |
|
"logps/rejected": -221.921875, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06176955625414848, |
|
"rewards/margins": 0.09228489547967911, |
|
"rewards/margins_max": 0.14073041081428528, |
|
"rewards/margins_min": 0.04383937641978264, |
|
"rewards/margins_std": 0.06851230561733246, |
|
"rewards/rejected": -0.1540544331073761, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 6.445631789597228e-10, |
|
"logits/chosen": -0.1372963935136795, |
|
"logits/rejected": 0.2152913510799408, |
|
"logps/chosen": -240.0201873779297, |
|
"logps/rejected": -232.8347625732422, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.0573449544608593, |
|
"rewards/margins": 0.09935126453638077, |
|
"rewards/margins_max": 0.15012647211551666, |
|
"rewards/margins_min": 0.04857606440782547, |
|
"rewards/margins_std": 0.07180698215961456, |
|
"rewards/rejected": -0.15669623017311096, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 4.1258428633339503e-10, |
|
"logits/chosen": -0.11732598394155502, |
|
"logits/rejected": 0.09575303643941879, |
|
"logps/chosen": -200.75241088867188, |
|
"logps/rejected": -224.1798553466797, |
|
"loss": 0.6445, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05652853846549988, |
|
"rewards/margins": 0.11160199344158173, |
|
"rewards/margins_max": 0.16687549650669098, |
|
"rewards/margins_min": 0.056328482925891876, |
|
"rewards/margins_std": 0.07816854864358902, |
|
"rewards/rejected": -0.1681305170059204, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 2.3210659929931432e-10, |
|
"logits/chosen": -0.1350639909505844, |
|
"logits/rejected": 0.05836183577775955, |
|
"logps/chosen": -198.38735961914062, |
|
"logps/rejected": -229.34182739257812, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.052884865552186966, |
|
"rewards/margins": 0.10049891471862793, |
|
"rewards/margins_max": 0.15602007508277893, |
|
"rewards/margins_min": 0.04497777670621872, |
|
"rewards/margins_std": 0.07851874828338623, |
|
"rewards/rejected": -0.1533837914466858, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.0316735859111636e-10, |
|
"logits/chosen": -0.18329744040966034, |
|
"logits/rejected": 0.12713107466697693, |
|
"logps/chosen": -213.11398315429688, |
|
"logps/rejected": -212.9873504638672, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.056636691093444824, |
|
"rewards/margins": 0.1115182638168335, |
|
"rewards/margins_max": 0.17401501536369324, |
|
"rewards/margins_min": 0.04902151972055435, |
|
"rewards/margins_std": 0.088383749127388, |
|
"rewards/rejected": -0.16815496981143951, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 2.5793170223026295e-11, |
|
"logits/chosen": -0.09687203168869019, |
|
"logits/rejected": 0.1417822688817978, |
|
"logps/chosen": -213.21426391601562, |
|
"logps/rejected": -236.4486541748047, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.06115274503827095, |
|
"rewards/margins": 0.10435257852077484, |
|
"rewards/margins_max": 0.14675767719745636, |
|
"rewards/margins_min": 0.06194749474525452, |
|
"rewards/margins_std": 0.05996985360980034, |
|
"rewards/rejected": -0.1655053198337555, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.09515249729156494, |
|
"logits/rejected": 0.09782592952251434, |
|
"logps/chosen": -215.6335906982422, |
|
"logps/rejected": -256.457763671875, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.053227148950099945, |
|
"rewards/margins": 0.10610984265804291, |
|
"rewards/margins_max": 0.14640632271766663, |
|
"rewards/margins_min": 0.06581337004899979, |
|
"rewards/margins_std": 0.05698782205581665, |
|
"rewards/rejected": -0.15933698415756226, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": 0.7168996334075928, |
|
"eval_logits/rejected": 0.8648675084114075, |
|
"eval_logps/chosen": -339.363525390625, |
|
"eval_logps/rejected": -325.4676208496094, |
|
"eval_loss": 0.6905444264411926, |
|
"eval_rewards/accuracies": 0.578000009059906, |
|
"eval_rewards/chosen": -0.05384029448032379, |
|
"eval_rewards/margins": 0.0059645208530128, |
|
"eval_rewards/margins_max": 0.06618467718362808, |
|
"eval_rewards/margins_min": -0.05284303426742554, |
|
"eval_rewards/margins_std": 0.038610368967056274, |
|
"eval_rewards/rejected": -0.05980480834841728, |
|
"eval_runtime": 835.6041, |
|
"eval_samples_per_second": 4.787, |
|
"eval_steps_per_second": 0.299, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2430, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6545025648894134, |
|
"train_runtime": 24035.423, |
|
"train_samples_per_second": 1.618, |
|
"train_steps_per_second": 0.101 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2430, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|