|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998424948810837, |
|
"eval_steps": 100, |
|
"global_step": 3174, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.5723270440251573e-08, |
|
"logits/chosen": -1.3876760005950928, |
|
"logits/rejected": -1.4584133625030518, |
|
"logps/chosen": -148.11717224121094, |
|
"logps/rejected": -197.28189086914062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 1.5723270440251575e-07, |
|
"logits/chosen": -1.2968941926956177, |
|
"logits/rejected": -1.006857991218567, |
|
"logps/chosen": -190.5150604248047, |
|
"logps/rejected": -182.05677795410156, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 0.0008159870631061494, |
|
"rewards/margins": 0.0014503882266581059, |
|
"rewards/margins_max": 0.0036827889271080494, |
|
"rewards/margins_min": -0.0007820128812454641, |
|
"rewards/margins_std": 0.003157091559842229, |
|
"rewards/rejected": -0.0006344011053442955, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 3.144654088050315e-07, |
|
"logits/chosen": -1.3660128116607666, |
|
"logits/rejected": -1.0527918338775635, |
|
"logps/chosen": -225.42782592773438, |
|
"logps/rejected": -200.10775756835938, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0007323303143493831, |
|
"rewards/margins": 0.0006205940153449774, |
|
"rewards/margins_max": 0.0026885834522545338, |
|
"rewards/margins_min": -0.0014473951887339354, |
|
"rewards/margins_std": 0.002924578730016947, |
|
"rewards/rejected": 0.0001117362771765329, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.716981132075472e-07, |
|
"logits/chosen": -1.2632051706314087, |
|
"logits/rejected": -0.9830085039138794, |
|
"logps/chosen": -180.41348266601562, |
|
"logps/rejected": -184.5921173095703, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0001449241244699806, |
|
"rewards/margins": 0.001389974495396018, |
|
"rewards/margins_max": 0.0028195646591484547, |
|
"rewards/margins_min": -3.9615635614609346e-05, |
|
"rewards/margins_std": 0.002021745778620243, |
|
"rewards/rejected": -0.001245050341822207, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 6.28930817610063e-07, |
|
"logits/chosen": -1.458979845046997, |
|
"logits/rejected": -1.1576180458068848, |
|
"logps/chosen": -225.39303588867188, |
|
"logps/rejected": -276.84759521484375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0010745488107204437, |
|
"rewards/margins": 0.0014787310501560569, |
|
"rewards/margins_max": 0.0033444215077906847, |
|
"rewards/margins_min": -0.00038695911644026637, |
|
"rewards/margins_std": 0.0026384841185063124, |
|
"rewards/rejected": -0.000404182355850935, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 7.861635220125787e-07, |
|
"logits/chosen": -1.367509126663208, |
|
"logits/rejected": -0.8635444641113281, |
|
"logps/chosen": -331.5768127441406, |
|
"logps/rejected": -205.92117309570312, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0009096821886487305, |
|
"rewards/margins": 0.001458239508792758, |
|
"rewards/margins_max": 0.0032467518467456102, |
|
"rewards/margins_min": -0.0003302727418486029, |
|
"rewards/margins_std": 0.0025293382350355387, |
|
"rewards/rejected": -0.0005485572619363666, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 9.433962264150944e-07, |
|
"logits/chosen": -1.1953330039978027, |
|
"logits/rejected": -1.012205958366394, |
|
"logps/chosen": -203.44515991210938, |
|
"logps/rejected": -264.71368408203125, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.003049404127523303, |
|
"rewards/margins": 0.003112142439931631, |
|
"rewards/margins_max": 0.004920116625726223, |
|
"rewards/margins_min": 0.0013041686033830047, |
|
"rewards/margins_std": 0.0025568611454218626, |
|
"rewards/rejected": -6.273845065152273e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 1.1006289308176102e-06, |
|
"logits/chosen": -1.4164549112319946, |
|
"logits/rejected": -1.0944883823394775, |
|
"logps/chosen": -218.58349609375, |
|
"logps/rejected": -225.0066375732422, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0034692329354584217, |
|
"rewards/margins": 0.003931135404855013, |
|
"rewards/margins_max": 0.006383487489074469, |
|
"rewards/margins_min": 0.0014787826221436262, |
|
"rewards/margins_std": 0.003468149807304144, |
|
"rewards/rejected": -0.00046190261491574347, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.257861635220126e-06, |
|
"logits/chosen": -1.273780345916748, |
|
"logits/rejected": -0.994576096534729, |
|
"logps/chosen": -284.58660888671875, |
|
"logps/rejected": -266.48455810546875, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0061839548870921135, |
|
"rewards/margins": 0.006205023266375065, |
|
"rewards/margins_max": 0.008886772207915783, |
|
"rewards/margins_min": 0.0035232752561569214, |
|
"rewards/margins_std": 0.0037925648503005505, |
|
"rewards/rejected": -2.1068564819870517e-05, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 1.4150943396226415e-06, |
|
"logits/chosen": -1.4617016315460205, |
|
"logits/rejected": -1.171229600906372, |
|
"logps/chosen": -211.6915283203125, |
|
"logps/rejected": -219.3947296142578, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.005883140489459038, |
|
"rewards/margins": 0.008139796555042267, |
|
"rewards/margins_max": 0.012691095471382141, |
|
"rewards/margins_min": 0.0035884971730411053, |
|
"rewards/margins_std": 0.006436510477215052, |
|
"rewards/rejected": -0.0022566565312445164, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.5723270440251573e-06, |
|
"logits/chosen": -1.4086381196975708, |
|
"logits/rejected": -0.9039764404296875, |
|
"logps/chosen": -256.61712646484375, |
|
"logps/rejected": -205.50588989257812, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.00955943949520588, |
|
"rewards/margins": 0.011472916230559349, |
|
"rewards/margins_max": 0.018265143036842346, |
|
"rewards/margins_min": 0.004680688492953777, |
|
"rewards/margins_std": 0.009605659171938896, |
|
"rewards/rejected": -0.0019134767353534698, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.7295597484276729e-06, |
|
"logits/chosen": -1.2530136108398438, |
|
"logits/rejected": -0.9792757034301758, |
|
"logps/chosen": -229.84579467773438, |
|
"logps/rejected": -190.1811065673828, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.009367749094963074, |
|
"rewards/margins": 0.01274518109858036, |
|
"rewards/margins_max": 0.01925867795944214, |
|
"rewards/margins_min": 0.006231681443750858, |
|
"rewards/margins_std": 0.009211478754878044, |
|
"rewards/rejected": -0.00337742967531085, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"logits/chosen": -1.4178739786148071, |
|
"logits/rejected": -1.0519492626190186, |
|
"logps/chosen": -194.27273559570312, |
|
"logps/rejected": -186.27613830566406, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.01265544630587101, |
|
"rewards/margins": 0.014012080617249012, |
|
"rewards/margins_max": 0.020701151341199875, |
|
"rewards/margins_min": 0.007323009427636862, |
|
"rewards/margins_std": 0.009459775872528553, |
|
"rewards/rejected": -0.0013566340785473585, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 2.044025157232705e-06, |
|
"logits/chosen": -1.180983066558838, |
|
"logits/rejected": -0.9479328989982605, |
|
"logps/chosen": -218.3427734375, |
|
"logps/rejected": -249.2072296142578, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.013748427852988243, |
|
"rewards/margins": 0.01925661601126194, |
|
"rewards/margins_max": 0.027199331670999527, |
|
"rewards/margins_min": 0.011313898488879204, |
|
"rewards/margins_std": 0.011232697404921055, |
|
"rewards/rejected": -0.005508188158273697, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 2.2012578616352204e-06, |
|
"logits/chosen": -1.189428448677063, |
|
"logits/rejected": -0.9659063220024109, |
|
"logps/chosen": -266.5777282714844, |
|
"logps/rejected": -217.1243438720703, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.012634982354938984, |
|
"rewards/margins": 0.019540421664714813, |
|
"rewards/margins_max": 0.028719156980514526, |
|
"rewards/margins_min": 0.0103616863489151, |
|
"rewards/margins_std": 0.012980693951249123, |
|
"rewards/rejected": -0.006905439309775829, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits/chosen": -1.3495254516601562, |
|
"logits/rejected": -0.8819383382797241, |
|
"logps/chosen": -302.58099365234375, |
|
"logps/rejected": -235.6078643798828, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.025924265384674072, |
|
"rewards/margins": 0.03467489033937454, |
|
"rewards/margins_max": 0.04881124570965767, |
|
"rewards/margins_min": 0.02053852751851082, |
|
"rewards/margins_std": 0.019991829991340637, |
|
"rewards/rejected": -0.00875062309205532, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 2.515723270440252e-06, |
|
"logits/chosen": -1.3711656332015991, |
|
"logits/rejected": -1.1784591674804688, |
|
"logps/chosen": -166.46961975097656, |
|
"logps/rejected": -229.8678436279297, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.023975688964128494, |
|
"rewards/margins": 0.03182069957256317, |
|
"rewards/margins_max": 0.047649938613176346, |
|
"rewards/margins_min": 0.015991469845175743, |
|
"rewards/margins_std": 0.022385913878679276, |
|
"rewards/rejected": -0.00784501526504755, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 2.6729559748427675e-06, |
|
"logits/chosen": -1.2441256046295166, |
|
"logits/rejected": -0.9379784464836121, |
|
"logps/chosen": -218.8394775390625, |
|
"logps/rejected": -199.82815551757812, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.022251352667808533, |
|
"rewards/margins": 0.03415388613939285, |
|
"rewards/margins_max": 0.045127563178539276, |
|
"rewards/margins_min": 0.023180212825536728, |
|
"rewards/margins_std": 0.015519115142524242, |
|
"rewards/rejected": -0.01190253347158432, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 2.830188679245283e-06, |
|
"logits/chosen": -1.3883864879608154, |
|
"logits/rejected": -1.0542776584625244, |
|
"logps/chosen": -210.83740234375, |
|
"logps/rejected": -217.66787719726562, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.031140008941292763, |
|
"rewards/margins": 0.048059821128845215, |
|
"rewards/margins_max": 0.07035170495510101, |
|
"rewards/margins_min": 0.02576792798936367, |
|
"rewards/margins_std": 0.031525496393442154, |
|
"rewards/rejected": -0.016919810324907303, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 2.987421383647799e-06, |
|
"logits/chosen": -1.2937629222869873, |
|
"logits/rejected": -1.0859944820404053, |
|
"logps/chosen": -193.8387451171875, |
|
"logps/rejected": -216.71841430664062, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.030585434287786484, |
|
"rewards/margins": 0.03754722326993942, |
|
"rewards/margins_max": 0.055405668914318085, |
|
"rewards/margins_min": 0.019688773900270462, |
|
"rewards/margins_std": 0.025255661457777023, |
|
"rewards/rejected": -0.00696178525686264, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 3.1446540880503146e-06, |
|
"logits/chosen": -1.3293890953063965, |
|
"logits/rejected": -1.1202318668365479, |
|
"logps/chosen": -217.463623046875, |
|
"logps/rejected": -208.9364013671875, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.017296183854341507, |
|
"rewards/margins": 0.037150561809539795, |
|
"rewards/margins_max": 0.05749017000198364, |
|
"rewards/margins_min": 0.016810955479741096, |
|
"rewards/margins_std": 0.028764545917510986, |
|
"rewards/rejected": -0.01985437609255314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 3.30188679245283e-06, |
|
"logits/chosen": -1.446415662765503, |
|
"logits/rejected": -1.1044580936431885, |
|
"logps/chosen": -234.07064819335938, |
|
"logps/rejected": -254.47964477539062, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0392422117292881, |
|
"rewards/margins": 0.05013582855463028, |
|
"rewards/margins_max": 0.07770033180713654, |
|
"rewards/margins_min": 0.022571321576833725, |
|
"rewards/margins_std": 0.038982093334198, |
|
"rewards/rejected": -0.01089361310005188, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 3.4591194968553458e-06, |
|
"logits/chosen": -1.411773681640625, |
|
"logits/rejected": -1.1437511444091797, |
|
"logps/chosen": -272.9713439941406, |
|
"logps/rejected": -201.37228393554688, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02809012308716774, |
|
"rewards/margins": 0.05969276279211044, |
|
"rewards/margins_max": 0.08930987864732742, |
|
"rewards/margins_min": 0.03007565811276436, |
|
"rewards/margins_std": 0.04188491404056549, |
|
"rewards/rejected": -0.031602635979652405, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 3.6163522012578618e-06, |
|
"logits/chosen": -1.4335668087005615, |
|
"logits/rejected": -1.1354362964630127, |
|
"logps/chosen": -249.6646270751953, |
|
"logps/rejected": -205.4978485107422, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.04378952831029892, |
|
"rewards/margins": 0.08125929534435272, |
|
"rewards/margins_max": 0.11855404078960419, |
|
"rewards/margins_min": 0.04396456480026245, |
|
"rewards/margins_std": 0.052742719650268555, |
|
"rewards/rejected": -0.0374697707593441, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"logits/chosen": -1.422411561012268, |
|
"logits/rejected": -1.2927656173706055, |
|
"logps/chosen": -172.6126708984375, |
|
"logps/rejected": -264.3357849121094, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.04493634030222893, |
|
"rewards/margins": 0.08786293864250183, |
|
"rewards/margins_max": 0.12235504388809204, |
|
"rewards/margins_min": 0.053370825946331024, |
|
"rewards/margins_std": 0.0487792082130909, |
|
"rewards/rejected": -0.042926594614982605, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 3.930817610062894e-06, |
|
"logits/chosen": -1.4506088495254517, |
|
"logits/rejected": -1.2010631561279297, |
|
"logps/chosen": -260.5227966308594, |
|
"logps/rejected": -221.35153198242188, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.052806317806243896, |
|
"rewards/margins": 0.09446804225444794, |
|
"rewards/margins_max": 0.13791503012180328, |
|
"rewards/margins_min": 0.05102104693651199, |
|
"rewards/margins_std": 0.061443328857421875, |
|
"rewards/rejected": -0.041661716997623444, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 4.08805031446541e-06, |
|
"logits/chosen": -1.2909493446350098, |
|
"logits/rejected": -0.8009993433952332, |
|
"logps/chosen": -277.853271484375, |
|
"logps/rejected": -263.16009521484375, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.06023542210459709, |
|
"rewards/margins": 0.1075894683599472, |
|
"rewards/margins_max": 0.14478802680969238, |
|
"rewards/margins_min": 0.07039090245962143, |
|
"rewards/margins_std": 0.05260671302676201, |
|
"rewards/rejected": -0.047354042530059814, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 4.245283018867925e-06, |
|
"logits/chosen": -1.4090592861175537, |
|
"logits/rejected": -0.8552477955818176, |
|
"logps/chosen": -254.5354766845703, |
|
"logps/rejected": -248.50057983398438, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.06402402371168137, |
|
"rewards/margins": 0.1346406638622284, |
|
"rewards/margins_max": 0.18887916207313538, |
|
"rewards/margins_min": 0.08040215820074081, |
|
"rewards/margins_std": 0.07670482993125916, |
|
"rewards/rejected": -0.07061664760112762, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 4.402515723270441e-06, |
|
"logits/chosen": -1.5802761316299438, |
|
"logits/rejected": -1.236020565032959, |
|
"logps/chosen": -191.21524047851562, |
|
"logps/rejected": -195.28176879882812, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.04768265783786774, |
|
"rewards/margins": 0.0977226123213768, |
|
"rewards/margins_max": 0.14488402009010315, |
|
"rewards/margins_min": 0.05056120082736015, |
|
"rewards/margins_std": 0.06669630855321884, |
|
"rewards/rejected": -0.050039954483509064, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 4.559748427672957e-06, |
|
"logits/chosen": -1.5027745962142944, |
|
"logits/rejected": -1.0609099864959717, |
|
"logps/chosen": -237.45034790039062, |
|
"logps/rejected": -216.4964141845703, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.05908682197332382, |
|
"rewards/margins": 0.14679358899593353, |
|
"rewards/margins_max": 0.21701796352863312, |
|
"rewards/margins_min": 0.07656919956207275, |
|
"rewards/margins_std": 0.09931226819753647, |
|
"rewards/rejected": -0.08770675212144852, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits/chosen": -1.6774566173553467, |
|
"logits/rejected": -1.2361747026443481, |
|
"logps/chosen": -220.8132781982422, |
|
"logps/rejected": -220.8450469970703, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0774054005742073, |
|
"rewards/margins": 0.17351576685905457, |
|
"rewards/margins_max": 0.25278240442276, |
|
"rewards/margins_min": 0.09424915909767151, |
|
"rewards/margins_std": 0.11209992319345474, |
|
"rewards/rejected": -0.09611036628484726, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 4.874213836477988e-06, |
|
"logits/chosen": -1.428739309310913, |
|
"logits/rejected": -1.1747839450836182, |
|
"logps/chosen": -194.78067016601562, |
|
"logps/rejected": -227.9475555419922, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.060627926141023636, |
|
"rewards/margins": 0.17353899776935577, |
|
"rewards/margins_max": 0.2618575692176819, |
|
"rewards/margins_min": 0.08522041887044907, |
|
"rewards/margins_std": 0.12490131705999374, |
|
"rewards/rejected": -0.11291106045246124, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 4.999993950030735e-06, |
|
"logits/chosen": -1.4527971744537354, |
|
"logits/rejected": -1.1030164957046509, |
|
"logps/chosen": -248.6017608642578, |
|
"logps/rejected": -257.7789001464844, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.023475894704461098, |
|
"rewards/margins": 0.2308409959077835, |
|
"rewards/margins_max": 0.37093719840049744, |
|
"rewards/margins_min": 0.09074478596448898, |
|
"rewards/margins_std": 0.1981259286403656, |
|
"rewards/rejected": -0.20736508071422577, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 4.999782204181027e-06, |
|
"logits/chosen": -1.5599935054779053, |
|
"logits/rejected": -1.0608078241348267, |
|
"logps/chosen": -236.848876953125, |
|
"logps/rejected": -287.7281188964844, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.042720407247543335, |
|
"rewards/margins": 0.3582765460014343, |
|
"rewards/margins_max": 0.6046980619430542, |
|
"rewards/margins_min": 0.11185508966445923, |
|
"rewards/margins_std": 0.3484925925731659, |
|
"rewards/rejected": -0.315556138753891, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 4.99926798914914e-06, |
|
"logits/chosen": -1.3914912939071655, |
|
"logits/rejected": -1.0357228517532349, |
|
"logps/chosen": -183.1966552734375, |
|
"logps/rejected": -208.01705932617188, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.006871527526527643, |
|
"rewards/margins": 0.2630093991756439, |
|
"rewards/margins_max": 0.37292978167533875, |
|
"rewards/margins_min": 0.1530890315771103, |
|
"rewards/margins_std": 0.15545089542865753, |
|
"rewards/rejected": -0.2698809504508972, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 4.9984513671541735e-06, |
|
"logits/chosen": -1.4279831647872925, |
|
"logits/rejected": -0.9789050817489624, |
|
"logps/chosen": -265.3249206542969, |
|
"logps/rejected": -276.2162780761719, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.011723552830517292, |
|
"rewards/margins": 0.3322909474372864, |
|
"rewards/margins_max": 0.561407208442688, |
|
"rewards/margins_min": 0.10317464172840118, |
|
"rewards/margins_std": 0.3240194022655487, |
|
"rewards/rejected": -0.3440144658088684, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 4.997332437005932e-06, |
|
"logits/chosen": -1.7668163776397705, |
|
"logits/rejected": -1.3288036584854126, |
|
"logps/chosen": -282.70648193359375, |
|
"logps/rejected": -300.6052551269531, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.04692164808511734, |
|
"rewards/margins": 0.7383378744125366, |
|
"rewards/margins_max": 1.2071880102157593, |
|
"rewards/margins_min": 0.26948752999305725, |
|
"rewards/margins_std": 0.663054347038269, |
|
"rewards/rejected": -0.7852594256401062, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 4.995911334092963e-06, |
|
"logits/chosen": -1.5333877801895142, |
|
"logits/rejected": -1.1322664022445679, |
|
"logps/chosen": -240.31625366210938, |
|
"logps/rejected": -247.5854034423828, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0783635601401329, |
|
"rewards/margins": 0.6869125366210938, |
|
"rewards/margins_max": 1.07400643825531, |
|
"rewards/margins_min": 0.2998184263706207, |
|
"rewards/margins_std": 0.547433614730835, |
|
"rewards/rejected": -0.7652760744094849, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 4.994188230366184e-06, |
|
"logits/chosen": -1.4324274063110352, |
|
"logits/rejected": -1.1852810382843018, |
|
"logps/chosen": -240.2544708251953, |
|
"logps/rejected": -266.61505126953125, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.10970436036586761, |
|
"rewards/margins": 0.7477412223815918, |
|
"rewards/margins_max": 1.18809974193573, |
|
"rewards/margins_min": 0.3073826730251312, |
|
"rewards/margins_std": 0.6227611303329468, |
|
"rewards/rejected": -0.8574455976486206, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 4.9921633343180655e-06, |
|
"logits/chosen": -1.3936595916748047, |
|
"logits/rejected": -0.9535585641860962, |
|
"logps/chosen": -293.89422607421875, |
|
"logps/rejected": -426.3384704589844, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2569817900657654, |
|
"rewards/margins": 1.8755642175674438, |
|
"rewards/margins_max": 3.503901720046997, |
|
"rewards/margins_min": 0.24722722172737122, |
|
"rewards/margins_std": 2.302816152572632, |
|
"rewards/rejected": -2.1325459480285645, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 4.989836890957415e-06, |
|
"logits/chosen": -1.4002983570098877, |
|
"logits/rejected": -1.0570244789123535, |
|
"logps/chosen": -233.2489471435547, |
|
"logps/rejected": -288.95269775390625, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.22145280241966248, |
|
"rewards/margins": 0.6735883355140686, |
|
"rewards/margins_max": 1.1278815269470215, |
|
"rewards/margins_min": 0.2192951738834381, |
|
"rewards/margins_std": 0.6424675583839417, |
|
"rewards/rejected": -0.895041286945343, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 4.987209181779722e-06, |
|
"logits/chosen": -1.5520060062408447, |
|
"logits/rejected": -1.2624719142913818, |
|
"logps/chosen": -232.98684692382812, |
|
"logps/rejected": -404.1191711425781, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3905092179775238, |
|
"rewards/margins": 1.8865032196044922, |
|
"rewards/margins_max": 3.478367328643799, |
|
"rewards/margins_min": 0.2946400046348572, |
|
"rewards/margins_std": 2.251234769821167, |
|
"rewards/rejected": -2.277012586593628, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 4.984280524733107e-06, |
|
"logits/chosen": -1.4704313278198242, |
|
"logits/rejected": -1.0447412729263306, |
|
"logps/chosen": -309.2225036621094, |
|
"logps/rejected": -504.7037048339844, |
|
"loss": 0.3563, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5077094435691833, |
|
"rewards/margins": 2.0941860675811768, |
|
"rewards/margins_max": 3.6239044666290283, |
|
"rewards/margins_min": 0.5644680261611938, |
|
"rewards/margins_std": 2.1633481979370117, |
|
"rewards/rejected": -2.601895332336426, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 4.98105127417984e-06, |
|
"logits/chosen": -1.4012444019317627, |
|
"logits/rejected": -1.0951766967773438, |
|
"logps/chosen": -325.9887390136719, |
|
"logps/rejected": -676.073974609375, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.6459981799125671, |
|
"rewards/margins": 3.602614164352417, |
|
"rewards/margins_max": 7.004115104675293, |
|
"rewards/margins_min": 0.2011127471923828, |
|
"rewards/margins_std": 4.810449600219727, |
|
"rewards/rejected": -4.248612403869629, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.9775218208534706e-06, |
|
"logits/chosen": -1.5487967729568481, |
|
"logits/rejected": -1.0010716915130615, |
|
"logps/chosen": -279.5751037597656, |
|
"logps/rejected": -468.4371032714844, |
|
"loss": 0.3252, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.7016158699989319, |
|
"rewards/margins": 1.9516023397445679, |
|
"rewards/margins_max": 3.2723541259765625, |
|
"rewards/margins_min": 0.6308507919311523, |
|
"rewards/margins_std": 1.8678247928619385, |
|
"rewards/rejected": -2.6532185077667236, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 4.973692591811549e-06, |
|
"logits/chosen": -1.3033084869384766, |
|
"logits/rejected": -1.1313459873199463, |
|
"logps/chosen": -299.37701416015625, |
|
"logps/rejected": -767.0716552734375, |
|
"loss": 0.3568, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.8542273640632629, |
|
"rewards/margins": 4.509963035583496, |
|
"rewards/margins_max": 7.8178558349609375, |
|
"rewards/margins_min": 1.2020692825317383, |
|
"rewards/margins_std": 4.678068161010742, |
|
"rewards/rejected": -5.364189624786377, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 4.9695640503839495e-06, |
|
"logits/chosen": -1.2340087890625, |
|
"logits/rejected": -0.912117600440979, |
|
"logps/chosen": -280.4734802246094, |
|
"logps/rejected": -791.8204345703125, |
|
"loss": 0.2803, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.067180871963501, |
|
"rewards/margins": 4.743367671966553, |
|
"rewards/margins_max": 8.377991676330566, |
|
"rewards/margins_min": 1.1087446212768555, |
|
"rewards/margins_std": 5.140133857727051, |
|
"rewards/rejected": -5.810548782348633, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 4.965136696116812e-06, |
|
"logits/chosen": -1.3413738012313843, |
|
"logits/rejected": -0.9947126507759094, |
|
"logps/chosen": -369.73773193359375, |
|
"logps/rejected": -1150.2530517578125, |
|
"loss": 0.2378, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.5380936861038208, |
|
"rewards/margins": 7.368071556091309, |
|
"rewards/margins_max": 13.355749130249023, |
|
"rewards/margins_min": 1.380393385887146, |
|
"rewards/margins_std": 8.467855453491211, |
|
"rewards/rejected": -8.90616512298584, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 4.960411064712095e-06, |
|
"logits/chosen": -1.274917721748352, |
|
"logits/rejected": -0.9616276025772095, |
|
"logps/chosen": -348.9794616699219, |
|
"logps/rejected": -1353.219482421875, |
|
"loss": 0.3372, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.6370916366577148, |
|
"rewards/margins": 9.717118263244629, |
|
"rewards/margins_max": 17.675342559814453, |
|
"rewards/margins_min": 1.758894681930542, |
|
"rewards/margins_std": 11.254626274108887, |
|
"rewards/rejected": -11.354207992553711, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.95538772796276e-06, |
|
"logits/chosen": -1.451219916343689, |
|
"logits/rejected": -1.2027828693389893, |
|
"logps/chosen": -529.2562866210938, |
|
"logps/rejected": -1177.658203125, |
|
"loss": 1.0878, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.522387742996216, |
|
"rewards/margins": 6.5533647537231445, |
|
"rewards/margins_max": 15.11187744140625, |
|
"rewards/margins_min": -2.0051493644714355, |
|
"rewards/margins_std": 12.103565216064453, |
|
"rewards/rejected": -10.075750350952148, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 4.95006729368358e-06, |
|
"logits/chosen": -1.532649040222168, |
|
"logits/rejected": -1.1228234767913818, |
|
"logps/chosen": -519.6512451171875, |
|
"logps/rejected": -1331.0703125, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.026210308074951, |
|
"rewards/margins": 8.242622375488281, |
|
"rewards/margins_max": 15.09514045715332, |
|
"rewards/margins_min": 1.3901066780090332, |
|
"rewards/margins_std": 9.690921783447266, |
|
"rewards/rejected": -11.26883316040039, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 4.944450405637603e-06, |
|
"logits/chosen": -1.340835452079773, |
|
"logits/rejected": -1.0451310873031616, |
|
"logps/chosen": -471.9386291503906, |
|
"logps/rejected": -1201.742431640625, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.621904134750366, |
|
"rewards/margins": 7.457037448883057, |
|
"rewards/margins_max": 13.327784538269043, |
|
"rewards/margins_min": 1.5862904787063599, |
|
"rewards/margins_std": 8.302489280700684, |
|
"rewards/rejected": -10.078941345214844, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 4.938537743458248e-06, |
|
"logits/chosen": -1.366683006286621, |
|
"logits/rejected": -1.0551526546478271, |
|
"logps/chosen": -337.539794921875, |
|
"logps/rejected": -1212.2384033203125, |
|
"loss": 0.389, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5652153491973877, |
|
"rewards/margins": 8.746683120727539, |
|
"rewards/margins_max": 16.385772705078125, |
|
"rewards/margins_min": 1.1075944900512695, |
|
"rewards/margins_std": 10.803304672241211, |
|
"rewards/rejected": -10.311899185180664, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 4.932330022567083e-06, |
|
"logits/chosen": -1.3100754022598267, |
|
"logits/rejected": -1.0806552171707153, |
|
"logps/chosen": -364.4188537597656, |
|
"logps/rejected": -860.2052001953125, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5020630359649658, |
|
"rewards/margins": 5.068886756896973, |
|
"rewards/margins_max": 7.5261735916137695, |
|
"rewards/margins_min": 2.611598491668701, |
|
"rewards/margins_std": 3.4751293659210205, |
|
"rewards/rejected": -6.570949554443359, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 4.925827994087245e-06, |
|
"logits/chosen": -1.4696404933929443, |
|
"logits/rejected": -0.9993557929992676, |
|
"logps/chosen": -405.4161682128906, |
|
"logps/rejected": -894.1640625, |
|
"loss": 0.3182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0658953189849854, |
|
"rewards/margins": 4.750572204589844, |
|
"rewards/margins_max": 8.305654525756836, |
|
"rewards/margins_min": 1.195489764213562, |
|
"rewards/margins_std": 5.027646064758301, |
|
"rewards/rejected": -6.81646728515625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.8046875, |
|
"learning_rate": 4.91903244475257e-06, |
|
"logits/chosen": -1.4830572605133057, |
|
"logits/rejected": -1.27045738697052, |
|
"logps/chosen": -477.55389404296875, |
|
"logps/rejected": -1366.124267578125, |
|
"loss": 0.3149, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.4173495769500732, |
|
"rewards/margins": 9.214771270751953, |
|
"rewards/margins_max": 13.362762451171875, |
|
"rewards/margins_min": 5.066779613494873, |
|
"rewards/margins_std": 5.866146087646484, |
|
"rewards/rejected": -11.632120132446289, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 4.911944196812391e-06, |
|
"logits/chosen": -1.428005576133728, |
|
"logits/rejected": -1.153662919998169, |
|
"logps/chosen": -409.9786071777344, |
|
"logps/rejected": -1460.192626953125, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0635440349578857, |
|
"rewards/margins": 10.500307083129883, |
|
"rewards/margins_max": 19.511022567749023, |
|
"rewards/margins_min": 1.4895923137664795, |
|
"rewards/margins_std": 12.743075370788574, |
|
"rewards/rejected": -12.563852310180664, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 4.904564107932048e-06, |
|
"logits/chosen": -1.2749096155166626, |
|
"logits/rejected": -0.9493370056152344, |
|
"logps/chosen": -543.0249633789062, |
|
"logps/rejected": -1391.12158203125, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.694972515106201, |
|
"rewards/margins": 8.827417373657227, |
|
"rewards/margins_max": 16.37221336364746, |
|
"rewards/margins_min": 1.2826213836669922, |
|
"rewards/margins_std": 10.669951438903809, |
|
"rewards/rejected": -11.522390365600586, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.875, |
|
"learning_rate": 4.896893071089116e-06, |
|
"logits/chosen": -1.375417947769165, |
|
"logits/rejected": -1.1153507232666016, |
|
"logps/chosen": -440.833984375, |
|
"logps/rejected": -1427.0830078125, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.082918643951416, |
|
"rewards/margins": 9.738504409790039, |
|
"rewards/margins_max": 14.975610733032227, |
|
"rewards/margins_min": 4.501399517059326, |
|
"rewards/margins_std": 7.4063849449157715, |
|
"rewards/rejected": -11.821423530578613, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 4.8889320144653525e-06, |
|
"logits/chosen": -1.3201481103897095, |
|
"logits/rejected": -1.0787646770477295, |
|
"logps/chosen": -460.324951171875, |
|
"logps/rejected": -1797.281982421875, |
|
"loss": 0.1839, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.4998037815093994, |
|
"rewards/margins": 13.246177673339844, |
|
"rewards/margins_max": 23.318471908569336, |
|
"rewards/margins_min": 3.1738815307617188, |
|
"rewards/margins_std": 14.244379043579102, |
|
"rewards/rejected": -15.74598217010498, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 4.88068190133439e-06, |
|
"logits/chosen": -1.4633710384368896, |
|
"logits/rejected": -1.2496354579925537, |
|
"logps/chosen": -714.4942016601562, |
|
"logps/rejected": -2540.6845703125, |
|
"loss": 0.3476, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.186011791229248, |
|
"rewards/margins": 18.637645721435547, |
|
"rewards/margins_max": 33.042198181152344, |
|
"rewards/margins_min": 4.233096122741699, |
|
"rewards/margins_std": 20.371110916137695, |
|
"rewards/rejected": -22.823659896850586, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.872143729945185e-06, |
|
"logits/chosen": -1.1824166774749756, |
|
"logits/rejected": -0.8578090667724609, |
|
"logps/chosen": -540.1989135742188, |
|
"logps/rejected": -1778.9107666015625, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.1951847076416016, |
|
"rewards/margins": 12.686628341674805, |
|
"rewards/margins_max": 22.70194435119629, |
|
"rewards/margins_min": 2.6713125705718994, |
|
"rewards/margins_std": 14.163797378540039, |
|
"rewards/rejected": -15.881813049316406, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 4.863318533401224e-06, |
|
"logits/chosen": -1.3596642017364502, |
|
"logits/rejected": -1.0070207118988037, |
|
"logps/chosen": -572.4644775390625, |
|
"logps/rejected": -1767.1966552734375, |
|
"loss": 0.2573, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2376201152801514, |
|
"rewards/margins": 11.738923072814941, |
|
"rewards/margins_max": 19.70199966430664, |
|
"rewards/margins_min": 3.7758469581604004, |
|
"rewards/margins_std": 11.261490821838379, |
|
"rewards/rejected": -14.976544380187988, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 4.85420737953553e-06, |
|
"logits/chosen": -1.5254818201065063, |
|
"logits/rejected": -1.1172149181365967, |
|
"logps/chosen": -552.1317138671875, |
|
"logps/rejected": -1063.9925537109375, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.961021900177002, |
|
"rewards/margins": 5.317422389984131, |
|
"rewards/margins_max": 10.766054153442383, |
|
"rewards/margins_min": -0.13120803236961365, |
|
"rewards/margins_std": 7.705528259277344, |
|
"rewards/rejected": -8.27844524383545, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.844811370781446e-06, |
|
"logits/chosen": -1.489282250404358, |
|
"logits/rejected": -1.1010894775390625, |
|
"logps/chosen": -437.63519287109375, |
|
"logps/rejected": -1001.1755981445312, |
|
"loss": 0.3419, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8960500955581665, |
|
"rewards/margins": 5.868886470794678, |
|
"rewards/margins_max": 9.640433311462402, |
|
"rewards/margins_min": 2.0973384380340576, |
|
"rewards/margins_std": 5.333773612976074, |
|
"rewards/rejected": -7.7649359703063965, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 4.835131644039251e-06, |
|
"logits/chosen": -1.585998296737671, |
|
"logits/rejected": -1.0765646696090698, |
|
"logps/chosen": -624.9088134765625, |
|
"logps/rejected": -1340.5115966796875, |
|
"loss": 0.2811, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.8306846618652344, |
|
"rewards/margins": 8.347230911254883, |
|
"rewards/margins_max": 14.0307035446167, |
|
"rewards/margins_min": 2.6637587547302246, |
|
"rewards/margins_std": 8.037643432617188, |
|
"rewards/rejected": -11.177915573120117, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 4.825169370538595e-06, |
|
"logits/chosen": -1.3587524890899658, |
|
"logits/rejected": -1.200235366821289, |
|
"logps/chosen": -486.85107421875, |
|
"logps/rejected": -1671.8179931640625, |
|
"loss": 0.4252, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.5282440185546875, |
|
"rewards/margins": 11.739642143249512, |
|
"rewards/margins_max": 22.0147705078125, |
|
"rewards/margins_min": 1.4645134210586548, |
|
"rewards/margins_std": 14.531225204467773, |
|
"rewards/rejected": -14.2678861618042, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.8149257556967776e-06, |
|
"logits/chosen": -1.5342085361480713, |
|
"logits/rejected": -1.1026307344436646, |
|
"logps/chosen": -690.6807861328125, |
|
"logps/rejected": -1562.658935546875, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.9973278045654297, |
|
"rewards/margins": 9.116189956665039, |
|
"rewards/margins_max": 16.03341293334961, |
|
"rewards/margins_min": 2.198969841003418, |
|
"rewards/margins_std": 9.782427787780762, |
|
"rewards/rejected": -13.113519668579102, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.796875, |
|
"learning_rate": 4.8044020389729e-06, |
|
"logits/chosen": -1.4870555400848389, |
|
"logits/rejected": -1.135543704032898, |
|
"logps/chosen": -645.024658203125, |
|
"logps/rejected": -1609.5645751953125, |
|
"loss": 0.2779, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.703730821609497, |
|
"rewards/margins": 9.667627334594727, |
|
"rewards/margins_max": 17.878780364990234, |
|
"rewards/margins_min": 1.456475853919983, |
|
"rewards/margins_std": 11.612321853637695, |
|
"rewards/rejected": -13.371357917785645, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 4.793599493717891e-06, |
|
"logits/chosen": -1.3815220594406128, |
|
"logits/rejected": -1.0400980710983276, |
|
"logps/chosen": -551.9547119140625, |
|
"logps/rejected": -1804.0220947265625, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.923583745956421, |
|
"rewards/margins": 12.87585163116455, |
|
"rewards/margins_max": 23.543264389038086, |
|
"rewards/margins_min": 2.208437919616699, |
|
"rewards/margins_std": 15.086000442504883, |
|
"rewards/rejected": -15.79943561553955, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 4.782519427020434e-06, |
|
"logits/chosen": -1.2928307056427002, |
|
"logits/rejected": -1.0188380479812622, |
|
"logps/chosen": -466.04290771484375, |
|
"logps/rejected": -1075.211181640625, |
|
"loss": 0.4064, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.593552350997925, |
|
"rewards/margins": 5.988112449645996, |
|
"rewards/margins_max": 10.20189094543457, |
|
"rewards/margins_min": 1.7743346691131592, |
|
"rewards/margins_std": 5.959182262420654, |
|
"rewards/rejected": -8.5816650390625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 4.771163179548809e-06, |
|
"logits/chosen": -1.4908405542373657, |
|
"logits/rejected": -1.0522329807281494, |
|
"logps/chosen": -562.4791870117188, |
|
"logps/rejected": -1309.4808349609375, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.262165069580078, |
|
"rewards/margins": 8.532692909240723, |
|
"rewards/margins_max": 13.401638984680176, |
|
"rewards/margins_min": 3.6637446880340576, |
|
"rewards/margins_std": 6.8857316970825195, |
|
"rewards/rejected": -10.7948579788208, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 4.759532125388681e-06, |
|
"logits/chosen": -1.2453266382217407, |
|
"logits/rejected": -0.9683879613876343, |
|
"logps/chosen": -486.449462890625, |
|
"logps/rejected": -1878.889892578125, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1956615447998047, |
|
"rewards/margins": 14.269218444824219, |
|
"rewards/margins_max": 24.373987197875977, |
|
"rewards/margins_min": 4.164450168609619, |
|
"rewards/margins_std": 14.290300369262695, |
|
"rewards/rejected": -16.464879989624023, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 4.747627671876829e-06, |
|
"logits/chosen": -1.5221184492111206, |
|
"logits/rejected": -1.213921308517456, |
|
"logps/chosen": -411.44805908203125, |
|
"logps/rejected": -1289.133056640625, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.027691125869751, |
|
"rewards/margins": 8.689915657043457, |
|
"rewards/margins_max": 13.808636665344238, |
|
"rewards/margins_min": 3.5711944103240967, |
|
"rewards/margins_std": 7.2389655113220215, |
|
"rewards/rejected": -10.717607498168945, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 4.735451259430866e-06, |
|
"logits/chosen": -1.4237302541732788, |
|
"logits/rejected": -1.3012243509292603, |
|
"logps/chosen": -456.906982421875, |
|
"logps/rejected": -1476.89453125, |
|
"loss": 0.2935, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6124205589294434, |
|
"rewards/margins": 10.188555717468262, |
|
"rewards/margins_max": 16.66933250427246, |
|
"rewards/margins_min": 3.707778215408325, |
|
"rewards/margins_std": 9.165203094482422, |
|
"rewards/rejected": -12.80097484588623, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 4.723004361374953e-06, |
|
"logits/chosen": -1.3836435079574585, |
|
"logits/rejected": -1.198193907737732, |
|
"logps/chosen": -473.876220703125, |
|
"logps/rejected": -1625.8421630859375, |
|
"loss": 0.2865, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.4213483333587646, |
|
"rewards/margins": 11.857926368713379, |
|
"rewards/margins_max": 21.100528717041016, |
|
"rewards/margins_min": 2.615323543548584, |
|
"rewards/margins_std": 13.071016311645508, |
|
"rewards/rejected": -14.279275894165039, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 4.710288483761524e-06, |
|
"logits/chosen": -1.2236430644989014, |
|
"logits/rejected": -0.9467355012893677, |
|
"logps/chosen": -524.3106689453125, |
|
"logps/rejected": -1654.451416015625, |
|
"loss": 0.3292, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.8375871181488037, |
|
"rewards/margins": 11.601432800292969, |
|
"rewards/margins_max": 20.039447784423828, |
|
"rewards/margins_min": 3.1634178161621094, |
|
"rewards/margins_std": 11.933156967163086, |
|
"rewards/rejected": -14.439020156860352, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 4.697305165189062e-06, |
|
"logits/chosen": -1.4987045526504517, |
|
"logits/rejected": -1.1748701333999634, |
|
"logps/chosen": -520.372314453125, |
|
"logps/rejected": -1442.008056640625, |
|
"loss": 0.2521, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.8685879707336426, |
|
"rewards/margins": 9.27059268951416, |
|
"rewards/margins_max": 16.574522018432617, |
|
"rewards/margins_min": 1.9666624069213867, |
|
"rewards/margins_std": 10.329317092895508, |
|
"rewards/rejected": -12.139180183410645, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.875, |
|
"learning_rate": 4.684055976615924e-06, |
|
"logits/chosen": -1.4547721147537231, |
|
"logits/rejected": -1.1212984323501587, |
|
"logps/chosen": -433.8109436035156, |
|
"logps/rejected": -1971.857177734375, |
|
"loss": 0.1387, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.043619394302368, |
|
"rewards/margins": 15.325085639953613, |
|
"rewards/margins_max": 24.84339714050293, |
|
"rewards/margins_min": 5.806773662567139, |
|
"rewards/margins_std": 13.460925102233887, |
|
"rewards/rejected": -17.368703842163086, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 33.75, |
|
"learning_rate": 4.670542521170266e-06, |
|
"logits/chosen": -1.5136555433273315, |
|
"logits/rejected": -1.2549117803573608, |
|
"logps/chosen": -411.322509765625, |
|
"logps/rejected": -1319.593994140625, |
|
"loss": 0.3371, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.360672950744629, |
|
"rewards/margins": 8.951146125793457, |
|
"rewards/margins_max": 15.15422534942627, |
|
"rewards/margins_min": 2.7480692863464355, |
|
"rewards/margins_std": 8.772477149963379, |
|
"rewards/rejected": -11.311819076538086, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 4.656766433956063e-06, |
|
"logits/chosen": -1.4924864768981934, |
|
"logits/rejected": -1.0543395280838013, |
|
"logps/chosen": -545.2259521484375, |
|
"logps/rejected": -1888.3046875, |
|
"loss": 0.1707, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9292566776275635, |
|
"rewards/margins": 13.785478591918945, |
|
"rewards/margins_max": 23.153963088989258, |
|
"rewards/margins_min": 4.4169921875, |
|
"rewards/margins_std": 13.249038696289062, |
|
"rewards/rejected": -16.71473503112793, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 6.125, |
|
"learning_rate": 4.642729381855262e-06, |
|
"logits/chosen": -1.559021234512329, |
|
"logits/rejected": -1.1420828104019165, |
|
"logps/chosen": -477.46795654296875, |
|
"logps/rejected": -1219.603271484375, |
|
"loss": 0.3419, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.3907387256622314, |
|
"rewards/margins": 7.980464935302734, |
|
"rewards/margins_max": 14.719846725463867, |
|
"rewards/margins_min": 1.2410833835601807, |
|
"rewards/margins_std": 9.530924797058105, |
|
"rewards/rejected": -10.371204376220703, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 14.5, |
|
"learning_rate": 4.6284330633261e-06, |
|
"logits/chosen": -1.3948744535446167, |
|
"logits/rejected": -1.1108922958374023, |
|
"logps/chosen": -548.1687622070312, |
|
"logps/rejected": -1504.393310546875, |
|
"loss": 0.3685, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.439661741256714, |
|
"rewards/margins": 9.631498336791992, |
|
"rewards/margins_max": 16.037883758544922, |
|
"rewards/margins_min": 3.225111484527588, |
|
"rewards/margins_std": 9.059998512268066, |
|
"rewards/rejected": -13.071161270141602, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.328125, |
|
"learning_rate": 4.613879208197585e-06, |
|
"logits/chosen": -1.5194332599639893, |
|
"logits/rejected": -1.1743465662002563, |
|
"logps/chosen": -473.6636657714844, |
|
"logps/rejected": -1417.037841796875, |
|
"loss": 0.1998, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.4681811332702637, |
|
"rewards/margins": 9.82261848449707, |
|
"rewards/margins_max": 16.545597076416016, |
|
"rewards/margins_min": 3.099639415740967, |
|
"rewards/margins_std": 9.507728576660156, |
|
"rewards/rejected": -12.290799140930176, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 4.5990695774601945e-06, |
|
"logits/chosen": -1.529234766960144, |
|
"logits/rejected": -1.3274476528167725, |
|
"logps/chosen": -542.2566528320312, |
|
"logps/rejected": -1780.8724365234375, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.5734362602233887, |
|
"rewards/margins": 12.809934616088867, |
|
"rewards/margins_max": 22.373729705810547, |
|
"rewards/margins_min": 3.24613618850708, |
|
"rewards/margins_std": 13.525251388549805, |
|
"rewards/rejected": -15.383371353149414, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.584005963052799e-06, |
|
"logits/chosen": -1.6547048091888428, |
|
"logits/rejected": -1.295466423034668, |
|
"logps/chosen": -469.98529052734375, |
|
"logps/rejected": -1244.710205078125, |
|
"loss": 0.217, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6301162242889404, |
|
"rewards/margins": 7.640155792236328, |
|
"rewards/margins_max": 13.174588203430176, |
|
"rewards/margins_min": 2.1057231426239014, |
|
"rewards/margins_std": 7.826869964599609, |
|
"rewards/rejected": -10.270272254943848, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 4.568690187645839e-06, |
|
"logits/chosen": -1.4834959506988525, |
|
"logits/rejected": -1.2343547344207764, |
|
"logps/chosen": -644.5606689453125, |
|
"logps/rejected": -1773.735107421875, |
|
"loss": 0.2402, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.305816650390625, |
|
"rewards/margins": 11.198797225952148, |
|
"rewards/margins_max": 19.111894607543945, |
|
"rewards/margins_min": 3.285700559616089, |
|
"rewards/margins_std": 11.19080924987793, |
|
"rewards/rejected": -15.504613876342773, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 12.0, |
|
"learning_rate": 4.553124104420784e-06, |
|
"logits/chosen": -1.4060771465301514, |
|
"logits/rejected": -1.2946099042892456, |
|
"logps/chosen": -648.6322631835938, |
|
"logps/rejected": -2310.56787109375, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.409775257110596, |
|
"rewards/margins": 16.627094268798828, |
|
"rewards/margins_max": 29.272930145263672, |
|
"rewards/margins_min": 3.981257677078247, |
|
"rewards/margins_std": 17.883914947509766, |
|
"rewards/rejected": -21.036869049072266, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 4.537309596845906e-06, |
|
"logits/chosen": -1.5412516593933105, |
|
"logits/rejected": -1.3352028131484985, |
|
"logps/chosen": -582.1204223632812, |
|
"logps/rejected": -2280.7783203125, |
|
"loss": 0.1607, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.755678653717041, |
|
"rewards/margins": 17.23660659790039, |
|
"rewards/margins_max": 28.5272216796875, |
|
"rewards/margins_min": 5.945991516113281, |
|
"rewards/margins_std": 15.967341423034668, |
|
"rewards/rejected": -20.99228286743164, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 4.521248578448374e-06, |
|
"logits/chosen": -1.396698236465454, |
|
"logits/rejected": -1.3860348463058472, |
|
"logps/chosen": -543.8904418945312, |
|
"logps/rejected": -1417.26904296875, |
|
"loss": 0.2369, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.744899034500122, |
|
"rewards/margins": 8.08409595489502, |
|
"rewards/margins_max": 12.590652465820312, |
|
"rewards/margins_min": 3.577538251876831, |
|
"rewards/margins_std": 6.373233795166016, |
|
"rewards/rejected": -11.828994750976562, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 4.504942992582732e-06, |
|
"logits/chosen": -1.3976601362228394, |
|
"logits/rejected": -1.242203712463379, |
|
"logps/chosen": -657.7681884765625, |
|
"logps/rejected": -2024.028564453125, |
|
"loss": 0.3438, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.529778480529785, |
|
"rewards/margins": 13.55543327331543, |
|
"rewards/margins_max": 23.787803649902344, |
|
"rewards/margins_min": 3.3230679035186768, |
|
"rewards/margins_std": 14.470751762390137, |
|
"rewards/rejected": -18.085216522216797, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.671875, |
|
"learning_rate": 4.488394812195749e-06, |
|
"logits/chosen": -1.476554036140442, |
|
"logits/rejected": -1.3132295608520508, |
|
"logps/chosen": -531.2057495117188, |
|
"logps/rejected": -1767.5189208984375, |
|
"loss": 0.33, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.5785250663757324, |
|
"rewards/margins": 11.893239974975586, |
|
"rewards/margins_max": 20.438159942626953, |
|
"rewards/margins_min": 3.3483211994171143, |
|
"rewards/margins_std": 12.084342002868652, |
|
"rewards/rejected": -15.471768379211426, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.471606039587696e-06, |
|
"logits/chosen": -1.5740488767623901, |
|
"logits/rejected": -1.4436171054840088, |
|
"logps/chosen": -544.67138671875, |
|
"logps/rejected": -1495.161865234375, |
|
"loss": 0.1441, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.913360118865967, |
|
"rewards/margins": 9.523843765258789, |
|
"rewards/margins_max": 15.644078254699707, |
|
"rewards/margins_min": 3.40360951423645, |
|
"rewards/margins_std": 8.655319213867188, |
|
"rewards/rejected": -12.437203407287598, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 4.454578706170075e-06, |
|
"logits/chosen": -1.5687984228134155, |
|
"logits/rejected": -1.1671384572982788, |
|
"logps/chosen": -530.4217529296875, |
|
"logps/rejected": -1573.7464599609375, |
|
"loss": 0.2936, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.3494606018066406, |
|
"rewards/margins": 10.087861061096191, |
|
"rewards/margins_max": 17.137001037597656, |
|
"rewards/margins_min": 3.0387213230133057, |
|
"rewards/margins_std": 9.968989372253418, |
|
"rewards/rejected": -13.437321662902832, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 4.437314872219819e-06, |
|
"logits/chosen": -1.4016139507293701, |
|
"logits/rejected": -1.1822127103805542, |
|
"logps/chosen": -483.20831298828125, |
|
"logps/rejected": -1944.1021728515625, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.7656898498535156, |
|
"rewards/margins": 14.37073802947998, |
|
"rewards/margins_max": 24.48914337158203, |
|
"rewards/margins_min": 4.2523345947265625, |
|
"rewards/margins_std": 14.30958366394043, |
|
"rewards/rejected": -17.136428833007812, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 4.419816626630003e-06, |
|
"logits/chosen": -1.6272413730621338, |
|
"logits/rejected": -1.3043510913848877, |
|
"logps/chosen": -545.1571655273438, |
|
"logps/rejected": -1785.0166015625, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.1751797199249268, |
|
"rewards/margins": 12.30494499206543, |
|
"rewards/margins_max": 19.407062530517578, |
|
"rewards/margins_min": 5.202826499938965, |
|
"rewards/margins_std": 10.043911933898926, |
|
"rewards/rejected": -15.480123519897461, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 4.402086086657093e-06, |
|
"logits/chosen": -1.627579927444458, |
|
"logits/rejected": -1.1320421695709229, |
|
"logps/chosen": -429.28704833984375, |
|
"logps/rejected": -1020.9256591796875, |
|
"loss": 0.2996, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.296780586242676, |
|
"rewards/margins": 5.808542251586914, |
|
"rewards/margins_max": 9.648881912231445, |
|
"rewards/margins_min": 1.968202829360962, |
|
"rewards/margins_std": 5.431060314178467, |
|
"rewards/rejected": -8.10532283782959, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 4.384125397664759e-06, |
|
"logits/chosen": -1.5202645063400269, |
|
"logits/rejected": -1.2545684576034546, |
|
"logps/chosen": -431.06121826171875, |
|
"logps/rejected": -1594.2802734375, |
|
"loss": 0.2789, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.312934398651123, |
|
"rewards/margins": 11.746423721313477, |
|
"rewards/margins_max": 19.13467788696289, |
|
"rewards/margins_min": 4.358170509338379, |
|
"rewards/margins_std": 10.448568344116211, |
|
"rewards/rejected": -14.059358596801758, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 11.25, |
|
"learning_rate": 4.365936732864292e-06, |
|
"logits/chosen": -1.279996395111084, |
|
"logits/rejected": -1.2075783014297485, |
|
"logps/chosen": -534.7310180664062, |
|
"logps/rejected": -1707.2154541015625, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.199549913406372, |
|
"rewards/margins": 11.328302383422852, |
|
"rewards/margins_max": 20.435813903808594, |
|
"rewards/margins_min": 2.2207882404327393, |
|
"rewards/margins_std": 12.879968643188477, |
|
"rewards/rejected": -14.527850151062012, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 4.3475222930516484e-06, |
|
"logits/chosen": -1.5118716955184937, |
|
"logits/rejected": -1.321613073348999, |
|
"logps/chosen": -475.66455078125, |
|
"logps/rejected": -1267.9703369140625, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6406443119049072, |
|
"rewards/margins": 7.942468166351318, |
|
"rewards/margins_max": 13.136529922485352, |
|
"rewards/margins_min": 2.7484066486358643, |
|
"rewards/margins_std": 7.345511436462402, |
|
"rewards/rejected": -10.583112716674805, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.546875, |
|
"learning_rate": 4.3288843063411576e-06, |
|
"logits/chosen": -1.6871601343154907, |
|
"logits/rejected": -1.2926992177963257, |
|
"logps/chosen": -519.3112182617188, |
|
"logps/rejected": -1428.3577880859375, |
|
"loss": 0.1899, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.0296714305877686, |
|
"rewards/margins": 9.275277137756348, |
|
"rewards/margins_max": 13.191515922546387, |
|
"rewards/margins_min": 5.359038352966309, |
|
"rewards/margins_std": 5.538398742675781, |
|
"rewards/rejected": -12.304948806762695, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.310025027895926e-06, |
|
"logits/chosen": -1.5426689386367798, |
|
"logits/rejected": -1.2699373960494995, |
|
"logps/chosen": -505.8729553222656, |
|
"logps/rejected": -1447.990478515625, |
|
"loss": 0.1406, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.8665452003479004, |
|
"rewards/margins": 9.338605880737305, |
|
"rewards/margins_max": 14.440671920776367, |
|
"rewards/margins_min": 4.236537456512451, |
|
"rewards/margins_std": 7.215413570404053, |
|
"rewards/rejected": -12.205150604248047, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8046875, |
|
"learning_rate": 4.290946739654962e-06, |
|
"logits/chosen": -1.4005990028381348, |
|
"logits/rejected": -1.128671407699585, |
|
"logps/chosen": -584.1080322265625, |
|
"logps/rejected": -1965.639404296875, |
|
"loss": 0.1263, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.352668285369873, |
|
"rewards/margins": 14.061731338500977, |
|
"rewards/margins_max": 23.202089309692383, |
|
"rewards/margins_min": 4.921371936798096, |
|
"rewards/margins_std": 12.926417350769043, |
|
"rewards/rejected": -17.414400100708008, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.75, |
|
"learning_rate": 4.271651750057071e-06, |
|
"logits/chosen": -1.5351276397705078, |
|
"logits/rejected": -1.365252137184143, |
|
"logps/chosen": -506.6732482910156, |
|
"logps/rejected": -1338.5911865234375, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.1733031272888184, |
|
"rewards/margins": 8.043719291687012, |
|
"rewards/margins_max": 12.65977954864502, |
|
"rewards/margins_min": 3.427661418914795, |
|
"rewards/margins_std": 6.5280914306640625, |
|
"rewards/rejected": -11.217023849487305, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 4.252142393761534e-06, |
|
"logits/chosen": -1.5365070104599, |
|
"logits/rejected": -1.185517430305481, |
|
"logps/chosen": -580.3465576171875, |
|
"logps/rejected": -1935.516357421875, |
|
"loss": 0.136, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2586402893066406, |
|
"rewards/margins": 13.273635864257812, |
|
"rewards/margins_max": 22.62195587158203, |
|
"rewards/margins_min": 3.9253132343292236, |
|
"rewards/margins_std": 13.2205228805542, |
|
"rewards/rejected": -16.532276153564453, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 4.232421031365618e-06, |
|
"logits/chosen": -1.4389902353286743, |
|
"logits/rejected": -1.3525134325027466, |
|
"logps/chosen": -535.2704467773438, |
|
"logps/rejected": -1825.3232421875, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.515831708908081, |
|
"rewards/margins": 12.615926742553711, |
|
"rewards/margins_max": 18.764150619506836, |
|
"rewards/margins_min": 6.467702388763428, |
|
"rewards/margins_std": 8.694902420043945, |
|
"rewards/rejected": -16.131757736206055, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 4.212490049118952e-06, |
|
"logits/chosen": -1.5485631227493286, |
|
"logits/rejected": -1.3416082859039307, |
|
"logps/chosen": -552.5711669921875, |
|
"logps/rejected": -1961.2366943359375, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.507843494415283, |
|
"rewards/margins": 13.728233337402344, |
|
"rewards/margins_max": 22.533226013183594, |
|
"rewards/margins_min": 4.923240661621094, |
|
"rewards/margins_std": 12.452141761779785, |
|
"rewards/rejected": -17.2360782623291, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 4.192351858634792e-06, |
|
"logits/chosen": -1.6128113269805908, |
|
"logits/rejected": -1.2383267879486084, |
|
"logps/chosen": -608.5562133789062, |
|
"logps/rejected": -1741.685546875, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.952550172805786, |
|
"rewards/margins": 11.562326431274414, |
|
"rewards/margins_max": 19.853296279907227, |
|
"rewards/margins_min": 3.271360397338867, |
|
"rewards/margins_std": 11.725197792053223, |
|
"rewards/rejected": -15.514877319335938, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 4.65625, |
|
"learning_rate": 4.172008896598221e-06, |
|
"logits/chosen": -1.497994303703308, |
|
"logits/rejected": -1.382777452468872, |
|
"logps/chosen": -619.6976928710938, |
|
"logps/rejected": -2020.6917724609375, |
|
"loss": 0.3711, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.149666786193848, |
|
"rewards/margins": 14.204757690429688, |
|
"rewards/margins_max": 22.320579528808594, |
|
"rewards/margins_min": 6.088934898376465, |
|
"rewards/margins_std": 11.477506637573242, |
|
"rewards/rejected": -18.35442352294922, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 4.151463624471314e-06, |
|
"logits/chosen": -1.464698076248169, |
|
"logits/rejected": -1.022950291633606, |
|
"logps/chosen": -577.0154418945312, |
|
"logps/rejected": -2056.116455078125, |
|
"loss": 0.1369, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.6164112091064453, |
|
"rewards/margins": 15.738482475280762, |
|
"rewards/margins_max": 25.909582138061523, |
|
"rewards/margins_min": 5.567382335662842, |
|
"rewards/margins_std": 14.384109497070312, |
|
"rewards/rejected": -18.354894638061523, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 4.130718528195303e-06, |
|
"logits/chosen": -1.6330950260162354, |
|
"logits/rejected": -1.2724729776382446, |
|
"logps/chosen": -528.8734130859375, |
|
"logps/rejected": -1879.766357421875, |
|
"loss": 0.1763, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.9474363327026367, |
|
"rewards/margins": 13.620790481567383, |
|
"rewards/margins_max": 21.610565185546875, |
|
"rewards/margins_min": 5.631014823913574, |
|
"rewards/margins_std": 11.299247741699219, |
|
"rewards/rejected": -16.568225860595703, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 4.109776117889789e-06, |
|
"logits/chosen": -1.4906260967254639, |
|
"logits/rejected": -1.1496328115463257, |
|
"logps/chosen": -598.34228515625, |
|
"logps/rejected": -1853.970458984375, |
|
"loss": 0.2219, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.373750686645508, |
|
"rewards/margins": 12.560392379760742, |
|
"rewards/margins_max": 17.999202728271484, |
|
"rewards/margins_min": 7.121583461761475, |
|
"rewards/margins_std": 7.6916375160217285, |
|
"rewards/rejected": -15.93414306640625, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.088638927549017e-06, |
|
"logits/chosen": -1.552268147468567, |
|
"logits/rejected": -1.2409183979034424, |
|
"logps/chosen": -649.3657836914062, |
|
"logps/rejected": -1703.074462890625, |
|
"loss": 0.2693, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.8895363807678223, |
|
"rewards/margins": 10.927635192871094, |
|
"rewards/margins_max": 17.817441940307617, |
|
"rewards/margins_min": 4.03782844543457, |
|
"rewards/margins_std": 9.743658065795898, |
|
"rewards/rejected": -14.817171096801758, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 4.067309514735267e-06, |
|
"logits/chosen": -1.3615977764129639, |
|
"logits/rejected": -1.1248162984848022, |
|
"logps/chosen": -608.7218017578125, |
|
"logps/rejected": -1841.6429443359375, |
|
"loss": 0.1213, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.523672580718994, |
|
"rewards/margins": 12.779653549194336, |
|
"rewards/margins_max": 18.22634506225586, |
|
"rewards/margins_min": 7.332962989807129, |
|
"rewards/margins_std": 7.702784061431885, |
|
"rewards/rejected": -16.303325653076172, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 4.0457904602693954e-06, |
|
"logits/chosen": -1.4254987239837646, |
|
"logits/rejected": -1.137463927268982, |
|
"logps/chosen": -525.2374267578125, |
|
"logps/rejected": -1585.09326171875, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.995495319366455, |
|
"rewards/margins": 10.838014602661133, |
|
"rewards/margins_max": 18.608234405517578, |
|
"rewards/margins_min": 3.0677988529205322, |
|
"rewards/margins_std": 10.98874568939209, |
|
"rewards/rejected": -13.833511352539062, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 4.0240843679185605e-06, |
|
"logits/chosen": -1.5120861530303955, |
|
"logits/rejected": -1.247182846069336, |
|
"logps/chosen": -617.8936157226562, |
|
"logps/rejected": -2085.58642578125, |
|
"loss": 0.1841, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7910187244415283, |
|
"rewards/margins": 14.714996337890625, |
|
"rewards/margins_max": 24.271373748779297, |
|
"rewards/margins_min": 5.158619403839111, |
|
"rewards/margins_std": 13.51475715637207, |
|
"rewards/rejected": -18.50601577758789, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 4.002193864081172e-06, |
|
"logits/chosen": -1.4501639604568481, |
|
"logits/rejected": -1.2224775552749634, |
|
"logps/chosen": -546.1622924804688, |
|
"logps/rejected": -2053.220703125, |
|
"loss": 0.1854, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.2076709270477295, |
|
"rewards/margins": 13.76832103729248, |
|
"rewards/margins_max": 21.275157928466797, |
|
"rewards/margins_min": 6.261481285095215, |
|
"rewards/margins_std": 10.6162748336792, |
|
"rewards/rejected": -16.97599220275879, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 3.980121597469096e-06, |
|
"logits/chosen": -1.5411275625228882, |
|
"logits/rejected": -1.25248122215271, |
|
"logps/chosen": -565.0699462890625, |
|
"logps/rejected": -1576.759033203125, |
|
"loss": 0.2283, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.4042811393737793, |
|
"rewards/margins": 10.423602104187012, |
|
"rewards/margins_max": 15.0601806640625, |
|
"rewards/margins_min": 5.787021636962891, |
|
"rewards/margins_std": 6.5571136474609375, |
|
"rewards/rejected": -13.827882766723633, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 25.875, |
|
"learning_rate": 3.9578702387871745e-06, |
|
"logits/chosen": -1.6382890939712524, |
|
"logits/rejected": -1.319797396659851, |
|
"logps/chosen": -584.9801025390625, |
|
"logps/rejected": -1850.2626953125, |
|
"loss": 0.2871, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.8053550720214844, |
|
"rewards/margins": 12.886405944824219, |
|
"rewards/margins_max": 21.918596267700195, |
|
"rewards/margins_min": 3.854217052459717, |
|
"rewards/margins_std": 12.773443222045898, |
|
"rewards/rejected": -16.691761016845703, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 3.935442480410065e-06, |
|
"logits/chosen": -1.486549973487854, |
|
"logits/rejected": -1.2807599306106567, |
|
"logps/chosen": -483.4930725097656, |
|
"logps/rejected": -1720.572998046875, |
|
"loss": 0.2895, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.9897453784942627, |
|
"rewards/margins": 11.935190200805664, |
|
"rewards/margins_max": 20.719188690185547, |
|
"rewards/margins_min": 3.151189088821411, |
|
"rewards/margins_std": 12.422451972961426, |
|
"rewards/rejected": -14.924932479858398, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 3.91284103605648e-06, |
|
"logits/chosen": -1.5874106884002686, |
|
"logits/rejected": -1.068820834159851, |
|
"logps/chosen": -526.4561767578125, |
|
"logps/rejected": -1309.350341796875, |
|
"loss": 0.1856, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.838336944580078, |
|
"rewards/margins": 7.996423244476318, |
|
"rewards/margins_max": 12.323980331420898, |
|
"rewards/margins_min": 3.668865919113159, |
|
"rewards/margins_std": 6.120089530944824, |
|
"rewards/rejected": -10.834760665893555, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 3.890068640460818e-06, |
|
"logits/chosen": -1.563246488571167, |
|
"logits/rejected": -1.3833080530166626, |
|
"logps/chosen": -608.7943725585938, |
|
"logps/rejected": -1742.0953369140625, |
|
"loss": 0.2083, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.5897440910339355, |
|
"rewards/margins": 11.420188903808594, |
|
"rewards/margins_max": 18.479089736938477, |
|
"rewards/margins_min": 4.361289024353027, |
|
"rewards/margins_std": 9.98279094696045, |
|
"rewards/rejected": -15.009931564331055, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 3.867128049042276e-06, |
|
"logits/chosen": -1.6497488021850586, |
|
"logits/rejected": -1.4546703100204468, |
|
"logps/chosen": -509.16265869140625, |
|
"logps/rejected": -1514.1629638671875, |
|
"loss": 0.1561, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.3210582733154297, |
|
"rewards/margins": 9.69025993347168, |
|
"rewards/margins_max": 15.015596389770508, |
|
"rewards/margins_min": 4.364924907684326, |
|
"rewards/margins_std": 7.531160831451416, |
|
"rewards/rejected": -13.011317253112793, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 3.844022037571444e-06, |
|
"logits/chosen": -1.5822490453720093, |
|
"logits/rejected": -1.1622604131698608, |
|
"logps/chosen": -559.5164184570312, |
|
"logps/rejected": -1303.6251220703125, |
|
"loss": 0.2372, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.63049054145813, |
|
"rewards/margins": 7.552873134613037, |
|
"rewards/margins_max": 11.804709434509277, |
|
"rewards/margins_min": 3.3010354042053223, |
|
"rewards/margins_std": 6.013005256652832, |
|
"rewards/rejected": -11.18336296081543, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.25, |
|
"learning_rate": 3.820753401834444e-06, |
|
"logits/chosen": -1.6436933279037476, |
|
"logits/rejected": -1.4893285036087036, |
|
"logps/chosen": -522.2276611328125, |
|
"logps/rejected": -1459.512451171875, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9450583457946777, |
|
"rewards/margins": 9.521982192993164, |
|
"rewards/margins_max": 13.79808235168457, |
|
"rewards/margins_min": 5.245881080627441, |
|
"rewards/margins_std": 6.047319412231445, |
|
"rewards/rejected": -12.467040061950684, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 3.7973249572946436e-06, |
|
"logits/chosen": -1.621744155883789, |
|
"logits/rejected": -1.3612353801727295, |
|
"logps/chosen": -539.4110107421875, |
|
"logps/rejected": -1551.703857421875, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.3887977600097656, |
|
"rewards/margins": 10.25967788696289, |
|
"rewards/margins_max": 18.161897659301758, |
|
"rewards/margins_min": 2.35745906829834, |
|
"rewards/margins_std": 11.17542552947998, |
|
"rewards/rejected": -13.648475646972656, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 3.7737395387519883e-06, |
|
"logits/chosen": -1.6151530742645264, |
|
"logits/rejected": -1.2519623041152954, |
|
"logps/chosen": -593.7537841796875, |
|
"logps/rejected": -1533.3704833984375, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.4480178356170654, |
|
"rewards/margins": 9.817560195922852, |
|
"rewards/margins_max": 14.997907638549805, |
|
"rewards/margins_min": 4.637210369110107, |
|
"rewards/margins_std": 7.326119422912598, |
|
"rewards/rejected": -13.26557731628418, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.593546748161316, |
|
"logits/rejected": -1.227420449256897, |
|
"logps/chosen": -894.0062255859375, |
|
"logps/rejected": -1717.8193359375, |
|
"loss": 0.281, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.509779930114746, |
|
"rewards/margins": 9.543737411499023, |
|
"rewards/margins_max": 15.239468574523926, |
|
"rewards/margins_min": 3.8480052947998047, |
|
"rewards/margins_std": 8.054980278015137, |
|
"rewards/rejected": -15.05351734161377, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.375, |
|
"learning_rate": 3.7261092134804698e-06, |
|
"logits/chosen": -1.5079495906829834, |
|
"logits/rejected": -1.2428034543991089, |
|
"logps/chosen": -615.8125, |
|
"logps/rejected": -1781.083984375, |
|
"loss": 0.2198, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.0713090896606445, |
|
"rewards/margins": 11.638517379760742, |
|
"rewards/margins_max": 20.66924285888672, |
|
"rewards/margins_min": 2.6077933311462402, |
|
"rewards/margins_std": 12.771371841430664, |
|
"rewards/rejected": -15.709826469421387, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 3.7020700699358984e-06, |
|
"logits/chosen": -1.6448938846588135, |
|
"logits/rejected": -1.2866158485412598, |
|
"logps/chosen": -567.0323486328125, |
|
"logps/rejected": -1882.602294921875, |
|
"loss": 0.2352, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3592464923858643, |
|
"rewards/margins": 13.268638610839844, |
|
"rewards/margins_max": 21.4252872467041, |
|
"rewards/margins_min": 5.111990928649902, |
|
"rewards/margins_std": 11.53524112701416, |
|
"rewards/rejected": -16.627885818481445, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 3.6778854780597218e-06, |
|
"logits/chosen": -1.431904673576355, |
|
"logits/rejected": -1.2583922147750854, |
|
"logps/chosen": -548.6318359375, |
|
"logps/rejected": -1664.658203125, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.2328219413757324, |
|
"rewards/margins": 11.606404304504395, |
|
"rewards/margins_max": 17.81925392150879, |
|
"rewards/margins_min": 5.393556594848633, |
|
"rewards/margins_std": 8.786293983459473, |
|
"rewards/rejected": -14.839225769042969, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 3.6535583641443634e-06, |
|
"logits/chosen": -1.6340068578720093, |
|
"logits/rejected": -1.5025604963302612, |
|
"logps/chosen": -582.7081298828125, |
|
"logps/rejected": -1899.066650390625, |
|
"loss": 0.4073, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.9646008014678955, |
|
"rewards/margins": 12.86851978302002, |
|
"rewards/margins_max": 21.045665740966797, |
|
"rewards/margins_min": 4.691373348236084, |
|
"rewards/margins_std": 11.564231872558594, |
|
"rewards/rejected": -16.833120346069336, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 3.6290916717271597e-06, |
|
"logits/chosen": -1.5056756734848022, |
|
"logits/rejected": -1.1574785709381104, |
|
"logps/chosen": -607.2431640625, |
|
"logps/rejected": -1871.325927734375, |
|
"loss": 0.1269, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.66899037361145, |
|
"rewards/margins": 12.744714736938477, |
|
"rewards/margins_max": 20.19748878479004, |
|
"rewards/margins_min": 5.2919416427612305, |
|
"rewards/margins_std": 10.539812088012695, |
|
"rewards/rejected": -16.41370391845703, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 3.604488361234196e-06, |
|
"logits/chosen": -1.6854197978973389, |
|
"logits/rejected": -1.4968090057373047, |
|
"logps/chosen": -534.645751953125, |
|
"logps/rejected": -1603.8359375, |
|
"loss": 0.1878, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.554718017578125, |
|
"rewards/margins": 10.739810943603516, |
|
"rewards/margins_max": 17.38308334350586, |
|
"rewards/margins_min": 4.096539497375488, |
|
"rewards/margins_std": 9.39500617980957, |
|
"rewards/rejected": -14.294529914855957, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 3.579751409622103e-06, |
|
"logits/chosen": -1.5969122648239136, |
|
"logits/rejected": -1.3476945161819458, |
|
"logps/chosen": -617.2340087890625, |
|
"logps/rejected": -1609.4246826171875, |
|
"loss": 0.2865, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.8023934364318848, |
|
"rewards/margins": 10.177874565124512, |
|
"rewards/margins_max": 17.874292373657227, |
|
"rewards/margins_min": 2.481457471847534, |
|
"rewards/margins_std": 10.884378433227539, |
|
"rewards/rejected": -13.980267524719238, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 3.5548838100178444e-06, |
|
"logits/chosen": -1.456305742263794, |
|
"logits/rejected": -1.257086157798767, |
|
"logps/chosen": -494.32501220703125, |
|
"logps/rejected": -1082.857666015625, |
|
"loss": 0.2634, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0967798233032227, |
|
"rewards/margins": 5.911543846130371, |
|
"rewards/margins_max": 9.54675006866455, |
|
"rewards/margins_min": 2.2763376235961914, |
|
"rewards/margins_std": 5.140958309173584, |
|
"rewards/rejected": -9.008323669433594, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 3.5298885713565615e-06, |
|
"logits/chosen": -1.380829095840454, |
|
"logits/rejected": -1.2045114040374756, |
|
"logps/chosen": -605.8043823242188, |
|
"logps/rejected": -1361.7166748046875, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.5279593467712402, |
|
"rewards/margins": 7.775490760803223, |
|
"rewards/margins_max": 13.313535690307617, |
|
"rewards/margins_min": 2.237445116043091, |
|
"rewards/margins_std": 7.831979274749756, |
|
"rewards/rejected": -11.303449630737305, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 3.5047687180174905e-06, |
|
"logits/chosen": -1.5265122652053833, |
|
"logits/rejected": -1.1738016605377197, |
|
"logps/chosen": -613.3758544921875, |
|
"logps/rejected": -1357.464599609375, |
|
"loss": 0.1996, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.1165761947631836, |
|
"rewards/margins": 8.375921249389648, |
|
"rewards/margins_max": 13.726768493652344, |
|
"rewards/margins_min": 3.0250773429870605, |
|
"rewards/margins_std": 7.567238807678223, |
|
"rewards/rejected": -11.492499351501465, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 3.4795272894580217e-06, |
|
"logits/chosen": -1.4896327257156372, |
|
"logits/rejected": -1.3400466442108154, |
|
"logps/chosen": -531.3592529296875, |
|
"logps/rejected": -1779.3931884765625, |
|
"loss": 0.3124, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.440082550048828, |
|
"rewards/margins": 12.141416549682617, |
|
"rewards/margins_max": 17.760726928710938, |
|
"rewards/margins_min": 6.522104740142822, |
|
"rewards/margins_std": 7.946906089782715, |
|
"rewards/rejected": -15.581499099731445, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 3.454167339845932e-06, |
|
"logits/chosen": -1.4307299852371216, |
|
"logits/rejected": -1.3355329036712646, |
|
"logps/chosen": -553.9991455078125, |
|
"logps/rejected": -1710.6588134765625, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.420407772064209, |
|
"rewards/margins": 11.416155815124512, |
|
"rewards/margins_max": 16.05166244506836, |
|
"rewards/margins_min": 6.780648231506348, |
|
"rewards/margins_std": 6.555596828460693, |
|
"rewards/rejected": -14.836563110351562, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 3.428691937689831e-06, |
|
"logits/chosen": -1.364752173423767, |
|
"logits/rejected": -1.174250841140747, |
|
"logps/chosen": -516.96142578125, |
|
"logps/rejected": -1582.386474609375, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.942873239517212, |
|
"rewards/margins": 10.622710227966309, |
|
"rewards/margins_max": 15.50335693359375, |
|
"rewards/margins_min": 5.742064476013184, |
|
"rewards/margins_std": 6.902276515960693, |
|
"rewards/rejected": -13.565584182739258, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 3.403104165467883e-06, |
|
"logits/chosen": -1.5384275913238525, |
|
"logits/rejected": -1.442461371421814, |
|
"logps/chosen": -617.8716430664062, |
|
"logps/rejected": -2171.876953125, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.375886917114258, |
|
"rewards/margins": 15.94532299041748, |
|
"rewards/margins_max": 27.856884002685547, |
|
"rewards/margins_min": 4.0337605476379395, |
|
"rewards/margins_std": 16.845491409301758, |
|
"rewards/rejected": -19.32120704650879, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 3.377407119254826e-06, |
|
"logits/chosen": -1.3978983163833618, |
|
"logits/rejected": -1.1405658721923828, |
|
"logps/chosen": -584.4593505859375, |
|
"logps/rejected": -1754.9351806640625, |
|
"loss": 0.157, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1924796104431152, |
|
"rewards/margins": 12.185917854309082, |
|
"rewards/margins_max": 18.766172409057617, |
|
"rewards/margins_min": 5.605664253234863, |
|
"rewards/margins_std": 9.305885314941406, |
|
"rewards/rejected": -15.378397941589355, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 3.3516039083473593e-06, |
|
"logits/chosen": -1.5193557739257812, |
|
"logits/rejected": -1.2308270931243896, |
|
"logps/chosen": -676.8104858398438, |
|
"logps/rejected": -1510.12744140625, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.3010759353637695, |
|
"rewards/margins": 8.718791961669922, |
|
"rewards/margins_max": 12.937799453735352, |
|
"rewards/margins_min": 4.49978494644165, |
|
"rewards/margins_std": 5.96657657623291, |
|
"rewards/rejected": -13.019868850708008, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 3.3256976548879183e-06, |
|
"logits/chosen": -1.6087383031845093, |
|
"logits/rejected": -1.409579873085022, |
|
"logps/chosen": -452.558349609375, |
|
"logps/rejected": -1753.7073974609375, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.7991509437561035, |
|
"rewards/margins": 12.75914478302002, |
|
"rewards/margins_max": 20.164968490600586, |
|
"rewards/margins_min": 5.3533196449279785, |
|
"rewards/margins_std": 10.473418235778809, |
|
"rewards/rejected": -15.558294296264648, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.375, |
|
"learning_rate": 3.299691493486904e-06, |
|
"logits/chosen": -1.553672194480896, |
|
"logits/rejected": -1.1765081882476807, |
|
"logps/chosen": -532.5276489257812, |
|
"logps/rejected": -1855.286376953125, |
|
"loss": 0.11, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.1610066890716553, |
|
"rewards/margins": 12.888555526733398, |
|
"rewards/margins_max": 20.415128707885742, |
|
"rewards/margins_min": 5.361984729766846, |
|
"rewards/margins_std": 10.644180297851562, |
|
"rewards/rejected": -16.049564361572266, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 3.2735885708433997e-06, |
|
"logits/chosen": -1.4375452995300293, |
|
"logits/rejected": -1.0696020126342773, |
|
"logps/chosen": -637.91552734375, |
|
"logps/rejected": -1755.896240234375, |
|
"loss": 0.2119, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.1430463790893555, |
|
"rewards/margins": 11.392419815063477, |
|
"rewards/margins_max": 16.991348266601562, |
|
"rewards/margins_min": 5.793490886688232, |
|
"rewards/margins_std": 7.918080806732178, |
|
"rewards/rejected": -15.5354642868042, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 3.247392045364426e-06, |
|
"logits/chosen": -1.4832890033721924, |
|
"logits/rejected": -1.4127947092056274, |
|
"logps/chosen": -557.3673095703125, |
|
"logps/rejected": -2154.6083984375, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.5359015464782715, |
|
"rewards/margins": 15.562028884887695, |
|
"rewards/margins_max": 23.592113494873047, |
|
"rewards/margins_min": 7.531941890716553, |
|
"rewards/margins_std": 11.356256484985352, |
|
"rewards/rejected": -19.097932815551758, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 3.221105086782781e-06, |
|
"logits/chosen": -1.5462195873260498, |
|
"logits/rejected": -1.2428996562957764, |
|
"logps/chosen": -594.3450927734375, |
|
"logps/rejected": -2124.96728515625, |
|
"loss": 0.3074, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.730942964553833, |
|
"rewards/margins": 14.838473320007324, |
|
"rewards/margins_max": 22.921918869018555, |
|
"rewards/margins_min": 6.755026817321777, |
|
"rewards/margins_std": 11.431719779968262, |
|
"rewards/rejected": -18.569416046142578, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 3.194730875773504e-06, |
|
"logits/chosen": -1.4843318462371826, |
|
"logits/rejected": -1.30060613155365, |
|
"logps/chosen": -617.7728271484375, |
|
"logps/rejected": -1552.475341796875, |
|
"loss": 0.2068, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.88134503364563, |
|
"rewards/margins": 9.543024063110352, |
|
"rewards/margins_max": 15.337367057800293, |
|
"rewards/margins_min": 3.7486824989318848, |
|
"rewards/margins_std": 8.194437026977539, |
|
"rewards/rejected": -13.424371719360352, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 3.1682726035690254e-06, |
|
"logits/chosen": -1.5420567989349365, |
|
"logits/rejected": -1.08122718334198, |
|
"logps/chosen": -562.6282348632812, |
|
"logps/rejected": -1668.4222412109375, |
|
"loss": 0.0894, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0375943183898926, |
|
"rewards/margins": 11.751439094543457, |
|
"rewards/margins_max": 18.029438018798828, |
|
"rewards/margins_min": 5.473437309265137, |
|
"rewards/margins_std": 8.878433227539062, |
|
"rewards/rejected": -14.789031982421875, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 3.141733471573026e-06, |
|
"logits/chosen": -1.4196301698684692, |
|
"logits/rejected": -1.1794580221176147, |
|
"logps/chosen": -638.9561767578125, |
|
"logps/rejected": -1795.17578125, |
|
"loss": 0.2731, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.6092400550842285, |
|
"rewards/margins": 12.275900840759277, |
|
"rewards/margins_max": 19.90064239501953, |
|
"rewards/margins_min": 4.651161193847656, |
|
"rewards/margins_std": 10.783010482788086, |
|
"rewards/rejected": -15.885139465332031, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 3.1151166909730814e-06, |
|
"logits/chosen": -1.4170836210250854, |
|
"logits/rejected": -1.3289744853973389, |
|
"logps/chosen": -499.4686584472656, |
|
"logps/rejected": -1666.9635009765625, |
|
"loss": 0.3169, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2561161518096924, |
|
"rewards/margins": 11.558661460876465, |
|
"rewards/margins_max": 19.94423484802246, |
|
"rewards/margins_min": 3.173088550567627, |
|
"rewards/margins_std": 11.858991622924805, |
|
"rewards/rejected": -14.814778327941895, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 3.0884254823521064e-06, |
|
"logits/chosen": -1.4527684450149536, |
|
"logits/rejected": -1.1548287868499756, |
|
"logps/chosen": -428.9183044433594, |
|
"logps/rejected": -1450.4156494140625, |
|
"loss": 0.2553, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.477548599243164, |
|
"rewards/margins": 10.415294647216797, |
|
"rewards/margins_max": 16.25199317932129, |
|
"rewards/margins_min": 4.578597068786621, |
|
"rewards/margins_std": 8.254335403442383, |
|
"rewards/rejected": -12.892842292785645, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 3.0616630752986755e-06, |
|
"logits/chosen": -1.6659702062606812, |
|
"logits/rejected": -1.3601497411727905, |
|
"logps/chosen": -599.8160400390625, |
|
"logps/rejected": -1705.684326171875, |
|
"loss": 0.1134, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.4542229175567627, |
|
"rewards/margins": 10.894365310668945, |
|
"rewards/margins_max": 16.569028854370117, |
|
"rewards/margins_min": 5.219702243804932, |
|
"rewards/margins_std": 8.025186538696289, |
|
"rewards/rejected": -14.348589897155762, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 3.0348327080162438e-06, |
|
"logits/chosen": -1.650661826133728, |
|
"logits/rejected": -1.2949879169464111, |
|
"logps/chosen": -613.9927978515625, |
|
"logps/rejected": -1550.9993896484375, |
|
"loss": 0.1008, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.4950804710388184, |
|
"rewards/margins": 9.927111625671387, |
|
"rewards/margins_max": 15.52288818359375, |
|
"rewards/margins_min": 4.331338405609131, |
|
"rewards/margins_std": 7.9136199951171875, |
|
"rewards/rejected": -13.422192573547363, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.25, |
|
"learning_rate": 3.007937626931336e-06, |
|
"logits/chosen": -1.5543906688690186, |
|
"logits/rejected": -1.2956621646881104, |
|
"logps/chosen": -451.51629638671875, |
|
"logps/rejected": -1468.626708984375, |
|
"loss": 0.247, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.4063615798950195, |
|
"rewards/margins": 9.632174491882324, |
|
"rewards/margins_max": 15.11768913269043, |
|
"rewards/margins_min": 4.146659851074219, |
|
"rewards/margins_std": 7.757689476013184, |
|
"rewards/rejected": -12.038535118103027, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.25, |
|
"learning_rate": 2.9809810863007286e-06, |
|
"logits/chosen": -1.581876516342163, |
|
"logits/rejected": -1.253678560256958, |
|
"logps/chosen": -515.3495483398438, |
|
"logps/rejected": -1483.419677734375, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.1044821739196777, |
|
"rewards/margins": 9.645689010620117, |
|
"rewards/margins_max": 15.521951675415039, |
|
"rewards/margins_min": 3.7694268226623535, |
|
"rewards/margins_std": 8.31028938293457, |
|
"rewards/rejected": -12.750170707702637, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 2.953966347817695e-06, |
|
"logits/chosen": -1.4079358577728271, |
|
"logits/rejected": -1.3491824865341187, |
|
"logps/chosen": -496.98394775390625, |
|
"logps/rejected": -1792.646484375, |
|
"loss": 0.1523, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.880263566970825, |
|
"rewards/margins": 12.569089889526367, |
|
"rewards/margins_max": 21.626419067382812, |
|
"rewards/margins_min": 3.5117599964141846, |
|
"rewards/margins_std": 12.808998107910156, |
|
"rewards/rejected": -15.449353218078613, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 11.375, |
|
"learning_rate": 2.9268966802173437e-06, |
|
"logits/chosen": -1.4724808931350708, |
|
"logits/rejected": -1.126155138015747, |
|
"logps/chosen": -572.5735473632812, |
|
"logps/rejected": -1458.450439453125, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.984464406967163, |
|
"rewards/margins": 9.40550422668457, |
|
"rewards/margins_max": 14.413080215454102, |
|
"rewards/margins_min": 4.397928237915039, |
|
"rewards/margins_std": 7.08178186416626, |
|
"rewards/rejected": -12.389968872070312, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 18.25, |
|
"learning_rate": 2.89977535888111e-06, |
|
"logits/chosen": -1.4884029626846313, |
|
"logits/rejected": -1.2525956630706787, |
|
"logps/chosen": -469.20220947265625, |
|
"logps/rejected": -1566.2236328125, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.888575792312622, |
|
"rewards/margins": 10.976606369018555, |
|
"rewards/margins_max": 17.656143188476562, |
|
"rewards/margins_min": 4.297070503234863, |
|
"rewards/margins_std": 9.446290969848633, |
|
"rewards/rejected": -13.865182876586914, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 29.375, |
|
"learning_rate": 2.872605665440436e-06, |
|
"logits/chosen": -1.493099331855774, |
|
"logits/rejected": -1.4659028053283691, |
|
"logps/chosen": -537.7650146484375, |
|
"logps/rejected": -2078.65869140625, |
|
"loss": 0.3169, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.645385265350342, |
|
"rewards/margins": 14.931035041809082, |
|
"rewards/margins_max": 23.03750991821289, |
|
"rewards/margins_min": 6.824557304382324, |
|
"rewards/margins_std": 11.464289665222168, |
|
"rewards/rejected": -18.576419830322266, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 2.845390887379706e-06, |
|
"logits/chosen": -1.6047687530517578, |
|
"logits/rejected": -1.345473051071167, |
|
"logps/chosen": -587.2362670898438, |
|
"logps/rejected": -1573.9320068359375, |
|
"loss": 0.3874, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.5847039222717285, |
|
"rewards/margins": 10.173007011413574, |
|
"rewards/margins_max": 17.029922485351562, |
|
"rewards/margins_min": 3.3160948753356934, |
|
"rewards/margins_std": 9.697138786315918, |
|
"rewards/rejected": -13.757711410522461, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 2.818134317638459e-06, |
|
"logits/chosen": -1.3401492834091187, |
|
"logits/rejected": -1.3071324825286865, |
|
"logps/chosen": -535.4603881835938, |
|
"logps/rejected": -1909.8978271484375, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.384225368499756, |
|
"rewards/margins": 12.386846542358398, |
|
"rewards/margins_max": 18.520631790161133, |
|
"rewards/margins_min": 6.253061771392822, |
|
"rewards/margins_std": 8.674481391906738, |
|
"rewards/rejected": -15.77107048034668, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.125, |
|
"learning_rate": 2.790839254212954e-06, |
|
"logits/chosen": -1.618775725364685, |
|
"logits/rejected": -1.407546043395996, |
|
"logps/chosen": -557.8433837890625, |
|
"logps/rejected": -1905.104248046875, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.2777836322784424, |
|
"rewards/margins": 13.154513359069824, |
|
"rewards/margins_max": 20.147037506103516, |
|
"rewards/margins_min": 6.161989212036133, |
|
"rewards/margins_std": 9.888921737670898, |
|
"rewards/rejected": -16.432294845581055, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 2.7635089997571196e-06, |
|
"logits/chosen": -1.5546470880508423, |
|
"logits/rejected": -1.463314175605774, |
|
"logps/chosen": -512.0051879882812, |
|
"logps/rejected": -1762.7623291015625, |
|
"loss": 0.1786, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9287774562835693, |
|
"rewards/margins": 11.732918739318848, |
|
"rewards/margins_max": 18.208843231201172, |
|
"rewards/margins_min": 5.256992816925049, |
|
"rewards/margins_std": 9.158343315124512, |
|
"rewards/rejected": -14.66169548034668, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 2.736146861182933e-06, |
|
"logits/chosen": -1.6997873783111572, |
|
"logits/rejected": -1.4095062017440796, |
|
"logps/chosen": -528.2429809570312, |
|
"logps/rejected": -1602.039306640625, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.242964506149292, |
|
"rewards/margins": 10.520895004272461, |
|
"rewards/margins_max": 17.078807830810547, |
|
"rewards/margins_min": 3.962982654571533, |
|
"rewards/margins_std": 9.27428913116455, |
|
"rewards/rejected": -13.763860702514648, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 11.875, |
|
"learning_rate": 2.7087561492602927e-06, |
|
"logits/chosen": -1.5607562065124512, |
|
"logits/rejected": -1.209094762802124, |
|
"logps/chosen": -582.0310668945312, |
|
"logps/rejected": -1421.4693603515625, |
|
"loss": 0.2018, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.435152769088745, |
|
"rewards/margins": 8.758903503417969, |
|
"rewards/margins_max": 13.951889038085938, |
|
"rewards/margins_min": 3.565919876098633, |
|
"rewards/margins_std": 7.34398889541626, |
|
"rewards/rejected": -12.194056510925293, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 2.681340178216423e-06, |
|
"logits/chosen": -1.8068653345108032, |
|
"logits/rejected": -1.5372353792190552, |
|
"logps/chosen": -536.0225830078125, |
|
"logps/rejected": -1841.3037109375, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9493765830993652, |
|
"rewards/margins": 12.962008476257324, |
|
"rewards/margins_max": 18.436702728271484, |
|
"rewards/margins_min": 7.487314701080322, |
|
"rewards/margins_std": 7.742384433746338, |
|
"rewards/rejected": -15.911382675170898, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 2.6539022653348577e-06, |
|
"logits/chosen": -1.4544528722763062, |
|
"logits/rejected": -1.2387598752975464, |
|
"logps/chosen": -541.6769409179688, |
|
"logps/rejected": -2221.091796875, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3415286540985107, |
|
"rewards/margins": 16.246158599853516, |
|
"rewards/margins_max": 25.676050186157227, |
|
"rewards/margins_min": 6.8162641525268555, |
|
"rewards/margins_std": 13.335882186889648, |
|
"rewards/rejected": -19.58768653869629, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 2.6264457305540502e-06, |
|
"logits/chosen": -1.7259023189544678, |
|
"logits/rejected": -1.4031670093536377, |
|
"logps/chosen": -525.0916748046875, |
|
"logps/rejected": -1967.254150390625, |
|
"loss": 0.2108, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.277066707611084, |
|
"rewards/margins": 14.416677474975586, |
|
"rewards/margins_max": 25.7095947265625, |
|
"rewards/margins_min": 3.1237592697143555, |
|
"rewards/margins_std": 15.970598220825195, |
|
"rewards/rejected": -17.693744659423828, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.625, |
|
"learning_rate": 2.598973896065674e-06, |
|
"logits/chosen": -1.2255192995071411, |
|
"logits/rejected": -1.1902718544006348, |
|
"logps/chosen": -627.9791870117188, |
|
"logps/rejected": -2590.4423828125, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.78948712348938, |
|
"rewards/margins": 19.36321258544922, |
|
"rewards/margins_max": 30.17099952697754, |
|
"rewards/margins_min": 8.555425643920898, |
|
"rewards/margins_std": 15.284518241882324, |
|
"rewards/rejected": -23.152698516845703, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 17.125, |
|
"learning_rate": 2.571490085912638e-06, |
|
"logits/chosen": -1.4462924003601074, |
|
"logits/rejected": -1.1185009479522705, |
|
"logps/chosen": -672.9526977539062, |
|
"logps/rejected": -1587.697021484375, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.46766996383667, |
|
"rewards/margins": 9.219390869140625, |
|
"rewards/margins_max": 12.912025451660156, |
|
"rewards/margins_min": 5.526756763458252, |
|
"rewards/margins_std": 5.222173690795898, |
|
"rewards/rejected": -13.68706226348877, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 2.543997625586885e-06, |
|
"logits/chosen": -1.4518120288848877, |
|
"logits/rejected": -1.2286278009414673, |
|
"logps/chosen": -543.7399291992188, |
|
"logps/rejected": -2093.8134765625, |
|
"loss": 0.1356, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.394313097000122, |
|
"rewards/margins": 14.934762954711914, |
|
"rewards/margins_max": 24.45358657836914, |
|
"rewards/margins_min": 5.415938377380371, |
|
"rewards/margins_std": 13.461651802062988, |
|
"rewards/rejected": -18.329076766967773, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 2.516499841627014e-06, |
|
"logits/chosen": -1.6292340755462646, |
|
"logits/rejected": -1.4900437593460083, |
|
"logps/chosen": -599.104248046875, |
|
"logps/rejected": -1993.777099609375, |
|
"loss": 0.2569, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.7089314460754395, |
|
"rewards/margins": 13.895370483398438, |
|
"rewards/margins_max": 23.25143814086914, |
|
"rewards/margins_min": 4.539303779602051, |
|
"rewards/margins_std": 13.231475830078125, |
|
"rewards/rejected": -17.60430335998535, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 2.4890000612157748e-06, |
|
"logits/chosen": -1.5528570413589478, |
|
"logits/rejected": -1.3459594249725342, |
|
"logps/chosen": -668.9677124023438, |
|
"logps/rejected": -1671.044189453125, |
|
"loss": 0.2611, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.5307769775390625, |
|
"rewards/margins": 10.027362823486328, |
|
"rewards/margins_max": 18.219478607177734, |
|
"rewards/margins_min": 1.835247278213501, |
|
"rewards/margins_std": 11.585399627685547, |
|
"rewards/rejected": -14.558140754699707, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 2.4615016117774836e-06, |
|
"logits/chosen": -1.4520705938339233, |
|
"logits/rejected": -1.242058515548706, |
|
"logps/chosen": -517.4442138671875, |
|
"logps/rejected": -1405.398681640625, |
|
"loss": 0.1668, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.164559841156006, |
|
"rewards/margins": 8.761804580688477, |
|
"rewards/margins_max": 14.644865036010742, |
|
"rewards/margins_min": 2.8787448406219482, |
|
"rewards/margins_std": 8.319904327392578, |
|
"rewards/rejected": -11.926366806030273, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 2.43400782057541e-06, |
|
"logits/chosen": -1.5988481044769287, |
|
"logits/rejected": -1.2858158349990845, |
|
"logps/chosen": -585.8101196289062, |
|
"logps/rejected": -1589.328125, |
|
"loss": 0.1983, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.538374423980713, |
|
"rewards/margins": 9.93020248413086, |
|
"rewards/margins_max": 15.531498908996582, |
|
"rewards/margins_min": 4.328904628753662, |
|
"rewards/margins_std": 7.921429634094238, |
|
"rewards/rejected": -13.46857738494873, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 2.4065220143091863e-06, |
|
"logits/chosen": -1.4877874851226807, |
|
"logits/rejected": -1.2298099994659424, |
|
"logps/chosen": -494.2522888183594, |
|
"logps/rejected": -1277.85107421875, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.7383546829223633, |
|
"rewards/margins": 7.873937129974365, |
|
"rewards/margins_max": 11.741680145263672, |
|
"rewards/margins_min": 4.006192684173584, |
|
"rewards/margins_std": 5.469815731048584, |
|
"rewards/rejected": -10.61229133605957, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.7265625, |
|
"learning_rate": 2.3790475187122838e-06, |
|
"logits/chosen": -1.488873839378357, |
|
"logits/rejected": -1.2716490030288696, |
|
"logps/chosen": -497.75762939453125, |
|
"logps/rejected": -1593.458984375, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9146790504455566, |
|
"rewards/margins": 11.19676399230957, |
|
"rewards/margins_max": 18.476064682006836, |
|
"rewards/margins_min": 3.9174625873565674, |
|
"rewards/margins_std": 10.294486045837402, |
|
"rewards/rejected": -14.111444473266602, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.9140625, |
|
"learning_rate": 2.3515876581495983e-06, |
|
"logits/chosen": -1.5976613759994507, |
|
"logits/rejected": -1.1677883863449097, |
|
"logps/chosen": -589.4625244140625, |
|
"logps/rejected": -1531.7314453125, |
|
"loss": 0.1462, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.779262065887451, |
|
"rewards/margins": 9.616241455078125, |
|
"rewards/margins_max": 15.82532787322998, |
|
"rewards/margins_min": 3.407156467437744, |
|
"rewards/margins_std": 8.780973434448242, |
|
"rewards/rejected": -12.395503044128418, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 2.3241457552152188e-06, |
|
"logits/chosen": -1.4134392738342285, |
|
"logits/rejected": -1.0529563426971436, |
|
"logps/chosen": -578.8505249023438, |
|
"logps/rejected": -1768.5361328125, |
|
"loss": 0.1572, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.193676471710205, |
|
"rewards/margins": 12.612680435180664, |
|
"rewards/margins_max": 20.445871353149414, |
|
"rewards/margins_min": 4.779488563537598, |
|
"rewards/margins_std": 11.077805519104004, |
|
"rewards/rejected": -15.806355476379395, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 2.2967251303303876e-06, |
|
"logits/chosen": -1.4060922861099243, |
|
"logits/rejected": -1.2433264255523682, |
|
"logps/chosen": -471.23370361328125, |
|
"logps/rejected": -1373.26708984375, |
|
"loss": 0.3607, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.941974639892578, |
|
"rewards/margins": 8.82459831237793, |
|
"rewards/margins_max": 14.054105758666992, |
|
"rewards/margins_min": 3.59509015083313, |
|
"rewards/margins_std": 7.395641326904297, |
|
"rewards/rejected": -11.766572952270508, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 2.269329101341745e-06, |
|
"logits/chosen": -1.555955171585083, |
|
"logits/rejected": -1.3775891065597534, |
|
"logps/chosen": -484.1917419433594, |
|
"logps/rejected": -1392.3919677734375, |
|
"loss": 0.1535, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.837075710296631, |
|
"rewards/margins": 8.895866394042969, |
|
"rewards/margins_max": 12.957498550415039, |
|
"rewards/margins_min": 4.834234237670898, |
|
"rewards/margins_std": 5.744015216827393, |
|
"rewards/rejected": -11.732942581176758, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 2.2419609831198695e-06, |
|
"logits/chosen": -1.4280954599380493, |
|
"logits/rejected": -1.3863157033920288, |
|
"logps/chosen": -489.63812255859375, |
|
"logps/rejected": -2068.576904296875, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.8380837440490723, |
|
"rewards/margins": 15.058830261230469, |
|
"rewards/margins_max": 23.17379379272461, |
|
"rewards/margins_min": 6.943869113922119, |
|
"rewards/margins_std": 11.476289749145508, |
|
"rewards/rejected": -17.896915435791016, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 2.214624087158188e-06, |
|
"logits/chosen": -1.5892161130905151, |
|
"logits/rejected": -1.3412600755691528, |
|
"logps/chosen": -600.9816284179688, |
|
"logps/rejected": -1814.3232421875, |
|
"loss": 0.3013, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3865344524383545, |
|
"rewards/margins": 11.768966674804688, |
|
"rewards/margins_max": 18.325162887573242, |
|
"rewards/margins_min": 5.212772846221924, |
|
"rewards/margins_std": 9.271859169006348, |
|
"rewards/rejected": -15.155502319335938, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 2.1873217211722883e-06, |
|
"logits/chosen": -1.41153883934021, |
|
"logits/rejected": -1.1877845525741577, |
|
"logps/chosen": -477.3059997558594, |
|
"logps/rejected": -1664.2574462890625, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.7079248428344727, |
|
"rewards/margins": 12.086627006530762, |
|
"rewards/margins_max": 20.110389709472656, |
|
"rewards/margins_min": 4.062866687774658, |
|
"rewards/margins_std": 11.347312927246094, |
|
"rewards/rejected": -14.794553756713867, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 2.1600571886996937e-06, |
|
"logits/chosen": -1.5755198001861572, |
|
"logits/rejected": -1.1792980432510376, |
|
"logps/chosen": -515.0977172851562, |
|
"logps/rejected": -1678.0728759765625, |
|
"loss": 0.1313, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.564347982406616, |
|
"rewards/margins": 11.869641304016113, |
|
"rewards/margins_max": 18.997394561767578, |
|
"rewards/margins_min": 4.74188756942749, |
|
"rewards/margins_std": 10.080163955688477, |
|
"rewards/rejected": -14.433990478515625, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 2.1328337887001387e-06, |
|
"logits/chosen": -1.5203434228897095, |
|
"logits/rejected": -1.1894242763519287, |
|
"logps/chosen": -681.9379272460938, |
|
"logps/rejected": -1993.7366943359375, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.278992652893066, |
|
"rewards/margins": 13.553190231323242, |
|
"rewards/margins_max": 20.062623977661133, |
|
"rewards/margins_min": 7.043754577636719, |
|
"rewards/margins_std": 9.205732345581055, |
|
"rewards/rejected": -17.832181930541992, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 2.1056548151564064e-06, |
|
"logits/chosen": -1.4046363830566406, |
|
"logits/rejected": -1.1018089056015015, |
|
"logps/chosen": -512.6920776367188, |
|
"logps/rejected": -1592.6275634765625, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.973428726196289, |
|
"rewards/margins": 10.557977676391602, |
|
"rewards/margins_max": 15.843744277954102, |
|
"rewards/margins_min": 5.272213935852051, |
|
"rewards/margins_std": 7.4751996994018555, |
|
"rewards/rejected": -13.531407356262207, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 2.078523556675752e-06, |
|
"logits/chosen": -1.666265845298767, |
|
"logits/rejected": -1.2846992015838623, |
|
"logps/chosen": -642.2943725585938, |
|
"logps/rejected": -1613.908203125, |
|
"loss": 0.2526, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.648852825164795, |
|
"rewards/margins": 9.809070587158203, |
|
"rewards/margins_max": 15.258506774902344, |
|
"rewards/margins_min": 4.359635353088379, |
|
"rewards/margins_std": 7.706665992736816, |
|
"rewards/rejected": -13.457923889160156, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 2.051443296091998e-06, |
|
"logits/chosen": -1.4331845045089722, |
|
"logits/rejected": -1.0446020364761353, |
|
"logps/chosen": -584.4759521484375, |
|
"logps/rejected": -1261.5855712890625, |
|
"loss": 0.0972, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.0501112937927246, |
|
"rewards/margins": 7.305967807769775, |
|
"rewards/margins_max": 10.653976440429688, |
|
"rewards/margins_min": 3.957958698272705, |
|
"rewards/margins_std": 4.734799385070801, |
|
"rewards/rejected": -10.3560791015625, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 2.0244173100683093e-06, |
|
"logits/chosen": -1.473258376121521, |
|
"logits/rejected": -1.2557618618011475, |
|
"logps/chosen": -553.75048828125, |
|
"logps/rejected": -1824.484375, |
|
"loss": 0.1291, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.070702075958252, |
|
"rewards/margins": 12.991300582885742, |
|
"rewards/margins_max": 20.931209564208984, |
|
"rewards/margins_min": 5.051392555236816, |
|
"rewards/margins_std": 11.228726387023926, |
|
"rewards/rejected": -16.062002182006836, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.9974488687007274e-06, |
|
"logits/chosen": -1.3898913860321045, |
|
"logits/rejected": -1.1323245763778687, |
|
"logps/chosen": -500.98419189453125, |
|
"logps/rejected": -1475.350341796875, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.0772461891174316, |
|
"rewards/margins": 9.603334426879883, |
|
"rewards/margins_max": 14.657646179199219, |
|
"rewards/margins_min": 4.549025058746338, |
|
"rewards/margins_std": 7.147873878479004, |
|
"rewards/rejected": -12.680582046508789, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.890625, |
|
"learning_rate": 1.970541235122494e-06, |
|
"logits/chosen": -1.4766249656677246, |
|
"logits/rejected": -1.251630187034607, |
|
"logps/chosen": -529.0836181640625, |
|
"logps/rejected": -1698.4908447265625, |
|
"loss": 0.2151, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6294097900390625, |
|
"rewards/margins": 12.28985595703125, |
|
"rewards/margins_max": 19.14556121826172, |
|
"rewards/margins_min": 5.434152603149414, |
|
"rewards/margins_std": 9.695430755615234, |
|
"rewards/rejected": -14.919267654418945, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"logits/chosen": -1.5825145244598389, |
|
"logits/rejected": -1.2321441173553467, |
|
"logps/chosen": -616.10888671875, |
|
"logps/rejected": -1734.438232421875, |
|
"loss": 0.1443, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.8859734535217285, |
|
"rewards/margins": 11.892366409301758, |
|
"rewards/margins_max": 16.552751541137695, |
|
"rewards/margins_min": 7.231983184814453, |
|
"rewards/margins_std": 6.590777397155762, |
|
"rewards/rejected": -14.778340339660645, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 14.125, |
|
"learning_rate": 1.91692140668492e-06, |
|
"logits/chosen": -1.4896900653839111, |
|
"logits/rejected": -1.1990612745285034, |
|
"logps/chosen": -513.3895263671875, |
|
"logps/rejected": -1140.274169921875, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.0260584354400635, |
|
"rewards/margins": 6.219557285308838, |
|
"rewards/margins_max": 10.312917709350586, |
|
"rewards/margins_min": 2.126194477081299, |
|
"rewards/margins_std": 5.788887977600098, |
|
"rewards/rejected": -9.245615005493164, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 1.8902156997290571e-06, |
|
"logits/chosen": -1.4913126230239868, |
|
"logits/rejected": -1.1569576263427734, |
|
"logps/chosen": -536.8547973632812, |
|
"logps/rejected": -1584.836669921875, |
|
"loss": 0.1117, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1256766319274902, |
|
"rewards/margins": 10.562490463256836, |
|
"rewards/margins_max": 15.560125350952148, |
|
"rewards/margins_min": 5.564853668212891, |
|
"rewards/margins_std": 7.067723751068115, |
|
"rewards/rejected": -13.6881685256958, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 1.8635837755844739e-06, |
|
"logits/chosen": -1.7155044078826904, |
|
"logits/rejected": -1.3497909307479858, |
|
"logps/chosen": -462.90716552734375, |
|
"logps/rejected": -1425.9698486328125, |
|
"loss": 0.1518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.666769027709961, |
|
"rewards/margins": 9.71937084197998, |
|
"rewards/margins_max": 15.215265274047852, |
|
"rewards/margins_min": 4.223477363586426, |
|
"rewards/margins_std": 7.77236795425415, |
|
"rewards/rejected": -12.386140823364258, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7890625, |
|
"learning_rate": 1.8370288566664263e-06, |
|
"logits/chosen": -1.5542620420455933, |
|
"logits/rejected": -1.303271770477295, |
|
"logps/chosen": -538.37255859375, |
|
"logps/rejected": -1543.0211181640625, |
|
"loss": 0.1435, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1137211322784424, |
|
"rewards/margins": 10.367592811584473, |
|
"rewards/margins_max": 16.86190414428711, |
|
"rewards/margins_min": 3.873281478881836, |
|
"rewards/margins_std": 9.184343338012695, |
|
"rewards/rejected": -13.481313705444336, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 1.8105541560726786e-06, |
|
"logits/chosen": -1.6579090356826782, |
|
"logits/rejected": -1.2486917972564697, |
|
"logps/chosen": -563.3258666992188, |
|
"logps/rejected": -1781.8521728515625, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.4339842796325684, |
|
"rewards/margins": 12.411375045776367, |
|
"rewards/margins_max": 17.969219207763672, |
|
"rewards/margins_min": 6.8535261154174805, |
|
"rewards/margins_std": 7.859982967376709, |
|
"rewards/rejected": -15.845357894897461, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.859375, |
|
"learning_rate": 1.784162877194719e-06, |
|
"logits/chosen": -1.5229161977767944, |
|
"logits/rejected": -1.1513035297393799, |
|
"logps/chosen": -524.3636474609375, |
|
"logps/rejected": -1594.617919921875, |
|
"loss": 0.1073, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.86541748046875, |
|
"rewards/margins": 11.076706886291504, |
|
"rewards/margins_max": 16.567943572998047, |
|
"rewards/margins_min": 5.585470199584961, |
|
"rewards/margins_std": 7.765781402587891, |
|
"rewards/rejected": -13.94212532043457, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 11.25, |
|
"learning_rate": 1.7578582133301572e-06, |
|
"logits/chosen": -1.3213260173797607, |
|
"logits/rejected": -1.2071130275726318, |
|
"logps/chosen": -654.8223266601562, |
|
"logps/rejected": -2169.09228515625, |
|
"loss": 0.1845, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.209962368011475, |
|
"rewards/margins": 14.692832946777344, |
|
"rewards/margins_max": 24.164161682128906, |
|
"rewards/margins_min": 5.221506118774414, |
|
"rewards/margins_std": 13.394479751586914, |
|
"rewards/rejected": -18.90279769897461, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 12.9375, |
|
"learning_rate": 1.7316433472963428e-06, |
|
"logits/chosen": -1.656837821006775, |
|
"logits/rejected": -1.4358789920806885, |
|
"logps/chosen": -638.5806884765625, |
|
"logps/rejected": -1915.6451416015625, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.5313782691955566, |
|
"rewards/margins": 13.209005355834961, |
|
"rewards/margins_max": 20.33796501159668, |
|
"rewards/margins_min": 6.080048561096191, |
|
"rewards/margins_std": 10.081869125366211, |
|
"rewards/rejected": -16.74038314819336, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 1.7055214510452462e-06, |
|
"logits/chosen": -1.490957498550415, |
|
"logits/rejected": -1.1374667882919312, |
|
"logps/chosen": -700.1129760742188, |
|
"logps/rejected": -2816.78173828125, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.6439292430877686, |
|
"rewards/margins": 21.763607025146484, |
|
"rewards/margins_max": 35.701961517333984, |
|
"rewards/margins_min": 7.825249671936035, |
|
"rewards/margins_std": 19.711811065673828, |
|
"rewards/rejected": -25.407535552978516, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.0, |
|
"learning_rate": 1.6794956852796618e-06, |
|
"logits/chosen": -1.5615451335906982, |
|
"logits/rejected": -1.3411719799041748, |
|
"logps/chosen": -565.6097412109375, |
|
"logps/rejected": -1728.943359375, |
|
"loss": 0.0938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4684576988220215, |
|
"rewards/margins": 11.626744270324707, |
|
"rewards/margins_max": 18.29702377319336, |
|
"rewards/margins_min": 4.956464767456055, |
|
"rewards/margins_std": 9.43320083618164, |
|
"rewards/rejected": -15.095202445983887, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 13.5625, |
|
"learning_rate": 1.6535691990707642e-06, |
|
"logits/chosen": -1.5745326280593872, |
|
"logits/rejected": -1.2651867866516113, |
|
"logps/chosen": -564.30224609375, |
|
"logps/rejected": -2381.66162109375, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.5344910621643066, |
|
"rewards/margins": 17.981985092163086, |
|
"rewards/margins_max": 29.834253311157227, |
|
"rewards/margins_min": 6.129714488983154, |
|
"rewards/margins_std": 16.761640548706055, |
|
"rewards/rejected": -21.5164737701416, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 1.6277451294770835e-06, |
|
"logits/chosen": -1.5891001224517822, |
|
"logits/rejected": -1.2852892875671387, |
|
"logps/chosen": -548.130615234375, |
|
"logps/rejected": -1428.930419921875, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.710313320159912, |
|
"rewards/margins": 8.998636245727539, |
|
"rewards/margins_max": 14.026883125305176, |
|
"rewards/margins_min": 3.9703896045684814, |
|
"rewards/margins_std": 7.111013889312744, |
|
"rewards/rejected": -12.708948135375977, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 1.6020266011649176e-06, |
|
"logits/chosen": -1.4778506755828857, |
|
"logits/rejected": -1.1584922075271606, |
|
"logps/chosen": -663.0747680664062, |
|
"logps/rejected": -1830.345947265625, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.136785507202148, |
|
"rewards/margins": 11.866098403930664, |
|
"rewards/margins_max": 20.245691299438477, |
|
"rewards/margins_min": 3.486506223678589, |
|
"rewards/margins_std": 11.850533485412598, |
|
"rewards/rejected": -16.002885818481445, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.01287841796875, |
|
"learning_rate": 1.576416726030261e-06, |
|
"logits/chosen": -1.3806267976760864, |
|
"logits/rejected": -1.3429162502288818, |
|
"logps/chosen": -581.0066528320312, |
|
"logps/rejected": -2020.9459228515625, |
|
"loss": 0.13, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.6570827960968018, |
|
"rewards/margins": 13.965600967407227, |
|
"rewards/margins_max": 22.282276153564453, |
|
"rewards/margins_min": 5.648922920227051, |
|
"rewards/margins_std": 11.761558532714844, |
|
"rewards/rejected": -17.622684478759766, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.5509186028222657e-06, |
|
"logits/chosen": -1.4945251941680908, |
|
"logits/rejected": -1.123812198638916, |
|
"logps/chosen": -578.8905639648438, |
|
"logps/rejected": -2480.041259765625, |
|
"loss": 0.1408, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3431968688964844, |
|
"rewards/margins": 19.43141746520996, |
|
"rewards/margins_max": 33.1664924621582, |
|
"rewards/margins_min": 5.696345806121826, |
|
"rewards/margins_std": 19.424325942993164, |
|
"rewards/rejected": -22.774616241455078, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 16.125, |
|
"learning_rate": 1.5255353167683017e-06, |
|
"logits/chosen": -1.5955666303634644, |
|
"logits/rejected": -1.2716028690338135, |
|
"logps/chosen": -535.6474609375, |
|
"logps/rejected": -1721.0478515625, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.3446204662323, |
|
"rewards/margins": 11.946348190307617, |
|
"rewards/margins_max": 19.05233383178711, |
|
"rewards/margins_min": 4.840361595153809, |
|
"rewards/margins_std": 10.049383163452148, |
|
"rewards/rejected": -15.290969848632812, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.500269939200648e-06, |
|
"logits/chosen": -1.5924263000488281, |
|
"logits/rejected": -1.3786189556121826, |
|
"logps/chosen": -581.8662109375, |
|
"logps/rejected": -1422.6884765625, |
|
"loss": 0.1991, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.9787490367889404, |
|
"rewards/margins": 8.33303165435791, |
|
"rewards/margins_max": 13.738202095031738, |
|
"rewards/margins_min": 2.9278597831726074, |
|
"rewards/margins_std": 7.644065856933594, |
|
"rewards/rejected": -12.31178092956543, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.4751255271848665e-06, |
|
"logits/chosen": -1.540938138961792, |
|
"logits/rejected": -1.3453645706176758, |
|
"logps/chosen": -561.5753173828125, |
|
"logps/rejected": -1571.74169921875, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.668487071990967, |
|
"rewards/margins": 9.975430488586426, |
|
"rewards/margins_max": 15.654937744140625, |
|
"rewards/margins_min": 4.29592227935791, |
|
"rewards/margins_std": 8.032035827636719, |
|
"rewards/rejected": -13.643916130065918, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 1.4501051231499042e-06, |
|
"logits/chosen": -1.518028974533081, |
|
"logits/rejected": -1.1507012844085693, |
|
"logps/chosen": -533.4281005859375, |
|
"logps/rejected": -1911.723388671875, |
|
"loss": 0.1175, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.922112464904785, |
|
"rewards/margins": 13.367526054382324, |
|
"rewards/margins_max": 23.11964225769043, |
|
"rewards/margins_min": 3.615410566329956, |
|
"rewards/margins_std": 13.79157543182373, |
|
"rewards/rejected": -16.28963851928711, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.4252117545199639e-06, |
|
"logits/chosen": -1.3974970579147339, |
|
"logits/rejected": -1.500832438468933, |
|
"logps/chosen": -433.35089111328125, |
|
"logps/rejected": -1680.6429443359375, |
|
"loss": 0.274, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.0182032585144043, |
|
"rewards/margins": 11.945356369018555, |
|
"rewards/margins_max": 20.45294761657715, |
|
"rewards/margins_min": 3.4377639293670654, |
|
"rewards/margins_std": 12.0315523147583, |
|
"rewards/rejected": -14.9635591506958, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0078125, |
|
"learning_rate": 1.4004484333481911e-06, |
|
"logits/chosen": -1.5064579248428345, |
|
"logits/rejected": -1.2587788105010986, |
|
"logps/chosen": -525.6350708007812, |
|
"logps/rejected": -1384.8411865234375, |
|
"loss": 0.2889, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.4120688438415527, |
|
"rewards/margins": 8.54577922821045, |
|
"rewards/margins_max": 13.886482238769531, |
|
"rewards/margins_min": 3.205078125, |
|
"rewards/margins_std": 7.552893161773682, |
|
"rewards/rejected": -11.957849502563477, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.875, |
|
"learning_rate": 1.375818155952222e-06, |
|
"logits/chosen": -1.511816382408142, |
|
"logits/rejected": -1.2979012727737427, |
|
"logps/chosen": -462.0762634277344, |
|
"logps/rejected": -1462.7767333984375, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.6236302852630615, |
|
"rewards/margins": 9.779306411743164, |
|
"rewards/margins_max": 16.093631744384766, |
|
"rewards/margins_min": 3.4649829864501953, |
|
"rewards/margins_std": 8.929801940917969, |
|
"rewards/rejected": -12.402936935424805, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 11.125, |
|
"learning_rate": 1.3513239025516312e-06, |
|
"logits/chosen": -1.5850474834442139, |
|
"logits/rejected": -1.4049034118652344, |
|
"logps/chosen": -554.8724975585938, |
|
"logps/rejected": -1975.818359375, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.6299610137939453, |
|
"rewards/margins": 14.073150634765625, |
|
"rewards/margins_max": 23.436885833740234, |
|
"rewards/margins_min": 4.709414482116699, |
|
"rewards/margins_std": 13.24232292175293, |
|
"rewards/rejected": -17.703113555908203, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.3269686369073348e-06, |
|
"logits/chosen": -1.547577142715454, |
|
"logits/rejected": -1.209017276763916, |
|
"logps/chosen": -588.4271850585938, |
|
"logps/rejected": -2137.411376953125, |
|
"loss": 0.1547, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.2934679985046387, |
|
"rewards/margins": 15.912363052368164, |
|
"rewards/margins_max": 25.526660919189453, |
|
"rewards/margins_min": 6.298065185546875, |
|
"rewards/margins_std": 13.596672058105469, |
|
"rewards/rejected": -19.20583152770996, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 1.3027553059629778e-06, |
|
"logits/chosen": -1.4048335552215576, |
|
"logits/rejected": -1.1918952465057373, |
|
"logps/chosen": -498.7955627441406, |
|
"logps/rejected": -2174.997802734375, |
|
"loss": 0.1584, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.917642116546631, |
|
"rewards/margins": 16.495616912841797, |
|
"rewards/margins_max": 23.698152542114258, |
|
"rewards/margins_min": 9.293082237243652, |
|
"rewards/margins_std": 10.185922622680664, |
|
"rewards/rejected": -19.41326141357422, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 1.2786868394883617e-06, |
|
"logits/chosen": -1.5030672550201416, |
|
"logits/rejected": -1.0825165510177612, |
|
"logps/chosen": -578.2425537109375, |
|
"logps/rejected": -1289.3734130859375, |
|
"loss": 0.2603, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.369215488433838, |
|
"rewards/margins": 7.822723388671875, |
|
"rewards/margins_max": 13.195724487304688, |
|
"rewards/margins_min": 2.4497230052948, |
|
"rewards/margins_std": 7.598570346832275, |
|
"rewards/rejected": -11.191938400268555, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.2547661497249424e-06, |
|
"logits/chosen": -1.641648530960083, |
|
"logits/rejected": -1.2975002527236938, |
|
"logps/chosen": -525.0863037109375, |
|
"logps/rejected": -1528.902099609375, |
|
"loss": 0.1054, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.701660633087158, |
|
"rewards/margins": 10.769542694091797, |
|
"rewards/margins_max": 18.1401309967041, |
|
"rewards/margins_min": 3.3989555835723877, |
|
"rewards/margins_std": 10.423584938049316, |
|
"rewards/rejected": -13.471203804016113, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.625, |
|
"learning_rate": 1.230996131033461e-06, |
|
"logits/chosen": -1.5188440084457397, |
|
"logits/rejected": -1.2162914276123047, |
|
"logps/chosen": -498.5997619628906, |
|
"logps/rejected": -1448.3214111328125, |
|
"loss": 0.1419, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.8506922721862793, |
|
"rewards/margins": 9.72119140625, |
|
"rewards/margins_max": 14.393712997436523, |
|
"rewards/margins_min": 5.04866886138916, |
|
"rewards/margins_std": 6.607945442199707, |
|
"rewards/rejected": -12.571883201599121, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.2073796595437262e-06, |
|
"logits/chosen": -1.6625356674194336, |
|
"logits/rejected": -1.2526956796646118, |
|
"logps/chosen": -525.0848388671875, |
|
"logps/rejected": -1451.570556640625, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.856308698654175, |
|
"rewards/margins": 9.767531394958496, |
|
"rewards/margins_max": 15.09489917755127, |
|
"rewards/margins_min": 4.440166473388672, |
|
"rewards/margins_std": 7.534033298492432, |
|
"rewards/rejected": -12.623841285705566, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 1.1839195928066101e-06, |
|
"logits/chosen": -1.7187540531158447, |
|
"logits/rejected": -1.3239184617996216, |
|
"logps/chosen": -541.1563720703125, |
|
"logps/rejected": -1764.908203125, |
|
"loss": 0.2656, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0002570152282715, |
|
"rewards/margins": 12.637992858886719, |
|
"rewards/margins_max": 20.210735321044922, |
|
"rewards/margins_min": 5.065249919891357, |
|
"rewards/margins_std": 10.709476470947266, |
|
"rewards/rejected": -15.638249397277832, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 1.1606187694482896e-06, |
|
"logits/chosen": -1.4333114624023438, |
|
"logits/rejected": -1.2259962558746338, |
|
"logps/chosen": -660.5386962890625, |
|
"logps/rejected": -2046.311767578125, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.156102180480957, |
|
"rewards/margins": 14.356404304504395, |
|
"rewards/margins_max": 21.486778259277344, |
|
"rewards/margins_min": 7.226031303405762, |
|
"rewards/margins_std": 10.083871841430664, |
|
"rewards/rejected": -17.512508392333984, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 1.1374800088267768e-06, |
|
"logits/chosen": -1.4923628568649292, |
|
"logits/rejected": -1.036879539489746, |
|
"logps/chosen": -591.9725952148438, |
|
"logps/rejected": -1538.219970703125, |
|
"loss": 0.1484, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.318856716156006, |
|
"rewards/margins": 10.050863265991211, |
|
"rewards/margins_max": 16.637432098388672, |
|
"rewards/margins_min": 3.4642951488494873, |
|
"rewards/margins_std": 9.314813613891602, |
|
"rewards/rejected": -13.369720458984375, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 1.1145061106907804e-06, |
|
"logits/chosen": -1.5123519897460938, |
|
"logits/rejected": -1.381981611251831, |
|
"logps/chosen": -510.7267150878906, |
|
"logps/rejected": -2111.06396484375, |
|
"loss": 0.1678, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9295575618743896, |
|
"rewards/margins": 15.46514892578125, |
|
"rewards/margins_max": 24.086877822875977, |
|
"rewards/margins_min": 6.843419551849365, |
|
"rewards/margins_std": 12.192965507507324, |
|
"rewards/rejected": -18.39470672607422, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.875, |
|
"learning_rate": 1.0916998548409449e-06, |
|
"logits/chosen": -1.4227778911590576, |
|
"logits/rejected": -1.327239751815796, |
|
"logps/chosen": -492.4085998535156, |
|
"logps/rejected": -2177.32177734375, |
|
"loss": 0.1216, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.802885055541992, |
|
"rewards/margins": 16.447601318359375, |
|
"rewards/margins_max": 25.75341796875, |
|
"rewards/margins_min": 7.141783237457275, |
|
"rewards/margins_std": 13.160412788391113, |
|
"rewards/rejected": -19.250484466552734, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.71875, |
|
"learning_rate": 1.069064000793498e-06, |
|
"logits/chosen": -1.5011472702026367, |
|
"logits/rejected": -1.0257246494293213, |
|
"logps/chosen": -586.6893310546875, |
|
"logps/rejected": -1570.8746337890625, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.1940226554870605, |
|
"rewards/margins": 10.318269729614258, |
|
"rewards/margins_max": 16.737285614013672, |
|
"rewards/margins_min": 3.8992531299591064, |
|
"rewards/margins_std": 9.077858924865723, |
|
"rewards/rejected": -13.512290954589844, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 17.625, |
|
"learning_rate": 1.0466012874463508e-06, |
|
"logits/chosen": -1.4119497537612915, |
|
"logits/rejected": -1.2268279790878296, |
|
"logps/chosen": -589.5607299804688, |
|
"logps/rejected": -1788.9788818359375, |
|
"loss": 0.179, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1833550930023193, |
|
"rewards/margins": 12.280914306640625, |
|
"rewards/margins_max": 18.877552032470703, |
|
"rewards/margins_min": 5.684277534484863, |
|
"rewards/margins_std": 9.32905387878418, |
|
"rewards/rejected": -15.464271545410156, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.5, |
|
"learning_rate": 1.0243144327477015e-06, |
|
"logits/chosen": -1.6249080896377563, |
|
"logits/rejected": -1.1828008890151978, |
|
"logps/chosen": -485.2342224121094, |
|
"logps/rejected": -1421.8602294921875, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.5701119899749756, |
|
"rewards/margins": 9.575872421264648, |
|
"rewards/margins_max": 14.847213745117188, |
|
"rewards/margins_min": 4.304533004760742, |
|
"rewards/margins_std": 7.454801082611084, |
|
"rewards/rejected": -12.145986557006836, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 1.0022061333671649e-06, |
|
"logits/chosen": -1.4596678018569946, |
|
"logits/rejected": -1.163874864578247, |
|
"logps/chosen": -494.34228515625, |
|
"logps/rejected": -1985.4769287109375, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6837120056152344, |
|
"rewards/margins": 15.144342422485352, |
|
"rewards/margins_max": 24.748046875, |
|
"rewards/margins_min": 5.540638446807861, |
|
"rewards/margins_std": 13.581687927246094, |
|
"rewards/rejected": -17.828052520751953, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 26.25, |
|
"learning_rate": 9.802790643694817e-07, |
|
"logits/chosen": -1.541244626045227, |
|
"logits/rejected": -1.4795812368392944, |
|
"logps/chosen": -583.5311889648438, |
|
"logps/rejected": -1613.3099365234375, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.8347973823547363, |
|
"rewards/margins": 10.287752151489258, |
|
"rewards/margins_max": 16.781322479248047, |
|
"rewards/margins_min": 3.7941813468933105, |
|
"rewards/margins_std": 9.183296203613281, |
|
"rewards/rejected": -14.122549057006836, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 9.585358788908395e-07, |
|
"logits/chosen": -1.5370924472808838, |
|
"logits/rejected": -1.3602590560913086, |
|
"logps/chosen": -525.8290405273438, |
|
"logps/rejected": -2051.8203125, |
|
"loss": 0.1576, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9414329528808594, |
|
"rewards/margins": 15.10389232635498, |
|
"rewards/margins_max": 26.046443939208984, |
|
"rewards/margins_min": 4.161340236663818, |
|
"rewards/margins_std": 15.475103378295898, |
|
"rewards/rejected": -18.045324325561523, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.734375, |
|
"learning_rate": 9.369792078178491e-07, |
|
"logits/chosen": -1.6488637924194336, |
|
"logits/rejected": -1.4893319606781006, |
|
"logps/chosen": -568.359375, |
|
"logps/rejected": -2058.3525390625, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.25250506401062, |
|
"rewards/margins": 15.005627632141113, |
|
"rewards/margins_max": 20.63724708557129, |
|
"rewards/margins_min": 9.374009132385254, |
|
"rewards/margins_std": 7.964312553405762, |
|
"rewards/rejected": -18.258129119873047, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 19.125, |
|
"learning_rate": 9.156116594692097e-07, |
|
"logits/chosen": -1.618363618850708, |
|
"logits/rejected": -1.1712907552719116, |
|
"logps/chosen": -550.4625244140625, |
|
"logps/rejected": -1597.6376953125, |
|
"loss": 0.1817, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.1480445861816406, |
|
"rewards/margins": 10.727932929992676, |
|
"rewards/margins_max": 17.022151947021484, |
|
"rewards/margins_min": 4.433710098266602, |
|
"rewards/margins_std": 8.901372909545898, |
|
"rewards/rejected": -13.8759765625, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 8.944358192801103e-07, |
|
"logits/chosen": -1.6205739974975586, |
|
"logits/rejected": -1.2058194875717163, |
|
"logps/chosen": -495.60015869140625, |
|
"logps/rejected": -1743.065673828125, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.684584379196167, |
|
"rewards/margins": 12.865442276000977, |
|
"rewards/margins_max": 19.519107818603516, |
|
"rewards/margins_min": 6.211775302886963, |
|
"rewards/margins_std": 9.409704208374023, |
|
"rewards/rejected": -15.550024032592773, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 8.734542494893955e-07, |
|
"logits/chosen": -1.6140270233154297, |
|
"logits/rejected": -1.5208282470703125, |
|
"logps/chosen": -522.6798095703125, |
|
"logps/rejected": -1974.721923828125, |
|
"loss": 0.2237, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.987030506134033, |
|
"rewards/margins": 14.091033935546875, |
|
"rewards/margins_max": 23.622156143188477, |
|
"rewards/margins_min": 4.55991268157959, |
|
"rewards/margins_std": 13.479040145874023, |
|
"rewards/rejected": -17.07806396484375, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 8.526694888295356e-07, |
|
"logits/chosen": -1.53464674949646, |
|
"logits/rejected": -1.3867006301879883, |
|
"logps/chosen": -515.68994140625, |
|
"logps/rejected": -1828.5380859375, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.8848297595977783, |
|
"rewards/margins": 13.052827835083008, |
|
"rewards/margins_max": 18.821996688842773, |
|
"rewards/margins_min": 7.283656120300293, |
|
"rewards/margins_std": 8.158838272094727, |
|
"rewards/rejected": -15.937657356262207, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 8.320840522194507e-07, |
|
"logits/chosen": -1.4935245513916016, |
|
"logits/rejected": -1.3408067226409912, |
|
"logps/chosen": -532.34765625, |
|
"logps/rejected": -1617.862548828125, |
|
"loss": 0.161, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.95365834236145, |
|
"rewards/margins": 10.888130187988281, |
|
"rewards/margins_max": 17.86824607849121, |
|
"rewards/margins_min": 3.9080147743225098, |
|
"rewards/margins_std": 9.871374130249023, |
|
"rewards/rejected": -13.841787338256836, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 8.117004304602052e-07, |
|
"logits/chosen": -1.562991976737976, |
|
"logits/rejected": -1.2025073766708374, |
|
"logps/chosen": -648.80029296875, |
|
"logps/rejected": -1497.7991943359375, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.700782060623169, |
|
"rewards/margins": 9.072282791137695, |
|
"rewards/margins_max": 14.986943244934082, |
|
"rewards/margins_min": 3.157620906829834, |
|
"rewards/margins_std": 8.364594459533691, |
|
"rewards/rejected": -12.773064613342285, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 7.915210899336284e-07, |
|
"logits/chosen": -1.7214120626449585, |
|
"logits/rejected": -1.458532691001892, |
|
"logps/chosen": -510.141845703125, |
|
"logps/rejected": -1868.0325927734375, |
|
"loss": 0.1117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.908207416534424, |
|
"rewards/margins": 13.193037033081055, |
|
"rewards/margins_max": 20.153043746948242, |
|
"rewards/margins_min": 6.2330322265625, |
|
"rewards/margins_std": 9.842935562133789, |
|
"rewards/rejected": -16.101245880126953, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 7.715484723038838e-07, |
|
"logits/chosen": -1.3019468784332275, |
|
"logits/rejected": -1.161091923713684, |
|
"logps/chosen": -487.67724609375, |
|
"logps/rejected": -1867.3681640625, |
|
"loss": 0.2212, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.646726608276367, |
|
"rewards/margins": 13.512460708618164, |
|
"rewards/margins_max": 21.958669662475586, |
|
"rewards/margins_min": 5.066250324249268, |
|
"rewards/margins_std": 11.944744110107422, |
|
"rewards/rejected": -16.1591854095459, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 7.517849942220348e-07, |
|
"logits/chosen": -1.38827645778656, |
|
"logits/rejected": -1.091671109199524, |
|
"logps/chosen": -601.8095092773438, |
|
"logps/rejected": -1584.5819091796875, |
|
"loss": 0.1188, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.90714693069458, |
|
"rewards/margins": 9.80831241607666, |
|
"rewards/margins_max": 14.877908706665039, |
|
"rewards/margins_min": 4.738718032836914, |
|
"rewards/margins_std": 7.169489860534668, |
|
"rewards/rejected": -13.715458869934082, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 7.322330470336314e-07, |
|
"logits/chosen": -1.420436143875122, |
|
"logits/rejected": -1.3260561227798462, |
|
"logps/chosen": -494.6438903808594, |
|
"logps/rejected": -1588.2587890625, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.879929304122925, |
|
"rewards/margins": 11.048105239868164, |
|
"rewards/margins_max": 16.661272048950195, |
|
"rewards/margins_min": 5.434937953948975, |
|
"rewards/margins_std": 7.938216209411621, |
|
"rewards/rejected": -13.928033828735352, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.828125, |
|
"learning_rate": 7.128949964893648e-07, |
|
"logits/chosen": -1.5485414266586304, |
|
"logits/rejected": -1.2311375141143799, |
|
"logps/chosen": -573.132080078125, |
|
"logps/rejected": -1600.142822265625, |
|
"loss": 0.2518, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.2259268760681152, |
|
"rewards/margins": 10.486474990844727, |
|
"rewards/margins_max": 15.399114608764648, |
|
"rewards/margins_min": 5.5738325119018555, |
|
"rewards/margins_std": 6.947523593902588, |
|
"rewards/rejected": -13.712400436401367, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 6.937731824588143e-07, |
|
"logits/chosen": -1.4667555093765259, |
|
"logits/rejected": -1.3924285173416138, |
|
"logps/chosen": -531.763916015625, |
|
"logps/rejected": -1142.96142578125, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.6806187629699707, |
|
"rewards/margins": 6.147883892059326, |
|
"rewards/margins_max": 9.388947486877441, |
|
"rewards/margins_min": 2.9068212509155273, |
|
"rewards/margins_std": 4.583555698394775, |
|
"rewards/rejected": -9.828502655029297, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.90625, |
|
"learning_rate": 6.74869918647325e-07, |
|
"logits/chosen": -1.3583132028579712, |
|
"logits/rejected": -1.1237436532974243, |
|
"logps/chosen": -545.1788940429688, |
|
"logps/rejected": -1591.46044921875, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9877545833587646, |
|
"rewards/margins": 10.712733268737793, |
|
"rewards/margins_max": 15.584787368774414, |
|
"rewards/margins_min": 5.840681076049805, |
|
"rewards/margins_std": 6.890122890472412, |
|
"rewards/rejected": -13.700488090515137, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 6.561874923160591e-07, |
|
"logits/chosen": -1.5355894565582275, |
|
"logits/rejected": -1.1774029731750488, |
|
"logps/chosen": -546.0108642578125, |
|
"logps/rejected": -1723.448974609375, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.2266223430633545, |
|
"rewards/margins": 12.48703670501709, |
|
"rewards/margins_max": 18.832082748413086, |
|
"rewards/margins_min": 6.141989231109619, |
|
"rewards/margins_std": 8.97325325012207, |
|
"rewards/rejected": -15.713659286499023, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 6.377281640052358e-07, |
|
"logits/chosen": -1.7051270008087158, |
|
"logits/rejected": -1.4254144430160522, |
|
"logps/chosen": -469.79913330078125, |
|
"logps/rejected": -1844.483642578125, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.7268261909484863, |
|
"rewards/margins": 13.270780563354492, |
|
"rewards/margins_max": 20.773021697998047, |
|
"rewards/margins_min": 5.7685441970825195, |
|
"rewards/margins_std": 10.60976791381836, |
|
"rewards/rejected": -15.99760913848877, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 6.194941672606131e-07, |
|
"logits/chosen": -1.5394561290740967, |
|
"logits/rejected": -1.3444119691848755, |
|
"logps/chosen": -449.86639404296875, |
|
"logps/rejected": -1816.393310546875, |
|
"loss": 0.1036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6189093589782715, |
|
"rewards/margins": 13.65289306640625, |
|
"rewards/margins_max": 22.08847999572754, |
|
"rewards/margins_min": 5.217305660247803, |
|
"rewards/margins_std": 11.92972183227539, |
|
"rewards/rejected": -16.271804809570312, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 6.01487708363232e-07, |
|
"logits/chosen": -1.5326162576675415, |
|
"logits/rejected": -1.2514145374298096, |
|
"logps/chosen": -559.52685546875, |
|
"logps/rejected": -1785.101806640625, |
|
"loss": 0.1695, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.237330675125122, |
|
"rewards/margins": 12.128488540649414, |
|
"rewards/margins_max": 17.351224899291992, |
|
"rewards/margins_min": 6.905752658843994, |
|
"rewards/margins_std": 7.386063575744629, |
|
"rewards/rejected": -15.365817070007324, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 5.837109660624607e-07, |
|
"logits/chosen": -1.5613453388214111, |
|
"logits/rejected": -1.2829174995422363, |
|
"logps/chosen": -627.0020141601562, |
|
"logps/rejected": -1759.3873291015625, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.9718761444091797, |
|
"rewards/margins": 11.263439178466797, |
|
"rewards/margins_max": 16.407276153564453, |
|
"rewards/margins_min": 6.119601249694824, |
|
"rewards/margins_std": 7.274483680725098, |
|
"rewards/rejected": -15.235315322875977, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 5.661660913123673e-07, |
|
"logits/chosen": -1.7146743535995483, |
|
"logits/rejected": -1.536327600479126, |
|
"logps/chosen": -526.9888305664062, |
|
"logps/rejected": -1644.693603515625, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.136125087738037, |
|
"rewards/margins": 11.309582710266113, |
|
"rewards/margins_max": 17.983318328857422, |
|
"rewards/margins_min": 4.63584566116333, |
|
"rewards/margins_std": 9.438088417053223, |
|
"rewards/rejected": -14.445707321166992, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 5.488552070114633e-07, |
|
"logits/chosen": -1.413991928100586, |
|
"logits/rejected": -1.246726155281067, |
|
"logps/chosen": -629.1475219726562, |
|
"logps/rejected": -2350.851318359375, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.112383842468262, |
|
"rewards/margins": 17.097023010253906, |
|
"rewards/margins_max": 28.80392837524414, |
|
"rewards/margins_min": 5.390118598937988, |
|
"rewards/margins_std": 16.556058883666992, |
|
"rewards/rejected": -21.209407806396484, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.75, |
|
"learning_rate": 5.317804077458324e-07, |
|
"logits/chosen": -1.5566442012786865, |
|
"logits/rejected": -1.168034315109253, |
|
"logps/chosen": -623.1751708984375, |
|
"logps/rejected": -1723.6129150390625, |
|
"loss": 0.1571, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.3873374462127686, |
|
"rewards/margins": 11.160648345947266, |
|
"rewards/margins_max": 18.534343719482422, |
|
"rewards/margins_min": 3.7869505882263184, |
|
"rewards/margins_std": 10.427982330322266, |
|
"rewards/rejected": -14.54798698425293, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.078125, |
|
"learning_rate": 5.149437595356902e-07, |
|
"logits/chosen": -1.4799290895462036, |
|
"logits/rejected": -1.1635984182357788, |
|
"logps/chosen": -605.4791259765625, |
|
"logps/rejected": -1606.4259033203125, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.5745937824249268, |
|
"rewards/margins": 10.339719772338867, |
|
"rewards/margins_max": 16.66402816772461, |
|
"rewards/margins_min": 4.015409469604492, |
|
"rewards/margins_std": 8.943924903869629, |
|
"rewards/rejected": -13.914312362670898, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 4.983472995854003e-07, |
|
"logits/chosen": -1.4681260585784912, |
|
"logits/rejected": -1.147685170173645, |
|
"logps/chosen": -592.9346923828125, |
|
"logps/rejected": -1304.0279541015625, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.336982250213623, |
|
"rewards/margins": 7.98565673828125, |
|
"rewards/margins_max": 14.44367504119873, |
|
"rewards/margins_min": 1.5276384353637695, |
|
"rewards/margins_std": 9.133016586303711, |
|
"rewards/rejected": -11.322638511657715, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 4.819930360369762e-07, |
|
"logits/chosen": -1.5900928974151611, |
|
"logits/rejected": -1.4449065923690796, |
|
"logps/chosen": -561.0192260742188, |
|
"logps/rejected": -1929.9560546875, |
|
"loss": 0.1423, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.454263687133789, |
|
"rewards/margins": 13.343027114868164, |
|
"rewards/margins_max": 23.15623664855957, |
|
"rewards/margins_min": 3.529816150665283, |
|
"rewards/margins_std": 13.877975463867188, |
|
"rewards/rejected": -16.797290802001953, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.75, |
|
"learning_rate": 4.658829477270996e-07, |
|
"logits/chosen": -1.6144227981567383, |
|
"logits/rejected": -1.315969467163086, |
|
"logps/chosen": -544.9019775390625, |
|
"logps/rejected": -1778.4368896484375, |
|
"loss": 0.2357, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.350193738937378, |
|
"rewards/margins": 11.645623207092285, |
|
"rewards/margins_max": 17.396743774414062, |
|
"rewards/margins_min": 5.89450216293335, |
|
"rewards/margins_std": 8.133313179016113, |
|
"rewards/rejected": -14.995816230773926, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 4.5001898394768346e-07, |
|
"logits/chosen": -1.5682995319366455, |
|
"logits/rejected": -1.4007831811904907, |
|
"logps/chosen": -547.2261352539062, |
|
"logps/rejected": -1444.755126953125, |
|
"loss": 0.2085, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.3274428844451904, |
|
"rewards/margins": 9.016876220703125, |
|
"rewards/margins_max": 14.168905258178711, |
|
"rewards/margins_min": 3.8648476600646973, |
|
"rewards/margins_std": 7.286067962646484, |
|
"rewards/rejected": -12.344320297241211, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 4.344030642100133e-07, |
|
"logits/chosen": -1.7077690362930298, |
|
"logits/rejected": -1.598008632659912, |
|
"logps/chosen": -611.0703125, |
|
"logps/rejected": -2115.15673828125, |
|
"loss": 0.2851, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.431548595428467, |
|
"rewards/margins": 15.340169906616211, |
|
"rewards/margins_max": 25.29729461669922, |
|
"rewards/margins_min": 5.383046627044678, |
|
"rewards/margins_std": 14.081500053405762, |
|
"rewards/rejected": -18.77172088623047, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 4.190370780124864e-07, |
|
"logits/chosen": -1.393789291381836, |
|
"logits/rejected": -1.2228848934173584, |
|
"logps/chosen": -481.9505310058594, |
|
"logps/rejected": -1663.0035400390625, |
|
"loss": 0.1675, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.922569513320923, |
|
"rewards/margins": 11.292458534240723, |
|
"rewards/margins_max": 18.119579315185547, |
|
"rewards/margins_min": 4.46533727645874, |
|
"rewards/margins_std": 9.655006408691406, |
|
"rewards/rejected": -14.215028762817383, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 4.0392288461199053e-07, |
|
"logits/chosen": -1.3990025520324707, |
|
"logits/rejected": -1.3038866519927979, |
|
"logps/chosen": -567.4427490234375, |
|
"logps/rejected": -2022.3857421875, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3987483978271484, |
|
"rewards/margins": 14.682683944702148, |
|
"rewards/margins_max": 23.64312744140625, |
|
"rewards/margins_min": 5.722237586975098, |
|
"rewards/margins_std": 12.671981811523438, |
|
"rewards/rejected": -18.081430435180664, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 5.625, |
|
"learning_rate": 3.8906231279893433e-07, |
|
"logits/chosen": -1.3695005178451538, |
|
"logits/rejected": -1.2840726375579834, |
|
"logps/chosen": -545.4678955078125, |
|
"logps/rejected": -2054.255126953125, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.088050365447998, |
|
"rewards/margins": 15.608365058898926, |
|
"rewards/margins_max": 25.990718841552734, |
|
"rewards/margins_min": 5.226010322570801, |
|
"rewards/margins_std": 14.682866096496582, |
|
"rewards/rejected": -18.696414947509766, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 3.7445716067596506e-07, |
|
"logits/chosen": -1.4935780763626099, |
|
"logits/rejected": -1.2695616483688354, |
|
"logps/chosen": -499.6849060058594, |
|
"logps/rejected": -1571.749755859375, |
|
"loss": 0.1548, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.89444637298584, |
|
"rewards/margins": 10.862527847290039, |
|
"rewards/margins_max": 18.293880462646484, |
|
"rewards/margins_min": 3.4311795234680176, |
|
"rewards/margins_std": 10.509515762329102, |
|
"rewards/rejected": -13.756976127624512, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.578125, |
|
"learning_rate": 3.601091954404062e-07, |
|
"logits/chosen": -1.3411470651626587, |
|
"logits/rejected": -1.1427286863327026, |
|
"logps/chosen": -603.7765502929688, |
|
"logps/rejected": -1911.207763671875, |
|
"loss": 0.1676, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.6270432472229004, |
|
"rewards/margins": 13.066546440124512, |
|
"rewards/margins_max": 19.609230041503906, |
|
"rewards/margins_min": 6.523860931396484, |
|
"rewards/margins_std": 9.252754211425781, |
|
"rewards/rejected": -16.693588256835938, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 11.625, |
|
"learning_rate": 3.460201531704263e-07, |
|
"logits/chosen": -1.5328198671340942, |
|
"logits/rejected": -1.062266230583191, |
|
"logps/chosen": -700.7471313476562, |
|
"logps/rejected": -1610.164794921875, |
|
"loss": 0.1287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.032205104827881, |
|
"rewards/margins": 10.639944076538086, |
|
"rewards/margins_max": 14.988049507141113, |
|
"rewards/margins_min": 6.29184103012085, |
|
"rewards/margins_std": 6.149147987365723, |
|
"rewards/rejected": -13.672149658203125, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 3.321917386149773e-07, |
|
"logits/chosen": -1.56075119972229, |
|
"logits/rejected": -1.2586215734481812, |
|
"logps/chosen": -492.829345703125, |
|
"logps/rejected": -1485.7462158203125, |
|
"loss": 0.1458, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.7940220832824707, |
|
"rewards/margins": 9.927345275878906, |
|
"rewards/margins_max": 17.084228515625, |
|
"rewards/margins_min": 2.7704620361328125, |
|
"rewards/margins_std": 10.121360778808594, |
|
"rewards/rejected": -12.721366882324219, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 3.186256249875236e-07, |
|
"logits/chosen": -1.5891391038894653, |
|
"logits/rejected": -1.3650810718536377, |
|
"logps/chosen": -464.7984313964844, |
|
"logps/rejected": -1371.1409912109375, |
|
"loss": 0.3338, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.6813507080078125, |
|
"rewards/margins": 8.95352554321289, |
|
"rewards/margins_max": 13.174215316772461, |
|
"rewards/margins_min": 4.732832908630371, |
|
"rewards/margins_std": 5.968959808349609, |
|
"rewards/rejected": -11.634876251220703, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 3.0532345376358577e-07, |
|
"logits/chosen": -1.654157280921936, |
|
"logits/rejected": -1.3361161947250366, |
|
"logps/chosen": -496.3033752441406, |
|
"logps/rejected": -1795.9544677734375, |
|
"loss": 0.1439, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.100607395172119, |
|
"rewards/margins": 12.40422248840332, |
|
"rewards/margins_max": 21.095714569091797, |
|
"rewards/margins_min": 3.7127292156219482, |
|
"rewards/margins_std": 12.291626930236816, |
|
"rewards/rejected": -15.504827499389648, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 2.922868344821236e-07, |
|
"logits/chosen": -1.454085350036621, |
|
"logits/rejected": -1.0795207023620605, |
|
"logps/chosen": -527.0432739257812, |
|
"logps/rejected": -1645.1402587890625, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0249111652374268, |
|
"rewards/margins": 11.555765151977539, |
|
"rewards/margins_max": 18.297351837158203, |
|
"rewards/margins_min": 4.814180850982666, |
|
"rewards/margins_std": 9.534041404724121, |
|
"rewards/rejected": -14.580678939819336, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 2.795173445507879e-07, |
|
"logits/chosen": -1.640484094619751, |
|
"logits/rejected": -1.2281348705291748, |
|
"logps/chosen": -674.6544799804688, |
|
"logps/rejected": -2076.06591796875, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.167102813720703, |
|
"rewards/margins": 13.986814498901367, |
|
"rewards/margins_max": 21.08889389038086, |
|
"rewards/margins_min": 6.884737968444824, |
|
"rewards/margins_std": 10.043853759765625, |
|
"rewards/rejected": -18.153919219970703, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 2.670165290550544e-07, |
|
"logits/chosen": -1.5252046585083008, |
|
"logits/rejected": -1.1835720539093018, |
|
"logps/chosen": -551.7774658203125, |
|
"logps/rejected": -1764.0374755859375, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.3922817707061768, |
|
"rewards/margins": 12.13778305053711, |
|
"rewards/margins_max": 18.606740951538086, |
|
"rewards/margins_min": 5.668824195861816, |
|
"rewards/margins_std": 9.148488998413086, |
|
"rewards/rejected": -15.530064582824707, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 2.547859005712727e-07, |
|
"logits/chosen": -1.5535533428192139, |
|
"logits/rejected": -1.2417479753494263, |
|
"logps/chosen": -521.0472412109375, |
|
"logps/rejected": -1648.343505859375, |
|
"loss": 0.1712, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0611438751220703, |
|
"rewards/margins": 11.504739761352539, |
|
"rewards/margins_max": 19.227418899536133, |
|
"rewards/margins_min": 3.7820606231689453, |
|
"rewards/margins_std": 10.921515464782715, |
|
"rewards/rejected": -14.565884590148926, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 2.4282693898364435e-07, |
|
"logits/chosen": -1.5507018566131592, |
|
"logits/rejected": -1.2115910053253174, |
|
"logps/chosen": -471.463134765625, |
|
"logps/rejected": -1789.766357421875, |
|
"loss": 0.1518, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9074912071228027, |
|
"rewards/margins": 13.214210510253906, |
|
"rewards/margins_max": 19.92336654663086, |
|
"rewards/margins_min": 6.505056858062744, |
|
"rewards/margins_std": 9.488176345825195, |
|
"rewards/rejected": -16.121702194213867, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 2.3114109130516427e-07, |
|
"logits/chosen": -1.488516092300415, |
|
"logits/rejected": -1.2162216901779175, |
|
"logps/chosen": -559.1824340820312, |
|
"logps/rejected": -1675.4085693359375, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.735757350921631, |
|
"rewards/margins": 10.952620506286621, |
|
"rewards/margins_max": 17.202655792236328, |
|
"rewards/margins_min": 4.702585220336914, |
|
"rewards/margins_std": 8.838884353637695, |
|
"rewards/rejected": -14.688379287719727, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 2.1972977150253066e-07, |
|
"logits/chosen": -1.6375281810760498, |
|
"logits/rejected": -1.2000789642333984, |
|
"logps/chosen": -563.3726806640625, |
|
"logps/rejected": -2449.58251953125, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.116568088531494, |
|
"rewards/margins": 18.522953033447266, |
|
"rewards/margins_max": 31.61623191833496, |
|
"rewards/margins_min": 5.429670810699463, |
|
"rewards/margins_std": 18.516695022583008, |
|
"rewards/rejected": -21.63951873779297, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 2.0859436032505954e-07, |
|
"logits/chosen": -1.5669145584106445, |
|
"logits/rejected": -1.4212372303009033, |
|
"logps/chosen": -529.2255859375, |
|
"logps/rejected": -2067.349853515625, |
|
"loss": 0.2644, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.3644447326660156, |
|
"rewards/margins": 15.288610458374023, |
|
"rewards/margins_max": 23.236772537231445, |
|
"rewards/margins_min": 7.340449333190918, |
|
"rewards/margins_std": 11.240400314331055, |
|
"rewards/rejected": -18.65305519104004, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.9773620513761582e-07, |
|
"logits/chosen": -1.544468641281128, |
|
"logits/rejected": -1.2507742643356323, |
|
"logps/chosen": -479.28717041015625, |
|
"logps/rejected": -1413.136474609375, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.6804263591766357, |
|
"rewards/margins": 9.630419731140137, |
|
"rewards/margins_max": 16.187856674194336, |
|
"rewards/margins_min": 3.072981834411621, |
|
"rewards/margins_std": 9.273618698120117, |
|
"rewards/rejected": -12.310846328735352, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 5.375, |
|
"learning_rate": 1.8715661975758524e-07, |
|
"logits/chosen": -1.3692967891693115, |
|
"logits/rejected": -1.2908586263656616, |
|
"logps/chosen": -471.1861877441406, |
|
"logps/rejected": -1936.43359375, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0035228729248047, |
|
"rewards/margins": 13.9976167678833, |
|
"rewards/margins_max": 23.008455276489258, |
|
"rewards/margins_min": 4.9867753982543945, |
|
"rewards/margins_std": 12.74325180053711, |
|
"rewards/rejected": -17.001136779785156, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 1.768568842959037e-07, |
|
"logits/chosen": -1.56259286403656, |
|
"logits/rejected": -1.225875735282898, |
|
"logps/chosen": -666.2752685546875, |
|
"logps/rejected": -2096.91845703125, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.032459735870361, |
|
"rewards/margins": 14.605703353881836, |
|
"rewards/margins_max": 19.857891082763672, |
|
"rewards/margins_min": 9.353516578674316, |
|
"rewards/margins_std": 7.4277143478393555, |
|
"rewards/rejected": -18.63816261291504, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.71875, |
|
"learning_rate": 1.6683824500216662e-07, |
|
"logits/chosen": -1.4535073041915894, |
|
"logits/rejected": -1.2454532384872437, |
|
"logps/chosen": -480.98455810546875, |
|
"logps/rejected": -1676.016845703125, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.708672285079956, |
|
"rewards/margins": 12.352299690246582, |
|
"rewards/margins_max": 20.519662857055664, |
|
"rewards/margins_min": 4.184937477111816, |
|
"rewards/margins_std": 11.550395011901855, |
|
"rewards/rejected": -15.060972213745117, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 1.5710191411383663e-07, |
|
"logits/chosen": -1.5043429136276245, |
|
"logits/rejected": -1.3213595151901245, |
|
"logps/chosen": -480.7269592285156, |
|
"logps/rejected": -1419.52197265625, |
|
"loss": 0.2321, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0613503456115723, |
|
"rewards/margins": 9.34998607635498, |
|
"rewards/margins_max": 15.409167289733887, |
|
"rewards/margins_min": 3.2908051013946533, |
|
"rewards/margins_std": 8.568977355957031, |
|
"rewards/rejected": -12.411336898803711, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 1.4764906970956145e-07, |
|
"logits/chosen": -1.5083320140838623, |
|
"logits/rejected": -1.259227991104126, |
|
"logps/chosen": -497.8409118652344, |
|
"logps/rejected": -1558.669677734375, |
|
"loss": 0.1545, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.007861375808716, |
|
"rewards/margins": 10.634368896484375, |
|
"rewards/margins_max": 15.329747200012207, |
|
"rewards/margins_min": 5.938992977142334, |
|
"rewards/margins_std": 6.640265464782715, |
|
"rewards/rejected": -13.642231941223145, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.38480855566632e-07, |
|
"logits/chosen": -1.4352874755859375, |
|
"logits/rejected": -1.1588048934936523, |
|
"logps/chosen": -626.794677734375, |
|
"logps/rejected": -1690.177490234375, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.555635452270508, |
|
"rewards/margins": 11.343018531799316, |
|
"rewards/margins_max": 18.7399845123291, |
|
"rewards/margins_min": 3.9460530281066895, |
|
"rewards/margins_std": 10.460888862609863, |
|
"rewards/rejected": -14.898653030395508, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 11.125, |
|
"learning_rate": 1.2959838102258537e-07, |
|
"logits/chosen": -1.5202867984771729, |
|
"logits/rejected": -1.2198843955993652, |
|
"logps/chosen": -686.660888671875, |
|
"logps/rejected": -1677.017333984375, |
|
"loss": 0.2712, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.959693431854248, |
|
"rewards/margins": 10.280670166015625, |
|
"rewards/margins_max": 15.942033767700195, |
|
"rewards/margins_min": 4.619304656982422, |
|
"rewards/margins_std": 8.006379127502441, |
|
"rewards/rejected": -14.240364074707031, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.210027208409778e-07, |
|
"logits/chosen": -1.4056593179702759, |
|
"logits/rejected": -1.3017997741699219, |
|
"logps/chosen": -444.84088134765625, |
|
"logps/rejected": -2359.341796875, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.568777084350586, |
|
"rewards/margins": 18.554290771484375, |
|
"rewards/margins_max": 29.505813598632812, |
|
"rewards/margins_min": 7.6027679443359375, |
|
"rewards/margins_std": 15.48779296875, |
|
"rewards/rejected": -21.123065948486328, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.1269491508133945e-07, |
|
"logits/chosen": -1.6786344051361084, |
|
"logits/rejected": -1.1672778129577637, |
|
"logps/chosen": -641.666259765625, |
|
"logps/rejected": -1821.4290771484375, |
|
"loss": 0.1808, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.2502269744873047, |
|
"rewards/margins": 12.766157150268555, |
|
"rewards/margins_max": 18.502321243286133, |
|
"rewards/margins_min": 7.029994010925293, |
|
"rewards/margins_std": 8.112160682678223, |
|
"rewards/rejected": -16.01638412475586, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.046759689733301e-07, |
|
"logits/chosen": -1.5015394687652588, |
|
"logits/rejected": -1.077661156654358, |
|
"logps/chosen": -610.1318359375, |
|
"logps/rejected": -1724.026123046875, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.742875337600708, |
|
"rewards/margins": 11.29780101776123, |
|
"rewards/margins_max": 18.355480194091797, |
|
"rewards/margins_min": 4.240123748779297, |
|
"rewards/margins_std": 9.981063842773438, |
|
"rewards/rejected": -15.040678024291992, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 9.694685279510674e-08, |
|
"logits/chosen": -1.4848477840423584, |
|
"logits/rejected": -1.453749656677246, |
|
"logps/chosen": -521.6805419921875, |
|
"logps/rejected": -1747.800537109375, |
|
"loss": 0.2553, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3396506309509277, |
|
"rewards/margins": 11.84017276763916, |
|
"rewards/margins_max": 18.36767578125, |
|
"rewards/margins_min": 5.312668800354004, |
|
"rewards/margins_std": 9.231285095214844, |
|
"rewards/rejected": -15.179824829101562, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 8.950850175592329e-08, |
|
"logits/chosen": -1.5855342149734497, |
|
"logits/rejected": -1.3711344003677368, |
|
"logps/chosen": -523.5225219726562, |
|
"logps/rejected": -1761.386962890625, |
|
"loss": 0.1607, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.866356134414673, |
|
"rewards/margins": 12.065530776977539, |
|
"rewards/margins_max": 19.174335479736328, |
|
"rewards/margins_min": 4.956724166870117, |
|
"rewards/margins_std": 10.053369522094727, |
|
"rewards/rejected": -14.931884765625, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 8.236181588297115e-08, |
|
"logits/chosen": -1.4377137422561646, |
|
"logits/rejected": -1.2142530679702759, |
|
"logps/chosen": -591.1087646484375, |
|
"logps/rejected": -2312.2509765625, |
|
"loss": 0.0935, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.312687397003174, |
|
"rewards/margins": 16.7128963470459, |
|
"rewards/margins_max": 25.945785522460938, |
|
"rewards/margins_min": 7.480005741119385, |
|
"rewards/margins_std": 13.057278633117676, |
|
"rewards/rejected": -20.025583267211914, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 7.550765991247655e-08, |
|
"logits/chosen": -1.5337202548980713, |
|
"logits/rejected": -1.193544626235962, |
|
"logps/chosen": -589.0224609375, |
|
"logps/rejected": -1560.7672119140625, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.4798824787139893, |
|
"rewards/margins": 9.967602729797363, |
|
"rewards/margins_max": 15.469675064086914, |
|
"rewards/margins_min": 4.465529918670654, |
|
"rewards/margins_std": 7.781105041503906, |
|
"rewards/rejected": -13.447484970092773, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 6.894686318507066e-08, |
|
"logits/chosen": -1.5093646049499512, |
|
"logits/rejected": -1.3388113975524902, |
|
"logps/chosen": -513.3501586914062, |
|
"logps/rejected": -1921.223388671875, |
|
"loss": 0.1717, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.021483898162842, |
|
"rewards/margins": 13.66453742980957, |
|
"rewards/margins_max": 22.030460357666016, |
|
"rewards/margins_min": 5.2986159324646, |
|
"rewards/margins_std": 11.831199645996094, |
|
"rewards/rejected": -16.68602180480957, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 6.268021954544095e-08, |
|
"logits/chosen": -1.2859686613082886, |
|
"logits/rejected": -1.2988886833190918, |
|
"logps/chosen": -521.9822387695312, |
|
"logps/rejected": -2064.39013671875, |
|
"loss": 0.3054, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.2040743827819824, |
|
"rewards/margins": 14.57214069366455, |
|
"rewards/margins_max": 24.986371994018555, |
|
"rewards/margins_min": 4.15791130065918, |
|
"rewards/margins_std": 14.727948188781738, |
|
"rewards/rejected": -17.776216506958008, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.5, |
|
"learning_rate": 5.670848724627531e-08, |
|
"logits/chosen": -1.6210033893585205, |
|
"logits/rejected": -1.280139684677124, |
|
"logps/chosen": -725.0421142578125, |
|
"logps/rejected": -1452.880859375, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.209847450256348, |
|
"rewards/margins": 8.341692924499512, |
|
"rewards/margins_max": 12.187960624694824, |
|
"rewards/margins_min": 4.495427131652832, |
|
"rewards/margins_std": 5.4394426345825195, |
|
"rewards/rejected": -12.55154037475586, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.765625, |
|
"learning_rate": 5.103238885651618e-08, |
|
"logits/chosen": -1.5707718133926392, |
|
"logits/rejected": -1.2055985927581787, |
|
"logps/chosen": -536.3211059570312, |
|
"logps/rejected": -1798.921142578125, |
|
"loss": 0.1463, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.944525718688965, |
|
"rewards/margins": 12.846099853515625, |
|
"rewards/margins_max": 19.72370147705078, |
|
"rewards/margins_min": 5.968496322631836, |
|
"rewards/margins_std": 9.726399421691895, |
|
"rewards/rejected": -15.790626525878906, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.125, |
|
"learning_rate": 4.5652611173932495e-08, |
|
"logits/chosen": -1.715071439743042, |
|
"logits/rejected": -1.393758773803711, |
|
"logps/chosen": -574.81689453125, |
|
"logps/rejected": -1423.7811279296875, |
|
"loss": 0.1692, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.3229072093963623, |
|
"rewards/margins": 8.942388534545898, |
|
"rewards/margins_max": 14.589694023132324, |
|
"rewards/margins_min": 3.2950832843780518, |
|
"rewards/margins_std": 7.986494541168213, |
|
"rewards/rejected": -12.265295028686523, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.0322265625, |
|
"learning_rate": 4.0569805142014476e-08, |
|
"logits/chosen": -1.424754023551941, |
|
"logits/rejected": -1.2524802684783936, |
|
"logps/chosen": -531.279296875, |
|
"logps/rejected": -1886.6539306640625, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.2407214641571045, |
|
"rewards/margins": 13.494546890258789, |
|
"rewards/margins_max": 21.47197723388672, |
|
"rewards/margins_min": 5.517115116119385, |
|
"rewards/margins_std": 11.281791687011719, |
|
"rewards/rejected": -16.735267639160156, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 3.578458577121524e-08, |
|
"logits/chosen": -1.4714146852493286, |
|
"logits/rejected": -1.2563064098358154, |
|
"logps/chosen": -528.0980224609375, |
|
"logps/rejected": -1789.8597412109375, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.484973192214966, |
|
"rewards/margins": 12.645235061645508, |
|
"rewards/margins_max": 18.291662216186523, |
|
"rewards/margins_min": 6.998806953430176, |
|
"rewards/margins_std": 7.985255241394043, |
|
"rewards/rejected": -16.13020896911621, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 3.129753206453201e-08, |
|
"logits/chosen": -1.6428391933441162, |
|
"logits/rejected": -1.2820537090301514, |
|
"logps/chosen": -585.875, |
|
"logps/rejected": -1616.100830078125, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.469578981399536, |
|
"rewards/margins": 10.344371795654297, |
|
"rewards/margins_max": 15.5939359664917, |
|
"rewards/margins_min": 5.094809532165527, |
|
"rewards/margins_std": 7.424003601074219, |
|
"rewards/rejected": -13.81395149230957, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.125, |
|
"learning_rate": 2.710918694744935e-08, |
|
"logits/chosen": -1.6528517007827759, |
|
"logits/rejected": -1.471823811531067, |
|
"logps/chosen": -563.698486328125, |
|
"logps/rejected": -1610.182373046875, |
|
"loss": 0.2088, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.7492880821228027, |
|
"rewards/margins": 10.301797866821289, |
|
"rewards/margins_max": 16.012958526611328, |
|
"rewards/margins_min": 4.59063720703125, |
|
"rewards/margins_std": 8.076800346374512, |
|
"rewards/rejected": -14.05108642578125, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 2.3220057202246183e-08, |
|
"logits/chosen": -1.333348035812378, |
|
"logits/rejected": -1.0714836120605469, |
|
"logps/chosen": -471.2120666503906, |
|
"logps/rejected": -1886.3255615234375, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9049346446990967, |
|
"rewards/margins": 13.638298034667969, |
|
"rewards/margins_max": 22.545673370361328, |
|
"rewards/margins_min": 4.730922698974609, |
|
"rewards/margins_std": 12.596931457519531, |
|
"rewards/rejected": -16.543231964111328, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.963061340667677e-08, |
|
"logits/chosen": -1.5092668533325195, |
|
"logits/rejected": -1.361816167831421, |
|
"logps/chosen": -466.8147888183594, |
|
"logps/rejected": -1187.789794921875, |
|
"loss": 0.1517, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.5971789360046387, |
|
"rewards/margins": 7.532545566558838, |
|
"rewards/margins_max": 11.385088920593262, |
|
"rewards/margins_min": 3.680001735687256, |
|
"rewards/margins_std": 5.448319435119629, |
|
"rewards/rejected": -10.129724502563477, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.6341289877028488e-08, |
|
"logits/chosen": -1.329237699508667, |
|
"logits/rejected": -1.1558626890182495, |
|
"logps/chosen": -491.35076904296875, |
|
"logps/rejected": -2207.8818359375, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.6653168201446533, |
|
"rewards/margins": 17.251855850219727, |
|
"rewards/margins_max": 26.020004272460938, |
|
"rewards/margins_min": 8.483704566955566, |
|
"rewards/margins_std": 12.400036811828613, |
|
"rewards/rejected": -19.917171478271484, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 1.3352484615574701e-08, |
|
"logits/chosen": -1.7151950597763062, |
|
"logits/rejected": -1.3515779972076416, |
|
"logps/chosen": -546.0389404296875, |
|
"logps/rejected": -1565.0789794921875, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.7018885612487793, |
|
"rewards/margins": 10.648792266845703, |
|
"rewards/margins_max": 16.55207061767578, |
|
"rewards/margins_min": 4.745510101318359, |
|
"rewards/margins_std": 8.348501205444336, |
|
"rewards/rejected": -13.350680351257324, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.0664559262413831e-08, |
|
"logits/chosen": -1.4656840562820435, |
|
"logits/rejected": -1.2670657634735107, |
|
"logps/chosen": -474.76611328125, |
|
"logps/rejected": -1649.747802734375, |
|
"loss": 0.1544, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.5341649055480957, |
|
"rewards/margins": 12.128092765808105, |
|
"rewards/margins_max": 20.278675079345703, |
|
"rewards/margins_min": 3.977510452270508, |
|
"rewards/margins_std": 11.526663780212402, |
|
"rewards/rejected": -14.662257194519043, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 8.2778390517127e-09, |
|
"logits/chosen": -1.4516102075576782, |
|
"logits/rejected": -1.1890254020690918, |
|
"logps/chosen": -617.524658203125, |
|
"logps/rejected": -1620.347412109375, |
|
"loss": 0.096, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.6001343727111816, |
|
"rewards/margins": 10.086786270141602, |
|
"rewards/margins_max": 14.390159606933594, |
|
"rewards/margins_min": 5.783412933349609, |
|
"rewards/margins_std": 6.085890293121338, |
|
"rewards/rejected": -13.686922073364258, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.625, |
|
"learning_rate": 6.192612772354944e-09, |
|
"logits/chosen": -1.4516470432281494, |
|
"logits/rejected": -1.2507392168045044, |
|
"logps/chosen": -566.8466186523438, |
|
"logps/rejected": -2050.584228515625, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1200942993164062, |
|
"rewards/margins": 14.864712715148926, |
|
"rewards/margins_max": 24.433439254760742, |
|
"rewards/margins_min": 5.295987129211426, |
|
"rewards/margins_std": 13.532221794128418, |
|
"rewards/rejected": -17.984806060791016, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 4.409132732995647e-09, |
|
"logits/chosen": -1.5317922830581665, |
|
"logits/rejected": -1.300173282623291, |
|
"logps/chosen": -554.97509765625, |
|
"logps/rejected": -1691.675537109375, |
|
"loss": 0.1008, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.6252989768981934, |
|
"rewards/margins": 11.502375602722168, |
|
"rewards/margins_max": 19.262727737426758, |
|
"rewards/margins_min": 3.7420215606689453, |
|
"rewards/margins_std": 10.9747953414917, |
|
"rewards/rejected": -15.127673149108887, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 2.927614731534356e-09, |
|
"logits/chosen": -1.5343540906906128, |
|
"logits/rejected": -1.3594461679458618, |
|
"logps/chosen": -524.830078125, |
|
"logps/rejected": -1884.637939453125, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0702600479125977, |
|
"rewards/margins": 12.865870475769043, |
|
"rewards/margins_max": 19.372177124023438, |
|
"rewards/margins_min": 6.359560966491699, |
|
"rewards/margins_std": 9.201309204101562, |
|
"rewards/rejected": -15.936129570007324, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.7482380290034795e-09, |
|
"logits/chosen": -1.6508004665374756, |
|
"logits/rejected": -1.2689622640609741, |
|
"logps/chosen": -478.16522216796875, |
|
"logps/rejected": -1635.46044921875, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.865492343902588, |
|
"rewards/margins": 11.581151962280273, |
|
"rewards/margins_max": 17.636213302612305, |
|
"rewards/margins_min": 5.526092529296875, |
|
"rewards/margins_std": 8.563149452209473, |
|
"rewards/rejected": -14.44664478302002, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 8.711453278778537e-10, |
|
"logits/chosen": -1.439031720161438, |
|
"logits/rejected": -1.0773530006408691, |
|
"logps/chosen": -565.1245727539062, |
|
"logps/rejected": -1736.3665771484375, |
|
"loss": 0.11, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.194706439971924, |
|
"rewards/margins": 12.026373863220215, |
|
"rewards/margins_max": 17.58086395263672, |
|
"rewards/margins_min": 6.471883296966553, |
|
"rewards/margins_std": 7.855236053466797, |
|
"rewards/rejected": -15.22107982635498, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 2.964427548077242e-10, |
|
"logits/chosen": -1.5736453533172607, |
|
"logits/rejected": -1.3564589023590088, |
|
"logps/chosen": -545.9297485351562, |
|
"logps/rejected": -1428.1328125, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3369319438934326, |
|
"rewards/margins": 9.009324073791504, |
|
"rewards/margins_max": 14.16156005859375, |
|
"rewards/margins_min": 3.857085704803467, |
|
"rewards/margins_std": 7.2863640785217285, |
|
"rewards/rejected": -12.3462553024292, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.25, |
|
"learning_rate": 2.419984777790596e-11, |
|
"logits/chosen": -1.4611544609069824, |
|
"logits/rejected": -1.0981907844543457, |
|
"logps/chosen": -558.7552490234375, |
|
"logps/rejected": -1721.875732421875, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.2622387409210205, |
|
"rewards/margins": 11.596755027770996, |
|
"rewards/margins_max": 18.492107391357422, |
|
"rewards/margins_min": 4.701401710510254, |
|
"rewards/margins_std": 9.751501083374023, |
|
"rewards/rejected": -14.858993530273438, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.1338366270065308, |
|
"eval_logits/rejected": -1.0143229961395264, |
|
"eval_logps/chosen": -640.5291137695312, |
|
"eval_logps/rejected": -746.0303955078125, |
|
"eval_loss": 0.935178279876709, |
|
"eval_rewards/accuracies": 0.6134999990463257, |
|
"eval_rewards/chosen": -3.155653715133667, |
|
"eval_rewards/margins": 1.1480075120925903, |
|
"eval_rewards/margins_max": 6.733797073364258, |
|
"eval_rewards/margins_min": -3.165346622467041, |
|
"eval_rewards/margins_std": 3.2318670749664307, |
|
"eval_rewards/rejected": -4.303661346435547, |
|
"eval_runtime": 1443.1499, |
|
"eval_samples_per_second": 2.772, |
|
"eval_steps_per_second": 0.173, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3174, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2596737848525537, |
|
"train_runtime": 26816.1062, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3174, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|