|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 400, |
|
"global_step": 234, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004273504273504274, |
|
"grad_norm": 13.34490065414622, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -0.0994097888469696, |
|
"logits/rejected": -0.05551636964082718, |
|
"logps/chosen": -0.3415659964084625, |
|
"logps/rejected": -0.47305911779403687, |
|
"loss": 1.6161, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8539150357246399, |
|
"rewards/margins": 0.32873278856277466, |
|
"rewards/rejected": -1.1826478242874146, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.021367521367521368, |
|
"grad_norm": 13.589601768721613, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.1462363749742508, |
|
"logits/rejected": -0.14229728281497955, |
|
"logps/chosen": -0.4933474361896515, |
|
"logps/rejected": -0.5222159624099731, |
|
"loss": 1.6602, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2333686351776123, |
|
"rewards/margins": 0.07217135280370712, |
|
"rewards/rejected": -1.3055399656295776, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.042735042735042736, |
|
"grad_norm": 8.87565340921273, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.06335792690515518, |
|
"logits/rejected": -0.02190782129764557, |
|
"logps/chosen": -0.42108288407325745, |
|
"logps/rejected": -0.4607582688331604, |
|
"loss": 1.6154, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.052707314491272, |
|
"rewards/margins": 0.09918837249279022, |
|
"rewards/rejected": -1.151895523071289, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 8.660979149745721, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -0.03177808225154877, |
|
"logits/rejected": -0.046066295355558395, |
|
"logps/chosen": -0.4381338953971863, |
|
"logps/rejected": -0.4405650198459625, |
|
"loss": 1.6068, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -1.095334768295288, |
|
"rewards/margins": 0.006077909376472235, |
|
"rewards/rejected": -1.1014125347137451, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 11.952248546593465, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -0.15588010847568512, |
|
"logits/rejected": -0.1342618316411972, |
|
"logps/chosen": -0.4571867883205414, |
|
"logps/rejected": -0.4777159094810486, |
|
"loss": 1.614, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.1429669857025146, |
|
"rewards/margins": 0.0513228178024292, |
|
"rewards/rejected": -1.1942898035049438, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10683760683760683, |
|
"grad_norm": 7.754135247761182, |
|
"learning_rate": 9.999440509051367e-07, |
|
"logits/chosen": -0.11927574872970581, |
|
"logits/rejected": -0.09945164620876312, |
|
"logps/chosen": -0.4351424276828766, |
|
"logps/rejected": -0.4545895457267761, |
|
"loss": 1.578, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1.0878560543060303, |
|
"rewards/margins": 0.048617832362651825, |
|
"rewards/rejected": -1.1364738941192627, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 7.538242001814111, |
|
"learning_rate": 9.979871469976195e-07, |
|
"logits/chosen": -0.10869207233190536, |
|
"logits/rejected": -0.10381748527288437, |
|
"logps/chosen": -0.34100422263145447, |
|
"logps/rejected": -0.3819103240966797, |
|
"loss": 1.5753, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8525105714797974, |
|
"rewards/margins": 0.10226528346538544, |
|
"rewards/rejected": -0.9547758102416992, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14957264957264957, |
|
"grad_norm": 12.195052142813049, |
|
"learning_rate": 9.932452969617607e-07, |
|
"logits/chosen": -0.1797598898410797, |
|
"logits/rejected": -0.08878383785486221, |
|
"logps/chosen": -0.39731845259666443, |
|
"logps/rejected": -0.4476427435874939, |
|
"loss": 1.5794, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.9932962656021118, |
|
"rewards/margins": 0.12581071257591248, |
|
"rewards/rejected": -1.1191068887710571, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 9.951467872149747, |
|
"learning_rate": 9.857450191464337e-07, |
|
"logits/chosen": -0.1144944578409195, |
|
"logits/rejected": -0.09615515172481537, |
|
"logps/chosen": -0.4008924067020416, |
|
"logps/rejected": -0.4632578492164612, |
|
"loss": 1.5425, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0022308826446533, |
|
"rewards/margins": 0.1559135764837265, |
|
"rewards/rejected": -1.158144474029541, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 9.233295836736795, |
|
"learning_rate": 9.755282581475767e-07, |
|
"logits/chosen": -0.13462205231189728, |
|
"logits/rejected": -0.11864318698644638, |
|
"logps/chosen": -0.38421568274497986, |
|
"logps/rejected": -0.4823899269104004, |
|
"loss": 1.563, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.9605391621589661, |
|
"rewards/margins": 0.24543562531471252, |
|
"rewards/rejected": -1.205974817276001, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.21367521367521367, |
|
"grad_norm": 8.910807590586288, |
|
"learning_rate": 9.626521502369983e-07, |
|
"logits/chosen": -0.19716130197048187, |
|
"logits/rejected": -0.15895314514636993, |
|
"logps/chosen": -0.37459030747413635, |
|
"logps/rejected": -0.4011107385158539, |
|
"loss": 1.5687, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.9364757537841797, |
|
"rewards/margins": 0.06630120426416397, |
|
"rewards/rejected": -1.002776861190796, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23504273504273504, |
|
"grad_norm": 8.741144982685217, |
|
"learning_rate": 9.471887038331684e-07, |
|
"logits/chosen": -0.07297103852033615, |
|
"logits/rejected": -0.045126646757125854, |
|
"logps/chosen": -0.3764435052871704, |
|
"logps/rejected": -0.4278687834739685, |
|
"loss": 1.5595, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9411088228225708, |
|
"rewards/margins": 0.12856322526931763, |
|
"rewards/rejected": -1.0696719884872437, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 9.65813041875177, |
|
"learning_rate": 9.29224396800933e-07, |
|
"logits/chosen": -0.00632941210642457, |
|
"logits/rejected": -0.04972558468580246, |
|
"logps/chosen": -0.3871188163757324, |
|
"logps/rejected": -0.42081761360168457, |
|
"loss": 1.5433, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9677971005439758, |
|
"rewards/margins": 0.08424701541662216, |
|
"rewards/rejected": -1.052044153213501, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 9.22020584708003, |
|
"learning_rate": 9.088596928322157e-07, |
|
"logits/chosen": 0.052268654108047485, |
|
"logits/rejected": 0.07172085344791412, |
|
"logps/chosen": -0.5376943945884705, |
|
"logps/rejected": -0.6326268911361694, |
|
"loss": 1.5646, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.3442360162734985, |
|
"rewards/margins": 0.23733112215995789, |
|
"rewards/rejected": -1.5815670490264893, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.29914529914529914, |
|
"grad_norm": 13.413709386986495, |
|
"learning_rate": 8.862084796122997e-07, |
|
"logits/chosen": -0.014689329080283642, |
|
"logits/rejected": 0.023155853152275085, |
|
"logps/chosen": -0.4990696310997009, |
|
"logps/rejected": -0.5445122718811035, |
|
"loss": 1.5438, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2476739883422852, |
|
"rewards/margins": 0.11360664665699005, |
|
"rewards/rejected": -1.3612806797027588, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32051282051282054, |
|
"grad_norm": 11.427386040917398, |
|
"learning_rate": 8.613974319136957e-07, |
|
"logits/chosen": -0.13034021854400635, |
|
"logits/rejected": -0.12927956879138947, |
|
"logps/chosen": -0.41988080739974976, |
|
"logps/rejected": -0.5410174131393433, |
|
"loss": 1.5502, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.0497019290924072, |
|
"rewards/margins": 0.3028416335582733, |
|
"rewards/rejected": -1.352543592453003, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 8.049393765401897, |
|
"learning_rate": 8.34565303179429e-07, |
|
"logits/chosen": -0.028346195816993713, |
|
"logits/rejected": -0.03619036823511124, |
|
"logps/chosen": -0.45006662607192993, |
|
"logps/rejected": -0.5116376876831055, |
|
"loss": 1.5312, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.125166416168213, |
|
"rewards/margins": 0.15392769873142242, |
|
"rewards/rejected": -1.2790940999984741, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36324786324786323, |
|
"grad_norm": 10.027579862082366, |
|
"learning_rate": 8.058621495575031e-07, |
|
"logits/chosen": -0.14632529020309448, |
|
"logits/rejected": -0.09482773393392563, |
|
"logps/chosen": -0.4204605221748352, |
|
"logps/rejected": -0.5197293758392334, |
|
"loss": 1.5235, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.051151156425476, |
|
"rewards/margins": 0.24817219376564026, |
|
"rewards/rejected": -1.299323320388794, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 11.778153989300435, |
|
"learning_rate": 7.754484907260512e-07, |
|
"logits/chosen": -0.03493024781346321, |
|
"logits/rejected": -0.005648002028465271, |
|
"logps/chosen": -0.48695850372314453, |
|
"logps/rejected": -0.6510589718818665, |
|
"loss": 1.4999, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.2173962593078613, |
|
"rewards/margins": 0.4102511405944824, |
|
"rewards/rejected": -1.6276471614837646, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.405982905982906, |
|
"grad_norm": 9.302069462672513, |
|
"learning_rate": 7.434944122021836e-07, |
|
"logits/chosen": -0.03053552471101284, |
|
"logits/rejected": 0.008784343488514423, |
|
"logps/chosen": -0.48170527815818787, |
|
"logps/rejected": -0.7114425897598267, |
|
"loss": 1.4935, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2042630910873413, |
|
"rewards/margins": 0.5743432641029358, |
|
"rewards/rejected": -1.7786064147949219, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 15.155125489577449, |
|
"learning_rate": 7.101786141547828e-07, |
|
"logits/chosen": -0.04211338609457016, |
|
"logits/rejected": -0.033037807792425156, |
|
"logps/chosen": -0.5078177452087402, |
|
"logps/rejected": -0.5776672959327698, |
|
"loss": 1.515, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2695444822311401, |
|
"rewards/margins": 0.17462393641471863, |
|
"rewards/rejected": -1.4441683292388916, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44871794871794873, |
|
"grad_norm": 16.110970936557738, |
|
"learning_rate": 6.756874120406714e-07, |
|
"logits/chosen": -0.09111454337835312, |
|
"logits/rejected": -0.05689894035458565, |
|
"logps/chosen": -0.53609699010849, |
|
"logps/rejected": -0.6224747896194458, |
|
"loss": 1.5115, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.3402423858642578, |
|
"rewards/margins": 0.21594436466693878, |
|
"rewards/rejected": -1.5561867952346802, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4700854700854701, |
|
"grad_norm": 15.393371096801042, |
|
"learning_rate": 6.402136946530014e-07, |
|
"logits/chosen": -0.03849278762936592, |
|
"logits/rejected": 0.008776647970080376, |
|
"logps/chosen": -0.6499117016792297, |
|
"logps/rejected": -0.800051212310791, |
|
"loss": 1.5033, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6247793436050415, |
|
"rewards/margins": 0.3753485679626465, |
|
"rewards/rejected": -2.0001277923583984, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.49145299145299143, |
|
"grad_norm": 9.808767922675623, |
|
"learning_rate": 6.039558454088795e-07, |
|
"logits/chosen": -0.03172523155808449, |
|
"logits/rejected": -0.04225381836295128, |
|
"logps/chosen": -0.5940315127372742, |
|
"logps/rejected": -0.7107864618301392, |
|
"loss": 1.5234, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.4850788116455078, |
|
"rewards/margins": 0.29188722372055054, |
|
"rewards/rejected": -1.7769660949707031, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 11.007927364769593, |
|
"learning_rate": 5.671166329088277e-07, |
|
"logits/chosen": -0.15589627623558044, |
|
"logits/rejected": -0.14328333735466003, |
|
"logps/chosen": -0.5461645126342773, |
|
"logps/rejected": -0.6048396825790405, |
|
"loss": 1.4873, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.3654112815856934, |
|
"rewards/margins": 0.14668798446655273, |
|
"rewards/rejected": -1.512099027633667, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5341880341880342, |
|
"grad_norm": 13.40581329613937, |
|
"learning_rate": 5.299020769725171e-07, |
|
"logits/chosen": -0.1017662063241005, |
|
"logits/rejected": -0.05507459491491318, |
|
"logps/chosen": -0.5289615392684937, |
|
"logps/rejected": -0.6923493146896362, |
|
"loss": 1.5008, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.322403907775879, |
|
"rewards/margins": 0.4084695875644684, |
|
"rewards/rejected": -1.730873465538025, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 10.393815491734465, |
|
"learning_rate": 4.925202964923683e-07, |
|
"logits/chosen": -0.01592491939663887, |
|
"logits/rejected": 0.10016925632953644, |
|
"logps/chosen": -0.545333981513977, |
|
"logps/rejected": -0.6946025490760803, |
|
"loss": 1.4829, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3633348941802979, |
|
"rewards/margins": 0.3731713891029358, |
|
"rewards/rejected": -1.7365062236785889, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 12.796199500434565, |
|
"learning_rate": 4.5518034554828327e-07, |
|
"logits/chosen": -0.15349504351615906, |
|
"logits/rejected": -0.0872931182384491, |
|
"logps/chosen": -0.5930777788162231, |
|
"logps/rejected": -0.7436810731887817, |
|
"loss": 1.5284, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.4826946258544922, |
|
"rewards/margins": 0.3765079379081726, |
|
"rewards/rejected": -1.8592026233673096, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5982905982905983, |
|
"grad_norm": 12.725430343830277, |
|
"learning_rate": 4.180910442924311e-07, |
|
"logits/chosen": -0.07290254533290863, |
|
"logits/rejected": -0.041871629655361176, |
|
"logps/chosen": -0.6430836915969849, |
|
"logps/rejected": -0.8283632397651672, |
|
"loss": 1.4923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.607709288597107, |
|
"rewards/margins": 0.4631989598274231, |
|
"rewards/rejected": -2.070908308029175, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6196581196581197, |
|
"grad_norm": 11.626811297979486, |
|
"learning_rate": 3.814598111422513e-07, |
|
"logits/chosen": -0.11089099943637848, |
|
"logits/rejected": -0.08382478356361389, |
|
"logps/chosen": -0.6920641660690308, |
|
"logps/rejected": -0.7230005264282227, |
|
"loss": 1.5009, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.7301604747772217, |
|
"rewards/margins": 0.07734106481075287, |
|
"rewards/rejected": -1.8075014352798462, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 10.775765508549604, |
|
"learning_rate": 3.454915028125263e-07, |
|
"logits/chosen": -0.045754365622997284, |
|
"logits/rejected": -0.01993427611887455, |
|
"logps/chosen": -0.6500915288925171, |
|
"logps/rejected": -0.7409160733222961, |
|
"loss": 1.4998, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.6252286434173584, |
|
"rewards/margins": 0.22706131637096405, |
|
"rewards/rejected": -1.8522899150848389, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6623931623931624, |
|
"grad_norm": 14.344149642351613, |
|
"learning_rate": 3.1038726867353583e-07, |
|
"logits/chosen": -0.06552598625421524, |
|
"logits/rejected": -0.047451216727495193, |
|
"logps/chosen": -0.5619519948959351, |
|
"logps/rejected": -0.7450312972068787, |
|
"loss": 1.5001, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4048798084259033, |
|
"rewards/margins": 0.4576982855796814, |
|
"rewards/rejected": -1.8625783920288086, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 15.714808233921314, |
|
"learning_rate": 2.763434258421836e-07, |
|
"logits/chosen": -0.033187855035066605, |
|
"logits/rejected": 0.04786054790019989, |
|
"logps/chosen": -0.5716092586517334, |
|
"logps/rejected": -0.7472088932991028, |
|
"loss": 1.4866, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4290231466293335, |
|
"rewards/margins": 0.4389989972114563, |
|
"rewards/rejected": -1.8680222034454346, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7051282051282052, |
|
"grad_norm": 12.685566519530033, |
|
"learning_rate": 2.4355036129704696e-07, |
|
"logits/chosen": 0.05670114606618881, |
|
"logits/rejected": 0.08839456737041473, |
|
"logps/chosen": -0.5581383109092712, |
|
"logps/rejected": -0.7727221250534058, |
|
"loss": 1.4701, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.395345687866211, |
|
"rewards/margins": 0.5364596247673035, |
|
"rewards/rejected": -1.9318053722381592, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7264957264957265, |
|
"grad_norm": 15.872452807830859, |
|
"learning_rate": 2.121914671571633e-07, |
|
"logits/chosen": 0.03589984029531479, |
|
"logits/rejected": 0.07213737815618515, |
|
"logps/chosen": -0.5885176658630371, |
|
"logps/rejected": -0.7095993757247925, |
|
"loss": 1.4749, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4712941646575928, |
|
"rewards/margins": 0.3027040958404541, |
|
"rewards/rejected": -1.773998498916626, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7478632478632479, |
|
"grad_norm": 10.782809870588457, |
|
"learning_rate": 1.824421150789106e-07, |
|
"logits/chosen": 0.08967064321041107, |
|
"logits/rejected": 0.07906897366046906, |
|
"logps/chosen": -0.7075825929641724, |
|
"logps/rejected": -0.8367185592651367, |
|
"loss": 1.4649, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.7689564228057861, |
|
"rewards/margins": 0.32283997535705566, |
|
"rewards/rejected": -2.091796398162842, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 19.45895258920461, |
|
"learning_rate": 1.5446867550656767e-07, |
|
"logits/chosen": 0.09190338104963303, |
|
"logits/rejected": 0.11659294366836548, |
|
"logps/chosen": -0.6805117130279541, |
|
"logps/rejected": -0.8247642517089844, |
|
"loss": 1.472, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.7012790441513062, |
|
"rewards/margins": 0.3606313467025757, |
|
"rewards/rejected": -2.061910629272461, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7905982905982906, |
|
"grad_norm": 12.591384251278834, |
|
"learning_rate": 1.284275872613028e-07, |
|
"logits/chosen": 0.09435538947582245, |
|
"logits/rejected": 0.0583331473171711, |
|
"logps/chosen": -0.6257596015930176, |
|
"logps/rejected": -0.7002542018890381, |
|
"loss": 1.4664, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.5643991231918335, |
|
"rewards/margins": 0.18623651564121246, |
|
"rewards/rejected": -1.7506357431411743, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.811965811965812, |
|
"grad_norm": 11.80568242770119, |
|
"learning_rate": 1.044644826718295e-07, |
|
"logits/chosen": 0.08234812319278717, |
|
"logits/rejected": 0.08470721542835236, |
|
"logps/chosen": -0.6872076988220215, |
|
"logps/rejected": -0.8852859735488892, |
|
"loss": 1.4845, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.7180191278457642, |
|
"rewards/margins": 0.4951957166194916, |
|
"rewards/rejected": -2.213214635848999, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 10.82000720224868, |
|
"learning_rate": 8.271337313934867e-08, |
|
"logits/chosen": 0.09420228004455566, |
|
"logits/rejected": 0.07983645796775818, |
|
"logps/chosen": -0.6325824856758118, |
|
"logps/rejected": -0.799116313457489, |
|
"loss": 1.4378, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.5814563035964966, |
|
"rewards/margins": 0.41633448004722595, |
|
"rewards/rejected": -1.9977906942367554, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 10.37262835016834, |
|
"learning_rate": 6.329589969143517e-08, |
|
"logits/chosen": -0.02971937693655491, |
|
"logits/rejected": -0.009283095598220825, |
|
"logps/chosen": -0.6813124418258667, |
|
"logps/rejected": -0.7548044323921204, |
|
"loss": 1.4691, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.703281044960022, |
|
"rewards/margins": 0.18373003602027893, |
|
"rewards/rejected": -1.887010931968689, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8760683760683761, |
|
"grad_norm": 16.81161049195176, |
|
"learning_rate": 4.6320652716067555e-08, |
|
"logits/chosen": 0.057117123156785965, |
|
"logits/rejected": 0.033341288566589355, |
|
"logps/chosen": -0.5831505656242371, |
|
"logps/rejected": -0.7539544701576233, |
|
"loss": 1.4888, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.457876443862915, |
|
"rewards/margins": 0.4270097315311432, |
|
"rewards/rejected": -1.8848861455917358, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 10.666184736871127, |
|
"learning_rate": 3.188256468013139e-08, |
|
"logits/chosen": 0.05842505767941475, |
|
"logits/rejected": 0.09373210370540619, |
|
"logps/chosen": -0.6092408299446106, |
|
"logps/rejected": -0.7288607954978943, |
|
"loss": 1.4636, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.523101806640625, |
|
"rewards/margins": 0.2990500330924988, |
|
"rewards/rejected": -1.8221518993377686, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9188034188034188, |
|
"grad_norm": 13.64833883057287, |
|
"learning_rate": 2.0062379228555525e-08, |
|
"logits/chosen": -0.03809903562068939, |
|
"logits/rejected": -0.026596253737807274, |
|
"logps/chosen": -0.6271125078201294, |
|
"logps/rejected": -0.6994240880012512, |
|
"loss": 1.4417, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.5677810907363892, |
|
"rewards/margins": 0.18077896535396576, |
|
"rewards/rejected": -1.7485601902008057, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9401709401709402, |
|
"grad_norm": 11.81936042932564, |
|
"learning_rate": 1.0926199633097154e-08, |
|
"logits/chosen": 0.06771688163280487, |
|
"logits/rejected": 0.06887342035770416, |
|
"logps/chosen": -0.6525846123695374, |
|
"logps/rejected": -0.7262920141220093, |
|
"loss": 1.4542, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.631461501121521, |
|
"rewards/margins": 0.18426847457885742, |
|
"rewards/rejected": -1.815730094909668, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 13.440051766867484, |
|
"learning_rate": 4.5251191160326495e-09, |
|
"logits/chosen": -0.03224249184131622, |
|
"logits/rejected": 0.010183418169617653, |
|
"logps/chosen": -0.6054807901382446, |
|
"logps/rejected": -0.7111561894416809, |
|
"loss": 1.4562, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.513702154159546, |
|
"rewards/margins": 0.26418834924697876, |
|
"rewards/rejected": -1.7778904438018799, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9829059829059829, |
|
"grad_norm": 11.45030232204693, |
|
"learning_rate": 8.949351161324225e-10, |
|
"logits/chosen": 0.15793287754058838, |
|
"logits/rejected": 0.19319342076778412, |
|
"logps/chosen": -0.8483369946479797, |
|
"logps/rejected": -0.9106278419494629, |
|
"loss": 1.4913, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.120842456817627, |
|
"rewards/margins": 0.1557270735502243, |
|
"rewards/rejected": -2.276569366455078, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 234, |
|
"total_flos": 0.0, |
|
"train_loss": 1.5168422256779468, |
|
"train_runtime": 9435.3453, |
|
"train_samples_per_second": 6.346, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 234, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|