phi3m0128-cds-0.8-kendall-onof-ofif-corr-max-2-simpo-max1500-default
/
checkpoint-600
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.5156854318865493, | |
"eval_steps": 50, | |
"global_step": 600, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.008594757198109154, | |
"grad_norm": 0.05167795345187187, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 15.084823608398438, | |
"logits/rejected": 15.218259811401367, | |
"logps/chosen": -0.3124043345451355, | |
"logps/rejected": -0.31854626536369324, | |
"loss": 0.9405, | |
"rewards/accuracies": 0.4375, | |
"rewards/chosen": -0.46860653162002563, | |
"rewards/margins": 0.009212849661707878, | |
"rewards/rejected": -0.47781938314437866, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.017189514396218308, | |
"grad_norm": 0.06444549560546875, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 14.565855026245117, | |
"logits/rejected": 14.914319038391113, | |
"logps/chosen": -0.28220412135124207, | |
"logps/rejected": -0.3605547249317169, | |
"loss": 0.9294, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -0.4233061671257019, | |
"rewards/margins": 0.11752591282129288, | |
"rewards/rejected": -0.5408320426940918, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.02578427159432746, | |
"grad_norm": 0.059900399297475815, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 14.878230094909668, | |
"logits/rejected": 15.334558486938477, | |
"logps/chosen": -0.2837519347667694, | |
"logps/rejected": -0.320808470249176, | |
"loss": 0.9338, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.4256278872489929, | |
"rewards/margins": 0.05558476969599724, | |
"rewards/rejected": -0.48121267557144165, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.034379028792436615, | |
"grad_norm": 0.05459418520331383, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 14.800946235656738, | |
"logits/rejected": 15.134121894836426, | |
"logps/chosen": -0.2971518635749817, | |
"logps/rejected": -0.3476788401603699, | |
"loss": 0.9202, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.4457278251647949, | |
"rewards/margins": 0.07579050213098526, | |
"rewards/rejected": -0.521518349647522, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"grad_norm": 0.05792691186070442, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 14.62980842590332, | |
"logits/rejected": 14.848493576049805, | |
"logps/chosen": -0.27511823177337646, | |
"logps/rejected": -0.32557612657546997, | |
"loss": 0.9213, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.4126773774623871, | |
"rewards/margins": 0.07568677514791489, | |
"rewards/rejected": -0.48836421966552734, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"eval_logits/chosen": 14.195974349975586, | |
"eval_logits/rejected": 15.046167373657227, | |
"eval_logps/chosen": -0.27934810519218445, | |
"eval_logps/rejected": -0.3643363118171692, | |
"eval_loss": 0.9250189065933228, | |
"eval_rewards/accuracies": 0.557894766330719, | |
"eval_rewards/chosen": -0.4190221428871155, | |
"eval_rewards/margins": 0.1274823397397995, | |
"eval_rewards/rejected": -0.5465044379234314, | |
"eval_runtime": 26.0506, | |
"eval_samples_per_second": 28.905, | |
"eval_steps_per_second": 3.647, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.05156854318865492, | |
"grad_norm": 0.08806851506233215, | |
"learning_rate": 4.980286753286196e-06, | |
"logits/chosen": 14.311370849609375, | |
"logits/rejected": 15.19476318359375, | |
"logps/chosen": -0.26153135299682617, | |
"logps/rejected": -0.34108471870422363, | |
"loss": 0.9255, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.39229699969291687, | |
"rewards/margins": 0.11933007091283798, | |
"rewards/rejected": -0.5116270780563354, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.060163300386764075, | |
"grad_norm": 0.10536951571702957, | |
"learning_rate": 4.973180832407471e-06, | |
"logits/chosen": 14.646909713745117, | |
"logits/rejected": 15.134190559387207, | |
"logps/chosen": -0.2928832173347473, | |
"logps/rejected": -0.37275972962379456, | |
"loss": 0.9155, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.4393247961997986, | |
"rewards/margins": 0.11981481313705444, | |
"rewards/rejected": -0.559139609336853, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.06875805758487323, | |
"grad_norm": 0.07452531903982162, | |
"learning_rate": 4.964990092676263e-06, | |
"logits/chosen": 14.383807182312012, | |
"logits/rejected": 14.806958198547363, | |
"logps/chosen": -0.2724239230155945, | |
"logps/rejected": -0.33048146963119507, | |
"loss": 0.9191, | |
"rewards/accuracies": 0.48750001192092896, | |
"rewards/chosen": -0.4086359143257141, | |
"rewards/margins": 0.08708634227514267, | |
"rewards/rejected": -0.495722234249115, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.07735281478298238, | |
"grad_norm": 0.06996195018291473, | |
"learning_rate": 4.9557181268217225e-06, | |
"logits/chosen": 14.557902336120605, | |
"logits/rejected": 15.043550491333008, | |
"logps/chosen": -0.3053165078163147, | |
"logps/rejected": -0.36941051483154297, | |
"loss": 0.9255, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.45797473192214966, | |
"rewards/margins": 0.0961410254240036, | |
"rewards/rejected": -0.5541157126426697, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"grad_norm": 0.09053988754749298, | |
"learning_rate": 4.9453690018345144e-06, | |
"logits/chosen": 13.747509956359863, | |
"logits/rejected": 14.678106307983398, | |
"logps/chosen": -0.2453141212463379, | |
"logps/rejected": -0.36430835723876953, | |
"loss": 0.9022, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.36797118186950684, | |
"rewards/margins": 0.17849135398864746, | |
"rewards/rejected": -0.5464625358581543, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"eval_logits/chosen": 14.017444610595703, | |
"eval_logits/rejected": 14.885564804077148, | |
"eval_logps/chosen": -0.2685285806655884, | |
"eval_logps/rejected": -0.3654690384864807, | |
"eval_loss": 0.9166209697723389, | |
"eval_rewards/accuracies": 0.557894766330719, | |
"eval_rewards/chosen": -0.4027928411960602, | |
"eval_rewards/margins": 0.14541073143482208, | |
"eval_rewards/rejected": -0.5482036471366882, | |
"eval_runtime": 26.0431, | |
"eval_samples_per_second": 28.914, | |
"eval_steps_per_second": 3.648, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.09454232917920069, | |
"grad_norm": 0.07788874208927155, | |
"learning_rate": 4.933947257182901e-06, | |
"logits/chosen": 14.805160522460938, | |
"logits/rejected": 14.767298698425293, | |
"logps/chosen": -0.30586495995521545, | |
"logps/rejected": -0.3159794211387634, | |
"loss": 0.9128, | |
"rewards/accuracies": 0.42500001192092896, | |
"rewards/chosen": -0.45879751443862915, | |
"rewards/margins": 0.015171671286225319, | |
"rewards/rejected": -0.47396916151046753, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.10313708637730984, | |
"grad_norm": 0.07691823691129684, | |
"learning_rate": 4.921457902821578e-06, | |
"logits/chosen": 13.761972427368164, | |
"logits/rejected": 14.64726448059082, | |
"logps/chosen": -0.2784760296344757, | |
"logps/rejected": -0.34076255559921265, | |
"loss": 0.9179, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.41771402955055237, | |
"rewards/margins": 0.09342982620000839, | |
"rewards/rejected": -0.5111438632011414, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.11173184357541899, | |
"grad_norm": 0.08534488826990128, | |
"learning_rate": 4.907906416994146e-06, | |
"logits/chosen": 13.837780952453613, | |
"logits/rejected": 14.767657279968262, | |
"logps/chosen": -0.26367664337158203, | |
"logps/rejected": -0.3845904469490051, | |
"loss": 0.8978, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.39551490545272827, | |
"rewards/margins": 0.18137072026729584, | |
"rewards/rejected": -0.5768855810165405, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.12032660077352815, | |
"grad_norm": 0.08117899298667908, | |
"learning_rate": 4.893298743830168e-06, | |
"logits/chosen": 13.270025253295898, | |
"logits/rejected": 14.128207206726074, | |
"logps/chosen": -0.24728116393089294, | |
"logps/rejected": -0.3510771095752716, | |
"loss": 0.9117, | |
"rewards/accuracies": 0.5874999761581421, | |
"rewards/chosen": -0.370921790599823, | |
"rewards/margins": 0.1556939035654068, | |
"rewards/rejected": -0.5266156196594238, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"grad_norm": 0.1263500601053238, | |
"learning_rate": 4.8776412907378845e-06, | |
"logits/chosen": 13.525009155273438, | |
"logits/rejected": 14.163309097290039, | |
"logps/chosen": -0.24874648451805115, | |
"logps/rejected": -0.38132259249687195, | |
"loss": 0.9007, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.3731197714805603, | |
"rewards/margins": 0.1988641768693924, | |
"rewards/rejected": -0.5719839334487915, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"eval_logits/chosen": 12.438652992248535, | |
"eval_logits/rejected": 13.519843101501465, | |
"eval_logps/chosen": -0.2689361274242401, | |
"eval_logps/rejected": -0.3897271454334259, | |
"eval_loss": 0.8991575241088867, | |
"eval_rewards/accuracies": 0.5894736647605896, | |
"eval_rewards/chosen": -0.40340420603752136, | |
"eval_rewards/margins": 0.1811865121126175, | |
"eval_rewards/rejected": -0.5845907330513, | |
"eval_runtime": 26.0482, | |
"eval_samples_per_second": 28.908, | |
"eval_steps_per_second": 3.647, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.13751611516974646, | |
"grad_norm": 0.11390316486358643, | |
"learning_rate": 4.860940925593703e-06, | |
"logits/chosen": 12.494891166687012, | |
"logits/rejected": 13.346384048461914, | |
"logps/chosen": -0.26858460903167725, | |
"logps/rejected": -0.4170496463775635, | |
"loss": 0.8854, | |
"rewards/accuracies": 0.5625, | |
"rewards/chosen": -0.4028768539428711, | |
"rewards/margins": 0.22269758582115173, | |
"rewards/rejected": -0.6255744695663452, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.1461108723678556, | |
"grad_norm": 0.14250700175762177, | |
"learning_rate": 4.84320497372973e-06, | |
"logits/chosen": 11.637483596801758, | |
"logits/rejected": 12.72177505493164, | |
"logps/chosen": -0.2967775762081146, | |
"logps/rejected": -0.440357506275177, | |
"loss": 0.8884, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.4451664090156555, | |
"rewards/margins": 0.21536986529827118, | |
"rewards/rejected": -0.6605362892150879, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.15470562956596476, | |
"grad_norm": 0.174351766705513, | |
"learning_rate": 4.824441214720629e-06, | |
"logits/chosen": 11.577589988708496, | |
"logits/rejected": 12.179681777954102, | |
"logps/chosen": -0.29397666454315186, | |
"logps/rejected": -0.4009665548801422, | |
"loss": 0.8756, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.44096502661705017, | |
"rewards/margins": 0.16048480570316315, | |
"rewards/rejected": -0.6014498472213745, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.1633003867640739, | |
"grad_norm": 0.22877676784992218, | |
"learning_rate": 4.804657878971252e-06, | |
"logits/chosen": 9.352752685546875, | |
"logits/rejected": 10.27645206451416, | |
"logps/chosen": -0.30452457070350647, | |
"logps/rejected": -0.4765443205833435, | |
"loss": 0.8781, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.4567868113517761, | |
"rewards/margins": 0.25802966952323914, | |
"rewards/rejected": -0.7148164510726929, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"grad_norm": 0.2517675459384918, | |
"learning_rate": 4.783863644106502e-06, | |
"logits/chosen": 8.136419296264648, | |
"logits/rejected": 9.26432991027832, | |
"logps/chosen": -0.3416380286216736, | |
"logps/rejected": -0.4680122435092926, | |
"loss": 0.8531, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -0.5124570727348328, | |
"rewards/margins": 0.18956127762794495, | |
"rewards/rejected": -0.7020183801651001, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"eval_logits/chosen": 7.26609992980957, | |
"eval_logits/rejected": 8.391904830932617, | |
"eval_logps/chosen": -0.31862083077430725, | |
"eval_logps/rejected": -0.5189473032951355, | |
"eval_loss": 0.8484573364257812, | |
"eval_rewards/accuracies": 0.6315789222717285, | |
"eval_rewards/chosen": -0.47793126106262207, | |
"eval_rewards/margins": 0.30048972368240356, | |
"eval_rewards/rejected": -0.7784210443496704, | |
"eval_runtime": 26.0496, | |
"eval_samples_per_second": 28.906, | |
"eval_steps_per_second": 3.647, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.18048990116029223, | |
"grad_norm": 0.28971683979034424, | |
"learning_rate": 4.762067631165049e-06, | |
"logits/chosen": 7.321592807769775, | |
"logits/rejected": 7.871228218078613, | |
"logps/chosen": -0.3311695158481598, | |
"logps/rejected": -0.4879254400730133, | |
"loss": 0.8211, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.4967542588710785, | |
"rewards/margins": 0.23513388633728027, | |
"rewards/rejected": -0.7318881750106812, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.18908465835840138, | |
"grad_norm": 0.568050742149353, | |
"learning_rate": 4.7392794005985324e-06, | |
"logits/chosen": 5.077876091003418, | |
"logits/rejected": 5.706583499908447, | |
"logps/chosen": -0.3127230405807495, | |
"logps/rejected": -0.5744297504425049, | |
"loss": 0.8331, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -0.46908459067344666, | |
"rewards/margins": 0.39256006479263306, | |
"rewards/rejected": -0.8616446256637573, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.19767941555651053, | |
"grad_norm": 0.32453760504722595, | |
"learning_rate": 4.715508948078037e-06, | |
"logits/chosen": 4.265925407409668, | |
"logits/rejected": 4.2006964683532715, | |
"logps/chosen": -0.4032830595970154, | |
"logps/rejected": -0.6459742784500122, | |
"loss": 0.7986, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.6049246191978455, | |
"rewards/margins": 0.3640367388725281, | |
"rewards/rejected": -0.9689614176750183, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.20627417275461968, | |
"grad_norm": 0.448809951543808, | |
"learning_rate": 4.690766700109659e-06, | |
"logits/chosen": 3.3534884452819824, | |
"logits/rejected": 3.4250903129577637, | |
"logps/chosen": -0.3817242383956909, | |
"logps/rejected": -0.7190496921539307, | |
"loss": 0.7708, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -0.5725863575935364, | |
"rewards/margins": 0.5059882402420044, | |
"rewards/rejected": -1.078574538230896, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"grad_norm": 0.4277574419975281, | |
"learning_rate": 4.665063509461098e-06, | |
"logits/chosen": 3.151397228240967, | |
"logits/rejected": 2.8183228969573975, | |
"logps/chosen": -0.44173598289489746, | |
"logps/rejected": -0.8323748707771301, | |
"loss": 0.7722, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.6626039743423462, | |
"rewards/margins": 0.5859583616256714, | |
"rewards/rejected": -1.248562216758728, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"eval_logits/chosen": 2.520007848739624, | |
"eval_logits/rejected": 1.9197090864181519, | |
"eval_logps/chosen": -0.4703753888607025, | |
"eval_logps/rejected": -0.90553879737854, | |
"eval_loss": 0.7410055994987488, | |
"eval_rewards/accuracies": 0.6631578803062439, | |
"eval_rewards/chosen": -0.7055630087852478, | |
"eval_rewards/margins": 0.6527453064918518, | |
"eval_rewards/rejected": -1.3583083152770996, | |
"eval_runtime": 26.0441, | |
"eval_samples_per_second": 28.912, | |
"eval_steps_per_second": 3.648, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.22346368715083798, | |
"grad_norm": 0.5626497268676758, | |
"learning_rate": 4.638410650401267e-06, | |
"logits/chosen": 1.2351257801055908, | |
"logits/rejected": 0.5925868153572083, | |
"logps/chosen": -0.46581563353538513, | |
"logps/rejected": -0.9673674702644348, | |
"loss": 0.6933, | |
"rewards/accuracies": 0.75, | |
"rewards/chosen": -0.6987233757972717, | |
"rewards/margins": 0.7523276209831238, | |
"rewards/rejected": -1.451051115989685, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.23205844434894715, | |
"grad_norm": 0.7433231472969055, | |
"learning_rate": 4.610819813755038e-06, | |
"logits/chosen": 3.1690659523010254, | |
"logits/rejected": 2.0423803329467773, | |
"logps/chosen": -0.506645679473877, | |
"logps/rejected": -1.0180162191390991, | |
"loss": 0.7265, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -0.7599684596061707, | |
"rewards/margins": 0.767055869102478, | |
"rewards/rejected": -1.527024507522583, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.2406532015470563, | |
"grad_norm": 1.4220589399337769, | |
"learning_rate": 4.582303101775249e-06, | |
"logits/chosen": 2.8173985481262207, | |
"logits/rejected": 1.5537467002868652, | |
"logps/chosen": -0.5869659185409546, | |
"logps/rejected": -1.1085975170135498, | |
"loss": 0.6725, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.8804486989974976, | |
"rewards/margins": 0.7824474573135376, | |
"rewards/rejected": -1.6628963947296143, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.24924795874516545, | |
"grad_norm": 0.6397098898887634, | |
"learning_rate": 4.55287302283426e-06, | |
"logits/chosen": 2.734229564666748, | |
"logits/rejected": 1.9948323965072632, | |
"logps/chosen": -0.6540845036506653, | |
"logps/rejected": -1.451608419418335, | |
"loss": 0.571, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -0.9811266660690308, | |
"rewards/margins": 1.1962860822677612, | |
"rewards/rejected": -2.177412748336792, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.2578427159432746, | |
"grad_norm": 0.4591177701950073, | |
"learning_rate": 4.522542485937369e-06, | |
"logits/chosen": 2.2491040229797363, | |
"logits/rejected": 1.345014214515686, | |
"logps/chosen": -0.6877793073654175, | |
"logps/rejected": -1.6054528951644897, | |
"loss": 0.5782, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -1.0316689014434814, | |
"rewards/margins": 1.3765103816986084, | |
"rewards/rejected": -2.408179521560669, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2578427159432746, | |
"eval_logits/chosen": 1.661840796470642, | |
"eval_logits/rejected": 0.6246702671051025, | |
"eval_logps/chosen": -0.7322248816490173, | |
"eval_logps/rejected": -2.272771120071411, | |
"eval_loss": 0.563686728477478, | |
"eval_rewards/accuracies": 0.7157894968986511, | |
"eval_rewards/chosen": -1.0983372926712036, | |
"eval_rewards/margins": 2.310819387435913, | |
"eval_rewards/rejected": -3.409156560897827, | |
"eval_runtime": 26.0455, | |
"eval_samples_per_second": 28.911, | |
"eval_steps_per_second": 3.647, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2664374731413838, | |
"grad_norm": 0.786809504032135, | |
"learning_rate": 4.491324795060491e-06, | |
"logits/chosen": 1.3445788621902466, | |
"logits/rejected": 0.4989510178565979, | |
"logps/chosen": -0.7276264429092407, | |
"logps/rejected": -2.3235878944396973, | |
"loss": 0.5253, | |
"rewards/accuracies": 0.75, | |
"rewards/chosen": -1.0914397239685059, | |
"rewards/margins": 2.393942356109619, | |
"rewards/rejected": -3.485382080078125, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.2750322303394929, | |
"grad_norm": 0.3913320004940033, | |
"learning_rate": 4.4592336433146e-06, | |
"logits/chosen": 2.61965012550354, | |
"logits/rejected": 1.9477211236953735, | |
"logps/chosen": -0.7146936655044556, | |
"logps/rejected": -1.9647115468978882, | |
"loss": 0.5294, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.0720404386520386, | |
"rewards/margins": 1.8750267028808594, | |
"rewards/rejected": -2.9470672607421875, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.28362698753760207, | |
"grad_norm": 0.4867005944252014, | |
"learning_rate": 4.426283106939474e-06, | |
"logits/chosen": 2.500439167022705, | |
"logits/rejected": 1.6413562297821045, | |
"logps/chosen": -0.8710287809371948, | |
"logps/rejected": -2.36894154548645, | |
"loss": 0.548, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -1.306543231010437, | |
"rewards/margins": 2.246868848800659, | |
"rewards/rejected": -3.5534119606018066, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.2922217447357112, | |
"grad_norm": 0.8009849786758423, | |
"learning_rate": 4.3924876391293915e-06, | |
"logits/chosen": 1.3847177028656006, | |
"logits/rejected": 0.8994542360305786, | |
"logps/chosen": -0.8447234034538269, | |
"logps/rejected": -2.800283908843994, | |
"loss": 0.4797, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -1.2670851945877075, | |
"rewards/margins": 2.9333412647247314, | |
"rewards/rejected": -4.2004265785217285, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.30081650193382037, | |
"grad_norm": 2.0202796459198, | |
"learning_rate": 4.357862063693486e-06, | |
"logits/chosen": 2.3197357654571533, | |
"logits/rejected": 1.37326180934906, | |
"logps/chosen": -0.8590717315673828, | |
"logps/rejected": -2.1532845497131348, | |
"loss": 0.5126, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -1.2886077165603638, | |
"rewards/margins": 1.941319465637207, | |
"rewards/rejected": -3.2299270629882812, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.30081650193382037, | |
"eval_logits/chosen": 2.0864102840423584, | |
"eval_logits/rejected": 1.2036340236663818, | |
"eval_logps/chosen": -0.9554746150970459, | |
"eval_logps/rejected": -3.0601954460144043, | |
"eval_loss": 0.5108997821807861, | |
"eval_rewards/accuracies": 0.7368420958518982, | |
"eval_rewards/chosen": -1.4332119226455688, | |
"eval_rewards/margins": 3.15708065032959, | |
"eval_rewards/rejected": -4.590292930603027, | |
"eval_runtime": 26.0503, | |
"eval_samples_per_second": 28.906, | |
"eval_steps_per_second": 3.647, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.3094112591319295, | |
"grad_norm": 1.0668681859970093, | |
"learning_rate": 4.322421568553529e-06, | |
"logits/chosen": 1.6770871877670288, | |
"logits/rejected": 1.073407530784607, | |
"logps/chosen": -1.1393296718597412, | |
"logps/rejected": -2.886169910430908, | |
"loss": 0.5031, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -1.7089945077896118, | |
"rewards/margins": 2.620260238647461, | |
"rewards/rejected": -4.329255104064941, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.31800601633003867, | |
"grad_norm": 0.5015287399291992, | |
"learning_rate": 4.286181699082008e-06, | |
"logits/chosen": 2.156587600708008, | |
"logits/rejected": 1.371209979057312, | |
"logps/chosen": -0.9851818084716797, | |
"logps/rejected": -3.2286324501037598, | |
"loss": 0.4662, | |
"rewards/accuracies": 0.7875000238418579, | |
"rewards/chosen": -1.47777259349823, | |
"rewards/margins": 3.3651764392852783, | |
"rewards/rejected": -4.842948913574219, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.3266007735281478, | |
"grad_norm": 0.9893808960914612, | |
"learning_rate": 4.249158351283414e-06, | |
"logits/chosen": 2.6184191703796387, | |
"logits/rejected": 2.212998390197754, | |
"logps/chosen": -0.9414733052253723, | |
"logps/rejected": -2.940886974334717, | |
"loss": 0.4829, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -1.4122098684310913, | |
"rewards/margins": 2.9991202354431152, | |
"rewards/rejected": -4.411330223083496, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.33519553072625696, | |
"grad_norm": 0.7588702440261841, | |
"learning_rate": 4.211367764821722e-06, | |
"logits/chosen": 3.257941484451294, | |
"logits/rejected": 2.5362088680267334, | |
"logps/chosen": -1.182255744934082, | |
"logps/rejected": -2.8621151447296143, | |
"loss": 0.4538, | |
"rewards/accuracies": 0.637499988079071, | |
"rewards/chosen": -1.7733834981918335, | |
"rewards/margins": 2.5197887420654297, | |
"rewards/rejected": -4.293172359466553, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.3437902879243661, | |
"grad_norm": 0.6317985653877258, | |
"learning_rate": 4.172826515897146e-06, | |
"logits/chosen": 3.057791233062744, | |
"logits/rejected": 2.4121367931365967, | |
"logps/chosen": -1.0847463607788086, | |
"logps/rejected": -3.3152599334716797, | |
"loss": 0.4847, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -1.6271196603775024, | |
"rewards/margins": 3.3457705974578857, | |
"rewards/rejected": -4.9728899002075195, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.3437902879243661, | |
"eval_logits/chosen": 2.9584426879882812, | |
"eval_logits/rejected": 2.292771577835083, | |
"eval_logps/chosen": -1.202886939048767, | |
"eval_logps/rejected": -3.6770312786102295, | |
"eval_loss": 0.47303518652915955, | |
"eval_rewards/accuracies": 0.7473683953285217, | |
"eval_rewards/chosen": -1.8043304681777954, | |
"eval_rewards/margins": 3.711216688156128, | |
"eval_rewards/rejected": -5.515547275543213, | |
"eval_runtime": 26.0247, | |
"eval_samples_per_second": 28.934, | |
"eval_steps_per_second": 3.65, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.3523850451224753, | |
"grad_norm": 1.0523916482925415, | |
"learning_rate": 4.133551509975264e-06, | |
"logits/chosen": 2.9360365867614746, | |
"logits/rejected": 2.330521583557129, | |
"logps/chosen": -1.3002166748046875, | |
"logps/rejected": -3.2887542247772217, | |
"loss": 0.4398, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.9503250122070312, | |
"rewards/margins": 2.9828057289123535, | |
"rewards/rejected": -4.933130741119385, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.36097980232058446, | |
"grad_norm": 0.6079875826835632, | |
"learning_rate": 4.093559974371725e-06, | |
"logits/chosen": 3.1500794887542725, | |
"logits/rejected": 2.329282283782959, | |
"logps/chosen": -1.23466157913208, | |
"logps/rejected": -3.291548252105713, | |
"loss": 0.4774, | |
"rewards/accuracies": 0.7124999761581421, | |
"rewards/chosen": -1.8519923686981201, | |
"rewards/margins": 3.085329532623291, | |
"rewards/rejected": -4.93732213973999, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.3695745595186936, | |
"grad_norm": 1.3175437450408936, | |
"learning_rate": 4.052869450695776e-06, | |
"logits/chosen": 3.4488296508789062, | |
"logits/rejected": 2.6282899379730225, | |
"logps/chosen": -1.380877137184143, | |
"logps/rejected": -4.005017280578613, | |
"loss": 0.4158, | |
"rewards/accuracies": 0.7749999761581421, | |
"rewards/chosen": -2.0713157653808594, | |
"rewards/margins": 3.9362099170684814, | |
"rewards/rejected": -6.007525444030762, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.37816931671680276, | |
"grad_norm": 3.7249863147735596, | |
"learning_rate": 4.011497787155938e-06, | |
"logits/chosen": 2.5173678398132324, | |
"logits/rejected": 1.943926215171814, | |
"logps/chosen": -1.7800304889678955, | |
"logps/rejected": -4.422289848327637, | |
"loss": 0.3916, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -2.6700453758239746, | |
"rewards/margins": 3.9633898735046387, | |
"rewards/rejected": -6.633435249328613, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.3867640739149119, | |
"grad_norm": 2.9776103496551514, | |
"learning_rate": 3.969463130731183e-06, | |
"logits/chosen": 3.2318034172058105, | |
"logits/rejected": 2.5253517627716064, | |
"logps/chosen": -2.309701442718506, | |
"logps/rejected": -4.725776672363281, | |
"loss": 0.368, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -3.464552640914917, | |
"rewards/margins": 3.624112606048584, | |
"rewards/rejected": -7.0886640548706055, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.3867640739149119, | |
"eval_logits/chosen": 2.397157907485962, | |
"eval_logits/rejected": 2.0492196083068848, | |
"eval_logps/chosen": -2.6244213581085205, | |
"eval_logps/rejected": -5.247391700744629, | |
"eval_loss": 0.3982011079788208, | |
"eval_rewards/accuracies": 0.8842105269432068, | |
"eval_rewards/chosen": -3.936631917953491, | |
"eval_rewards/margins": 3.934455633163452, | |
"eval_rewards/rejected": -7.87108850479126, | |
"eval_runtime": 26.0501, | |
"eval_samples_per_second": 28.906, | |
"eval_steps_per_second": 3.647, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.39535883111302106, | |
"grad_norm": 2.3925623893737793, | |
"learning_rate": 3.92678391921108e-06, | |
"logits/chosen": 3.0329971313476562, | |
"logits/rejected": 2.67683482170105, | |
"logps/chosen": -2.4644994735717773, | |
"logps/rejected": -4.755246162414551, | |
"loss": 0.3584, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -3.696749210357666, | |
"rewards/margins": 3.436119794845581, | |
"rewards/rejected": -7.132868766784668, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.4039535883111302, | |
"grad_norm": 3.1981327533721924, | |
"learning_rate": 3.88347887310836e-06, | |
"logits/chosen": 2.219741106033325, | |
"logits/rejected": 1.8649622201919556, | |
"logps/chosen": -2.2890329360961914, | |
"logps/rejected": -5.124932289123535, | |
"loss": 0.3709, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -3.433549404144287, | |
"rewards/margins": 4.253849029541016, | |
"rewards/rejected": -7.687398433685303, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.41254834550923936, | |
"grad_norm": 2.0272741317749023, | |
"learning_rate": 3.839566987447492e-06, | |
"logits/chosen": 3.6659038066864014, | |
"logits/rejected": 3.202749252319336, | |
"logps/chosen": -2.5729193687438965, | |
"logps/rejected": -4.992354393005371, | |
"loss": 0.3837, | |
"rewards/accuracies": 0.8374999761581421, | |
"rewards/chosen": -3.859379529953003, | |
"rewards/margins": 3.629152297973633, | |
"rewards/rejected": -7.488531589508057, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.4211431027073485, | |
"grad_norm": 2.5182268619537354, | |
"learning_rate": 3.795067523432826e-06, | |
"logits/chosen": 3.327012538909912, | |
"logits/rejected": 3.1205530166625977, | |
"logps/chosen": -3.016247510910034, | |
"logps/rejected": -5.566779136657715, | |
"loss": 0.3112, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -4.524371147155762, | |
"rewards/margins": 3.8257980346679688, | |
"rewards/rejected": -8.35016918182373, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.42973785990545765, | |
"grad_norm": 2.990694046020508, | |
"learning_rate": 3.7500000000000005e-06, | |
"logits/chosen": 2.7793381214141846, | |
"logits/rejected": 2.7330098152160645, | |
"logps/chosen": -2.7836732864379883, | |
"logps/rejected": -5.60109806060791, | |
"loss": 0.3069, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -4.175509929656982, | |
"rewards/margins": 4.226136684417725, | |
"rewards/rejected": -8.401647567749023, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.42973785990545765, | |
"eval_logits/chosen": 2.5767242908477783, | |
"eval_logits/rejected": 2.1918540000915527, | |
"eval_logps/chosen": -3.1751770973205566, | |
"eval_logps/rejected": -6.361191749572754, | |
"eval_loss": 0.35469338297843933, | |
"eval_rewards/accuracies": 0.9157894849777222, | |
"eval_rewards/chosen": -4.762764930725098, | |
"eval_rewards/margins": 4.779022693634033, | |
"eval_rewards/rejected": -9.541787147521973, | |
"eval_runtime": 26.0483, | |
"eval_samples_per_second": 28.908, | |
"eval_steps_per_second": 3.647, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.4383326171035668, | |
"grad_norm": 3.1177096366882324, | |
"learning_rate": 3.7043841852542884e-06, | |
"logits/chosen": 3.4840216636657715, | |
"logits/rejected": 2.871774196624756, | |
"logps/chosen": -2.739344596862793, | |
"logps/rejected": -5.363945960998535, | |
"loss": 0.3468, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -4.1090168952941895, | |
"rewards/margins": 3.9369025230407715, | |
"rewards/rejected": -8.045918464660645, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.44692737430167595, | |
"grad_norm": 2.212597131729126, | |
"learning_rate": 3.658240087799655e-06, | |
"logits/chosen": 2.8667449951171875, | |
"logits/rejected": 2.463776111602783, | |
"logps/chosen": -3.17940092086792, | |
"logps/rejected": -6.375420570373535, | |
"loss": 0.3092, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -4.769101619720459, | |
"rewards/margins": 4.794029235839844, | |
"rewards/rejected": -9.563131332397461, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.45552213149978515, | |
"grad_norm": 4.475163459777832, | |
"learning_rate": 3.611587947962319e-06, | |
"logits/chosen": 3.234764814376831, | |
"logits/rejected": 2.6656813621520996, | |
"logps/chosen": -3.0503814220428467, | |
"logps/rejected": -5.525468826293945, | |
"loss": 0.3044, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -4.5755720138549805, | |
"rewards/margins": 3.7126305103302, | |
"rewards/rejected": -8.288202285766602, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.4641168886978943, | |
"grad_norm": 1.8678548336029053, | |
"learning_rate": 3.564448228912682e-06, | |
"logits/chosen": 2.1433145999908447, | |
"logits/rejected": 2.1412692070007324, | |
"logps/chosen": -2.6177189350128174, | |
"logps/rejected": -5.8179192543029785, | |
"loss": 0.3376, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -3.9265785217285156, | |
"rewards/margins": 4.800299644470215, | |
"rewards/rejected": -8.72687816619873, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.47271164589600345, | |
"grad_norm": 2.3289716243743896, | |
"learning_rate": 3.516841607689501e-06, | |
"logits/chosen": 2.7216885089874268, | |
"logits/rejected": 2.549870729446411, | |
"logps/chosen": -2.7370285987854004, | |
"logps/rejected": -5.929703712463379, | |
"loss": 0.2937, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -4.1055426597595215, | |
"rewards/margins": 4.7890119552612305, | |
"rewards/rejected": -8.894556045532227, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.47271164589600345, | |
"eval_logits/chosen": 2.7431576251983643, | |
"eval_logits/rejected": 2.386326789855957, | |
"eval_logps/chosen": -3.3791866302490234, | |
"eval_logps/rejected": -6.955687999725342, | |
"eval_loss": 0.33076339960098267, | |
"eval_rewards/accuracies": 0.9157894849777222, | |
"eval_rewards/chosen": -5.068779945373535, | |
"eval_rewards/margins": 5.364751815795898, | |
"eval_rewards/rejected": -10.433531761169434, | |
"eval_runtime": 26.0558, | |
"eval_samples_per_second": 28.899, | |
"eval_steps_per_second": 3.646, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.4813064030941126, | |
"grad_norm": 2.7705740928649902, | |
"learning_rate": 3.4687889661302577e-06, | |
"logits/chosen": 2.2392983436584473, | |
"logits/rejected": 1.9859422445297241, | |
"logps/chosen": -3.14917254447937, | |
"logps/rejected": -6.809067726135254, | |
"loss": 0.2983, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -4.723758697509766, | |
"rewards/margins": 5.489841938018799, | |
"rewards/rejected": -10.213602066040039, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.48990116029222175, | |
"grad_norm": 2.1203205585479736, | |
"learning_rate": 3.4203113817116955e-06, | |
"logits/chosen": 2.5817489624023438, | |
"logits/rejected": 2.54498291015625, | |
"logps/chosen": -3.4195308685302734, | |
"logps/rejected": -7.411266326904297, | |
"loss": 0.3014, | |
"rewards/accuracies": 0.987500011920929, | |
"rewards/chosen": -5.129295349121094, | |
"rewards/margins": 5.987602710723877, | |
"rewards/rejected": -11.116899490356445, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.4984959174903309, | |
"grad_norm": 1.7489718198776245, | |
"learning_rate": 3.3714301183045382e-06, | |
"logits/chosen": 2.1257646083831787, | |
"logits/rejected": 2.1210994720458984, | |
"logps/chosen": -2.9680445194244385, | |
"logps/rejected": -6.824588775634766, | |
"loss": 0.2752, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -4.452066898345947, | |
"rewards/margins": 5.784815788269043, | |
"rewards/rejected": -10.236883163452148, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.50709067468844, | |
"grad_norm": 2.1680099964141846, | |
"learning_rate": 3.3221666168464584e-06, | |
"logits/chosen": 2.5764970779418945, | |
"logits/rejected": 2.2523038387298584, | |
"logps/chosen": -3.667435884475708, | |
"logps/rejected": -7.162708282470703, | |
"loss": 0.2968, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.501153945922852, | |
"rewards/margins": 5.242908954620361, | |
"rewards/rejected": -10.744061470031738, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.5156854318865493, | |
"grad_norm": 1.7536494731903076, | |
"learning_rate": 3.272542485937369e-06, | |
"logits/chosen": 2.2658116817474365, | |
"logits/rejected": 1.980126142501831, | |
"logps/chosen": -3.5995922088623047, | |
"logps/rejected": -7.158552646636963, | |
"loss": 0.2971, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.399388313293457, | |
"rewards/margins": 5.338440418243408, | |
"rewards/rejected": -10.737829208374023, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.5156854318865493, | |
"eval_logits/chosen": 2.6781415939331055, | |
"eval_logits/rejected": 2.508939027786255, | |
"eval_logps/chosen": -3.80741548538208, | |
"eval_logps/rejected": -7.577634334564209, | |
"eval_loss": 0.3210188150405884, | |
"eval_rewards/accuracies": 0.9368420839309692, | |
"eval_rewards/chosen": -5.711122989654541, | |
"eval_rewards/margins": 5.655328273773193, | |
"eval_rewards/rejected": -11.366451263427734, | |
"eval_runtime": 26.0494, | |
"eval_samples_per_second": 28.907, | |
"eval_steps_per_second": 3.647, | |
"step": 600 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.4077101809126605e+18, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |