|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998766954377312, |
|
"eval_steps": 1000, |
|
"global_step": 405, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2195121951219512e-08, |
|
"logits/chosen": -2.8088459968566895, |
|
"logits/rejected": -2.7595884799957275, |
|
"logps/chosen": -368.90777587890625, |
|
"logps/rejected": -133.10202026367188, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.219512195121951e-07, |
|
"logits/chosen": -2.838653802871704, |
|
"logits/rejected": -2.824901819229126, |
|
"logps/chosen": -433.81378173828125, |
|
"logps/rejected": -114.705810546875, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0011021953541785479, |
|
"rewards/margins": 0.0018466737819835544, |
|
"rewards/rejected": -0.0007444784860126674, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.439024390243902e-07, |
|
"logits/chosen": -2.797428607940674, |
|
"logits/rejected": -2.7644600868225098, |
|
"logps/chosen": -436.551025390625, |
|
"logps/rejected": -109.42466735839844, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.02190575934946537, |
|
"rewards/margins": 0.03880878537893295, |
|
"rewards/rejected": -0.01690302975475788, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6585365853658536e-07, |
|
"logits/chosen": -2.7145814895629883, |
|
"logits/rejected": -2.6879427433013916, |
|
"logps/chosen": -422.17218017578125, |
|
"logps/rejected": -128.7976531982422, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.07188864052295685, |
|
"rewards/margins": 0.20736002922058105, |
|
"rewards/rejected": -0.1354713886976242, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.878048780487804e-07, |
|
"logits/chosen": -2.588033676147461, |
|
"logits/rejected": -2.5705184936523438, |
|
"logps/chosen": -397.0795593261719, |
|
"logps/rejected": -140.2168426513672, |
|
"loss": 0.3735, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.016153398901224136, |
|
"rewards/margins": 0.42458558082580566, |
|
"rewards/rejected": -0.40843215584754944, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992461696250783e-07, |
|
"logits/chosen": -2.4239001274108887, |
|
"logits/rejected": -2.3979756832122803, |
|
"logps/chosen": -445.3406677246094, |
|
"logps/rejected": -201.51806640625, |
|
"loss": 0.2783, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.030274126678705215, |
|
"rewards/margins": 0.8344534635543823, |
|
"rewards/rejected": -0.8647276163101196, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966461721767899e-07, |
|
"logits/chosen": -2.383493661880493, |
|
"logits/rejected": -2.3332314491271973, |
|
"logps/chosen": -423.5155334472656, |
|
"logps/rejected": -253.60073852539062, |
|
"loss": 0.2228, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.36428865790367126, |
|
"rewards/margins": 0.920581042766571, |
|
"rewards/rejected": -1.2848697900772095, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.922100518015975e-07, |
|
"logits/chosen": -2.4096710681915283, |
|
"logits/rejected": -2.364241123199463, |
|
"logps/chosen": -422.70513916015625, |
|
"logps/rejected": -279.1031188964844, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3884132504463196, |
|
"rewards/margins": 1.2157753705978394, |
|
"rewards/rejected": -1.6041886806488037, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.859708325770919e-07, |
|
"logits/chosen": -2.38008451461792, |
|
"logits/rejected": -2.344496011734009, |
|
"logps/chosen": -455.79339599609375, |
|
"logps/rejected": -303.01690673828125, |
|
"loss": 0.1269, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.647363543510437, |
|
"rewards/margins": 1.3260728120803833, |
|
"rewards/rejected": -1.9734363555908203, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779749614980225e-07, |
|
"logits/chosen": -2.374379873275757, |
|
"logits/rejected": -2.3444790840148926, |
|
"logps/chosen": -532.1400146484375, |
|
"logps/rejected": -380.60955810546875, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.560777485370636, |
|
"rewards/margins": 1.9465181827545166, |
|
"rewards/rejected": -2.507295846939087, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.682819627081427e-07, |
|
"logits/chosen": -2.3339896202087402, |
|
"logits/rejected": -2.2830748558044434, |
|
"logps/chosen": -467.65374755859375, |
|
"logps/rejected": -348.66156005859375, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.54865962266922, |
|
"rewards/margins": 1.753379464149475, |
|
"rewards/rejected": -2.30203914642334, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.569639943810477e-07, |
|
"logits/chosen": -2.3383262157440186, |
|
"logits/rejected": -2.2902047634124756, |
|
"logps/chosen": -484.0919494628906, |
|
"logps/rejected": -367.74505615234375, |
|
"loss": 0.1193, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.769287645816803, |
|
"rewards/margins": 1.7386033535003662, |
|
"rewards/rejected": -2.5078909397125244, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4410531154874543e-07, |
|
"logits/chosen": -2.3726840019226074, |
|
"logits/rejected": -2.317364454269409, |
|
"logps/chosen": -516.4107666015625, |
|
"logps/rejected": -385.29571533203125, |
|
"loss": 0.1169, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.6457246541976929, |
|
"rewards/margins": 1.9019176959991455, |
|
"rewards/rejected": -2.547642469406128, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298016388768561e-07, |
|
"logits/chosen": -2.413625717163086, |
|
"logits/rejected": -2.3727028369903564, |
|
"logps/chosen": -501.69561767578125, |
|
"logps/rejected": -364.65997314453125, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.36493119597435, |
|
"rewards/margins": 2.0498671531677246, |
|
"rewards/rejected": -2.4147982597351074, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.1415945805573005e-07, |
|
"logits/chosen": -2.3137855529785156, |
|
"logits/rejected": -2.2661328315734863, |
|
"logps/chosen": -494.91546630859375, |
|
"logps/rejected": -381.90924072265625, |
|
"loss": 0.1181, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7394388914108276, |
|
"rewards/margins": 1.7537353038787842, |
|
"rewards/rejected": -2.4931740760803223, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.972952151123984e-07, |
|
"logits/chosen": -2.3150975704193115, |
|
"logits/rejected": -2.2541096210479736, |
|
"logps/chosen": -429.13299560546875, |
|
"logps/rejected": -321.36676025390625, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5441495180130005, |
|
"rewards/margins": 1.660264253616333, |
|
"rewards/rejected": -2.204413890838623, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.793344535444142e-07, |
|
"logits/chosen": -2.2927441596984863, |
|
"logits/rejected": -2.235689401626587, |
|
"logps/chosen": -517.9212646484375, |
|
"logps/rejected": -355.93096923828125, |
|
"loss": 0.1116, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.5073868036270142, |
|
"rewards/margins": 1.9403215646743774, |
|
"rewards/rejected": -2.4477083683013916, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604108797288461e-07, |
|
"logits/chosen": -2.2924787998199463, |
|
"logits/rejected": -2.2269301414489746, |
|
"logps/chosen": -528.6913452148438, |
|
"logps/rejected": -407.18780517578125, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.8971579670906067, |
|
"rewards/margins": 2.0700173377990723, |
|
"rewards/rejected": -2.9671754837036133, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.40665367563858e-07, |
|
"logits/chosen": -2.288649320602417, |
|
"logits/rejected": -2.230454683303833, |
|
"logps/chosen": -500.1395568847656, |
|
"logps/rejected": -393.0097961425781, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9004061818122864, |
|
"rewards/margins": 1.8742806911468506, |
|
"rewards/rejected": -2.774686813354492, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.202449097526798e-07, |
|
"logits/chosen": -2.3139257431030273, |
|
"logits/rejected": -2.2718071937561035, |
|
"logps/chosen": -465.964111328125, |
|
"logps/rejected": -354.6561584472656, |
|
"loss": 0.1167, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.6382254362106323, |
|
"rewards/margins": 1.8348891735076904, |
|
"rewards/rejected": -2.4731147289276123, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.993015235369905e-07, |
|
"logits/chosen": -2.2759037017822266, |
|
"logits/rejected": -2.2043704986572266, |
|
"logps/chosen": -518.3172607421875, |
|
"logps/rejected": -417.5638122558594, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.7904411554336548, |
|
"rewards/margins": 2.2067065238952637, |
|
"rewards/rejected": -2.997147798538208, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7799111902582693e-07, |
|
"logits/chosen": -2.2700018882751465, |
|
"logits/rejected": -2.207021951675415, |
|
"logps/chosen": -489.39801025390625, |
|
"logps/rejected": -369.2752685546875, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.889120876789093, |
|
"rewards/margins": 1.7826087474822998, |
|
"rewards/rejected": -2.671729803085327, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.564723385445869e-07, |
|
"logits/chosen": -2.324565887451172, |
|
"logits/rejected": -2.267853260040283, |
|
"logps/chosen": -490.59130859375, |
|
"logps/rejected": -388.76690673828125, |
|
"loss": 0.1348, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.7297108769416809, |
|
"rewards/margins": 1.9148566722869873, |
|
"rewards/rejected": -2.6445674896240234, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3490537564442845e-07, |
|
"logits/chosen": -2.267577648162842, |
|
"logits/rejected": -2.186471462249756, |
|
"logps/chosen": -497.12335205078125, |
|
"logps/rejected": -387.3985900878906, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.051777958869934, |
|
"rewards/margins": 1.7568168640136719, |
|
"rewards/rejected": -2.8085949420928955, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1345078256378801e-07, |
|
"logits/chosen": -2.311931610107422, |
|
"logits/rejected": -2.2356672286987305, |
|
"logps/chosen": -485.4923400878906, |
|
"logps/rejected": -404.1107177734375, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7667996287345886, |
|
"rewards/margins": 2.0932888984680176, |
|
"rewards/rejected": -2.860088586807251, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9226827501969865e-07, |
|
"logits/chosen": -2.326636552810669, |
|
"logits/rejected": -2.2703440189361572, |
|
"logps/chosen": -509.9071350097656, |
|
"logps/rejected": -414.39874267578125, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6723321676254272, |
|
"rewards/margins": 2.289646625518799, |
|
"rewards/rejected": -2.9619784355163574, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.715155432264775e-07, |
|
"logits/chosen": -2.3231379985809326, |
|
"logits/rejected": -2.2714104652404785, |
|
"logps/chosen": -511.625732421875, |
|
"logps/rejected": -397.1949157714844, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.7959054708480835, |
|
"rewards/margins": 2.012528896331787, |
|
"rewards/rejected": -2.80843448638916, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.51347077992983e-07, |
|
"logits/chosen": -2.3125240802764893, |
|
"logits/rejected": -2.269193410873413, |
|
"logps/chosen": -496.95001220703125, |
|
"logps/rejected": -402.70782470703125, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9325113296508789, |
|
"rewards/margins": 1.8711084127426147, |
|
"rewards/rejected": -2.803619861602783, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3191302063739906e-07, |
|
"logits/chosen": -2.282691717147827, |
|
"logits/rejected": -2.233623743057251, |
|
"logps/chosen": -469.02630615234375, |
|
"logps/rejected": -393.0565185546875, |
|
"loss": 0.0978, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.8983050584793091, |
|
"rewards/margins": 1.9013340473175049, |
|
"rewards/rejected": -2.7996389865875244, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1335804528119475e-07, |
|
"logits/chosen": -2.3497660160064697, |
|
"logits/rejected": -2.2719693183898926, |
|
"logps/chosen": -515.9114990234375, |
|
"logps/rejected": -393.408935546875, |
|
"loss": 0.1062, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.8015244603157043, |
|
"rewards/margins": 2.1254332065582275, |
|
"rewards/rejected": -2.926957607269287, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.582028184286423e-08, |
|
"logits/chosen": -2.2509076595306396, |
|
"logits/rejected": -2.2074227333068848, |
|
"logps/chosen": -466.32305908203125, |
|
"logps/rejected": -419.3622131347656, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0646774768829346, |
|
"rewards/margins": 1.9130761623382568, |
|
"rewards/rejected": -2.9777536392211914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.943028774907065e-08, |
|
"logits/chosen": -2.250230312347412, |
|
"logits/rejected": -2.195244789123535, |
|
"logps/chosen": -476.79998779296875, |
|
"logps/rejected": -408.64068603515625, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.923287034034729, |
|
"rewards/margins": 1.9779703617095947, |
|
"rewards/rejected": -2.901257276535034, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.431007601814637e-08, |
|
"logits/chosen": -2.2714295387268066, |
|
"logits/rejected": -2.2263479232788086, |
|
"logps/chosen": -431.47601318359375, |
|
"logps/rejected": -397.78521728515625, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.0377047061920166, |
|
"rewards/margins": 1.8672853708267212, |
|
"rewards/rejected": -2.9049899578094482, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.0572206951246e-08, |
|
"logits/chosen": -2.243610382080078, |
|
"logits/rejected": -2.1739163398742676, |
|
"logps/chosen": -469.3233337402344, |
|
"logps/rejected": -400.6234130859375, |
|
"loss": 0.1024, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0177555084228516, |
|
"rewards/margins": 1.9254471063613892, |
|
"rewards/rejected": -2.943202495574951, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.831895019292897e-08, |
|
"logits/chosen": -2.302597761154175, |
|
"logits/rejected": -2.245525360107422, |
|
"logps/chosen": -527.4336547851562, |
|
"logps/rejected": -453.01116943359375, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.9302037954330444, |
|
"rewards/margins": 2.4225857257843018, |
|
"rewards/rejected": -3.3527894020080566, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764152339909756e-08, |
|
"logits/chosen": -2.2584733963012695, |
|
"logits/rejected": -2.1962692737579346, |
|
"logps/chosen": -499.47576904296875, |
|
"logps/rejected": -363.5857238769531, |
|
"loss": 0.1036, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7420647144317627, |
|
"rewards/margins": 1.9064794778823853, |
|
"rewards/rejected": -2.6485438346862793, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.861941317991664e-08, |
|
"logits/chosen": -2.300192356109619, |
|
"logits/rejected": -2.2127695083618164, |
|
"logps/chosen": -531.5907592773438, |
|
"logps/rejected": -412.7438049316406, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.7529584765434265, |
|
"rewards/margins": 2.228832960128784, |
|
"rewards/rejected": -2.9817919731140137, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.13197833728636e-08, |
|
"logits/chosen": -2.25828218460083, |
|
"logits/rejected": -2.198098659515381, |
|
"logps/chosen": -485.60052490234375, |
|
"logps/rejected": -423.1136779785156, |
|
"loss": 0.095, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.8292403221130371, |
|
"rewards/margins": 2.2823524475097656, |
|
"rewards/rejected": -3.1115927696228027, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.79697505093521e-09, |
|
"logits/chosen": -2.2520506381988525, |
|
"logits/rejected": -2.1905932426452637, |
|
"logps/chosen": -501.0511169433594, |
|
"logps/rejected": -402.864501953125, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9875413775444031, |
|
"rewards/margins": 1.987672209739685, |
|
"rewards/rejected": -2.9752135276794434, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.092101988131256e-09, |
|
"logits/chosen": -2.2959237098693848, |
|
"logits/rejected": -2.1943726539611816, |
|
"logps/chosen": -542.3916015625, |
|
"logps/rejected": -426.2535095214844, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.7925306558609009, |
|
"rewards/margins": 2.3787803649902344, |
|
"rewards/rejected": -3.1713109016418457, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.327445937151673e-10, |
|
"logits/chosen": -2.2838008403778076, |
|
"logits/rejected": -2.2221412658691406, |
|
"logps/chosen": -538.8756103515625, |
|
"logps/rejected": -445.8970642089844, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9025734066963196, |
|
"rewards/margins": 2.2714860439300537, |
|
"rewards/rejected": -3.1740598678588867, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 405, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1562451661368947, |
|
"train_runtime": 3209.2418, |
|
"train_samples_per_second": 16.17, |
|
"train_steps_per_second": 0.126 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 405, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|