{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998766954377312, "eval_steps": 1000, "global_step": 405, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.2195121951219512e-08, "logits/chosen": -2.8088459968566895, "logits/rejected": -2.7595884799957275, "logps/chosen": -368.90777587890625, "logps/rejected": -133.10202026367188, "loss": 0.5469, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.219512195121951e-07, "logits/chosen": -2.838653802871704, "logits/rejected": -2.824901819229126, "logps/chosen": -433.81378173828125, "logps/rejected": -114.705810546875, "loss": 0.55, "rewards/accuracies": 0.5, "rewards/chosen": 0.0011021953541785479, "rewards/margins": 0.0018466737819835544, "rewards/rejected": -0.0007444784860126674, "step": 10 }, { "epoch": 0.05, "learning_rate": 2.439024390243902e-07, "logits/chosen": -2.797428607940674, "logits/rejected": -2.7644600868225098, "logps/chosen": -436.551025390625, "logps/rejected": -109.42466735839844, "loss": 0.5381, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.02190575934946537, "rewards/margins": 0.03880878537893295, "rewards/rejected": -0.01690302975475788, "step": 20 }, { "epoch": 0.07, "learning_rate": 3.6585365853658536e-07, "logits/chosen": -2.7145814895629883, "logits/rejected": -2.6879427433013916, "logps/chosen": -422.17218017578125, "logps/rejected": -128.7976531982422, "loss": 0.4635, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07188864052295685, "rewards/margins": 0.20736002922058105, "rewards/rejected": -0.1354713886976242, "step": 30 }, { "epoch": 0.1, "learning_rate": 4.878048780487804e-07, "logits/chosen": -2.588033676147461, "logits/rejected": -2.5705184936523438, "logps/chosen": -397.0795593261719, "logps/rejected": -140.2168426513672, "loss": 0.3735, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.016153398901224136, "rewards/margins": 0.42458558082580566, "rewards/rejected": -0.40843215584754944, "step": 40 }, { "epoch": 0.12, "learning_rate": 4.992461696250783e-07, "logits/chosen": -2.4239001274108887, "logits/rejected": -2.3979756832122803, "logps/chosen": -445.3406677246094, "logps/rejected": -201.51806640625, "loss": 0.2783, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.030274126678705215, "rewards/margins": 0.8344534635543823, "rewards/rejected": -0.8647276163101196, "step": 50 }, { "epoch": 0.15, "learning_rate": 4.966461721767899e-07, "logits/chosen": -2.383493661880493, "logits/rejected": -2.3332314491271973, "logps/chosen": -423.5155334472656, "logps/rejected": -253.60073852539062, "loss": 0.2228, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.36428865790367126, "rewards/margins": 0.920581042766571, "rewards/rejected": -1.2848697900772095, "step": 60 }, { "epoch": 0.17, "learning_rate": 4.922100518015975e-07, "logits/chosen": -2.4096710681915283, "logits/rejected": -2.364241123199463, "logps/chosen": -422.70513916015625, "logps/rejected": -279.1031188964844, "loss": 0.1891, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3884132504463196, "rewards/margins": 1.2157753705978394, "rewards/rejected": -1.6041886806488037, "step": 70 }, { "epoch": 0.2, "learning_rate": 4.859708325770919e-07, "logits/chosen": -2.38008451461792, "logits/rejected": -2.344496011734009, "logps/chosen": -455.79339599609375, "logps/rejected": -303.01690673828125, "loss": 0.1269, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.647363543510437, "rewards/margins": 1.3260728120803833, "rewards/rejected": -1.9734363555908203, "step": 80 }, { "epoch": 0.22, "learning_rate": 4.779749614980225e-07, "logits/chosen": -2.374379873275757, "logits/rejected": -2.3444790840148926, "logps/chosen": -532.1400146484375, "logps/rejected": -380.60955810546875, "loss": 0.1095, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.560777485370636, "rewards/margins": 1.9465181827545166, "rewards/rejected": -2.507295846939087, "step": 90 }, { "epoch": 0.25, "learning_rate": 4.682819627081427e-07, "logits/chosen": -2.3339896202087402, "logits/rejected": -2.2830748558044434, "logps/chosen": -467.65374755859375, "logps/rejected": -348.66156005859375, "loss": 0.1477, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.54865962266922, "rewards/margins": 1.753379464149475, "rewards/rejected": -2.30203914642334, "step": 100 }, { "epoch": 0.27, "learning_rate": 4.569639943810477e-07, "logits/chosen": -2.3383262157440186, "logits/rejected": -2.2902047634124756, "logps/chosen": -484.0919494628906, "logps/rejected": -367.74505615234375, "loss": 0.1193, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.769287645816803, "rewards/margins": 1.7386033535003662, "rewards/rejected": -2.5078909397125244, "step": 110 }, { "epoch": 0.3, "learning_rate": 4.4410531154874543e-07, "logits/chosen": -2.3726840019226074, "logits/rejected": -2.317364454269409, "logps/chosen": -516.4107666015625, "logps/rejected": -385.29571533203125, "loss": 0.1169, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.6457246541976929, "rewards/margins": 1.9019176959991455, "rewards/rejected": -2.547642469406128, "step": 120 }, { "epoch": 0.32, "learning_rate": 4.298016388768561e-07, "logits/chosen": -2.413625717163086, "logits/rejected": -2.3727028369903564, "logps/chosen": -501.69561767578125, "logps/rejected": -364.65997314453125, "loss": 0.1158, "rewards/accuracies": 0.875, "rewards/chosen": -0.36493119597435, "rewards/margins": 2.0498671531677246, "rewards/rejected": -2.4147982597351074, "step": 130 }, { "epoch": 0.35, "learning_rate": 4.1415945805573005e-07, "logits/chosen": -2.3137855529785156, "logits/rejected": -2.2661328315734863, "logps/chosen": -494.91546630859375, "logps/rejected": -381.90924072265625, "loss": 0.1181, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7394388914108276, "rewards/margins": 1.7537353038787842, "rewards/rejected": -2.4931740760803223, "step": 140 }, { "epoch": 0.37, "learning_rate": 3.972952151123984e-07, "logits/chosen": -2.3150975704193115, "logits/rejected": -2.2541096210479736, "logps/chosen": -429.13299560546875, "logps/rejected": -321.36676025390625, "loss": 0.1415, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5441495180130005, "rewards/margins": 1.660264253616333, "rewards/rejected": -2.204413890838623, "step": 150 }, { "epoch": 0.39, "learning_rate": 3.793344535444142e-07, "logits/chosen": -2.2927441596984863, "logits/rejected": -2.235689401626587, "logps/chosen": -517.9212646484375, "logps/rejected": -355.93096923828125, "loss": 0.1116, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.5073868036270142, "rewards/margins": 1.9403215646743774, "rewards/rejected": -2.4477083683013916, "step": 160 }, { "epoch": 0.42, "learning_rate": 3.604108797288461e-07, "logits/chosen": -2.2924787998199463, "logits/rejected": -2.2269301414489746, "logps/chosen": -528.6913452148438, "logps/rejected": -407.18780517578125, "loss": 0.0933, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.8971579670906067, "rewards/margins": 2.0700173377990723, "rewards/rejected": -2.9671754837036133, "step": 170 }, { "epoch": 0.44, "learning_rate": 3.40665367563858e-07, "logits/chosen": -2.288649320602417, "logits/rejected": -2.230454683303833, "logps/chosen": -500.1395568847656, "logps/rejected": -393.0097961425781, "loss": 0.0995, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9004061818122864, "rewards/margins": 1.8742806911468506, "rewards/rejected": -2.774686813354492, "step": 180 }, { "epoch": 0.47, "learning_rate": 3.202449097526798e-07, "logits/chosen": -2.3139257431030273, "logits/rejected": -2.2718071937561035, "logps/chosen": -465.964111328125, "logps/rejected": -354.6561584472656, "loss": 0.1167, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.6382254362106323, "rewards/margins": 1.8348891735076904, "rewards/rejected": -2.4731147289276123, "step": 190 }, { "epoch": 0.49, "learning_rate": 2.993015235369905e-07, "logits/chosen": -2.2759037017822266, "logits/rejected": -2.2043704986572266, "logps/chosen": -518.3172607421875, "logps/rejected": -417.5638122558594, "loss": 0.0996, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.7904411554336548, "rewards/margins": 2.2067065238952637, "rewards/rejected": -2.997147798538208, "step": 200 }, { "epoch": 0.52, "learning_rate": 2.7799111902582693e-07, "logits/chosen": -2.2700018882751465, "logits/rejected": -2.207021951675415, "logps/chosen": -489.39801025390625, "logps/rejected": -369.2752685546875, "loss": 0.1109, "rewards/accuracies": 0.78125, "rewards/chosen": -0.889120876789093, "rewards/margins": 1.7826087474822998, "rewards/rejected": -2.671729803085327, "step": 210 }, { "epoch": 0.54, "learning_rate": 2.564723385445869e-07, "logits/chosen": -2.324565887451172, "logits/rejected": -2.267853260040283, "logps/chosen": -490.59130859375, "logps/rejected": -388.76690673828125, "loss": 0.1348, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.7297108769416809, "rewards/margins": 1.9148566722869873, "rewards/rejected": -2.6445674896240234, "step": 220 }, { "epoch": 0.57, "learning_rate": 2.3490537564442845e-07, "logits/chosen": -2.267577648162842, "logits/rejected": -2.186471462249756, "logps/chosen": -497.12335205078125, "logps/rejected": -387.3985900878906, "loss": 0.1239, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.051777958869934, "rewards/margins": 1.7568168640136719, "rewards/rejected": -2.8085949420928955, "step": 230 }, { "epoch": 0.59, "learning_rate": 2.1345078256378801e-07, "logits/chosen": -2.311931610107422, "logits/rejected": -2.2356672286987305, "logps/chosen": -485.4923400878906, "logps/rejected": -404.1107177734375, "loss": 0.1251, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7667996287345886, "rewards/margins": 2.0932888984680176, "rewards/rejected": -2.860088586807251, "step": 240 }, { "epoch": 0.62, "learning_rate": 1.9226827501969865e-07, "logits/chosen": -2.326636552810669, "logits/rejected": -2.2703440189361572, "logps/chosen": -509.9071350097656, "logps/rejected": -414.39874267578125, "loss": 0.1196, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6723321676254272, "rewards/margins": 2.289646625518799, "rewards/rejected": -2.9619784355163574, "step": 250 }, { "epoch": 0.64, "learning_rate": 1.715155432264775e-07, "logits/chosen": -2.3231379985809326, "logits/rejected": -2.2714104652404785, "logps/chosen": -511.625732421875, "logps/rejected": -397.1949157714844, "loss": 0.104, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.7959054708480835, "rewards/margins": 2.012528896331787, "rewards/rejected": -2.80843448638916, "step": 260 }, { "epoch": 0.67, "learning_rate": 1.51347077992983e-07, "logits/chosen": -2.3125240802764893, "logits/rejected": -2.269193410873413, "logps/chosen": -496.95001220703125, "logps/rejected": -402.70782470703125, "loss": 0.0976, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9325113296508789, "rewards/margins": 1.8711084127426147, "rewards/rejected": -2.803619861602783, "step": 270 }, { "epoch": 0.69, "learning_rate": 1.3191302063739906e-07, "logits/chosen": -2.282691717147827, "logits/rejected": -2.233623743057251, "logps/chosen": -469.02630615234375, "logps/rejected": -393.0565185546875, "loss": 0.0978, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.8983050584793091, "rewards/margins": 1.9013340473175049, "rewards/rejected": -2.7996389865875244, "step": 280 }, { "epoch": 0.72, "learning_rate": 1.1335804528119475e-07, "logits/chosen": -2.3497660160064697, "logits/rejected": -2.2719693183898926, "logps/chosen": -515.9114990234375, "logps/rejected": -393.408935546875, "loss": 0.1062, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8015244603157043, "rewards/margins": 2.1254332065582275, "rewards/rejected": -2.926957607269287, "step": 290 }, { "epoch": 0.74, "learning_rate": 9.582028184286423e-08, "logits/chosen": -2.2509076595306396, "logits/rejected": -2.2074227333068848, "logps/chosen": -466.32305908203125, "logps/rejected": -419.3622131347656, "loss": 0.1048, "rewards/accuracies": 0.75, "rewards/chosen": -1.0646774768829346, "rewards/margins": 1.9130761623382568, "rewards/rejected": -2.9777536392211914, "step": 300 }, { "epoch": 0.76, "learning_rate": 7.943028774907065e-08, "logits/chosen": -2.250230312347412, "logits/rejected": -2.195244789123535, "logps/chosen": -476.79998779296875, "logps/rejected": -408.64068603515625, "loss": 0.1044, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.923287034034729, "rewards/margins": 1.9779703617095947, "rewards/rejected": -2.901257276535034, "step": 310 }, { "epoch": 0.79, "learning_rate": 6.431007601814637e-08, "logits/chosen": -2.2714295387268066, "logits/rejected": -2.2263479232788086, "logps/chosen": -431.47601318359375, "logps/rejected": -397.78521728515625, "loss": 0.0946, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0377047061920166, "rewards/margins": 1.8672853708267212, "rewards/rejected": -2.9049899578094482, "step": 320 }, { "epoch": 0.81, "learning_rate": 5.0572206951246e-08, "logits/chosen": -2.243610382080078, "logits/rejected": -2.1739163398742676, "logps/chosen": -469.3233337402344, "logps/rejected": -400.6234130859375, "loss": 0.1024, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0177555084228516, "rewards/margins": 1.9254471063613892, "rewards/rejected": -2.943202495574951, "step": 330 }, { "epoch": 0.84, "learning_rate": 3.831895019292897e-08, "logits/chosen": -2.302597761154175, "logits/rejected": -2.245525360107422, "logps/chosen": -527.4336547851562, "logps/rejected": -453.01116943359375, "loss": 0.1089, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.9302037954330444, "rewards/margins": 2.4225857257843018, "rewards/rejected": -3.3527894020080566, "step": 340 }, { "epoch": 0.86, "learning_rate": 2.764152339909756e-08, "logits/chosen": -2.2584733963012695, "logits/rejected": -2.1962692737579346, "logps/chosen": -499.47576904296875, "logps/rejected": -363.5857238769531, "loss": 0.1036, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7420647144317627, "rewards/margins": 1.9064794778823853, "rewards/rejected": -2.6485438346862793, "step": 350 }, { "epoch": 0.89, "learning_rate": 1.861941317991664e-08, "logits/chosen": -2.300192356109619, "logits/rejected": -2.2127695083618164, "logps/chosen": -531.5907592773438, "logps/rejected": -412.7438049316406, "loss": 0.1013, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.7529584765434265, "rewards/margins": 2.228832960128784, "rewards/rejected": -2.9817919731140137, "step": 360 }, { "epoch": 0.91, "learning_rate": 1.13197833728636e-08, "logits/chosen": -2.25828218460083, "logits/rejected": -2.198098659515381, "logps/chosen": -485.60052490234375, "logps/rejected": -423.1136779785156, "loss": 0.095, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.8292403221130371, "rewards/margins": 2.2823524475097656, "rewards/rejected": -3.1115927696228027, "step": 370 }, { "epoch": 0.94, "learning_rate": 5.79697505093521e-09, "logits/chosen": -2.2520506381988525, "logits/rejected": -2.1905932426452637, "logps/chosen": -501.0511169433594, "logps/rejected": -402.864501953125, "loss": 0.1084, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9875413775444031, "rewards/margins": 1.987672209739685, "rewards/rejected": -2.9752135276794434, "step": 380 }, { "epoch": 0.96, "learning_rate": 2.092101988131256e-09, "logits/chosen": -2.2959237098693848, "logits/rejected": -2.1943726539611816, "logps/chosen": -542.3916015625, "logps/rejected": -426.2535095214844, "loss": 0.092, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.7925306558609009, "rewards/margins": 2.3787803649902344, "rewards/rejected": -3.1713109016418457, "step": 390 }, { "epoch": 0.99, "learning_rate": 2.327445937151673e-10, "logits/chosen": -2.2838008403778076, "logits/rejected": -2.2221412658691406, "logps/chosen": -538.8756103515625, "logps/rejected": -445.8970642089844, "loss": 0.0998, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9025734066963196, "rewards/margins": 2.2714860439300537, "rewards/rejected": -3.1740598678588867, "step": 400 }, { "epoch": 1.0, "step": 405, "total_flos": 0.0, "train_loss": 0.1562451661368947, "train_runtime": 3209.2418, "train_samples_per_second": 16.17, "train_steps_per_second": 0.126 } ], "logging_steps": 10, "max_steps": 405, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "trial_name": null, "trial_params": null }