{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 1250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4e-08, "logits/chosen": -1.9152824878692627, "logits/rejected": -1.9152824878692627, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3334, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -2.035064220428467, "logits/rejected": -2.035064220428467, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3197, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 10 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -2.0738348960876465, "logits/rejected": -2.0738348960876465, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -1.9596742391586304, "logits/rejected": -1.9596742391586304, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3334, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 30 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -1.9157626628875732, "logits/rejected": -1.9157626628875732, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 40 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.9507601261138916, "logits/rejected": -1.9507601261138916, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3285, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 50 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -1.9532804489135742, "logits/rejected": -1.9532804489135742, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 60 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -1.8357080221176147, "logits/rejected": -1.8357080221176147, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.2717, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 70 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -1.8322147130966187, "logits/rejected": -1.8322147130966187, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3457, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 80 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.8425270318984985, "logits/rejected": -1.8425270318984985, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3902, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 90 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.8621313571929932, "logits/rejected": -1.8621313571929932, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3754, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 100 }, { "epoch": 0.08, "eval_logits/chosen": -1.9617642164230347, "eval_logits/rejected": -1.8066532611846924, "eval_logps/chosen": -266.6976013183594, "eval_logps/rejected": -254.9398193359375, "eval_loss": 0.053734518587589264, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": 0.0, "eval_rewards/margins": 0.0, "eval_rewards/rejected": 0.0, "eval_runtime": 727.871, "eval_samples_per_second": 2.748, "eval_steps_per_second": 1.374, "step": 100 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.909868597984314, "logits/rejected": -1.909868597984314, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3754, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 110 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.7610801458358765, "logits/rejected": -1.7610801458358765, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3062, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 120 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -2.119793653488159, "logits/rejected": -2.119793653488159, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.326, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 130 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -1.986210584640503, "logits/rejected": -1.986210584640503, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3334, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 140 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -2.0813775062561035, "logits/rejected": -2.0813775062561035, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 150 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -2.0440070629119873, "logits/rejected": -2.0440070629119873, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3433, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 160 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.6871118545532227, "logits/rejected": -1.6871118545532227, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3235, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 170 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.906797170639038, "logits/rejected": -1.906797170639038, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 180 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.7077922821044922, "logits/rejected": -1.7077922821044922, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3433, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 190 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.7370048761367798, "logits/rejected": -1.7370048761367798, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 200 }, { "epoch": 0.16, "eval_logits/chosen": -1.9617642164230347, "eval_logits/rejected": -1.8066532611846924, "eval_logps/chosen": -266.6976013183594, "eval_logps/rejected": -254.9398193359375, "eval_loss": 0.053734518587589264, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": 0.0, "eval_rewards/margins": 0.0, "eval_rewards/rejected": 0.0, "eval_runtime": 730.7556, "eval_samples_per_second": 2.737, "eval_steps_per_second": 1.368, "step": 200 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.7182317972183228, "logits/rejected": -1.7182317972183228, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 210 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.905134916305542, "logits/rejected": -1.905134916305542, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3457, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 220 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -2.071720600128174, "logits/rejected": -2.071720600128174, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 230 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -2.025371789932251, "logits/rejected": -2.025371789932251, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 240 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.8689197301864624, "logits/rejected": -1.8689197301864624, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.363, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 250 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.937535285949707, "logits/rejected": -1.937535285949707, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 260 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -2.058104991912842, "logits/rejected": -2.058104991912842, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3581, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 270 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.8560088872909546, "logits/rejected": -1.8560088872909546, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3902, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 280 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.8305753469467163, "logits/rejected": -1.8305753469467163, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.363, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 290 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.7557750940322876, "logits/rejected": -1.7557750940322876, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 300 }, { "epoch": 0.24, "eval_logits/chosen": -1.9617642164230347, "eval_logits/rejected": -1.8066532611846924, "eval_logps/chosen": -266.6976013183594, "eval_logps/rejected": -254.9398193359375, "eval_loss": 0.053734518587589264, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": 0.0, "eval_rewards/margins": 0.0, "eval_rewards/rejected": 0.0, "eval_runtime": 730.4441, "eval_samples_per_second": 2.738, "eval_steps_per_second": 1.369, "step": 300 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -2.111258029937744, "logits/rejected": -2.111258029937744, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3655, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 310 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.8872848749160767, "logits/rejected": -1.8872848749160767, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3112, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 320 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.9492517709732056, "logits/rejected": -1.9492517709732056, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3433, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 330 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.8537830114364624, "logits/rejected": -1.8537830114364624, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3704, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 340 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -2.1256401538848877, "logits/rejected": -2.1256401538848877, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3285, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 350 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.9888694286346436, "logits/rejected": -1.9888694286346436, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3433, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 360 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -2.064541816711426, "logits/rejected": -2.064541816711426, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3136, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 370 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -2.1313929557800293, "logits/rejected": -2.1313929557800293, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 380 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -2.0341124534606934, "logits/rejected": -2.0341124534606934, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.2914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 390 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.7964853048324585, "logits/rejected": -1.7964853048324585, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3606, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 400 }, { "epoch": 0.32, "eval_logits/chosen": -1.9617642164230347, "eval_logits/rejected": -1.8066532611846924, "eval_logps/chosen": -266.6976013183594, "eval_logps/rejected": -254.9398193359375, "eval_loss": 0.053734518587589264, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": 0.0, "eval_rewards/margins": 0.0, "eval_rewards/rejected": 0.0, "eval_runtime": 725.8132, "eval_samples_per_second": 2.756, "eval_steps_per_second": 1.378, "step": 400 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.9106441736221313, "logits/rejected": -1.9106441736221313, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3359, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 410 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.9656860828399658, "logits/rejected": -1.9656860828399658, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3729, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 420 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.9669355154037476, "logits/rejected": -1.9669355154037476, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3211, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 430 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.8838989734649658, "logits/rejected": -1.8838989734649658, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3976, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 440 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.890795111656189, "logits/rejected": -1.890795111656189, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3828, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 450 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.9264202117919922, "logits/rejected": -1.9264202117919922, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3531, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 460 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.8948800563812256, "logits/rejected": -1.8948800563812256, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3211, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 470 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.9177709817886353, "logits/rejected": -1.8989362716674805, "logps/chosen": -10.277307510375977, "logps/rejected": -4.643392562866211, "loss": 0.3284, "rewards/accuracies": 0.02500000037252903, "rewards/chosen": 9.182358189718798e-05, "rewards/margins": 2.3801421775715426e-05, "rewards/rejected": 6.802215648349375e-05, "step": 480 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.9777803421020508, "logits/rejected": -1.9777803421020508, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3359, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 490 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.8519232273101807, "logits/rejected": -1.8519232273101807, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3606, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 500 }, { "epoch": 0.4, "eval_logits/chosen": -1.9201807975769043, "eval_logits/rejected": -1.7627112865447998, "eval_logps/chosen": -300.9842834472656, "eval_logps/rejected": -281.2868957519531, "eval_loss": 0.058566804975271225, "eval_rewards/accuracies": 0.3125, "eval_rewards/chosen": -0.034286707639694214, "eval_rewards/margins": -0.00793963111937046, "eval_rewards/rejected": -0.026347076520323753, "eval_runtime": 729.5568, "eval_samples_per_second": 2.741, "eval_steps_per_second": 1.371, "step": 500 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.7138468027114868, "logits/rejected": -1.7138468027114868, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3211, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 510 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.802970290184021, "logits/rejected": -1.802970290184021, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.2643, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 520 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.6650207042694092, "logits/rejected": -1.6650207042694092, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 530 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.8073577880859375, "logits/rejected": -1.8073577880859375, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3186, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 540 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.7098474502563477, "logits/rejected": -1.7098474502563477, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 550 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.8214161396026611, "logits/rejected": -1.8214161396026611, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3606, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 560 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.8090301752090454, "logits/rejected": -1.8090301752090454, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.4001, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 570 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.6492881774902344, "logits/rejected": -1.6492881774902344, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3704, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 580 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.7389822006225586, "logits/rejected": -1.7389822006225586, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3334, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 590 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.6927127838134766, "logits/rejected": -1.6927127838134766, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3408, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 600 }, { "epoch": 0.48, "eval_logits/chosen": -1.8917298316955566, "eval_logits/rejected": -1.7360574007034302, "eval_logps/chosen": -305.4412841796875, "eval_logps/rejected": -285.37774658203125, "eval_loss": 0.058740295469760895, "eval_rewards/accuracies": 0.31200000643730164, "eval_rewards/chosen": -0.038743674755096436, "eval_rewards/margins": -0.008305751718580723, "eval_rewards/rejected": -0.030437923967838287, "eval_runtime": 730.0666, "eval_samples_per_second": 2.739, "eval_steps_per_second": 1.37, "step": 600 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.760236382484436, "logits/rejected": -1.760236382484436, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3581, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 610 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.7868738174438477, "logits/rejected": -1.7868738174438477, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3112, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 620 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.7506424188613892, "logits/rejected": -1.7506424188613892, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 630 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.6889375448226929, "logits/rejected": -1.6889375448226929, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3334, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 640 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.7860199213027954, "logits/rejected": -1.7860199213027954, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 650 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.8066167831420898, "logits/rejected": -1.8066167831420898, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 660 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.998725175857544, "logits/rejected": -1.998725175857544, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.2939, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 670 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.7736581563949585, "logits/rejected": -1.7736581563949585, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.4025, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 680 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.7235050201416016, "logits/rejected": -1.7235050201416016, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.2865, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 690 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.740098237991333, "logits/rejected": -1.740098237991333, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3359, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 700 }, { "epoch": 0.56, "eval_logits/chosen": -1.8920072317123413, "eval_logits/rejected": -1.7363018989562988, "eval_logps/chosen": -305.3719787597656, "eval_logps/rejected": -285.3293762207031, "eval_loss": 0.05872537940740585, "eval_rewards/accuracies": 0.3095000088214874, "eval_rewards/chosen": -0.038674384355545044, "eval_rewards/margins": -0.008284823969006538, "eval_rewards/rejected": -0.030389558523893356, "eval_runtime": 729.5557, "eval_samples_per_second": 2.741, "eval_steps_per_second": 1.371, "step": 700 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.9098726511001587, "logits/rejected": -1.9098726511001587, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 710 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.7153629064559937, "logits/rejected": -1.7153629064559937, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3828, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 720 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.611203908920288, "logits/rejected": -1.611203908920288, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 730 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.7782968282699585, "logits/rejected": -1.7782968282699585, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3112, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 740 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.8602126836776733, "logits/rejected": -1.8602126836776733, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3877, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 750 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.7727556228637695, "logits/rejected": -1.7727556228637695, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3309, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 760 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.8260834217071533, "logits/rejected": -1.8260834217071533, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 770 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.7438442707061768, "logits/rejected": -1.7438442707061768, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.2988, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 780 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.9674087762832642, "logits/rejected": -1.9656404256820679, "logps/chosen": -3.2285637855529785, "logps/rejected": -2.4141287803649902, "loss": 0.3516, "rewards/accuracies": 0.0, "rewards/chosen": -0.001752424635924399, "rewards/margins": -0.0005892428453080356, "rewards/rejected": -0.0011631817324087024, "step": 790 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.8979122638702393, "logits/rejected": -1.8979122638702393, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 800 }, { "epoch": 0.64, "eval_logits/chosen": -1.9784096479415894, "eval_logits/rejected": -1.8172101974487305, "eval_logps/chosen": -291.8357238769531, "eval_logps/rejected": -274.8357238769531, "eval_loss": 0.05687940865755081, "eval_rewards/accuracies": 0.3215000033378601, "eval_rewards/chosen": -0.02513808384537697, "eval_rewards/margins": -0.005242162849754095, "eval_rewards/rejected": -0.019895924255251884, "eval_runtime": 728.993, "eval_samples_per_second": 2.744, "eval_steps_per_second": 1.372, "step": 800 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.7325031757354736, "logits/rejected": -1.7325031757354736, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3803, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 810 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.9031715393066406, "logits/rejected": -1.9031715393066406, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3606, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 820 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.878941297531128, "logits/rejected": -1.8776565790176392, "logps/chosen": -4.952629089355469, "logps/rejected": -4.525970458984375, "loss": 0.3682, "rewards/accuracies": 0.0, "rewards/chosen": -0.000667310378048569, "rewards/margins": -0.0001743949978845194, "rewards/rejected": -0.0004929153947159648, "step": 830 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.8778276443481445, "logits/rejected": -1.8778276443481445, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 840 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.8605072498321533, "logits/rejected": -1.8605072498321533, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.289, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 850 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.521748423576355, "logits/rejected": -1.521748423576355, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3211, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 860 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.753326416015625, "logits/rejected": -1.7436802387237549, "logps/chosen": -11.578987121582031, "logps/rejected": -9.436164855957031, "loss": 0.3037, "rewards/accuracies": 0.02500000037252903, "rewards/chosen": -0.0006309890886768699, "rewards/margins": 4.580840322887525e-05, "rewards/rejected": -0.0006767975282855332, "step": 870 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.9987977743148804, "logits/rejected": -1.9987977743148804, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 880 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.9310076236724854, "logits/rejected": -1.9310076236724854, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3531, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 890 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.7657368183135986, "logits/rejected": -1.7657368183135986, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3926, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 900 }, { "epoch": 0.72, "eval_logits/chosen": -1.95650053024292, "eval_logits/rejected": -1.7981593608856201, "eval_logps/chosen": -291.2066650390625, "eval_logps/rejected": -277.3841552734375, "eval_loss": 0.05500446632504463, "eval_rewards/accuracies": 0.3840000033378601, "eval_rewards/chosen": -0.024509064853191376, "eval_rewards/margins": -0.002064692322164774, "eval_rewards/rejected": -0.02244437113404274, "eval_runtime": 726.2446, "eval_samples_per_second": 2.754, "eval_steps_per_second": 1.377, "step": 900 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.853788137435913, "logits/rejected": -1.853788137435913, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3926, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 910 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.7569034099578857, "logits/rejected": -1.7553638219833374, "logps/chosen": -3.4119091033935547, "logps/rejected": -3.395718812942505, "loss": 0.335, "rewards/accuracies": 0.02500000037252903, "rewards/chosen": -0.0012354147620499134, "rewards/margins": 0.0007618310628458858, "rewards/rejected": -0.001997245941311121, "step": 920 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.8945032358169556, "logits/rejected": -1.8945032358169556, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 930 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.8704814910888672, "logits/rejected": -1.8704814910888672, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3581, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 940 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.9085773229599, "logits/rejected": -1.9085773229599, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3457, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 950 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.8419487476348877, "logits/rejected": -1.8419487476348877, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 960 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.9136149883270264, "logits/rejected": -1.9136149883270264, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3433, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 970 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.899609923362732, "logits/rejected": -1.899609923362732, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 980 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.7761380672454834, "logits/rejected": -1.7761380672454834, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3062, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.7907581329345703, "logits/rejected": -1.7907581329345703, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3655, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1000 }, { "epoch": 0.8, "eval_logits/chosen": -1.948217511177063, "eval_logits/rejected": -1.7905058860778809, "eval_logps/chosen": -292.09368896484375, "eval_logps/rejected": -278.4593505859375, "eval_loss": 0.05491425469517708, "eval_rewards/accuracies": 0.38600000739097595, "eval_rewards/chosen": -0.025396063923835754, "eval_rewards/margins": -0.001876543858088553, "eval_rewards/rejected": -0.023519521579146385, "eval_runtime": 727.6638, "eval_samples_per_second": 2.749, "eval_steps_per_second": 1.374, "step": 1000 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.9497054815292358, "logits/rejected": -1.9497054815292358, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3408, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1010 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.6699402332305908, "logits/rejected": -1.6699402332305908, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3186, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1020 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.782236099243164, "logits/rejected": -1.782236099243164, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1030 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.8598238229751587, "logits/rejected": -1.8598238229751587, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3136, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1040 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -2.01692795753479, "logits/rejected": -2.01692795753479, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3383, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1050 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.8278007507324219, "logits/rejected": -1.8278007507324219, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.326, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1060 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -2.0067131519317627, "logits/rejected": -2.0067131519317627, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3408, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1070 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.8435783386230469, "logits/rejected": -1.8435783386230469, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3433, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1080 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.865335464477539, "logits/rejected": -1.865335464477539, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3803, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1090 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.829878807067871, "logits/rejected": -1.8288015127182007, "logps/chosen": -3.383352756500244, "logps/rejected": -2.7885169982910156, "loss": 0.3682, "rewards/accuracies": 0.0, "rewards/chosen": -0.0015257485210895538, "rewards/margins": -0.00039975493564270437, "rewards/rejected": -0.001125993556343019, "step": 1100 }, { "epoch": 0.88, "eval_logits/chosen": -1.9496779441833496, "eval_logits/rejected": -1.791935920715332, "eval_logps/chosen": -292.0441589355469, "eval_logps/rejected": -278.3316955566406, "eval_loss": 0.054946158081293106, "eval_rewards/accuracies": 0.38499999046325684, "eval_rewards/chosen": -0.025346575304865837, "eval_rewards/margins": -0.0019547012634575367, "eval_rewards/rejected": -0.02339187264442444, "eval_runtime": 727.7574, "eval_samples_per_second": 2.748, "eval_steps_per_second": 1.374, "step": 1100 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.9723230600357056, "logits/rejected": -1.9723230600357056, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3828, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1110 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.8381221294403076, "logits/rejected": -1.8381221294403076, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3877, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1120 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.6961113214492798, "logits/rejected": -1.6961113214492798, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.363, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1130 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.796628713607788, "logits/rejected": -1.796628713607788, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3778, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1140 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.8420875072479248, "logits/rejected": -1.8420875072479248, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1150 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.8557026386260986, "logits/rejected": -1.8557026386260986, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1160 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.9385162591934204, "logits/rejected": -1.9385162591934204, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3655, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1170 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.9021384716033936, "logits/rejected": -1.9021384716033936, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.363, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1180 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.8466148376464844, "logits/rejected": -1.8466148376464844, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3087, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1190 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.7545801401138306, "logits/rejected": -1.7545801401138306, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3531, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1200 }, { "epoch": 0.96, "eval_logits/chosen": -1.949344515800476, "eval_logits/rejected": -1.7915234565734863, "eval_logps/chosen": -291.8377990722656, "eval_logps/rejected": -278.0787048339844, "eval_loss": 0.0550062395632267, "eval_rewards/accuracies": 0.39100000262260437, "eval_rewards/chosen": -0.02514021284878254, "eval_rewards/margins": -0.0020012950990349054, "eval_rewards/rejected": -0.02313891611993313, "eval_runtime": 729.194, "eval_samples_per_second": 2.743, "eval_steps_per_second": 1.371, "step": 1200 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.6881771087646484, "logits/rejected": -1.6881771087646484, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3087, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1210 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.9288533926010132, "logits/rejected": -1.9288533926010132, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1220 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.8713302612304688, "logits/rejected": -1.8713302612304688, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.3606, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1230 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.8912433385849, "logits/rejected": -1.8912433385849, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.2791, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1240 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.8090438842773438, "logits/rejected": -1.800258994102478, "logps/chosen": -5.058285713195801, "logps/rejected": -3.845151901245117, "loss": 0.3094, "rewards/accuracies": 0.0, "rewards/chosen": -0.002232617698609829, "rewards/margins": -0.0004155985952820629, "rewards/rejected": -0.0018170190742239356, "step": 1250 }, { "epoch": 1.0, "step": 1250, "total_flos": 0.0, "train_loss": 0.3462253446340561, "train_runtime": 13267.124, "train_samples_per_second": 0.377, "train_steps_per_second": 0.094 } ], "logging_steps": 10, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }