{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1427, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 7.579595964430724, "learning_rate": 3.4965034965034967e-08, "log_odds_chosen": 0.4541015625, "log_odds_ratio": -0.677050769329071, "logits/chosen": -2.015625, "logits/rejected": -2.015625, "logps/chosen": -1.8671875, "logps/rejected": -2.265625, "loss": 1.5655, "nll_loss": 1.5, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.1865234375, "rewards/margins": 0.0400390625, "rewards/rejected": -0.2265625, "step": 10 }, { "epoch": 0.01, "grad_norm": 7.5249840340758425, "learning_rate": 6.993006993006993e-08, "log_odds_chosen": 0.22894287109375, "log_odds_ratio": -0.7715820074081421, "logits/chosen": -2.03125, "logits/rejected": -2.0625, "logps/chosen": -1.9921875, "logps/rejected": -2.1875, "loss": 1.5773, "nll_loss": 1.4921875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.19921875, "rewards/margins": 0.01953125, "rewards/rejected": -0.21875, "step": 20 }, { "epoch": 0.02, "grad_norm": 5.9119698758492865, "learning_rate": 1.0489510489510489e-07, "log_odds_chosen": 0.28911131620407104, "log_odds_ratio": -0.8036133050918579, "logits/chosen": -2.015625, "logits/rejected": -2.046875, "logps/chosen": -2.109375, "logps/rejected": -2.34375, "loss": 1.5587, "nll_loss": 1.515625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2109375, "rewards/margins": 0.0242919921875, "rewards/rejected": -0.2353515625, "step": 30 }, { "epoch": 0.03, "grad_norm": 6.999607089742296, "learning_rate": 1.3986013986013987e-07, "log_odds_chosen": 0.25310057401657104, "log_odds_ratio": -0.762402355670929, "logits/chosen": -2.03125, "logits/rejected": -2.015625, "logps/chosen": -1.921875, "logps/rejected": -2.125, "loss": 1.5862, "nll_loss": 1.4765625, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1923828125, "rewards/margins": 0.0208740234375, "rewards/rejected": -0.212890625, "step": 40 }, { "epoch": 0.04, "grad_norm": 5.309681737493818, "learning_rate": 1.7482517482517481e-07, "log_odds_chosen": 0.25770264863967896, "log_odds_ratio": -0.697558581829071, "logits/chosen": -2.109375, "logits/rejected": -2.0625, "logps/chosen": -1.921875, "logps/rejected": -2.140625, "loss": 1.5736, "nll_loss": 1.546875, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1923828125, "rewards/margins": 0.0218505859375, "rewards/rejected": -0.2138671875, "step": 50 }, { "epoch": 0.04, "grad_norm": 5.569709247368732, "learning_rate": 2.0979020979020979e-07, "log_odds_chosen": 0.311614990234375, "log_odds_ratio": -0.695605456829071, "logits/chosen": -2.109375, "logits/rejected": -2.15625, "logps/chosen": -1.96875, "logps/rejected": -2.25, "loss": 1.5668, "nll_loss": 1.484375, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.197265625, "rewards/margins": 0.02734375, "rewards/rejected": -0.224609375, "step": 60 }, { "epoch": 0.05, "grad_norm": 5.773237000535201, "learning_rate": 2.4475524475524473e-07, "log_odds_chosen": 0.14488525688648224, "log_odds_ratio": -0.786425769329071, "logits/chosen": -2.171875, "logits/rejected": -2.125, "logps/chosen": -2.0, "logps/rejected": -2.125, "loss": 1.5723, "nll_loss": 1.5234375, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.2001953125, "rewards/margins": 0.01287841796875, "rewards/rejected": -0.212890625, "step": 70 }, { "epoch": 0.06, "grad_norm": 5.5756538810115925, "learning_rate": 2.7972027972027973e-07, "log_odds_chosen": 0.2967529296875, "log_odds_ratio": -0.7289062738418579, "logits/chosen": -2.109375, "logits/rejected": -2.140625, "logps/chosen": -1.8125, "logps/rejected": -2.046875, "loss": 1.4668, "nll_loss": 1.3515625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.181640625, "rewards/margins": 0.023681640625, "rewards/rejected": -0.205078125, "step": 80 }, { "epoch": 0.06, "grad_norm": 4.2269586617524535, "learning_rate": 3.146853146853147e-07, "log_odds_chosen": 0.2103271484375, "log_odds_ratio": -0.73095703125, "logits/chosen": -2.171875, "logits/rejected": -2.140625, "logps/chosen": -1.8515625, "logps/rejected": -2.03125, "loss": 1.5357, "nll_loss": 1.4921875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.185546875, "rewards/margins": 0.0186767578125, "rewards/rejected": -0.2041015625, "step": 90 }, { "epoch": 0.07, "grad_norm": 7.493002974943704, "learning_rate": 3.4965034965034963e-07, "log_odds_chosen": 0.14136353135108948, "log_odds_ratio": -0.710644543170929, "logits/chosen": -2.171875, "logits/rejected": -2.1875, "logps/chosen": -1.921875, "logps/rejected": -2.03125, "loss": 1.5193, "nll_loss": 1.4375, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.19140625, "rewards/margins": 0.010498046875, "rewards/rejected": -0.2021484375, "step": 100 }, { "epoch": 0.08, "grad_norm": 6.173840413359453, "learning_rate": 3.8461538461538463e-07, "log_odds_chosen": 0.18465575575828552, "log_odds_ratio": -0.7144531011581421, "logits/chosen": -2.265625, "logits/rejected": -2.296875, "logps/chosen": -1.734375, "logps/rejected": -1.890625, "loss": 1.5043, "nll_loss": 1.421875, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.173828125, "rewards/margins": 0.01556396484375, "rewards/rejected": -0.189453125, "step": 110 }, { "epoch": 0.08, "grad_norm": 3.9756802927191024, "learning_rate": 4.1958041958041957e-07, "log_odds_chosen": 0.22193603217601776, "log_odds_ratio": -0.7044922113418579, "logits/chosen": -2.296875, "logits/rejected": -2.359375, "logps/chosen": -1.4921875, "logps/rejected": -1.703125, "loss": 1.4443, "nll_loss": 1.3125, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1494140625, "rewards/margins": 0.0208740234375, "rewards/rejected": -0.169921875, "step": 120 }, { "epoch": 0.09, "grad_norm": 3.49115617326949, "learning_rate": 4.545454545454545e-07, "log_odds_chosen": 0.07476196438074112, "log_odds_ratio": -0.741992175579071, "logits/chosen": -2.28125, "logits/rejected": -2.359375, "logps/chosen": -1.5703125, "logps/rejected": -1.640625, "loss": 1.4535, "nll_loss": 1.40625, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1572265625, "rewards/margins": 0.006622314453125, "rewards/rejected": -0.1640625, "step": 130 }, { "epoch": 0.1, "grad_norm": 3.2300418693285047, "learning_rate": 4.895104895104895e-07, "log_odds_chosen": 0.23361817002296448, "log_odds_ratio": -0.694628894329071, "logits/chosen": -2.265625, "logits/rejected": -2.375, "logps/chosen": -1.5, "logps/rejected": -1.703125, "loss": 1.4352, "nll_loss": 1.3828125, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.150390625, "rewards/margins": 0.019775390625, "rewards/rejected": -0.169921875, "step": 140 }, { "epoch": 0.11, "grad_norm": 3.733581414979026, "learning_rate": 4.999633338614865e-07, "log_odds_chosen": 0.0267333984375, "log_odds_ratio": -0.739453136920929, "logits/chosen": -2.296875, "logits/rejected": -2.375, "logps/chosen": -1.5234375, "logps/rejected": -1.546875, "loss": 1.4385, "nll_loss": 1.3671875, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.15234375, "rewards/margins": 0.0021209716796875, "rewards/rejected": -0.154296875, "step": 150 }, { "epoch": 0.11, "grad_norm": 4.063546439288504, "learning_rate": 4.997837705025723e-07, "log_odds_chosen": 0.14241942763328552, "log_odds_ratio": -0.7054687738418579, "logits/chosen": -2.203125, "logits/rejected": -2.3125, "logps/chosen": -1.3828125, "logps/rejected": -1.4921875, "loss": 1.3762, "nll_loss": 1.2734375, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.138671875, "rewards/margins": 0.0108642578125, "rewards/rejected": -0.1494140625, "step": 160 }, { "epoch": 0.12, "grad_norm": 2.8382728420037524, "learning_rate": 4.994546826814266e-07, "log_odds_chosen": 0.19875487685203552, "log_odds_ratio": -0.658203125, "logits/chosen": -2.375, "logits/rejected": -2.46875, "logps/chosen": -1.3359375, "logps/rejected": -1.484375, "loss": 1.3645, "nll_loss": 1.3046875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1337890625, "rewards/margins": 0.0146484375, "rewards/rejected": -0.1484375, "step": 170 }, { "epoch": 0.13, "grad_norm": 2.5209932782786, "learning_rate": 4.989762673951533e-07, "log_odds_chosen": 0.16008301079273224, "log_odds_ratio": -0.6830078363418579, "logits/chosen": -2.359375, "logits/rejected": -2.515625, "logps/chosen": -1.2890625, "logps/rejected": -1.40625, "loss": 1.3044, "nll_loss": 1.2265625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.12890625, "rewards/margins": 0.01129150390625, "rewards/rejected": -0.140625, "step": 180 }, { "epoch": 0.13, "grad_norm": 2.7624176664717623, "learning_rate": 4.983488110306074e-07, "log_odds_chosen": 0.08121337741613388, "log_odds_ratio": -0.7232421636581421, "logits/chosen": -2.375, "logits/rejected": -2.4375, "logps/chosen": -1.34375, "logps/rejected": -1.3984375, "loss": 1.3099, "nll_loss": 1.265625, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.1337890625, "rewards/margins": 0.005584716796875, "rewards/rejected": -0.1396484375, "step": 190 }, { "epoch": 0.14, "grad_norm": 2.5951453466247916, "learning_rate": 4.975726891929584e-07, "log_odds_chosen": 0.22237548232078552, "log_odds_ratio": -0.66455078125, "logits/chosen": -2.359375, "logits/rejected": -2.46875, "logps/chosen": -1.2421875, "logps/rejected": -1.40625, "loss": 1.3293, "nll_loss": 1.1953125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1240234375, "rewards/margins": 0.0169677734375, "rewards/rejected": -0.140625, "step": 200 }, { "epoch": 0.15, "grad_norm": 2.745043363887144, "learning_rate": 4.966483664808476e-07, "log_odds_chosen": 0.13164062798023224, "log_odds_ratio": -0.7242187261581421, "logits/chosen": -2.21875, "logits/rejected": -2.21875, "logps/chosen": -1.21875, "logps/rejected": -1.3203125, "loss": 1.3097, "nll_loss": 1.1875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.12158203125, "rewards/margins": 0.010498046875, "rewards/rejected": -0.1318359375, "step": 210 }, { "epoch": 0.15, "grad_norm": 2.544921558378996, "learning_rate": 4.955763962082722e-07, "log_odds_chosen": 0.2619995176792145, "log_odds_ratio": -0.673046886920929, "logits/chosen": -2.4375, "logits/rejected": -2.453125, "logps/chosen": -1.265625, "logps/rejected": -1.46875, "loss": 1.299, "nll_loss": 1.2734375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.126953125, "rewards/margins": 0.0205078125, "rewards/rejected": -0.1474609375, "step": 220 }, { "epoch": 0.16, "grad_norm": 2.7422926363251467, "learning_rate": 4.943574200733625e-07, "log_odds_chosen": 0.197174072265625, "log_odds_ratio": -0.6680663824081421, "logits/chosen": -2.296875, "logits/rejected": -2.390625, "logps/chosen": -1.234375, "logps/rejected": -1.375, "loss": 1.3089, "nll_loss": 1.234375, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.123046875, "rewards/margins": 0.0142822265625, "rewards/rejected": -0.1376953125, "step": 230 }, { "epoch": 0.17, "grad_norm": 2.5904882528928845, "learning_rate": 4.929921677742516e-07, "log_odds_chosen": 0.24537964165210724, "log_odds_ratio": -0.657031238079071, "logits/chosen": -2.328125, "logits/rejected": -2.421875, "logps/chosen": -1.2734375, "logps/rejected": -1.4453125, "loss": 1.3289, "nll_loss": 1.2421875, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1279296875, "rewards/margins": 0.0167236328125, "rewards/rejected": -0.14453125, "step": 240 }, { "epoch": 0.18, "grad_norm": 2.57374464764083, "learning_rate": 4.91481456572267e-07, "log_odds_chosen": 0.1439208984375, "log_odds_ratio": -0.7132812738418579, "logits/chosen": -2.296875, "logits/rejected": -2.328125, "logps/chosen": -1.21875, "logps/rejected": -1.3203125, "loss": 1.2807, "nll_loss": 1.2109375, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1220703125, "rewards/margins": 0.01019287109375, "rewards/rejected": -0.1318359375, "step": 250 }, { "epoch": 0.18, "grad_norm": 2.5866184628438873, "learning_rate": 4.898261908027049e-07, "log_odds_chosen": 0.20936889946460724, "log_odds_ratio": -0.6578124761581421, "logits/chosen": -2.28125, "logits/rejected": -2.359375, "logps/chosen": -1.2421875, "logps/rejected": -1.40625, "loss": 1.2753, "nll_loss": 1.21875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1240234375, "rewards/margins": 0.01611328125, "rewards/rejected": -0.140625, "step": 260 }, { "epoch": 0.19, "grad_norm": 3.7405175636819643, "learning_rate": 4.880273613334809e-07, "log_odds_chosen": 0.22337035834789276, "log_odds_ratio": -0.7001953125, "logits/chosen": -2.328125, "logits/rejected": -2.4375, "logps/chosen": -1.2265625, "logps/rejected": -1.40625, "loss": 1.2719, "nll_loss": 1.203125, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.123046875, "rewards/margins": 0.0179443359375, "rewards/rejected": -0.140625, "step": 270 }, { "epoch": 0.2, "grad_norm": 2.7574742318554923, "learning_rate": 4.86086044971981e-07, "log_odds_chosen": 0.15998534858226776, "log_odds_ratio": -0.677539050579071, "logits/chosen": -2.25, "logits/rejected": -2.328125, "logps/chosen": -1.2578125, "logps/rejected": -1.3671875, "loss": 1.3146, "nll_loss": 1.28125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1259765625, "rewards/margins": 0.0103759765625, "rewards/rejected": -0.13671875, "step": 280 }, { "epoch": 0.2, "grad_norm": 2.3017737036995194, "learning_rate": 4.840034038204686e-07, "log_odds_chosen": 0.244415283203125, "log_odds_ratio": -0.649121105670929, "logits/chosen": -2.328125, "logits/rejected": -2.4375, "logps/chosen": -1.1171875, "logps/rejected": -1.2734375, "loss": 1.2796, "nll_loss": 1.1640625, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.111328125, "rewards/margins": 0.0159912109375, "rewards/rejected": -0.1279296875, "step": 290 }, { "epoch": 0.21, "grad_norm": 2.4440053287886823, "learning_rate": 4.817806845804308e-07, "log_odds_chosen": 0.03922118991613388, "log_odds_ratio": -0.7237304449081421, "logits/chosen": -2.28125, "logits/rejected": -2.296875, "logps/chosen": -1.25, "logps/rejected": -1.2734375, "loss": 1.3285, "nll_loss": 1.25, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.125, "rewards/margins": 0.002655029296875, "rewards/rejected": -0.1279296875, "step": 300 }, { "epoch": 0.22, "grad_norm": 2.404175576750603, "learning_rate": 4.794192178062845e-07, "log_odds_chosen": 0.07508544623851776, "log_odds_ratio": -0.7403320074081421, "logits/chosen": -2.265625, "logits/rejected": -2.265625, "logps/chosen": -1.2265625, "logps/rejected": -1.2890625, "loss": 1.2926, "nll_loss": 1.2265625, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.123046875, "rewards/margins": 0.006195068359375, "rewards/rejected": -0.12890625, "step": 310 }, { "epoch": 0.22, "grad_norm": 2.4237084015053982, "learning_rate": 4.769204171088849e-07, "log_odds_chosen": 0.21917724609375, "log_odds_ratio": -0.671875, "logits/chosen": -2.359375, "logits/rejected": -2.4375, "logps/chosen": -1.203125, "logps/rejected": -1.34375, "loss": 1.3217, "nll_loss": 1.2578125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1201171875, "rewards/margins": 0.0147705078125, "rewards/rejected": -0.134765625, "step": 320 }, { "epoch": 0.23, "grad_norm": 2.684798684422195, "learning_rate": 4.742857783093166e-07, "log_odds_chosen": 0.278564453125, "log_odds_ratio": -0.6395508050918579, "logits/chosen": -2.203125, "logits/rejected": -2.328125, "logps/chosen": -1.140625, "logps/rejected": -1.3359375, "loss": 1.2549, "nll_loss": 1.1640625, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.11376953125, "rewards/margins": 0.0201416015625, "rewards/rejected": -0.1337890625, "step": 330 }, { "epoch": 0.24, "grad_norm": 3.6492535837021225, "learning_rate": 4.7151687854347157e-07, "log_odds_chosen": 0.1578369140625, "log_odds_ratio": -0.69482421875, "logits/chosen": -2.25, "logits/rejected": -2.34375, "logps/chosen": -1.1640625, "logps/rejected": -1.2734375, "loss": 1.2529, "nll_loss": 1.1640625, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.11669921875, "rewards/margins": 0.0106201171875, "rewards/rejected": -0.126953125, "step": 340 }, { "epoch": 0.25, "grad_norm": 2.8702543670932075, "learning_rate": 4.6861537531795094e-07, "log_odds_chosen": 0.20824584364891052, "log_odds_ratio": -0.66015625, "logits/chosen": -2.265625, "logits/rejected": -2.359375, "logps/chosen": -1.21875, "logps/rejected": -1.3828125, "loss": 1.2876, "nll_loss": 1.25, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1220703125, "rewards/margins": 0.016357421875, "rewards/rejected": -0.138671875, "step": 350 }, { "epoch": 0.25, "grad_norm": 2.847118271789134, "learning_rate": 4.655830055178557e-07, "log_odds_chosen": 0.23935547471046448, "log_odds_ratio": -0.6680663824081421, "logits/chosen": -2.1875, "logits/rejected": -2.328125, "logps/chosen": -1.1328125, "logps/rejected": -1.3046875, "loss": 1.2766, "nll_loss": 1.171875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11328125, "rewards/margins": 0.016845703125, "rewards/rejected": -0.130859375, "step": 360 }, { "epoch": 0.26, "grad_norm": 2.510359096280682, "learning_rate": 4.6242158436706015e-07, "log_odds_chosen": 0.16656494140625, "log_odds_ratio": -0.706347644329071, "logits/chosen": -2.296875, "logits/rejected": -2.359375, "logps/chosen": -1.1875, "logps/rejected": -1.3046875, "loss": 1.2963, "nll_loss": 1.2265625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.119140625, "rewards/margins": 0.011474609375, "rewards/rejected": -0.130859375, "step": 370 }, { "epoch": 0.27, "grad_norm": 2.2404018809724606, "learning_rate": 4.5913300434159087e-07, "log_odds_chosen": 0.2242431640625, "log_odds_ratio": -0.6768554449081421, "logits/chosen": -2.28125, "logits/rejected": -2.359375, "logps/chosen": -1.21875, "logps/rejected": -1.390625, "loss": 1.293, "nll_loss": 1.21875, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1220703125, "rewards/margins": 0.016845703125, "rewards/rejected": -0.138671875, "step": 380 }, { "epoch": 0.27, "grad_norm": 2.3810451499627434, "learning_rate": 4.5571923403676047e-07, "log_odds_chosen": 0.30363768339157104, "log_odds_ratio": -0.6507812738418579, "logits/chosen": -2.1875, "logits/rejected": -2.265625, "logps/chosen": -1.1328125, "logps/rejected": -1.3359375, "loss": 1.2838, "nll_loss": 1.140625, "rewards/accuracies": 0.59375, "rewards/chosen": -0.11328125, "rewards/margins": 0.0201416015625, "rewards/rejected": -0.1328125, "step": 390 }, { "epoch": 0.28, "grad_norm": 2.671734360059129, "learning_rate": 4.5218231698873627e-07, "log_odds_chosen": 0.0892333984375, "log_odds_ratio": -0.735058605670929, "logits/chosen": -2.296875, "logits/rejected": -2.3125, "logps/chosen": -1.234375, "logps/rejected": -1.3046875, "loss": 1.3283, "nll_loss": 1.296875, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.12353515625, "rewards/margins": 0.0067138671875, "rewards/rejected": -0.1298828125, "step": 400 }, { "epoch": 0.29, "grad_norm": 2.5118185799490043, "learning_rate": 4.4852437045124735e-07, "log_odds_chosen": 0.08201904594898224, "log_odds_ratio": -0.7183593511581421, "logits/chosen": -2.1875, "logits/rejected": -2.3125, "logps/chosen": -1.21875, "logps/rejected": -1.265625, "loss": 1.2777, "nll_loss": 1.25, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.12158203125, "rewards/margins": 0.0047607421875, "rewards/rejected": -0.1259765625, "step": 410 }, { "epoch": 0.29, "grad_norm": 2.5063550386397604, "learning_rate": 4.447475841281635e-07, "log_odds_chosen": 0.2621704041957855, "log_odds_ratio": -0.6456054449081421, "logits/chosen": -2.203125, "logits/rejected": -2.296875, "logps/chosen": -1.1796875, "logps/rejected": -1.3515625, "loss": 1.256, "nll_loss": 1.25, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11767578125, "rewards/margins": 0.01708984375, "rewards/rejected": -0.134765625, "step": 420 }, { "epoch": 0.3, "grad_norm": 2.3631407616001243, "learning_rate": 4.408542188627044e-07, "log_odds_chosen": 0.12116088718175888, "log_odds_ratio": -0.7083984613418579, "logits/chosen": -2.3125, "logits/rejected": -2.328125, "logps/chosen": -1.1796875, "logps/rejected": -1.265625, "loss": 1.2792, "nll_loss": 1.1953125, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.1181640625, "rewards/margins": 0.00860595703125, "rewards/rejected": -0.126953125, "step": 430 }, { "epoch": 0.31, "grad_norm": 2.6616497499686393, "learning_rate": 4.368466052840636e-07, "log_odds_chosen": 0.17690429091453552, "log_odds_ratio": -0.6680663824081421, "logits/chosen": -2.1875, "logits/rejected": -2.3125, "logps/chosen": -1.171875, "logps/rejected": -1.296875, "loss": 1.2943, "nll_loss": 1.2109375, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.11669921875, "rewards/margins": 0.01300048828125, "rewards/rejected": -0.1298828125, "step": 440 }, { "epoch": 0.32, "grad_norm": 2.5349037092042406, "learning_rate": 4.327271424122573e-07, "log_odds_chosen": 0.2679077088832855, "log_odds_ratio": -0.6576172113418579, "logits/chosen": -2.1875, "logits/rejected": -2.28125, "logps/chosen": -1.1328125, "logps/rejected": -1.3359375, "loss": 1.2823, "nll_loss": 1.1796875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.11328125, "rewards/margins": 0.019775390625, "rewards/rejected": -0.1328125, "step": 450 }, { "epoch": 0.32, "grad_norm": 2.485655487242489, "learning_rate": 4.284982962220337e-07, "log_odds_chosen": 0.212890625, "log_odds_ratio": -0.680371105670929, "logits/chosen": -2.15625, "logits/rejected": -2.203125, "logps/chosen": -1.15625, "logps/rejected": -1.296875, "loss": 1.2641, "nll_loss": 1.171875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.115234375, "rewards/margins": 0.0140380859375, "rewards/rejected": -0.12890625, "step": 460 }, { "epoch": 0.33, "grad_norm": 2.569720946707249, "learning_rate": 4.241625981667023e-07, "log_odds_chosen": 0.20276489853858948, "log_odds_ratio": -0.681933581829071, "logits/chosen": -2.25, "logits/rejected": -2.28125, "logps/chosen": -1.1875, "logps/rejected": -1.3125, "loss": 1.2584, "nll_loss": 1.21875, "rewards/accuracies": 0.59375, "rewards/chosen": -0.11865234375, "rewards/margins": 0.01312255859375, "rewards/rejected": -0.1318359375, "step": 470 }, { "epoch": 0.34, "grad_norm": 2.6019962569531323, "learning_rate": 4.19722643662766e-07, "log_odds_chosen": 0.2719970643520355, "log_odds_ratio": -0.64111328125, "logits/chosen": -2.203125, "logits/rejected": -2.28125, "logps/chosen": -1.1328125, "logps/rejected": -1.328125, "loss": 1.2457, "nll_loss": 1.15625, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11279296875, "rewards/margins": 0.01953125, "rewards/rejected": -0.1328125, "step": 480 }, { "epoch": 0.34, "grad_norm": 2.378793105209263, "learning_rate": 4.151810905362643e-07, "log_odds_chosen": 0.15797118842601776, "log_odds_ratio": -0.710156261920929, "logits/chosen": -2.109375, "logits/rejected": -2.25, "logps/chosen": -1.1484375, "logps/rejected": -1.25, "loss": 1.2691, "nll_loss": 1.1640625, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.11474609375, "rewards/margins": 0.01019287109375, "rewards/rejected": -0.125, "step": 490 }, { "epoch": 0.35, "grad_norm": 2.461627110160161, "learning_rate": 4.105406574317578e-07, "log_odds_chosen": 0.16668701171875, "log_odds_ratio": -0.723925769329071, "logits/chosen": -2.09375, "logits/rejected": -2.171875, "logps/chosen": -1.2265625, "logps/rejected": -1.34375, "loss": 1.2486, "nll_loss": 1.25, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.1220703125, "rewards/margins": 0.01226806640625, "rewards/rejected": -0.134765625, "step": 500 }, { "epoch": 0.36, "grad_norm": 2.3991709623693485, "learning_rate": 4.0580412218490424e-07, "log_odds_chosen": 0.3194335997104645, "log_odds_ratio": -0.61669921875, "logits/chosen": -2.15625, "logits/rejected": -2.25, "logps/chosen": -1.125, "logps/rejected": -1.34375, "loss": 1.261, "nll_loss": 1.203125, "rewards/accuracies": 0.625, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0218505859375, "rewards/rejected": -0.134765625, "step": 510 }, { "epoch": 0.36, "grad_norm": 2.420136778893825, "learning_rate": 4.009743201596034e-07, "log_odds_chosen": 0.32293701171875, "log_odds_ratio": -0.66357421875, "logits/chosen": -2.203125, "logits/rejected": -2.296875, "logps/chosen": -1.140625, "logps/rejected": -1.3671875, "loss": 1.279, "nll_loss": 1.203125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1142578125, "rewards/margins": 0.0223388671875, "rewards/rejected": -0.13671875, "step": 520 }, { "epoch": 0.37, "grad_norm": 2.576899517162626, "learning_rate": 3.960541425507039e-07, "log_odds_chosen": 0.18942871689796448, "log_odds_ratio": -0.6888672113418579, "logits/chosen": -2.171875, "logits/rejected": -2.296875, "logps/chosen": -1.171875, "logps/rejected": -1.3203125, "loss": 1.3049, "nll_loss": 1.2265625, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.1171875, "rewards/margins": 0.01458740234375, "rewards/rejected": -0.1318359375, "step": 530 }, { "epoch": 0.38, "grad_norm": 2.7625972509957144, "learning_rate": 3.9104653465328853e-07, "log_odds_chosen": 0.09678955376148224, "log_odds_ratio": -0.721875011920929, "logits/chosen": -2.109375, "logits/rejected": -2.1875, "logps/chosen": -1.203125, "logps/rejected": -1.265625, "loss": 1.2931, "nll_loss": 1.234375, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1201171875, "rewards/margins": 0.006103515625, "rewards/rejected": -0.1259765625, "step": 540 }, { "epoch": 0.39, "grad_norm": 2.45849449078066, "learning_rate": 3.8595449409957516e-07, "log_odds_chosen": 0.20249633491039276, "log_odds_ratio": -0.668749988079071, "logits/chosen": -1.9921875, "logits/rejected": -2.109375, "logps/chosen": -1.1015625, "logps/rejected": -1.2421875, "loss": 1.2424, "nll_loss": 1.125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1103515625, "rewards/margins": 0.01397705078125, "rewards/rejected": -0.1240234375, "step": 550 }, { "epoch": 0.39, "grad_norm": 2.336077999068494, "learning_rate": 3.807810690644868e-07, "log_odds_chosen": 0.15464477241039276, "log_odds_ratio": -0.6904296875, "logits/chosen": -2.0625, "logits/rejected": -2.1875, "logps/chosen": -1.1875, "logps/rejected": -1.3046875, "loss": 1.2639, "nll_loss": 1.2109375, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.119140625, "rewards/margins": 0.011474609375, "rewards/rejected": -0.1298828125, "step": 560 }, { "epoch": 0.4, "grad_norm": 2.8019381749527845, "learning_rate": 3.75529356440967e-07, "log_odds_chosen": -0.002349853515625, "log_odds_ratio": -0.763867199420929, "logits/chosen": -2.140625, "logits/rejected": -2.1875, "logps/chosen": -1.1875, "logps/rejected": -1.203125, "loss": 1.2792, "nll_loss": 1.21875, "rewards/accuracies": 0.5, "rewards/chosen": -0.119140625, "rewards/margins": 0.0014190673828125, "rewards/rejected": -0.12060546875, "step": 570 }, { "epoch": 0.41, "grad_norm": 2.904460591649426, "learning_rate": 3.702024999861312e-07, "log_odds_chosen": 0.20991210639476776, "log_odds_ratio": -0.690234363079071, "logits/chosen": -2.140625, "logits/rejected": -2.203125, "logps/chosen": -1.1484375, "logps/rejected": -1.3125, "loss": 1.2689, "nll_loss": 1.21875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.115234375, "rewards/margins": 0.0157470703125, "rewards/rejected": -0.130859375, "step": 580 }, { "epoch": 0.41, "grad_norm": 2.839022537148172, "learning_rate": 3.648036884393646e-07, "log_odds_chosen": 0.11424560844898224, "log_odds_ratio": -0.715136706829071, "logits/chosen": -2.078125, "logits/rejected": -2.234375, "logps/chosen": -1.1953125, "logps/rejected": -1.28125, "loss": 1.2823, "nll_loss": 1.234375, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.119140625, "rewards/margins": 0.008544921875, "rewards/rejected": -0.1279296875, "step": 590 }, { "epoch": 0.42, "grad_norm": 2.6089925230689737, "learning_rate": 3.593361536134931e-07, "log_odds_chosen": 0.2776428163051605, "log_odds_ratio": -0.6419922113418579, "logits/chosen": -1.9765625, "logits/rejected": -2.109375, "logps/chosen": -1.0859375, "logps/rejected": -1.2734375, "loss": 1.2798, "nll_loss": 1.1640625, "rewards/accuracies": 0.625, "rewards/chosen": -0.1083984375, "rewards/margins": 0.0189208984375, "rewards/rejected": -0.126953125, "step": 600 }, { "epoch": 0.43, "grad_norm": 2.603291469222202, "learning_rate": 3.538031684601698e-07, "log_odds_chosen": 0.06999512016773224, "log_odds_ratio": -0.740429699420929, "logits/chosen": -2.125, "logits/rejected": -2.234375, "logps/chosen": -1.171875, "logps/rejected": -1.234375, "loss": 1.2504, "nll_loss": 1.1796875, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.1171875, "rewards/margins": 0.006103515625, "rewards/rejected": -0.123046875, "step": 610 }, { "epoch": 0.43, "grad_norm": 2.524850145022778, "learning_rate": 3.4820804511063496e-07, "log_odds_chosen": 0.2641845643520355, "log_odds_ratio": -0.642285168170929, "logits/chosen": -2.015625, "logits/rejected": -2.09375, "logps/chosen": -1.09375, "logps/rejected": -1.25, "loss": 1.2574, "nll_loss": 1.140625, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10888671875, "rewards/margins": 0.0157470703125, "rewards/rejected": -0.125, "step": 620 }, { "epoch": 0.44, "grad_norm": 2.821337451607177, "learning_rate": 3.425541328930226e-07, "log_odds_chosen": 0.26170653104782104, "log_odds_ratio": -0.6304687261581421, "logits/chosen": -2.046875, "logits/rejected": -2.15625, "logps/chosen": -1.125, "logps/rejected": -1.3125, "loss": 1.2199, "nll_loss": 1.15625, "rewards/accuracies": 0.625, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0184326171875, "rewards/rejected": -0.130859375, "step": 630 }, { "epoch": 0.45, "grad_norm": 2.5621913998689734, "learning_rate": 3.3684481632740025e-07, "log_odds_chosen": 0.131927490234375, "log_odds_ratio": -0.692578136920929, "logits/chosen": -2.1875, "logits/rejected": -2.21875, "logps/chosen": -1.1875, "logps/rejected": -1.2734375, "loss": 1.2889, "nll_loss": 1.2421875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.119140625, "rewards/margins": 0.0086669921875, "rewards/rejected": -0.1279296875, "step": 640 }, { "epoch": 0.46, "grad_norm": 2.556936502980912, "learning_rate": 3.310835130997428e-07, "log_odds_chosen": 0.18841552734375, "log_odds_ratio": -0.67431640625, "logits/chosen": -2.125, "logits/rejected": -2.234375, "logps/chosen": -1.1953125, "logps/rejected": -1.3359375, "loss": 1.3018, "nll_loss": 1.2578125, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.11962890625, "rewards/margins": 0.01348876953125, "rewards/rejected": -0.1337890625, "step": 650 }, { "epoch": 0.46, "grad_norm": 2.509648126252326, "learning_rate": 3.2527367201605215e-07, "log_odds_chosen": 0.28242188692092896, "log_odds_ratio": -0.6371093988418579, "logits/chosen": -2.0625, "logits/rejected": -2.15625, "logps/chosen": -1.15625, "logps/rejected": -1.3359375, "loss": 1.2715, "nll_loss": 1.2109375, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.115234375, "rewards/margins": 0.01806640625, "rewards/rejected": -0.1337890625, "step": 660 }, { "epoch": 0.47, "grad_norm": 2.572686228637093, "learning_rate": 3.194187709378479e-07, "log_odds_chosen": 0.10871581733226776, "log_odds_ratio": -0.723828136920929, "logits/chosen": -2.09375, "logits/rejected": -2.171875, "logps/chosen": -1.171875, "logps/rejected": -1.2421875, "loss": 1.2564, "nll_loss": 1.2421875, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.11669921875, "rewards/margins": 0.00714111328125, "rewards/rejected": -0.1240234375, "step": 670 }, { "epoch": 0.48, "grad_norm": 2.48714277601033, "learning_rate": 3.135223147002658e-07, "log_odds_chosen": 0.17313842475414276, "log_odds_ratio": -0.698437511920929, "logits/chosen": -2.15625, "logits/rejected": -2.21875, "logps/chosen": -1.1328125, "logps/rejected": -1.2578125, "loss": 1.2525, "nll_loss": 1.1875, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11328125, "rewards/margins": 0.012451171875, "rewards/rejected": -0.1259765625, "step": 680 }, { "epoch": 0.48, "grad_norm": 2.5499221997702164, "learning_rate": 3.075878330140079e-07, "log_odds_chosen": 0.22934570908546448, "log_odds_ratio": -0.6927734613418579, "logits/chosen": -2.109375, "logits/rejected": -2.1875, "logps/chosen": -1.203125, "logps/rejected": -1.3515625, "loss": 1.3172, "nll_loss": 1.25, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12060546875, "rewards/margins": 0.01470947265625, "rewards/rejected": -0.134765625, "step": 690 }, { "epoch": 0.49, "grad_norm": 2.4807391943530206, "learning_rate": 3.0161887835240353e-07, "log_odds_chosen": 0.16926269233226776, "log_odds_ratio": -0.692578136920929, "logits/chosen": -2.078125, "logits/rejected": -2.140625, "logps/chosen": -1.1640625, "logps/rejected": -1.28125, "loss": 1.2796, "nll_loss": 1.21875, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11669921875, "rewards/margins": 0.0113525390625, "rewards/rejected": -0.1279296875, "step": 700 }, { "epoch": 0.5, "grad_norm": 2.789334396425448, "learning_rate": 2.956190238248425e-07, "log_odds_chosen": 0.1761474609375, "log_odds_ratio": -0.71728515625, "logits/chosen": -2.0625, "logits/rejected": -2.125, "logps/chosen": -1.15625, "logps/rejected": -1.28125, "loss": 1.2759, "nll_loss": 1.265625, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11572265625, "rewards/margins": 0.0126953125, "rewards/rejected": -0.1279296875, "step": 710 }, { "epoch": 0.5, "grad_norm": 2.4358280165677817, "learning_rate": 2.8959186103785694e-07, "log_odds_chosen": 0.3479553163051605, "log_odds_ratio": -0.630664050579071, "logits/chosen": -2.0, "logits/rejected": -2.078125, "logps/chosen": -1.0625, "logps/rejected": -1.3046875, "loss": 1.2165, "nll_loss": 1.125, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10595703125, "rewards/margins": 0.0245361328125, "rewards/rejected": -0.130859375, "step": 720 }, { "epoch": 0.51, "grad_norm": 2.88074218743752, "learning_rate": 2.8354099794512876e-07, "log_odds_chosen": 0.14354248344898224, "log_odds_ratio": -0.6961914300918579, "logits/chosen": -2.078125, "logits/rejected": -2.21875, "logps/chosen": -1.109375, "logps/rejected": -1.1953125, "loss": 1.277, "nll_loss": 1.1640625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.111328125, "rewards/margins": 0.00830078125, "rewards/rejected": -0.11962890625, "step": 730 }, { "epoch": 0.52, "grad_norm": 2.511351318161689, "learning_rate": 2.774700566877129e-07, "log_odds_chosen": 0.21488037705421448, "log_odds_ratio": -0.669921875, "logits/chosen": -2.046875, "logits/rejected": -2.109375, "logps/chosen": -1.140625, "logps/rejected": -1.3046875, "loss": 1.2247, "nll_loss": 1.1796875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1142578125, "rewards/margins": 0.0164794921875, "rewards/rejected": -0.130859375, "step": 740 }, { "epoch": 0.53, "grad_norm": 3.010290770764348, "learning_rate": 2.713826714257669e-07, "log_odds_chosen": 0.19861450791358948, "log_odds_ratio": -0.6786133050918579, "logits/chosen": -2.109375, "logits/rejected": -2.109375, "logps/chosen": -1.1796875, "logps/rejected": -1.3046875, "loss": 1.236, "nll_loss": 1.2109375, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.11767578125, "rewards/margins": 0.01239013671875, "rewards/rejected": -0.1298828125, "step": 750 }, { "epoch": 0.53, "grad_norm": 2.27093551536218, "learning_rate": 2.6528248616308595e-07, "log_odds_chosen": 0.31080931425094604, "log_odds_ratio": -0.6333984136581421, "logits/chosen": -2.0625, "logits/rejected": -2.140625, "logps/chosen": -1.1484375, "logps/rejected": -1.3671875, "loss": 1.2256, "nll_loss": 1.1953125, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.115234375, "rewards/margins": 0.0216064453125, "rewards/rejected": -0.13671875, "step": 760 }, { "epoch": 0.54, "grad_norm": 2.5700766425464, "learning_rate": 2.591731525657454e-07, "log_odds_chosen": 0.2696166932582855, "log_odds_ratio": -0.6527343988418579, "logits/chosen": -2.046875, "logits/rejected": -2.140625, "logps/chosen": -1.1171875, "logps/rejected": -1.296875, "loss": 1.2227, "nll_loss": 1.1484375, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.11181640625, "rewards/margins": 0.0177001953125, "rewards/rejected": -0.1298828125, "step": 770 }, { "epoch": 0.55, "grad_norm": 3.3277253612496933, "learning_rate": 2.5305832777615644e-07, "log_odds_chosen": 0.19566650688648224, "log_odds_ratio": -0.688183605670929, "logits/chosen": -2.109375, "logits/rejected": -2.1875, "logps/chosen": -1.1640625, "logps/rejected": -1.296875, "loss": 1.2602, "nll_loss": 1.25, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1162109375, "rewards/margins": 0.01287841796875, "rewards/rejected": -0.12890625, "step": 780 }, { "epoch": 0.55, "grad_norm": 2.847511378794556, "learning_rate": 2.469416722238436e-07, "log_odds_chosen": 0.1658935546875, "log_odds_ratio": -0.706738293170929, "logits/chosen": -2.078125, "logits/rejected": -2.203125, "logps/chosen": -1.1875, "logps/rejected": -1.296875, "loss": 1.2591, "nll_loss": 1.21875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.119140625, "rewards/margins": 0.01080322265625, "rewards/rejected": -0.1298828125, "step": 790 }, { "epoch": 0.56, "grad_norm": 2.6549295176096672, "learning_rate": 2.4082684743425454e-07, "log_odds_chosen": 0.08632812649011612, "log_odds_ratio": -0.722851574420929, "logits/chosen": -2.0625, "logits/rejected": -2.15625, "logps/chosen": -1.140625, "logps/rejected": -1.203125, "loss": 1.2585, "nll_loss": 1.1875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.11376953125, "rewards/margins": 0.0067138671875, "rewards/rejected": -0.12060546875, "step": 800 }, { "epoch": 0.57, "grad_norm": 2.743059460411326, "learning_rate": 2.3471751383691403e-07, "log_odds_chosen": 0.22829589247703552, "log_odds_ratio": -0.6942383050918579, "logits/chosen": -2.046875, "logits/rejected": -2.125, "logps/chosen": -1.140625, "logps/rejected": -1.2890625, "loss": 1.2288, "nll_loss": 1.1875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1142578125, "rewards/margins": 0.014892578125, "rewards/rejected": -0.12890625, "step": 810 }, { "epoch": 0.57, "grad_norm": 2.5772075525222253, "learning_rate": 2.2861732857423306e-07, "log_odds_chosen": 0.27777099609375, "log_odds_ratio": -0.6568359136581421, "logits/chosen": -1.9609375, "logits/rejected": -2.015625, "logps/chosen": -1.0859375, "logps/rejected": -1.28125, "loss": 1.2301, "nll_loss": 1.140625, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.1083984375, "rewards/margins": 0.02001953125, "rewards/rejected": -0.12890625, "step": 820 }, { "epoch": 0.58, "grad_norm": 2.9548400351876465, "learning_rate": 2.225299433122871e-07, "log_odds_chosen": 0.19624023139476776, "log_odds_ratio": -0.6937500238418579, "logits/chosen": -2.046875, "logits/rejected": -2.109375, "logps/chosen": -1.21875, "logps/rejected": -1.3828125, "loss": 1.2425, "nll_loss": 1.21875, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1220703125, "rewards/margins": 0.015869140625, "rewards/rejected": -0.1376953125, "step": 830 }, { "epoch": 0.59, "grad_norm": 3.0054061279771362, "learning_rate": 2.1645900205487122e-07, "log_odds_chosen": 0.21121826767921448, "log_odds_ratio": -0.654492199420929, "logits/chosen": -2.03125, "logits/rejected": -2.0625, "logps/chosen": -1.09375, "logps/rejected": -1.234375, "loss": 1.2646, "nll_loss": 1.140625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10888671875, "rewards/margins": 0.014892578125, "rewards/rejected": -0.12353515625, "step": 840 }, { "epoch": 0.6, "grad_norm": 2.6164133362622484, "learning_rate": 2.1040813896214307e-07, "log_odds_chosen": 0.150909423828125, "log_odds_ratio": -0.6998046636581421, "logits/chosen": -2.0, "logits/rejected": -2.078125, "logps/chosen": -1.1171875, "logps/rejected": -1.234375, "loss": 1.263, "nll_loss": 1.15625, "rewards/accuracies": 0.59375, "rewards/chosen": -0.11181640625, "rewards/margins": 0.0118408203125, "rewards/rejected": -0.12353515625, "step": 850 }, { "epoch": 0.6, "grad_norm": 2.507875262223547, "learning_rate": 2.0438097617515741e-07, "log_odds_chosen": 0.241455078125, "log_odds_ratio": -0.6556640863418579, "logits/chosen": -1.890625, "logits/rejected": -2.015625, "logps/chosen": -1.0859375, "logps/rejected": -1.2265625, "loss": 1.2638, "nll_loss": 1.1484375, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.10888671875, "rewards/margins": 0.01409912109375, "rewards/rejected": -0.123046875, "step": 860 }, { "epoch": 0.61, "grad_norm": 2.507656226813357, "learning_rate": 1.9838112164759652e-07, "log_odds_chosen": 0.12969970703125, "log_odds_ratio": -0.7134765386581421, "logits/chosen": -2.046875, "logits/rejected": -2.15625, "logps/chosen": -1.1328125, "logps/rejected": -1.234375, "loss": 1.2807, "nll_loss": 1.15625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0103759765625, "rewards/rejected": -0.12353515625, "step": 870 }, { "epoch": 0.62, "grad_norm": 2.4386366953549508, "learning_rate": 1.9241216698599202e-07, "log_odds_chosen": 0.16312256455421448, "log_odds_ratio": -0.6949218511581421, "logits/chosen": -2.125, "logits/rejected": -2.171875, "logps/chosen": -1.171875, "logps/rejected": -1.2890625, "loss": 1.2391, "nll_loss": 1.234375, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.11669921875, "rewards/margins": 0.01214599609375, "rewards/rejected": -0.12890625, "step": 880 }, { "epoch": 0.62, "grad_norm": 2.4804086256409157, "learning_rate": 1.8647768529973423e-07, "log_odds_chosen": 0.17501220107078552, "log_odds_ratio": -0.6958984136581421, "logits/chosen": -2.015625, "logits/rejected": -2.109375, "logps/chosen": -1.0625, "logps/rejected": -1.203125, "loss": 1.227, "nll_loss": 1.125, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1064453125, "rewards/margins": 0.0135498046875, "rewards/rejected": -0.1201171875, "step": 890 }, { "epoch": 0.63, "grad_norm": 2.4623419233165986, "learning_rate": 1.80581229062152e-07, "log_odds_chosen": 0.17165526747703552, "log_odds_ratio": -0.6900390386581421, "logits/chosen": -2.03125, "logits/rejected": -2.078125, "logps/chosen": -1.1640625, "logps/rejected": -1.2734375, "loss": 1.2806, "nll_loss": 1.234375, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1162109375, "rewards/margins": 0.0111083984375, "rewards/rejected": -0.126953125, "step": 900 }, { "epoch": 0.64, "grad_norm": 2.797106319585485, "learning_rate": 1.7472632798394788e-07, "log_odds_chosen": 0.102783203125, "log_odds_ratio": -0.738574206829071, "logits/chosen": -2.03125, "logits/rejected": -2.125, "logps/chosen": -1.125, "logps/rejected": -1.1796875, "loss": 1.2623, "nll_loss": 1.203125, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.1123046875, "rewards/margins": 0.0057373046875, "rewards/rejected": -0.1181640625, "step": 910 }, { "epoch": 0.64, "grad_norm": 2.502768284944301, "learning_rate": 1.6891648690025718e-07, "log_odds_chosen": 0.20386962592601776, "log_odds_ratio": -0.687695324420929, "logits/chosen": -2.046875, "logits/rejected": -2.09375, "logps/chosen": -1.09375, "logps/rejected": -1.2421875, "loss": 1.2348, "nll_loss": 1.1640625, "rewards/accuracies": 0.53125, "rewards/chosen": -0.109375, "rewards/margins": 0.01507568359375, "rewards/rejected": -0.12451171875, "step": 920 }, { "epoch": 0.65, "grad_norm": 2.656181511294093, "learning_rate": 1.6315518367259978e-07, "log_odds_chosen": 0.27000731229782104, "log_odds_ratio": -0.647265613079071, "logits/chosen": -1.9765625, "logits/rejected": -2.0625, "logps/chosen": -1.1015625, "logps/rejected": -1.2890625, "loss": 1.2463, "nll_loss": 1.140625, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0185546875, "rewards/rejected": -0.12890625, "step": 930 }, { "epoch": 0.66, "grad_norm": 3.0809025740755227, "learning_rate": 1.574458671069774e-07, "log_odds_chosen": 0.10646972805261612, "log_odds_ratio": -0.731249988079071, "logits/chosen": -1.9609375, "logits/rejected": -2.09375, "logps/chosen": -1.1328125, "logps/rejected": -1.2109375, "loss": 1.243, "nll_loss": 1.171875, "rewards/accuracies": 0.46875, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0078125, "rewards/rejected": -0.12060546875, "step": 940 }, { "epoch": 0.67, "grad_norm": 2.5307625663876787, "learning_rate": 1.5179195488936504e-07, "log_odds_chosen": 0.22603759169578552, "log_odds_ratio": -0.6683593988418579, "logits/chosen": -2.046875, "logits/rejected": -2.109375, "logps/chosen": -1.109375, "logps/rejected": -1.265625, "loss": 1.224, "nll_loss": 1.1796875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0162353515625, "rewards/rejected": -0.126953125, "step": 950 }, { "epoch": 0.67, "grad_norm": 2.587513457015136, "learning_rate": 1.4619683153983016e-07, "log_odds_chosen": 0.15230712294578552, "log_odds_ratio": -0.70263671875, "logits/chosen": -2.0625, "logits/rejected": -2.171875, "logps/chosen": -1.1484375, "logps/rejected": -1.2421875, "loss": 1.2417, "nll_loss": 1.1953125, "rewards/accuracies": 0.53125, "rewards/chosen": -0.11474609375, "rewards/margins": 0.00909423828125, "rewards/rejected": -0.1240234375, "step": 960 }, { "epoch": 0.68, "grad_norm": 2.468276881587768, "learning_rate": 1.4066384638650687e-07, "log_odds_chosen": 0.26385498046875, "log_odds_ratio": -0.650195300579071, "logits/chosen": -2.015625, "logits/rejected": -2.046875, "logps/chosen": -1.15625, "logps/rejected": -1.3359375, "loss": 1.2174, "nll_loss": 1.1796875, "rewards/accuracies": 0.625, "rewards/chosen": -0.11572265625, "rewards/margins": 0.0177001953125, "rewards/rejected": -0.1337890625, "step": 970 }, { "epoch": 0.69, "grad_norm": 2.5705148701270537, "learning_rate": 1.351963115606354e-07, "log_odds_chosen": 0.18297119438648224, "log_odds_ratio": -0.689746081829071, "logits/chosen": -2.03125, "logits/rejected": -2.171875, "logps/chosen": -1.109375, "logps/rejected": -1.2265625, "loss": 1.2253, "nll_loss": 1.1640625, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11083984375, "rewards/margins": 0.011962890625, "rewards/rejected": -0.123046875, "step": 980 }, { "epoch": 0.69, "grad_norm": 2.5100478432366558, "learning_rate": 1.297975000138688e-07, "log_odds_chosen": 0.10567627102136612, "log_odds_ratio": -0.7080078125, "logits/chosen": -2.03125, "logits/rejected": -2.109375, "logps/chosen": -1.15625, "logps/rejected": -1.234375, "loss": 1.257, "nll_loss": 1.2109375, "rewards/accuracies": 0.53125, "rewards/chosen": -0.11572265625, "rewards/margins": 0.00787353515625, "rewards/rejected": -0.12353515625, "step": 990 }, { "epoch": 0.7, "grad_norm": 2.6847113433850924, "learning_rate": 1.24470643559033e-07, "log_odds_chosen": 0.27998048067092896, "log_odds_ratio": -0.650585949420929, "logits/chosen": -2.0, "logits/rejected": -2.109375, "logps/chosen": -1.125, "logps/rejected": -1.3125, "loss": 1.2519, "nll_loss": 1.1875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11279296875, "rewards/margins": 0.018798828125, "rewards/rejected": -0.1318359375, "step": 1000 }, { "epoch": 0.71, "grad_norm": 2.695150048132808, "learning_rate": 1.1921893093551323e-07, "log_odds_chosen": 0.21813353896141052, "log_odds_ratio": -0.673144519329071, "logits/chosen": -1.9921875, "logits/rejected": -2.0625, "logps/chosen": -1.1171875, "logps/rejected": -1.265625, "loss": 1.2468, "nll_loss": 1.1171875, "rewards/accuracies": 0.59375, "rewards/chosen": -0.11181640625, "rewards/margins": 0.0147705078125, "rewards/rejected": -0.1259765625, "step": 1010 }, { "epoch": 0.71, "grad_norm": 2.575139032183112, "learning_rate": 1.140455059004248e-07, "log_odds_chosen": 0.33759766817092896, "log_odds_ratio": -0.63525390625, "logits/chosen": -2.078125, "logits/rejected": -2.203125, "logps/chosen": -1.1171875, "logps/rejected": -1.359375, "loss": 1.2473, "nll_loss": 1.1796875, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.11181640625, "rewards/margins": 0.0244140625, "rewards/rejected": -0.13671875, "step": 1020 }, { "epoch": 0.72, "grad_norm": 2.3412172556068858, "learning_rate": 1.0895346534671146e-07, "log_odds_chosen": 0.28547364473342896, "log_odds_ratio": -0.6714843511581421, "logits/chosen": -2.0625, "logits/rejected": -2.171875, "logps/chosen": -1.109375, "logps/rejected": -1.296875, "loss": 1.223, "nll_loss": 1.1484375, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.111328125, "rewards/margins": 0.018798828125, "rewards/rejected": -0.1298828125, "step": 1030 }, { "epoch": 0.73, "grad_norm": 2.440451621766773, "learning_rate": 1.0394585744929605e-07, "log_odds_chosen": 0.09172363579273224, "log_odds_ratio": -0.706250011920929, "logits/chosen": -2.03125, "logits/rejected": -2.078125, "logps/chosen": -1.1796875, "logps/rejected": -1.25, "loss": 1.2244, "nll_loss": 1.203125, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1181640625, "rewards/margins": 0.0067138671875, "rewards/rejected": -0.125, "step": 1040 }, { "epoch": 0.74, "grad_norm": 2.7872635135771486, "learning_rate": 9.902567984039659e-08, "log_odds_chosen": 0.10517577826976776, "log_odds_ratio": -0.718554675579071, "logits/chosen": -2.015625, "logits/rejected": -2.109375, "logps/chosen": -1.1796875, "logps/rejected": -1.25, "loss": 1.2433, "nll_loss": 1.234375, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1181640625, "rewards/margins": 0.0067138671875, "rewards/rejected": -0.125, "step": 1050 }, { "epoch": 0.74, "grad_norm": 2.6062059656472667, "learning_rate": 9.419587781509572e-08, "log_odds_chosen": 0.21335449814796448, "log_odds_ratio": -0.6875, "logits/chosen": -1.9296875, "logits/rejected": -2.046875, "logps/chosen": -1.0859375, "logps/rejected": -1.2421875, "loss": 1.2339, "nll_loss": 1.109375, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1083984375, "rewards/margins": 0.016357421875, "rewards/rejected": -0.12451171875, "step": 1060 }, { "epoch": 0.75, "grad_norm": 2.387458644437053, "learning_rate": 8.945934256824217e-08, "log_odds_chosen": 0.19075927138328552, "log_odds_ratio": -0.67236328125, "logits/chosen": -2.0, "logits/rejected": -2.0625, "logps/chosen": -1.125, "logps/rejected": -1.2578125, "loss": 1.2428, "nll_loss": 1.171875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1123046875, "rewards/margins": 0.0137939453125, "rewards/rejected": -0.1259765625, "step": 1070 }, { "epoch": 0.76, "grad_norm": 2.7020254464929536, "learning_rate": 8.481890946373562e-08, "log_odds_chosen": 0.16676025092601776, "log_odds_ratio": -0.6944335699081421, "logits/chosen": -1.9921875, "logits/rejected": -2.09375, "logps/chosen": -1.1484375, "logps/rejected": -1.265625, "loss": 1.2652, "nll_loss": 1.1953125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.115234375, "rewards/margins": 0.01165771484375, "rewards/rejected": -0.126953125, "step": 1080 }, { "epoch": 0.76, "grad_norm": 2.5506342978080365, "learning_rate": 8.027735633723401e-08, "log_odds_chosen": 0.19404907524585724, "log_odds_ratio": -0.678906261920929, "logits/chosen": -2.015625, "logits/rejected": -2.0625, "logps/chosen": -1.15625, "logps/rejected": -1.2890625, "loss": 1.2371, "nll_loss": 1.1640625, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.11572265625, "rewards/margins": 0.01336669921875, "rewards/rejected": -0.12890625, "step": 1090 }, { "epoch": 0.77, "grad_norm": 2.9131209890812215, "learning_rate": 7.583740183329768e-08, "log_odds_chosen": 0.25482177734375, "log_odds_ratio": -0.654589831829071, "logits/chosen": -2.078125, "logits/rejected": -2.1875, "logps/chosen": -1.1328125, "logps/rejected": -1.3125, "loss": 1.2099, "nll_loss": 1.2109375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.11376953125, "rewards/margins": 0.017578125, "rewards/rejected": -0.130859375, "step": 1100 }, { "epoch": 0.78, "grad_norm": 2.4151795293745417, "learning_rate": 7.150170377796627e-08, "log_odds_chosen": 0.09471435844898224, "log_odds_ratio": -0.719921886920929, "logits/chosen": -2.015625, "logits/rejected": -2.125, "logps/chosen": -1.15625, "logps/rejected": -1.2109375, "loss": 1.2681, "nll_loss": 1.1875, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.11572265625, "rewards/margins": 0.0057373046875, "rewards/rejected": -0.12109375, "step": 1110 }, { "epoch": 0.78, "grad_norm": 2.9894142206703074, "learning_rate": 6.727285758774276e-08, "log_odds_chosen": 0.20228271186351776, "log_odds_ratio": -0.686718761920929, "logits/chosen": -1.9296875, "logits/rejected": -2.0625, "logps/chosen": -1.078125, "logps/rejected": -1.2265625, "loss": 1.2048, "nll_loss": 1.1171875, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.107421875, "rewards/margins": 0.01513671875, "rewards/rejected": -0.12255859375, "step": 1120 }, { "epoch": 0.79, "grad_norm": 2.292724337045773, "learning_rate": 6.315339471593645e-08, "log_odds_chosen": 0.2957824766635895, "log_odds_ratio": -0.6419922113418579, "logits/chosen": -1.9921875, "logits/rejected": -2.171875, "logps/chosen": -1.125, "logps/rejected": -1.328125, "loss": 1.2562, "nll_loss": 1.1640625, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0201416015625, "rewards/rejected": -0.1328125, "step": 1130 }, { "epoch": 0.8, "grad_norm": 2.4858379206596783, "learning_rate": 5.9145781137295596e-08, "log_odds_chosen": 0.11273193359375, "log_odds_ratio": -0.712109386920929, "logits/chosen": -1.9765625, "logits/rejected": -2.109375, "logps/chosen": -1.1640625, "logps/rejected": -1.25, "loss": 1.2469, "nll_loss": 1.1796875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.11669921875, "rewards/margins": 0.00823974609375, "rewards/rejected": -0.125, "step": 1140 }, { "epoch": 0.81, "grad_norm": 2.9554773310288303, "learning_rate": 5.5252415871836514e-08, "log_odds_chosen": 0.19149169325828552, "log_odds_ratio": -0.6700195074081421, "logits/chosen": -2.078125, "logits/rejected": -2.125, "logps/chosen": -1.140625, "logps/rejected": -1.265625, "loss": 1.2691, "nll_loss": 1.1796875, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11376953125, "rewards/margins": 0.01287841796875, "rewards/rejected": -0.126953125, "step": 1150 }, { "epoch": 0.81, "grad_norm": 2.455900615151912, "learning_rate": 5.147562954875267e-08, "log_odds_chosen": 0.25535887479782104, "log_odds_ratio": -0.662304699420929, "logits/chosen": -1.9765625, "logits/rejected": -2.0625, "logps/chosen": -1.0625, "logps/rejected": -1.2265625, "loss": 1.2103, "nll_loss": 1.078125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1064453125, "rewards/margins": 0.01611328125, "rewards/rejected": -0.12255859375, "step": 1160 }, { "epoch": 0.82, "grad_norm": 2.6012002451079055, "learning_rate": 4.781768301126374e-08, "log_odds_chosen": 0.10560913383960724, "log_odds_ratio": -0.7105468511581421, "logits/chosen": -2.0625, "logits/rejected": -2.09375, "logps/chosen": -1.171875, "logps/rejected": -1.2578125, "loss": 1.2439, "nll_loss": 1.1875, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.11767578125, "rewards/margins": 0.00823974609375, "rewards/rejected": -0.1259765625, "step": 1170 }, { "epoch": 0.83, "grad_norm": 2.2529151127172526, "learning_rate": 4.428076596323954e-08, "log_odds_chosen": 0.21270751953125, "log_odds_ratio": -0.6869140863418579, "logits/chosen": -1.953125, "logits/rejected": -2.078125, "logps/chosen": -1.125, "logps/rejected": -1.28125, "loss": 1.2745, "nll_loss": 1.2265625, "rewards/accuracies": 0.53125, "rewards/chosen": -0.11279296875, "rewards/margins": 0.01513671875, "rewards/rejected": -0.1279296875, "step": 1180 }, { "epoch": 0.83, "grad_norm": 2.4154203157415512, "learning_rate": 4.0866995658409145e-08, "log_odds_chosen": 0.25830078125, "log_odds_ratio": -0.65966796875, "logits/chosen": -1.9921875, "logits/rejected": -2.125, "logps/chosen": -1.09375, "logps/rejected": -1.265625, "loss": 1.2827, "nll_loss": 1.1484375, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.109375, "rewards/margins": 0.0174560546875, "rewards/rejected": -0.126953125, "step": 1190 }, { "epoch": 0.84, "grad_norm": 2.447937914506964, "learning_rate": 3.757841563293987e-08, "log_odds_chosen": 0.24925537407398224, "log_odds_ratio": -0.6611328125, "logits/chosen": -2.03125, "logits/rejected": -2.09375, "logps/chosen": -1.125, "logps/rejected": -1.296875, "loss": 1.2345, "nll_loss": 1.1640625, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1123046875, "rewards/margins": 0.0167236328125, "rewards/rejected": -0.12890625, "step": 1200 }, { "epoch": 0.85, "grad_norm": 2.5349608625933375, "learning_rate": 3.441699448214433e-08, "log_odds_chosen": 0.20963135361671448, "log_odds_ratio": -0.689257800579071, "logits/chosen": -2.015625, "logits/rejected": -2.109375, "logps/chosen": -1.140625, "logps/rejected": -1.28125, "loss": 1.2633, "nll_loss": 1.1875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11376953125, "rewards/margins": 0.01409912109375, "rewards/rejected": -0.1279296875, "step": 1210 }, { "epoch": 0.85, "grad_norm": 2.6135424725635987, "learning_rate": 3.138462468204914e-08, "log_odds_chosen": 0.25098878145217896, "log_odds_ratio": -0.660449206829071, "logits/chosen": -2.015625, "logits/rejected": -2.03125, "logps/chosen": -1.078125, "logps/rejected": -1.25, "loss": 1.2263, "nll_loss": 1.15625, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.10791015625, "rewards/margins": 0.01708984375, "rewards/rejected": -0.125, "step": 1220 }, { "epoch": 0.86, "grad_norm": 2.450199821395702, "learning_rate": 2.8483121456528454e-08, "log_odds_chosen": 0.24144287407398224, "log_odds_ratio": -0.661816418170929, "logits/chosen": -2.046875, "logits/rejected": -2.125, "logps/chosen": -1.171875, "logps/rejected": -1.34375, "loss": 1.2541, "nll_loss": 1.2421875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1171875, "rewards/margins": 0.016845703125, "rewards/rejected": -0.1337890625, "step": 1230 }, { "epoch": 0.87, "grad_norm": 2.4363743869256904, "learning_rate": 2.5714221690683462e-08, "log_odds_chosen": 0.122314453125, "log_odds_ratio": -0.7119140625, "logits/chosen": -1.9609375, "logits/rejected": -2.046875, "logps/chosen": -1.1484375, "logps/rejected": -1.2265625, "loss": 1.3106, "nll_loss": 1.1953125, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.115234375, "rewards/margins": 0.0079345703125, "rewards/rejected": -0.123046875, "step": 1240 }, { "epoch": 0.88, "grad_norm": 2.5756303193348327, "learning_rate": 2.307958289111514e-08, "log_odds_chosen": 0.19765624403953552, "log_odds_ratio": -0.6796875, "logits/chosen": -2.0, "logits/rejected": -2.03125, "logps/chosen": -1.15625, "logps/rejected": -1.296875, "loss": 1.2333, "nll_loss": 1.1875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11572265625, "rewards/margins": 0.01397705078125, "rewards/rejected": -0.1298828125, "step": 1250 }, { "epoch": 0.88, "grad_norm": 2.553964198420633, "learning_rate": 2.0580782193715506e-08, "log_odds_chosen": 0.15519408881664276, "log_odds_ratio": -0.704882800579071, "logits/chosen": -1.9296875, "logits/rejected": -2.0, "logps/chosen": -1.15625, "logps/rejected": -1.2578125, "loss": 1.2388, "nll_loss": 1.1640625, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.11572265625, "rewards/margins": 0.01031494140625, "rewards/rejected": -0.1259765625, "step": 1260 }, { "epoch": 0.89, "grad_norm": 2.469702033641708, "learning_rate": 1.821931541956914e-08, "log_odds_chosen": 0.4207519590854645, "log_odds_ratio": -0.582226574420929, "logits/chosen": -2.03125, "logits/rejected": -2.125, "logps/chosen": -1.09375, "logps/rejected": -1.359375, "loss": 1.2354, "nll_loss": 1.1484375, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.109375, "rewards/margins": 0.027099609375, "rewards/rejected": -0.1357421875, "step": 1270 }, { "epoch": 0.9, "grad_norm": 2.4889196799977786, "learning_rate": 1.5996596179531364e-08, "log_odds_chosen": 0.12117920070886612, "log_odds_ratio": -0.710644543170929, "logits/chosen": -2.015625, "logits/rejected": -2.046875, "logps/chosen": -1.1484375, "logps/rejected": -1.234375, "loss": 1.2454, "nll_loss": 1.21875, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.115234375, "rewards/margins": 0.0084228515625, "rewards/rejected": -0.12353515625, "step": 1280 }, { "epoch": 0.9, "grad_norm": 2.6921341795857088, "learning_rate": 1.3913955028018925e-08, "log_odds_chosen": 0.0946044921875, "log_odds_ratio": -0.734375, "logits/chosen": -1.9921875, "logits/rejected": -2.046875, "logps/chosen": -1.15625, "logps/rejected": -1.21875, "loss": 1.2338, "nll_loss": 1.203125, "rewards/accuracies": 0.5, "rewards/chosen": -0.115234375, "rewards/margins": 0.00634765625, "rewards/rejected": -0.12158203125, "step": 1290 }, { "epoch": 0.91, "grad_norm": 2.9234303938288444, "learning_rate": 1.1972638666519153e-08, "log_odds_chosen": 0.30915528535842896, "log_odds_ratio": -0.6298828125, "logits/chosen": -1.9453125, "logits/rejected": -2.046875, "logps/chosen": -1.09375, "logps/rejected": -1.328125, "loss": 1.2258, "nll_loss": 1.140625, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.109375, "rewards/margins": 0.0230712890625, "rewards/rejected": -0.1328125, "step": 1300 }, { "epoch": 0.92, "grad_norm": 2.488742998417297, "learning_rate": 1.0173809197295074e-08, "log_odds_chosen": 0.1483154296875, "log_odds_ratio": -0.6932617425918579, "logits/chosen": -2.015625, "logits/rejected": -2.078125, "logps/chosen": -1.171875, "logps/rejected": -1.28125, "loss": 1.2868, "nll_loss": 1.234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11767578125, "rewards/margins": 0.01080322265625, "rewards/rejected": -0.12890625, "step": 1310 }, { "epoch": 0.93, "grad_norm": 2.930123126278221, "learning_rate": 8.518543427732949e-09, "log_odds_chosen": 0.2968383729457855, "log_odds_ratio": -0.6597656011581421, "logits/chosen": -1.9609375, "logits/rejected": -2.0, "logps/chosen": -1.1015625, "logps/rejected": -1.3359375, "loss": 1.2515, "nll_loss": 1.1484375, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0230712890625, "rewards/rejected": -0.1337890625, "step": 1320 }, { "epoch": 0.93, "grad_norm": 2.4910759658116306, "learning_rate": 7.007832225748356e-09, "log_odds_chosen": 0.3276123106479645, "log_odds_ratio": -0.623339831829071, "logits/chosen": -1.8359375, "logits/rejected": -1.953125, "logps/chosen": -1.03125, "logps/rejected": -1.25, "loss": 1.2122, "nll_loss": 1.1015625, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.1025390625, "rewards/margins": 0.0220947265625, "rewards/rejected": -0.125, "step": 1330 }, { "epoch": 0.94, "grad_norm": 2.6952772741810462, "learning_rate": 5.642579926637553e-09, "log_odds_chosen": 0.102996826171875, "log_odds_ratio": -0.718554675579071, "logits/chosen": -1.953125, "logits/rejected": -1.984375, "logps/chosen": -1.1875, "logps/rejected": -1.265625, "loss": 1.2679, "nll_loss": 1.2421875, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.11865234375, "rewards/margins": 0.007659912109375, "rewards/rejected": -0.1259765625, "step": 1340 }, { "epoch": 0.95, "grad_norm": 2.564277781118033, "learning_rate": 4.4236037917277644e-09, "log_odds_chosen": 0.17406006157398224, "log_odds_ratio": -0.697949230670929, "logits/chosen": -2.078125, "logits/rejected": -2.171875, "logps/chosen": -1.1875, "logps/rejected": -1.3125, "loss": 1.2736, "nll_loss": 1.2265625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11865234375, "rewards/margins": 0.012939453125, "rewards/rejected": -0.1318359375, "step": 1350 }, { "epoch": 0.95, "grad_norm": 2.6044066137402764, "learning_rate": 3.3516335191523528e-09, "log_odds_chosen": 0.22431640326976776, "log_odds_ratio": -0.6830078363418579, "logits/chosen": -1.9765625, "logits/rejected": -2.078125, "logps/chosen": -1.15625, "logps/rejected": -1.3046875, "loss": 1.221, "nll_loss": 1.1640625, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.115234375, "rewards/margins": 0.01483154296875, "rewards/rejected": -0.1298828125, "step": 1360 }, { "epoch": 0.96, "grad_norm": 2.569982717697751, "learning_rate": 2.4273108070415605e-09, "log_odds_chosen": 0.24323730170726776, "log_odds_ratio": -0.675000011920929, "logits/chosen": -1.984375, "logits/rejected": -2.078125, "logps/chosen": -1.125, "logps/rejected": -1.3046875, "loss": 1.2342, "nll_loss": 1.1953125, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1123046875, "rewards/margins": 0.017822265625, "rewards/rejected": -0.130859375, "step": 1370 }, { "epoch": 0.97, "grad_norm": 2.9474176384005117, "learning_rate": 1.651188969392564e-09, "log_odds_chosen": 0.18778076767921448, "log_odds_ratio": -0.675000011920929, "logits/chosen": -2.03125, "logits/rejected": -2.171875, "logps/chosen": -1.1484375, "logps/rejected": -1.2734375, "loss": 1.2187, "nll_loss": 1.1796875, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.11474609375, "rewards/margins": 0.012939453125, "rewards/rejected": -0.1279296875, "step": 1380 }, { "epoch": 0.97, "grad_norm": 2.5311989884303747, "learning_rate": 1.0237326048466443e-09, "log_odds_chosen": 0.2604003846645355, "log_odds_ratio": -0.6591796875, "logits/chosen": -1.9296875, "logits/rejected": -1.96875, "logps/chosen": -1.1328125, "logps/rejected": -1.3125, "loss": 1.2465, "nll_loss": 1.1875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.11328125, "rewards/margins": 0.0185546875, "rewards/rejected": -0.1318359375, "step": 1390 }, { "epoch": 0.98, "grad_norm": 2.4286187387911005, "learning_rate": 5.453173185734073e-10, "log_odds_chosen": 0.158447265625, "log_odds_ratio": -0.71044921875, "logits/chosen": -2.0, "logits/rejected": -2.09375, "logps/chosen": -1.15625, "logps/rejected": -1.2734375, "loss": 1.2469, "nll_loss": 1.2265625, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1162109375, "rewards/margins": 0.01123046875, "rewards/rejected": -0.126953125, "step": 1400 }, { "epoch": 0.99, "grad_norm": 2.7786302717518256, "learning_rate": 2.1622949742761133e-10, "log_odds_chosen": 0.21599121391773224, "log_odds_ratio": -0.6719726324081421, "logits/chosen": -1.9609375, "logits/rejected": -2.0625, "logps/chosen": -1.09375, "logps/rejected": -1.2265625, "loss": 1.2478, "nll_loss": 1.1796875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.109375, "rewards/margins": 0.0133056640625, "rewards/rejected": -0.123046875, "step": 1410 }, { "epoch": 1.0, "grad_norm": 2.2980939714984134, "learning_rate": 3.666613851355249e-11, "log_odds_chosen": 0.22458496689796448, "log_odds_ratio": -0.666210949420929, "logits/chosen": -2.078125, "logits/rejected": -2.203125, "logps/chosen": -1.1640625, "logps/rejected": -1.34375, "loss": 1.2281, "nll_loss": 1.2109375, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1162109375, "rewards/margins": 0.017822265625, "rewards/rejected": -0.1337890625, "step": 1420 }, { "epoch": 1.0, "step": 1427, "total_flos": 0.0, "train_loss": 1.2897131164005442, "train_runtime": 9812.389, "train_samples_per_second": 6.979, "train_steps_per_second": 0.145 } ], "logging_steps": 10, "max_steps": 1427, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }