diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4584 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9992122883024814, + "eval_steps": 500, + "global_step": 2538, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 6.010698237247525, + "learning_rate": 1.968503937007874e-08, + "log_odds_chosen": 0.27912598848342896, + "log_odds_ratio": -0.7284179925918579, + "logits/chosen": -2.015625, + "logits/rejected": -2.046875, + "logps/chosen": -2.03125, + "logps/rejected": -2.28125, + "loss": 1.5763, + "nll_loss": 1.4375, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.2041015625, + "rewards/margins": 0.023681640625, + "rewards/rejected": -0.2275390625, + "step": 10 + }, + { + "epoch": 0.02, + "grad_norm": 5.135758173057069, + "learning_rate": 3.937007874015748e-08, + "log_odds_chosen": 0.36018067598342896, + "log_odds_ratio": -0.750683605670929, + "logits/chosen": -2.109375, + "logits/rejected": -2.140625, + "logps/chosen": -1.9765625, + "logps/rejected": -2.296875, + "loss": 1.5927, + "nll_loss": 1.5, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.197265625, + "rewards/margins": 0.0322265625, + "rewards/rejected": -0.2294921875, + "step": 20 + }, + { + "epoch": 0.02, + "grad_norm": 6.927967353705024, + "learning_rate": 5.9055118110236216e-08, + "log_odds_chosen": 0.17539063096046448, + "log_odds_ratio": -0.812207043170929, + "logits/chosen": -1.9375, + "logits/rejected": -2.03125, + "logps/chosen": -2.0625, + "logps/rejected": -2.1875, + "loss": 1.5598, + "nll_loss": 1.5, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.2060546875, + "rewards/margins": 0.013427734375, + "rewards/rejected": -0.2197265625, + "step": 30 + }, + { + "epoch": 0.03, + "grad_norm": 9.514842032339935, + "learning_rate": 7.874015748031496e-08, + "log_odds_chosen": 0.3271545469760895, + "log_odds_ratio": -0.705859363079071, + "logits/chosen": -2.03125, + "logits/rejected": -2.03125, + "logps/chosen": -1.9921875, + "logps/rejected": -2.296875, + "loss": 1.6148, + "nll_loss": 1.546875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.19921875, + "rewards/margins": 0.031005859375, + "rewards/rejected": -0.23046875, + "step": 40 + }, + { + "epoch": 0.04, + "grad_norm": 5.878873460342644, + "learning_rate": 9.84251968503937e-08, + "log_odds_chosen": 0.1710205078125, + "log_odds_ratio": -0.77587890625, + "logits/chosen": -2.03125, + "logits/rejected": -2.0, + "logps/chosen": -1.9609375, + "logps/rejected": -2.09375, + "loss": 1.5496, + "nll_loss": 1.484375, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.1962890625, + "rewards/margins": 0.0137939453125, + "rewards/rejected": -0.2099609375, + "step": 50 + }, + { + "epoch": 0.05, + "grad_norm": 6.390918916154321, + "learning_rate": 1.1811023622047243e-07, + "log_odds_chosen": 0.21818237006664276, + "log_odds_ratio": -0.7723633050918579, + "logits/chosen": -2.046875, + "logits/rejected": -2.03125, + "logps/chosen": -2.0625, + "logps/rejected": -2.25, + "loss": 1.6078, + "nll_loss": 1.5625, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.2060546875, + "rewards/margins": 0.0185546875, + "rewards/rejected": -0.224609375, + "step": 60 + }, + { + "epoch": 0.06, + "grad_norm": 7.289138170054862, + "learning_rate": 1.3779527559055117e-07, + "log_odds_chosen": 0.34990233182907104, + "log_odds_ratio": -0.7339843511581421, + "logits/chosen": -2.015625, + "logits/rejected": -2.046875, + "logps/chosen": -2.0625, + "logps/rejected": -2.375, + "loss": 1.5634, + "nll_loss": 1.5078125, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.20703125, + "rewards/margins": 0.0311279296875, + "rewards/rejected": -0.23828125, + "step": 70 + }, + { + "epoch": 0.06, + "grad_norm": 6.612859448964533, + "learning_rate": 1.5748031496062992e-07, + "log_odds_chosen": 0.4515624940395355, + "log_odds_ratio": -0.640917956829071, + "logits/chosen": -1.9609375, + "logits/rejected": -2.015625, + "logps/chosen": -1.7734375, + "logps/rejected": -2.171875, + "loss": 1.532, + "nll_loss": 1.421875, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.177734375, + "rewards/margins": 0.039794921875, + "rewards/rejected": -0.2177734375, + "step": 80 + }, + { + "epoch": 0.07, + "grad_norm": 8.397857351399276, + "learning_rate": 1.7716535433070863e-07, + "log_odds_chosen": 0.17760619521141052, + "log_odds_ratio": -0.7791992425918579, + "logits/chosen": -2.0625, + "logits/rejected": -2.03125, + "logps/chosen": -2.0625, + "logps/rejected": -2.21875, + "loss": 1.5648, + "nll_loss": 1.546875, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.2060546875, + "rewards/margins": 0.0166015625, + "rewards/rejected": -0.22265625, + "step": 90 + }, + { + "epoch": 0.08, + "grad_norm": 7.586832559599352, + "learning_rate": 1.968503937007874e-07, + "log_odds_chosen": 0.2771240174770355, + "log_odds_ratio": -0.6792968511581421, + "logits/chosen": -2.140625, + "logits/rejected": -2.171875, + "logps/chosen": -1.890625, + "logps/rejected": -2.125, + "loss": 1.5286, + "nll_loss": 1.484375, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.189453125, + "rewards/margins": 0.02392578125, + "rewards/rejected": -0.212890625, + "step": 100 + }, + { + "epoch": 0.09, + "grad_norm": 5.07791601254699, + "learning_rate": 2.1653543307086615e-07, + "log_odds_chosen": 0.214080810546875, + "log_odds_ratio": -0.7261718511581421, + "logits/chosen": -2.09375, + "logits/rejected": -2.15625, + "logps/chosen": -1.890625, + "logps/rejected": -2.0625, + "loss": 1.4891, + "nll_loss": 1.4140625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.189453125, + "rewards/margins": 0.017333984375, + "rewards/rejected": -0.2060546875, + "step": 110 + }, + { + "epoch": 0.09, + "grad_norm": 5.712206786453907, + "learning_rate": 2.3622047244094486e-07, + "log_odds_chosen": 0.14680786430835724, + "log_odds_ratio": -0.7562500238418579, + "logits/chosen": -2.078125, + "logits/rejected": -2.15625, + "logps/chosen": -1.9375, + "logps/rejected": -2.03125, + "loss": 1.4901, + "nll_loss": 1.34375, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.193359375, + "rewards/margins": 0.00994873046875, + "rewards/rejected": -0.2041015625, + "step": 120 + }, + { + "epoch": 0.1, + "grad_norm": 5.959474046553222, + "learning_rate": 2.559055118110236e-07, + "log_odds_chosen": 0.2553772032260895, + "log_odds_ratio": -0.6973632574081421, + "logits/chosen": -2.15625, + "logits/rejected": -2.21875, + "logps/chosen": -1.7890625, + "logps/rejected": -1.9921875, + "loss": 1.497, + "nll_loss": 1.3828125, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1787109375, + "rewards/margins": 0.0205078125, + "rewards/rejected": -0.19921875, + "step": 130 + }, + { + "epoch": 0.11, + "grad_norm": 5.960730803694658, + "learning_rate": 2.7559055118110235e-07, + "log_odds_chosen": 0.16423340141773224, + "log_odds_ratio": -0.718457043170929, + "logits/chosen": -2.15625, + "logits/rejected": -2.21875, + "logps/chosen": -1.765625, + "logps/rejected": -1.8984375, + "loss": 1.5228, + "nll_loss": 1.453125, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.1767578125, + "rewards/margins": 0.0133056640625, + "rewards/rejected": -0.189453125, + "step": 140 + }, + { + "epoch": 0.12, + "grad_norm": 4.558212884083645, + "learning_rate": 2.9527559055118104e-07, + "log_odds_chosen": 0.24582520127296448, + "log_odds_ratio": -0.671191394329071, + "logits/chosen": -2.125, + "logits/rejected": -2.1875, + "logps/chosen": -1.546875, + "logps/rejected": -1.7421875, + "loss": 1.4124, + "nll_loss": 1.3125, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.154296875, + "rewards/margins": 0.0191650390625, + "rewards/rejected": -0.173828125, + "step": 150 + }, + { + "epoch": 0.13, + "grad_norm": 3.4831822067708686, + "learning_rate": 3.1496062992125984e-07, + "log_odds_chosen": 0.17824706435203552, + "log_odds_ratio": -0.69384765625, + "logits/chosen": -2.25, + "logits/rejected": -2.28125, + "logps/chosen": -1.671875, + "logps/rejected": -1.8203125, + "loss": 1.4297, + "nll_loss": 1.390625, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.1669921875, + "rewards/margins": 0.0145263671875, + "rewards/rejected": -0.181640625, + "step": 160 + }, + { + "epoch": 0.13, + "grad_norm": 4.034138231637428, + "learning_rate": 3.346456692913386e-07, + "log_odds_chosen": 0.12167968600988388, + "log_odds_ratio": -0.7186523675918579, + "logits/chosen": -2.1875, + "logits/rejected": -2.296875, + "logps/chosen": -1.5625, + "logps/rejected": -1.6640625, + "loss": 1.3835, + "nll_loss": 1.3203125, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.15625, + "rewards/margins": 0.01007080078125, + "rewards/rejected": -0.166015625, + "step": 170 + }, + { + "epoch": 0.14, + "grad_norm": 4.146506823609489, + "learning_rate": 3.5433070866141727e-07, + "log_odds_chosen": 0.13695068657398224, + "log_odds_ratio": -0.719433605670929, + "logits/chosen": -2.28125, + "logits/rejected": -2.40625, + "logps/chosen": -1.5546875, + "logps/rejected": -1.671875, + "loss": 1.4352, + "nll_loss": 1.3828125, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.1552734375, + "rewards/margins": 0.0115966796875, + "rewards/rejected": -0.1669921875, + "step": 180 + }, + { + "epoch": 0.15, + "grad_norm": 3.5266117677319087, + "learning_rate": 3.7401574803149606e-07, + "log_odds_chosen": 0.17273560166358948, + "log_odds_ratio": -0.681640625, + "logits/chosen": -2.3125, + "logits/rejected": -2.4375, + "logps/chosen": -1.4765625, + "logps/rejected": -1.625, + "loss": 1.3599, + "nll_loss": 1.3046875, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.1474609375, + "rewards/margins": 0.01495361328125, + "rewards/rejected": -0.162109375, + "step": 190 + }, + { + "epoch": 0.16, + "grad_norm": 3.4327683095111072, + "learning_rate": 3.937007874015748e-07, + "log_odds_chosen": 0.11888428032398224, + "log_odds_ratio": -0.711230456829071, + "logits/chosen": -2.296875, + "logits/rejected": -2.40625, + "logps/chosen": -1.3828125, + "logps/rejected": -1.484375, + "loss": 1.3592, + "nll_loss": 1.28125, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.138671875, + "rewards/margins": 0.0093994140625, + "rewards/rejected": -0.1484375, + "step": 200 + }, + { + "epoch": 0.17, + "grad_norm": 2.811761812417915, + "learning_rate": 4.133858267716535e-07, + "log_odds_chosen": 0.13620606064796448, + "log_odds_ratio": -0.692089855670929, + "logits/chosen": -2.3125, + "logits/rejected": -2.4375, + "logps/chosen": -1.359375, + "logps/rejected": -1.453125, + "loss": 1.3822, + "nll_loss": 1.2421875, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1357421875, + "rewards/margins": 0.010009765625, + "rewards/rejected": -0.1455078125, + "step": 210 + }, + { + "epoch": 0.17, + "grad_norm": 3.1783123040584775, + "learning_rate": 4.330708661417323e-07, + "log_odds_chosen": 0.23652343451976776, + "log_odds_ratio": -0.656054675579071, + "logits/chosen": -2.3125, + "logits/rejected": -2.4375, + "logps/chosen": -1.296875, + "logps/rejected": -1.46875, + "loss": 1.3022, + "nll_loss": 1.2265625, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.12890625, + "rewards/margins": 0.017578125, + "rewards/rejected": -0.146484375, + "step": 220 + }, + { + "epoch": 0.18, + "grad_norm": 3.398094645144472, + "learning_rate": 4.52755905511811e-07, + "log_odds_chosen": 0.10042724758386612, + "log_odds_ratio": -0.737500011920929, + "logits/chosen": -2.421875, + "logits/rejected": -2.546875, + "logps/chosen": -1.265625, + "logps/rejected": -1.3359375, + "loss": 1.3118, + "nll_loss": 1.21875, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.126953125, + "rewards/margins": 0.007049560546875, + "rewards/rejected": -0.1337890625, + "step": 230 + }, + { + "epoch": 0.19, + "grad_norm": 3.0602456337138735, + "learning_rate": 4.7244094488188973e-07, + "log_odds_chosen": 0.05767212063074112, + "log_odds_ratio": -0.7372070550918579, + "logits/chosen": -2.328125, + "logits/rejected": -2.4375, + "logps/chosen": -1.359375, + "logps/rejected": -1.40625, + "loss": 1.3639, + "nll_loss": 1.3359375, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1357421875, + "rewards/margins": 0.0050048828125, + "rewards/rejected": -0.140625, + "step": 240 + }, + { + "epoch": 0.2, + "grad_norm": 3.1390537608793543, + "learning_rate": 4.921259842519685e-07, + "log_odds_chosen": 0.16054077446460724, + "log_odds_ratio": -0.6732422113418579, + "logits/chosen": -2.34375, + "logits/rejected": -2.40625, + "logps/chosen": -1.2421875, + "logps/rejected": -1.3515625, + "loss": 1.3024, + "nll_loss": 1.234375, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1240234375, + "rewards/margins": 0.01104736328125, + "rewards/rejected": -0.134765625, + "step": 250 + }, + { + "epoch": 0.2, + "grad_norm": 2.518997318792438, + "learning_rate": 4.999914863146575e-07, + "log_odds_chosen": 0.16718749701976776, + "log_odds_ratio": -0.6884765625, + "logits/chosen": -2.359375, + "logits/rejected": -2.5, + "logps/chosen": -1.234375, + "logps/rejected": -1.3359375, + "loss": 1.3314, + "nll_loss": 1.234375, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.12353515625, + "rewards/margins": 0.01019287109375, + "rewards/rejected": -0.1337890625, + "step": 260 + }, + { + "epoch": 0.21, + "grad_norm": 2.798537540317038, + "learning_rate": 4.999394603374641e-07, + "log_odds_chosen": 0.19011840224266052, + "log_odds_ratio": -0.676562488079071, + "logits/chosen": -2.359375, + "logits/rejected": -2.46875, + "logps/chosen": -1.203125, + "logps/rejected": -1.3515625, + "loss": 1.2872, + "nll_loss": 1.1875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.12060546875, + "rewards/margins": 0.01422119140625, + "rewards/rejected": -0.134765625, + "step": 270 + }, + { + "epoch": 0.22, + "grad_norm": 2.8758259093431437, + "learning_rate": 4.99840148039188e-07, + "log_odds_chosen": 0.29682618379592896, + "log_odds_ratio": -0.637890636920929, + "logits/chosen": -2.3125, + "logits/rejected": -2.5, + "logps/chosen": -1.203125, + "logps/rejected": -1.4140625, + "loss": 1.2201, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1201171875, + "rewards/margins": 0.021484375, + "rewards/rejected": -0.1416015625, + "step": 280 + }, + { + "epoch": 0.23, + "grad_norm": 2.695897803134525, + "learning_rate": 4.996935682088318e-07, + "log_odds_chosen": 0.22941894829273224, + "log_odds_ratio": -0.6490234136581421, + "logits/chosen": -2.359375, + "logits/rejected": -2.46875, + "logps/chosen": -1.21875, + "logps/rejected": -1.375, + "loss": 1.2819, + "nll_loss": 1.1875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1220703125, + "rewards/margins": 0.015380859375, + "rewards/rejected": -0.1376953125, + "step": 290 + }, + { + "epoch": 0.24, + "grad_norm": 2.8982419601141585, + "learning_rate": 4.994997485779947e-07, + "log_odds_chosen": 0.23259887099266052, + "log_odds_ratio": -0.666796863079071, + "logits/chosen": -2.25, + "logits/rejected": -2.375, + "logps/chosen": -1.1796875, + "logps/rejected": -1.34375, + "loss": 1.2759, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.1181640625, + "rewards/margins": 0.015869140625, + "rewards/rejected": -0.1337890625, + "step": 300 + }, + { + "epoch": 0.24, + "grad_norm": 2.9056748531961585, + "learning_rate": 4.992587258156258e-07, + "log_odds_chosen": 0.17786864936351776, + "log_odds_ratio": -0.6656249761581421, + "logits/chosen": -2.265625, + "logits/rejected": -2.359375, + "logps/chosen": -1.21875, + "logps/rejected": -1.34375, + "loss": 1.2812, + "nll_loss": 1.2109375, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.12158203125, + "rewards/margins": 0.01324462890625, + "rewards/rejected": -0.134765625, + "step": 310 + }, + { + "epoch": 0.25, + "grad_norm": 2.596709437423632, + "learning_rate": 4.989705455210862e-07, + "log_odds_chosen": 0.21816405653953552, + "log_odds_ratio": -0.662890613079071, + "logits/chosen": -2.3125, + "logits/rejected": -2.421875, + "logps/chosen": -1.1875, + "logps/rejected": -1.3515625, + "loss": 1.2184, + "nll_loss": 1.109375, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.11865234375, + "rewards/margins": 0.016357421875, + "rewards/rejected": -0.134765625, + "step": 320 + }, + { + "epoch": 0.26, + "grad_norm": 2.476759669633908, + "learning_rate": 4.986352622155222e-07, + "log_odds_chosen": 0.17100830376148224, + "log_odds_ratio": -0.6903320550918579, + "logits/chosen": -2.359375, + "logits/rejected": -2.46875, + "logps/chosen": -1.1953125, + "logps/rejected": -1.3046875, + "loss": 1.2865, + "nll_loss": 1.21875, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.119140625, + "rewards/margins": 0.01129150390625, + "rewards/rejected": -0.130859375, + "step": 330 + }, + { + "epoch": 0.27, + "grad_norm": 2.628189970289334, + "learning_rate": 4.98252939331551e-07, + "log_odds_chosen": 0.17416992783546448, + "log_odds_ratio": -0.697070300579071, + "logits/chosen": -2.328125, + "logits/rejected": -2.4375, + "logps/chosen": -1.21875, + "logps/rejected": -1.34375, + "loss": 1.2797, + "nll_loss": 1.2109375, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.12158203125, + "rewards/margins": 0.01318359375, + "rewards/rejected": -0.134765625, + "step": 340 + }, + { + "epoch": 0.28, + "grad_norm": 2.6295659881126943, + "learning_rate": 4.978236492012589e-07, + "log_odds_chosen": 0.02346191368997097, + "log_odds_ratio": -0.755078136920929, + "logits/chosen": -2.421875, + "logits/rejected": -2.421875, + "logps/chosen": -1.2578125, + "logps/rejected": -1.2734375, + "loss": 1.3077, + "nll_loss": 1.25, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1259765625, + "rewards/margins": 0.00180816650390625, + "rewards/rejected": -0.126953125, + "step": 350 + }, + { + "epoch": 0.28, + "grad_norm": 2.959773704803729, + "learning_rate": 4.973474730425173e-07, + "log_odds_chosen": 0.19099120795726776, + "log_odds_ratio": -0.6749023199081421, + "logits/chosen": -2.25, + "logits/rejected": -2.359375, + "logps/chosen": -1.1875, + "logps/rejected": -1.3125, + "loss": 1.2568, + "nll_loss": 1.171875, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.11865234375, + "rewards/margins": 0.0125732421875, + "rewards/rejected": -0.1318359375, + "step": 360 + }, + { + "epoch": 0.29, + "grad_norm": 2.613182586833654, + "learning_rate": 4.968245009436167e-07, + "log_odds_chosen": 0.10064697265625, + "log_odds_ratio": -0.72216796875, + "logits/chosen": -2.375, + "logits/rejected": -2.4375, + "logps/chosen": -1.28125, + "logps/rejected": -1.3671875, + "loss": 1.2944, + "nll_loss": 1.2890625, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.1279296875, + "rewards/margins": 0.00860595703125, + "rewards/rejected": -0.13671875, + "step": 370 + }, + { + "epoch": 0.3, + "grad_norm": 2.8683189618015126, + "learning_rate": 4.962548318462231e-07, + "log_odds_chosen": 0.19755859673023224, + "log_odds_ratio": -0.6724609136581421, + "logits/chosen": -2.296875, + "logits/rejected": -2.359375, + "logps/chosen": -1.171875, + "logps/rejected": -1.3046875, + "loss": 1.2778, + "nll_loss": 1.2265625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.11669921875, + "rewards/margins": 0.01373291015625, + "rewards/rejected": -0.130859375, + "step": 380 + }, + { + "epoch": 0.31, + "grad_norm": 2.9807597681868305, + "learning_rate": 4.95638573526659e-07, + "log_odds_chosen": 0.14707031846046448, + "log_odds_ratio": -0.7005859613418579, + "logits/chosen": -2.28125, + "logits/rejected": -2.453125, + "logps/chosen": -1.171875, + "logps/rejected": -1.2734375, + "loss": 1.2844, + "nll_loss": 1.203125, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.11767578125, + "rewards/margins": 0.0098876953125, + "rewards/rejected": -0.126953125, + "step": 390 + }, + { + "epoch": 0.32, + "grad_norm": 2.655124275329291, + "learning_rate": 4.949758425755127e-07, + "log_odds_chosen": 0.10791015625, + "log_odds_ratio": -0.7230468988418579, + "logits/chosen": -2.1875, + "logits/rejected": -2.28125, + "logps/chosen": -1.203125, + "logps/rejected": -1.28125, + "loss": 1.2902, + "nll_loss": 1.1875, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1201171875, + "rewards/margins": 0.00762939453125, + "rewards/rejected": -0.1279296875, + "step": 400 + }, + { + "epoch": 0.32, + "grad_norm": 2.4371959032830293, + "learning_rate": 4.94266764375581e-07, + "log_odds_chosen": 0.20887450873851776, + "log_odds_ratio": -0.6651366949081421, + "logits/chosen": -2.21875, + "logits/rejected": -2.34375, + "logps/chosen": -1.1328125, + "logps/rejected": -1.2890625, + "loss": 1.2674, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11376953125, + "rewards/margins": 0.01544189453125, + "rewards/rejected": -0.12890625, + "step": 410 + }, + { + "epoch": 0.33, + "grad_norm": 2.729848906556158, + "learning_rate": 4.935114730781475e-07, + "log_odds_chosen": 0.27691650390625, + "log_odds_ratio": -0.6527343988418579, + "logits/chosen": -2.140625, + "logits/rejected": -2.296875, + "logps/chosen": -1.1953125, + "logps/rejected": -1.3828125, + "loss": 1.2544, + "nll_loss": 1.1875, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.11962890625, + "rewards/margins": 0.018798828125, + "rewards/rejected": -0.138671875, + "step": 420 + }, + { + "epoch": 0.34, + "grad_norm": 2.50393555238819, + "learning_rate": 4.927101115776026e-07, + "log_odds_chosen": 0.14921875298023224, + "log_odds_ratio": -0.702343761920929, + "logits/chosen": -2.28125, + "logits/rejected": -2.34375, + "logps/chosen": -1.203125, + "logps/rejected": -1.3203125, + "loss": 1.2471, + "nll_loss": 1.2265625, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.12060546875, + "rewards/margins": 0.01129150390625, + "rewards/rejected": -0.1318359375, + "step": 430 + }, + { + "epoch": 0.35, + "grad_norm": 2.7483117165130744, + "learning_rate": 4.918628314844088e-07, + "log_odds_chosen": 0.04735717922449112, + "log_odds_ratio": -0.749218761920929, + "logits/chosen": -2.265625, + "logits/rejected": -2.359375, + "logps/chosen": -1.2109375, + "logps/rejected": -1.25, + "loss": 1.2351, + "nll_loss": 1.21875, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.12060546875, + "rewards/margins": 0.0040283203125, + "rewards/rejected": -0.12451171875, + "step": 440 + }, + { + "epoch": 0.35, + "grad_norm": 2.9881422727710887, + "learning_rate": 4.909697930964179e-07, + "log_odds_chosen": 0.16976317763328552, + "log_odds_ratio": -0.6986328363418579, + "logits/chosen": -2.25, + "logits/rejected": -2.40625, + "logps/chosen": -1.1953125, + "logps/rejected": -1.3125, + "loss": 1.2467, + "nll_loss": 1.2421875, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.11962890625, + "rewards/margins": 0.01153564453125, + "rewards/rejected": -0.130859375, + "step": 450 + }, + { + "epoch": 0.36, + "grad_norm": 2.6175879243996363, + "learning_rate": 4.900311653685437e-07, + "log_odds_chosen": 0.20297852158546448, + "log_odds_ratio": -0.669140636920929, + "logits/chosen": -2.296875, + "logits/rejected": -2.359375, + "logps/chosen": -1.1640625, + "logps/rejected": -1.3125, + "loss": 1.2102, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1162109375, + "rewards/margins": 0.01513671875, + "rewards/rejected": -0.1318359375, + "step": 460 + }, + { + "epoch": 0.37, + "grad_norm": 2.2300783745527317, + "learning_rate": 4.890471258807968e-07, + "log_odds_chosen": 0.19609375298023224, + "log_odds_ratio": -0.6773437261581421, + "logits/chosen": -2.1875, + "logits/rejected": -2.265625, + "logps/chosen": -1.1875, + "logps/rejected": -1.296875, + "loss": 1.2387, + "nll_loss": 1.203125, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.11865234375, + "rewards/margins": 0.01116943359375, + "rewards/rejected": -0.1298828125, + "step": 470 + }, + { + "epoch": 0.38, + "grad_norm": 2.9471805049826094, + "learning_rate": 4.880178608046894e-07, + "log_odds_chosen": 0.14970703423023224, + "log_odds_ratio": -0.7040039300918579, + "logits/chosen": -2.171875, + "logits/rejected": -2.203125, + "logps/chosen": -1.140625, + "logps/rejected": -1.25, + "loss": 1.2675, + "nll_loss": 1.2109375, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.1142578125, + "rewards/margins": 0.0108642578125, + "rewards/rejected": -0.125, + "step": 480 + }, + { + "epoch": 0.39, + "grad_norm": 2.7162956655728623, + "learning_rate": 4.869435648680116e-07, + "log_odds_chosen": 0.15129394829273224, + "log_odds_ratio": -0.6802734136581421, + "logits/chosen": -2.1875, + "logits/rejected": -2.28125, + "logps/chosen": -1.1171875, + "logps/rejected": -1.2265625, + "loss": 1.1889, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.11181640625, + "rewards/margins": 0.01043701171875, + "rewards/rejected": -0.12255859375, + "step": 490 + }, + { + "epoch": 0.39, + "grad_norm": 4.459550453771863, + "learning_rate": 4.858244413179923e-07, + "log_odds_chosen": 0.2993102967739105, + "log_odds_ratio": -0.64111328125, + "logits/chosen": -2.171875, + "logits/rejected": -2.25, + "logps/chosen": -1.1171875, + "logps/rejected": -1.3125, + "loss": 1.2095, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.111328125, + "rewards/margins": 0.0198974609375, + "rewards/rejected": -0.1318359375, + "step": 500 + }, + { + "epoch": 0.4, + "grad_norm": 3.2619422051015836, + "learning_rate": 4.846607018828449e-07, + "log_odds_chosen": 0.210205078125, + "log_odds_ratio": -0.673632800579071, + "logits/chosen": -2.21875, + "logits/rejected": -2.421875, + "logps/chosen": -1.1875, + "logps/rejected": -1.3203125, + "loss": 1.2653, + "nll_loss": 1.21875, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.119140625, + "rewards/margins": 0.013427734375, + "rewards/rejected": -0.1328125, + "step": 510 + }, + { + "epoch": 0.41, + "grad_norm": 2.7715133106941576, + "learning_rate": 4.834525667317121e-07, + "log_odds_chosen": 0.22309570014476776, + "log_odds_ratio": -0.66357421875, + "logits/chosen": -2.0625, + "logits/rejected": -2.234375, + "logps/chosen": -1.15625, + "logps/rejected": -1.3046875, + "loss": 1.2614, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.115234375, + "rewards/margins": 0.0150146484375, + "rewards/rejected": -0.1298828125, + "step": 520 + }, + { + "epoch": 0.42, + "grad_norm": 2.4609772540284593, + "learning_rate": 4.822002644330101e-07, + "log_odds_chosen": 0.19017334282398224, + "log_odds_ratio": -0.711718738079071, + "logits/chosen": -2.171875, + "logits/rejected": -2.296875, + "logps/chosen": -1.21875, + "logps/rejected": -1.3515625, + "loss": 1.2548, + "nll_loss": 1.2421875, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.12158203125, + "rewards/margins": 0.0137939453125, + "rewards/rejected": -0.1357421875, + "step": 530 + }, + { + "epoch": 0.43, + "grad_norm": 2.645873831673924, + "learning_rate": 4.809040319111865e-07, + "log_odds_chosen": 0.12646484375, + "log_odds_ratio": -0.713671863079071, + "logits/chosen": -2.09375, + "logits/rejected": -2.25, + "logps/chosen": -1.1484375, + "logps/rejected": -1.2421875, + "loss": 1.2402, + "nll_loss": 1.15625, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.11474609375, + "rewards/margins": 0.0096435546875, + "rewards/rejected": -0.12451171875, + "step": 540 + }, + { + "epoch": 0.43, + "grad_norm": 2.45398616162251, + "learning_rate": 4.795641144018965e-07, + "log_odds_chosen": 0.09213867038488388, + "log_odds_ratio": -0.74267578125, + "logits/chosen": -2.203125, + "logits/rejected": -2.203125, + "logps/chosen": -1.2109375, + "logps/rejected": -1.28125, + "loss": 1.2755, + "nll_loss": 1.25, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.12109375, + "rewards/margins": 0.006805419921875, + "rewards/rejected": -0.1279296875, + "step": 550 + }, + { + "epoch": 0.44, + "grad_norm": 2.5512768402172683, + "learning_rate": 4.781807654056053e-07, + "log_odds_chosen": 0.214599609375, + "log_odds_ratio": -0.692675769329071, + "logits/chosen": -2.109375, + "logits/rejected": -2.21875, + "logps/chosen": -1.125, + "logps/rejected": -1.2890625, + "loss": 1.2303, + "nll_loss": 1.140625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.11279296875, + "rewards/margins": 0.0159912109375, + "rewards/rejected": -0.12890625, + "step": 560 + }, + { + "epoch": 0.45, + "grad_norm": 2.411346604585139, + "learning_rate": 4.7675424663962933e-07, + "log_odds_chosen": 0.165435791015625, + "log_odds_ratio": -0.700390636920929, + "logits/chosen": -2.140625, + "logits/rejected": -2.21875, + "logps/chosen": -1.2109375, + "logps/rejected": -1.3125, + "loss": 1.2571, + "nll_loss": 1.234375, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12109375, + "rewards/margins": 0.010498046875, + "rewards/rejected": -0.1318359375, + "step": 570 + }, + { + "epoch": 0.46, + "grad_norm": 2.48077566767054, + "learning_rate": 4.752848279886212e-07, + "log_odds_chosen": 0.207489013671875, + "log_odds_ratio": -0.66748046875, + "logits/chosen": -2.0625, + "logits/rejected": -2.234375, + "logps/chosen": -1.1015625, + "logps/rejected": -1.2421875, + "loss": 1.1978, + "nll_loss": 1.140625, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.0145263671875, + "rewards/rejected": -0.12451171875, + "step": 580 + }, + { + "epoch": 0.46, + "grad_norm": 2.7897141919738786, + "learning_rate": 4.7377278745350984e-07, + "log_odds_chosen": 0.2662353515625, + "log_odds_ratio": -0.663867175579071, + "logits/chosen": -2.03125, + "logits/rejected": -2.15625, + "logps/chosen": -1.046875, + "logps/rejected": -1.1875, + "loss": 1.2309, + "nll_loss": 1.0859375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1044921875, + "rewards/margins": 0.0147705078125, + "rewards/rejected": -0.119140625, + "step": 590 + }, + { + "epoch": 0.47, + "grad_norm": 2.758617809500896, + "learning_rate": 4.7221841109890506e-07, + "log_odds_chosen": 0.22445067763328552, + "log_odds_ratio": -0.695117175579071, + "logits/chosen": -2.0625, + "logits/rejected": -2.140625, + "logps/chosen": -1.1015625, + "logps/rejected": -1.2578125, + "loss": 1.2281, + "nll_loss": 1.125, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.0159912109375, + "rewards/rejected": -0.1259765625, + "step": 600 + }, + { + "epoch": 0.48, + "grad_norm": 2.6514784587831204, + "learning_rate": 4.706219929989771e-07, + "log_odds_chosen": 0.2147216796875, + "log_odds_ratio": -0.660937488079071, + "logits/chosen": -2.078125, + "logits/rejected": -2.203125, + "logps/chosen": -1.09375, + "logps/rejected": -1.234375, + "loss": 1.2094, + "nll_loss": 1.0859375, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.0140380859375, + "rewards/rejected": -0.12353515625, + "step": 610 + }, + { + "epoch": 0.49, + "grad_norm": 2.630207221232529, + "learning_rate": 4.6898383518182007e-07, + "log_odds_chosen": 0.19202271103858948, + "log_odds_ratio": -0.6786133050918579, + "logits/chosen": -2.046875, + "logits/rejected": -2.125, + "logps/chosen": -1.0859375, + "logps/rejected": -1.2265625, + "loss": 1.2307, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1083984375, + "rewards/margins": 0.014404296875, + "rewards/rejected": -0.12255859375, + "step": 620 + }, + { + "epoch": 0.5, + "grad_norm": 2.9225816829730427, + "learning_rate": 4.67304247572311e-07, + "log_odds_chosen": 0.2799316346645355, + "log_odds_ratio": -0.659960925579071, + "logits/chosen": -2.09375, + "logits/rejected": -2.1875, + "logps/chosen": -1.1171875, + "logps/rejected": -1.3046875, + "loss": 1.2257, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.11181640625, + "rewards/margins": 0.018310546875, + "rewards/rejected": -0.130859375, + "step": 630 + }, + { + "epoch": 0.5, + "grad_norm": 2.331606515139032, + "learning_rate": 4.65583547933475e-07, + "log_odds_chosen": 0.16041259467601776, + "log_odds_ratio": -0.713085949420929, + "logits/chosen": -2.0, + "logits/rejected": -2.140625, + "logps/chosen": -1.1171875, + "logps/rejected": -1.21875, + "loss": 1.21, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.11181640625, + "rewards/margins": 0.01031494140625, + "rewards/rejected": -0.1220703125, + "step": 640 + }, + { + "epoch": 0.51, + "grad_norm": 2.541498557632385, + "learning_rate": 4.6382206180636705e-07, + "log_odds_chosen": 0.12631836533546448, + "log_odds_ratio": -0.7256835699081421, + "logits/chosen": -2.0625, + "logits/rejected": -2.1875, + "logps/chosen": -1.140625, + "logps/rejected": -1.25, + "loss": 1.2675, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1142578125, + "rewards/margins": 0.01019287109375, + "rewards/rejected": -0.12451171875, + "step": 650 + }, + { + "epoch": 0.52, + "grad_norm": 3.2675127960880586, + "learning_rate": 4.620201224484827e-07, + "log_odds_chosen": 0.2113037109375, + "log_odds_ratio": -0.658496081829071, + "logits/chosen": -2.0, + "logits/rejected": -2.078125, + "logps/chosen": -1.078125, + "logps/rejected": -1.2265625, + "loss": 1.209, + "nll_loss": 1.0703125, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.01470947265625, + "rewards/rejected": -0.12255859375, + "step": 660 + }, + { + "epoch": 0.53, + "grad_norm": 2.5735852092457248, + "learning_rate": 4.601780707707087e-07, + "log_odds_chosen": 0.25184327363967896, + "log_odds_ratio": -0.679394543170929, + "logits/chosen": -2.015625, + "logits/rejected": -2.109375, + "logps/chosen": -1.109375, + "logps/rejected": -1.28125, + "loss": 1.1888, + "nll_loss": 1.15625, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11083984375, + "rewards/margins": 0.017333984375, + "rewards/rejected": -0.1279296875, + "step": 670 + }, + { + "epoch": 0.54, + "grad_norm": 2.678233631526468, + "learning_rate": 4.5829625527282554e-07, + "log_odds_chosen": 0.15609130263328552, + "log_odds_ratio": -0.700488269329071, + "logits/chosen": -2.03125, + "logits/rejected": -2.125, + "logps/chosen": -1.125, + "logps/rejected": -1.234375, + "loss": 1.2431, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11279296875, + "rewards/margins": 0.01080322265625, + "rewards/rejected": -0.12353515625, + "step": 680 + }, + { + "epoch": 0.54, + "grad_norm": 2.423777152319806, + "learning_rate": 4.5637503197757474e-07, + "log_odds_chosen": 0.089111328125, + "log_odds_ratio": -0.746874988079071, + "logits/chosen": -1.890625, + "logits/rejected": -2.015625, + "logps/chosen": -1.140625, + "logps/rejected": -1.2109375, + "loss": 1.1964, + "nll_loss": 1.140625, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.11376953125, + "rewards/margins": 0.007568359375, + "rewards/rejected": -0.12158203125, + "step": 690 + }, + { + "epoch": 0.55, + "grad_norm": 3.0765189053391633, + "learning_rate": 4.5441476436330204e-07, + "log_odds_chosen": 0.27679443359375, + "log_odds_ratio": -0.677929699420929, + "logits/chosen": -2.078125, + "logits/rejected": -2.21875, + "logps/chosen": -1.109375, + "logps/rejected": -1.296875, + "loss": 1.2492, + "nll_loss": 1.171875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.111328125, + "rewards/margins": 0.018310546875, + "rewards/rejected": -0.1298828125, + "step": 700 + }, + { + "epoch": 0.56, + "grad_norm": 2.6130205345904334, + "learning_rate": 4.5241582329519105e-07, + "log_odds_chosen": 0.150299072265625, + "log_odds_ratio": -0.7164062261581421, + "logits/chosen": -1.984375, + "logits/rejected": -2.109375, + "logps/chosen": -1.125, + "logps/rejected": -1.2421875, + "loss": 1.2128, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.11279296875, + "rewards/margins": 0.0115966796875, + "rewards/rejected": -0.12451171875, + "step": 710 + }, + { + "epoch": 0.57, + "grad_norm": 2.710305930916119, + "learning_rate": 4.503785869550984e-07, + "log_odds_chosen": 0.17982177436351776, + "log_odds_ratio": -0.708300769329071, + "logits/chosen": -2.046875, + "logits/rejected": -2.078125, + "logps/chosen": -1.1796875, + "logps/rejected": -1.3203125, + "loss": 1.2557, + "nll_loss": 1.2109375, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.1181640625, + "rewards/margins": 0.01446533203125, + "rewards/rejected": -0.1328125, + "step": 720 + }, + { + "epoch": 0.58, + "grad_norm": 2.784125203819912, + "learning_rate": 4.4830344077000535e-07, + "log_odds_chosen": 0.17173461616039276, + "log_odds_ratio": -0.70556640625, + "logits/chosen": -1.921875, + "logits/rejected": -2.109375, + "logps/chosen": -1.1328125, + "logps/rejected": -1.2578125, + "loss": 1.2264, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11376953125, + "rewards/margins": 0.012451171875, + "rewards/rejected": -0.1259765625, + "step": 730 + }, + { + "epoch": 0.58, + "grad_norm": 2.969932216303278, + "learning_rate": 4.461907773390984e-07, + "log_odds_chosen": 0.24876098334789276, + "log_odds_ratio": -0.671191394329071, + "logits/chosen": -2.0625, + "logits/rejected": -2.171875, + "logps/chosen": -1.1171875, + "logps/rejected": -1.2890625, + "loss": 1.2521, + "nll_loss": 1.203125, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.11181640625, + "rewards/margins": 0.0169677734375, + "rewards/rejected": -0.12890625, + "step": 740 + }, + { + "epoch": 0.59, + "grad_norm": 2.3389098001594553, + "learning_rate": 4.4404099635949297e-07, + "log_odds_chosen": 0.20144042372703552, + "log_odds_ratio": -0.679394543170929, + "logits/chosen": -1.984375, + "logits/rejected": -2.0625, + "logps/chosen": -1.09375, + "logps/rejected": -1.2265625, + "loss": 1.1809, + "nll_loss": 1.140625, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.109375, + "rewards/margins": 0.012939453125, + "rewards/rejected": -0.12255859375, + "step": 750 + }, + { + "epoch": 0.6, + "grad_norm": 3.0086699300119872, + "learning_rate": 4.418545045506144e-07, + "log_odds_chosen": 0.10061035305261612, + "log_odds_ratio": -0.7476562261581421, + "logits/chosen": -2.0625, + "logits/rejected": -2.15625, + "logps/chosen": -1.0703125, + "logps/rejected": -1.140625, + "loss": 1.239, + "nll_loss": 1.1015625, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.007415771484375, + "rewards/rejected": -0.1142578125, + "step": 760 + }, + { + "epoch": 0.61, + "grad_norm": 2.2320233236738143, + "learning_rate": 4.3963171557725004e-07, + "log_odds_chosen": 0.0516357421875, + "log_odds_ratio": -0.776171863079071, + "logits/chosen": -2.015625, + "logits/rejected": -2.109375, + "logps/chosen": -1.125, + "logps/rejected": -1.1796875, + "loss": 1.2253, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.00567626953125, + "rewards/rejected": -0.1181640625, + "step": 770 + }, + { + "epoch": 0.61, + "grad_norm": 2.5006901009181877, + "learning_rate": 4.3737304997128765e-07, + "log_odds_chosen": 0.2787841856479645, + "log_odds_ratio": -0.668652355670929, + "logits/chosen": -1.96875, + "logits/rejected": -2.078125, + "logps/chosen": -1.125, + "logps/rejected": -1.3125, + "loss": 1.2413, + "nll_loss": 1.203125, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.11279296875, + "rewards/margins": 0.0184326171875, + "rewards/rejected": -0.130859375, + "step": 780 + }, + { + "epoch": 0.62, + "grad_norm": 2.7387563755483, + "learning_rate": 4.350789350521548e-07, + "log_odds_chosen": 0.19570311903953552, + "log_odds_ratio": -0.71337890625, + "logits/chosen": -1.9921875, + "logits/rejected": -2.078125, + "logps/chosen": -1.0859375, + "logps/rejected": -1.234375, + "loss": 1.2163, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.014892578125, + "rewards/rejected": -0.12353515625, + "step": 790 + }, + { + "epoch": 0.63, + "grad_norm": 2.898794879634056, + "learning_rate": 4.32749804845973e-07, + "log_odds_chosen": 0.16457518935203552, + "log_odds_ratio": -0.7186523675918579, + "logits/chosen": -2.03125, + "logits/rejected": -2.15625, + "logps/chosen": -1.1484375, + "logps/rejected": -1.25, + "loss": 1.2604, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.115234375, + "rewards/margins": 0.0098876953125, + "rewards/rejected": -0.125, + "step": 800 + }, + { + "epoch": 0.64, + "grad_norm": 2.5762145094524973, + "learning_rate": 4.303861000034449e-07, + "log_odds_chosen": 0.17528076469898224, + "log_odds_ratio": -0.6820312738418579, + "logits/chosen": -1.953125, + "logits/rejected": -2.03125, + "logps/chosen": -1.0703125, + "logps/rejected": -1.1875, + "loss": 1.1942, + "nll_loss": 1.0625, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.01123046875, + "rewards/rejected": -0.1181640625, + "step": 810 + }, + { + "epoch": 0.65, + "grad_norm": 2.865127283376686, + "learning_rate": 4.2798826771648635e-07, + "log_odds_chosen": 0.24028930068016052, + "log_odds_ratio": -0.6766601800918579, + "logits/chosen": -1.9375, + "logits/rejected": -2.078125, + "logps/chosen": -1.125, + "logps/rejected": -1.296875, + "loss": 1.2299, + "nll_loss": 1.171875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.017333984375, + "rewards/rejected": -0.1298828125, + "step": 820 + }, + { + "epoch": 0.65, + "grad_norm": 2.446066437808379, + "learning_rate": 4.2555676163362205e-07, + "log_odds_chosen": 0.21907348930835724, + "log_odds_ratio": -0.6822265386581421, + "logits/chosen": -2.015625, + "logits/rejected": -2.140625, + "logps/chosen": -1.1640625, + "logps/rejected": -1.296875, + "loss": 1.2354, + "nll_loss": 1.21875, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.1162109375, + "rewards/margins": 0.0133056640625, + "rewards/rejected": -0.12890625, + "step": 830 + }, + { + "epoch": 0.66, + "grad_norm": 2.372183587847385, + "learning_rate": 4.230920417741589e-07, + "log_odds_chosen": 0.27910155057907104, + "log_odds_ratio": -0.666308581829071, + "logits/chosen": -2.0625, + "logits/rejected": -2.203125, + "logps/chosen": -1.0703125, + "logps/rejected": -1.2734375, + "loss": 1.2168, + "nll_loss": 1.125, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.020263671875, + "rewards/rejected": -0.126953125, + "step": 840 + }, + { + "epoch": 0.67, + "grad_norm": 2.9436571486068623, + "learning_rate": 4.205945744411551e-07, + "log_odds_chosen": 0.12534180283546448, + "log_odds_ratio": -0.73974609375, + "logits/chosen": -2.046875, + "logits/rejected": -2.15625, + "logps/chosen": -1.078125, + "logps/rejected": -1.1640625, + "loss": 1.2815, + "nll_loss": 1.203125, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.00909423828125, + "rewards/rejected": -0.11669921875, + "step": 850 + }, + { + "epoch": 0.68, + "grad_norm": 2.8833427572143133, + "learning_rate": 4.1806483213319877e-07, + "log_odds_chosen": 0.23845215141773224, + "log_odds_ratio": -0.6595703363418579, + "logits/chosen": -1.859375, + "logits/rejected": -2.046875, + "logps/chosen": -1.1015625, + "logps/rejected": -1.265625, + "loss": 1.2544, + "nll_loss": 1.171875, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.0167236328125, + "rewards/rejected": -0.126953125, + "step": 860 + }, + { + "epoch": 0.69, + "grad_norm": 2.715581746962796, + "learning_rate": 4.155032934550165e-07, + "log_odds_chosen": 0.16794434189796448, + "log_odds_ratio": -0.6874023675918579, + "logits/chosen": -1.9296875, + "logits/rejected": -2.046875, + "logps/chosen": -1.078125, + "logps/rejected": -1.1640625, + "loss": 1.1984, + "nll_loss": 1.125, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.00909423828125, + "rewards/rejected": -0.11669921875, + "step": 870 + }, + { + "epoch": 0.69, + "grad_norm": 2.784822109898019, + "learning_rate": 4.129104430269248e-07, + "log_odds_chosen": 0.15845946967601776, + "log_odds_ratio": -0.693359375, + "logits/chosen": -1.9453125, + "logits/rejected": -2.09375, + "logps/chosen": -1.109375, + "logps/rejected": -1.21875, + "loss": 1.2345, + "nll_loss": 1.15625, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.11083984375, + "rewards/margins": 0.0106201171875, + "rewards/rejected": -0.12158203125, + "step": 880 + }, + { + "epoch": 0.7, + "grad_norm": 2.5161836223908263, + "learning_rate": 4.102867713931448e-07, + "log_odds_chosen": 0.16597899794578552, + "log_odds_ratio": -0.6788085699081421, + "logits/chosen": -1.9765625, + "logits/rejected": -2.125, + "logps/chosen": -1.09375, + "logps/rejected": -1.1953125, + "loss": 1.2663, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.0096435546875, + "rewards/rejected": -0.119140625, + "step": 890 + }, + { + "epoch": 0.71, + "grad_norm": 2.644860041118969, + "learning_rate": 4.0763277492899504e-07, + "log_odds_chosen": 0.23768310248851776, + "log_odds_ratio": -0.6807616949081421, + "logits/chosen": -1.984375, + "logits/rejected": -2.109375, + "logps/chosen": -1.1328125, + "logps/rejected": -1.2890625, + "loss": 1.2307, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11328125, + "rewards/margins": 0.0157470703125, + "rewards/rejected": -0.12890625, + "step": 900 + }, + { + "epoch": 0.72, + "grad_norm": 2.7157553266494503, + "learning_rate": 4.049489557469824e-07, + "log_odds_chosen": 0.15152588486671448, + "log_odds_ratio": -0.7015625238418579, + "logits/chosen": -1.7890625, + "logits/rejected": -1.96875, + "logps/chosen": -1.0625, + "logps/rejected": -1.1640625, + "loss": 1.168, + "nll_loss": 1.09375, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.01025390625, + "rewards/rejected": -0.1162109375, + "step": 910 + }, + { + "epoch": 0.72, + "grad_norm": 2.302289432995534, + "learning_rate": 4.0223582160180623e-07, + "log_odds_chosen": 0.13297119736671448, + "log_odds_ratio": -0.725781261920929, + "logits/chosen": -1.9375, + "logits/rejected": -2.0, + "logps/chosen": -1.125, + "logps/rejected": -1.21875, + "loss": 1.1737, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.00994873046875, + "rewards/rejected": -0.1220703125, + "step": 920 + }, + { + "epoch": 0.73, + "grad_norm": 2.6556730641084543, + "learning_rate": 3.9949388579429614e-07, + "log_odds_chosen": 0.00870361365377903, + "log_odds_ratio": -0.7632812261581421, + "logits/chosen": -1.8828125, + "logits/rejected": -2.046875, + "logps/chosen": -1.1015625, + "logps/rejected": -1.1015625, + "loss": 1.2113, + "nll_loss": 1.171875, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.10986328125, + "rewards/margins": -0.0003070831298828125, + "rewards/rejected": -0.10986328125, + "step": 930 + }, + { + "epoch": 0.74, + "grad_norm": 2.3947626659116406, + "learning_rate": 3.967236670742998e-07, + "log_odds_chosen": 0.22456054389476776, + "log_odds_ratio": -0.681347668170929, + "logits/chosen": -2.0, + "logits/rejected": -2.109375, + "logps/chosen": -1.1796875, + "logps/rejected": -1.3515625, + "loss": 1.2084, + "nll_loss": 1.203125, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.11767578125, + "rewards/margins": 0.0174560546875, + "rewards/rejected": -0.134765625, + "step": 940 + }, + { + "epoch": 0.75, + "grad_norm": 2.7830252945871896, + "learning_rate": 3.9392568954254023e-07, + "log_odds_chosen": 0.2349853515625, + "log_odds_ratio": -0.6742187738418579, + "logits/chosen": -1.90625, + "logits/rejected": -2.03125, + "logps/chosen": -1.140625, + "logps/rejected": -1.3125, + "loss": 1.2185, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.11376953125, + "rewards/margins": 0.01708984375, + "rewards/rejected": -0.130859375, + "step": 950 + }, + { + "epoch": 0.76, + "grad_norm": 2.4217959208998723, + "learning_rate": 3.9110048255146043e-07, + "log_odds_chosen": 0.16409912705421448, + "log_odds_ratio": -0.693164050579071, + "logits/chosen": -2.046875, + "logits/rejected": -2.15625, + "logps/chosen": -1.109375, + "logps/rejected": -1.2109375, + "loss": 1.2102, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.0107421875, + "rewards/rejected": -0.12158203125, + "step": 960 + }, + { + "epoch": 0.76, + "grad_norm": 2.7469080448706706, + "learning_rate": 3.882485806050748e-07, + "log_odds_chosen": 0.31447142362594604, + "log_odds_ratio": -0.639355480670929, + "logits/chosen": -1.9609375, + "logits/rejected": -2.078125, + "logps/chosen": -1.0625, + "logps/rejected": -1.2578125, + "loss": 1.2235, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.0194091796875, + "rewards/rejected": -0.125, + "step": 970 + }, + { + "epoch": 0.77, + "grad_norm": 2.596832510754079, + "learning_rate": 3.8537052325784573e-07, + "log_odds_chosen": 0.2929016053676605, + "log_odds_ratio": -0.650585949420929, + "logits/chosen": -1.96875, + "logits/rejected": -2.046875, + "logps/chosen": -1.1015625, + "logps/rejected": -1.3125, + "loss": 1.1857, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.0205078125, + "rewards/rejected": -0.130859375, + "step": 980 + }, + { + "epoch": 0.78, + "grad_norm": 2.7575190212441383, + "learning_rate": 3.824668550126046e-07, + "log_odds_chosen": 0.19545897841453552, + "log_odds_ratio": -0.6885741949081421, + "logits/chosen": -1.96875, + "logits/rejected": -2.0625, + "logps/chosen": -1.0703125, + "logps/rejected": -1.1953125, + "loss": 1.1889, + "nll_loss": 1.140625, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.0123291015625, + "rewards/rejected": -0.119140625, + "step": 990 + }, + { + "epoch": 0.79, + "grad_norm": 3.592994219979355, + "learning_rate": 3.7953812521753643e-07, + "log_odds_chosen": 0.16755370795726776, + "log_odds_ratio": -0.6943359375, + "logits/chosen": -1.921875, + "logits/rejected": -2.0, + "logps/chosen": -1.0703125, + "logps/rejected": -1.171875, + "loss": 1.1494, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.0098876953125, + "rewards/rejected": -0.1171875, + "step": 1000 + }, + { + "epoch": 0.8, + "grad_norm": 2.5202564213089405, + "learning_rate": 3.7658488796224885e-07, + "log_odds_chosen": 0.11643066257238388, + "log_odds_ratio": -0.716796875, + "logits/chosen": -2.046875, + "logits/rejected": -2.125, + "logps/chosen": -1.1484375, + "logps/rejected": -1.234375, + "loss": 1.2212, + "nll_loss": 1.203125, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.11474609375, + "rewards/margins": 0.00860595703125, + "rewards/rejected": -0.12353515625, + "step": 1010 + }, + { + "epoch": 0.8, + "grad_norm": 2.724799909308137, + "learning_rate": 3.736077019729425e-07, + "log_odds_chosen": 0.302978515625, + "log_odds_ratio": -0.6385742425918579, + "logits/chosen": -1.9296875, + "logits/rejected": -2.046875, + "logps/chosen": -1.0390625, + "logps/rejected": -1.2421875, + "loss": 1.1893, + "nll_loss": 1.109375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.10400390625, + "rewards/margins": 0.0205078125, + "rewards/rejected": -0.12451171875, + "step": 1020 + }, + { + "epoch": 0.81, + "grad_norm": 2.4835614341515053, + "learning_rate": 3.7060713050670546e-07, + "log_odds_chosen": 0.2666015625, + "log_odds_ratio": -0.6908203363418579, + "logits/chosen": -1.875, + "logits/rejected": -2.015625, + "logps/chosen": -1.1171875, + "logps/rejected": -1.328125, + "loss": 1.2376, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.020751953125, + "rewards/rejected": -0.1328125, + "step": 1030 + }, + { + "epoch": 0.82, + "grad_norm": 2.709722079150454, + "learning_rate": 3.6758374124494973e-07, + "log_odds_chosen": 0.185791015625, + "log_odds_ratio": -0.6966797113418579, + "logits/chosen": -1.8984375, + "logits/rejected": -2.03125, + "logps/chosen": -1.0703125, + "logps/rejected": -1.2109375, + "loss": 1.2082, + "nll_loss": 1.109375, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.01458740234375, + "rewards/rejected": -0.12109375, + "step": 1040 + }, + { + "epoch": 0.83, + "grad_norm": 2.8331342756102167, + "learning_rate": 3.645381061860113e-07, + "log_odds_chosen": 0.3631835877895355, + "log_odds_ratio": -0.6460937261581421, + "logits/chosen": -1.921875, + "logits/rejected": -1.9765625, + "logps/chosen": -1.0078125, + "logps/rejected": -1.2578125, + "loss": 1.1933, + "nll_loss": 1.1015625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1005859375, + "rewards/margins": 0.025146484375, + "rewards/rejected": -0.1259765625, + "step": 1050 + }, + { + "epoch": 0.83, + "grad_norm": 2.6269186805524143, + "learning_rate": 3.61470801536933e-07, + "log_odds_chosen": 0.12788085639476776, + "log_odds_ratio": -0.7337890863418579, + "logits/chosen": -2.0, + "logits/rejected": -2.078125, + "logps/chosen": -1.0390625, + "logps/rejected": -1.1328125, + "loss": 1.2153, + "nll_loss": 1.109375, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.10400390625, + "rewards/margins": 0.00909423828125, + "rewards/rejected": -0.11279296875, + "step": 1060 + }, + { + "epoch": 0.84, + "grad_norm": 2.60712425422802, + "learning_rate": 3.583824076044508e-07, + "log_odds_chosen": 0.08272705227136612, + "log_odds_ratio": -0.7518554925918579, + "logits/chosen": -1.890625, + "logits/rejected": -1.984375, + "logps/chosen": -1.140625, + "logps/rejected": -1.2265625, + "loss": 1.2114, + "nll_loss": 1.171875, + "rewards/accuracies": 0.46875, + "rewards/chosen": -0.1142578125, + "rewards/margins": 0.0078125, + "rewards/rejected": -0.12255859375, + "step": 1070 + }, + { + "epoch": 0.85, + "grad_norm": 2.742344457324174, + "learning_rate": 3.55273508685206e-07, + "log_odds_chosen": 0.11997070163488388, + "log_odds_ratio": -0.7144531011581421, + "logits/chosen": -1.8828125, + "logits/rejected": -2.046875, + "logps/chosen": -1.140625, + "logps/rejected": -1.21875, + "loss": 1.2194, + "nll_loss": 1.171875, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.11376953125, + "rewards/margins": 0.0084228515625, + "rewards/rejected": -0.1220703125, + "step": 1080 + }, + { + "epoch": 0.86, + "grad_norm": 2.677923442608537, + "learning_rate": 3.5214469295520033e-07, + "log_odds_chosen": 0.2944091856479645, + "log_odds_ratio": -0.6474609375, + "logits/chosen": -1.953125, + "logits/rejected": -2.0625, + "logps/chosen": -1.09375, + "logps/rejected": -1.296875, + "loss": 1.1926, + "nll_loss": 1.140625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0206298828125, + "rewards/rejected": -0.1298828125, + "step": 1090 + }, + { + "epoch": 0.87, + "grad_norm": 2.614103984779814, + "learning_rate": 3.4899655235851903e-07, + "log_odds_chosen": 0.15128174424171448, + "log_odds_ratio": -0.692187488079071, + "logits/chosen": -1.875, + "logits/rejected": -2.03125, + "logps/chosen": -1.140625, + "logps/rejected": -1.2421875, + "loss": 1.2353, + "nll_loss": 1.2265625, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.11376953125, + "rewards/margins": 0.01055908203125, + "rewards/rejected": -0.12451171875, + "step": 1100 + }, + { + "epoch": 0.87, + "grad_norm": 2.661524044558228, + "learning_rate": 3.458296824953403e-07, + "log_odds_chosen": 0.19251708686351776, + "log_odds_ratio": -0.681445300579071, + "logits/chosen": -1.8125, + "logits/rejected": -1.9609375, + "logps/chosen": -1.125, + "logps/rejected": -1.2578125, + "loss": 1.2002, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.01312255859375, + "rewards/rejected": -0.1259765625, + "step": 1110 + }, + { + "epoch": 0.88, + "grad_norm": 2.479788982713935, + "learning_rate": 3.426446825092525e-07, + "log_odds_chosen": 0.30213624238967896, + "log_odds_ratio": -0.6465820074081421, + "logits/chosen": -1.875, + "logits/rejected": -1.9765625, + "logps/chosen": -1.0546875, + "logps/rejected": -1.2734375, + "loss": 1.2165, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.10546875, + "rewards/margins": 0.021728515625, + "rewards/rejected": -0.126953125, + "step": 1120 + }, + { + "epoch": 0.89, + "grad_norm": 3.266264486839817, + "learning_rate": 3.3944215497390197e-07, + "log_odds_chosen": 0.12014160305261612, + "log_odds_ratio": -0.707812488079071, + "logits/chosen": -1.875, + "logits/rejected": -1.9375, + "logps/chosen": -1.1015625, + "logps/rejected": -1.1875, + "loss": 1.2284, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.00848388671875, + "rewards/rejected": -0.1181640625, + "step": 1130 + }, + { + "epoch": 0.9, + "grad_norm": 2.397641031210895, + "learning_rate": 3.362227057789915e-07, + "log_odds_chosen": 0.3463378846645355, + "log_odds_ratio": -0.619335949420929, + "logits/chosen": -1.890625, + "logits/rejected": -2.046875, + "logps/chosen": -1.0703125, + "logps/rejected": -1.2890625, + "loss": 1.1821, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.0220947265625, + "rewards/rejected": -0.12890625, + "step": 1140 + }, + { + "epoch": 0.91, + "grad_norm": 2.4645155740078617, + "learning_rate": 3.329869440156512e-07, + "log_odds_chosen": 0.357086181640625, + "log_odds_ratio": -0.63232421875, + "logits/chosen": -1.890625, + "logits/rejected": -1.9609375, + "logps/chosen": -1.0625, + "logps/rejected": -1.3125, + "loss": 1.176, + "nll_loss": 1.109375, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.024658203125, + "rewards/rejected": -0.130859375, + "step": 1150 + }, + { + "epoch": 0.91, + "grad_norm": 2.8912394279639084, + "learning_rate": 3.297354818612037e-07, + "log_odds_chosen": 0.05325927585363388, + "log_odds_ratio": -0.7728515863418579, + "logits/chosen": -1.9453125, + "logits/rejected": -2.015625, + "logps/chosen": -1.09375, + "logps/rejected": -1.125, + "loss": 1.2402, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.0030975341796875, + "rewards/rejected": -0.11279296875, + "step": 1160 + }, + { + "epoch": 0.92, + "grad_norm": 2.563419103608563, + "learning_rate": 3.264689344633461e-07, + "log_odds_chosen": 0.14066162705421448, + "log_odds_ratio": -0.6943359375, + "logits/chosen": -1.8125, + "logits/rejected": -1.859375, + "logps/chosen": -1.140625, + "logps/rejected": -1.2265625, + "loss": 1.1959, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1142578125, + "rewards/margins": 0.0084228515625, + "rewards/rejected": -0.12255859375, + "step": 1170 + }, + { + "epoch": 0.93, + "grad_norm": 2.8288693775232643, + "learning_rate": 3.2318791982376923e-07, + "log_odds_chosen": 0.19826659560203552, + "log_odds_ratio": -0.6885741949081421, + "logits/chosen": -1.9375, + "logits/rejected": -2.09375, + "logps/chosen": -1.0859375, + "logps/rejected": -1.21875, + "loss": 1.2221, + "nll_loss": 1.109375, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.01275634765625, + "rewards/rejected": -0.12158203125, + "step": 1180 + }, + { + "epoch": 0.94, + "grad_norm": 2.9337235954606844, + "learning_rate": 3.198930586812372e-07, + "log_odds_chosen": 0.3016296327114105, + "log_odds_ratio": -0.67626953125, + "logits/chosen": -1.8671875, + "logits/rejected": -2.0, + "logps/chosen": -1.0859375, + "logps/rejected": -1.3046875, + "loss": 1.1805, + "nll_loss": 1.171875, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.1083984375, + "rewards/margins": 0.02197265625, + "rewards/rejected": -0.1298828125, + "step": 1190 + }, + { + "epoch": 0.95, + "grad_norm": 2.815544385281363, + "learning_rate": 3.1658497439414935e-07, + "log_odds_chosen": 0.18316039443016052, + "log_odds_ratio": -0.6927734613418579, + "logits/chosen": -1.96875, + "logits/rejected": -2.015625, + "logps/chosen": -1.0859375, + "logps/rejected": -1.203125, + "loss": 1.2118, + "nll_loss": 1.140625, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1083984375, + "rewards/margins": 0.01153564453125, + "rewards/rejected": -0.1201171875, + "step": 1200 + }, + { + "epoch": 0.95, + "grad_norm": 2.806645073099231, + "learning_rate": 3.132642928226061e-07, + "log_odds_chosen": 0.33399659395217896, + "log_odds_ratio": -0.6348632574081421, + "logits/chosen": -1.8828125, + "logits/rejected": -1.9921875, + "logps/chosen": -1.0546875, + "logps/rejected": -1.2734375, + "loss": 1.1911, + "nll_loss": 1.140625, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.0223388671875, + "rewards/rejected": -0.1279296875, + "step": 1210 + }, + { + "epoch": 0.96, + "grad_norm": 2.4581782453300884, + "learning_rate": 3.0993164221000207e-07, + "log_odds_chosen": 0.215545654296875, + "log_odds_ratio": -0.672070324420929, + "logits/chosen": -1.9140625, + "logits/rejected": -2.0, + "logps/chosen": -1.109375, + "logps/rejected": -1.2578125, + "loss": 1.182, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.11083984375, + "rewards/margins": 0.0145263671875, + "rewards/rejected": -0.125, + "step": 1220 + }, + { + "epoch": 0.97, + "grad_norm": 5.263613381972474, + "learning_rate": 3.0658765306416794e-07, + "log_odds_chosen": 0.166778564453125, + "log_odds_ratio": -0.6953125, + "logits/chosen": -1.8359375, + "logits/rejected": -1.921875, + "logps/chosen": -1.09375, + "logps/rejected": -1.2109375, + "loss": 1.2193, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.0118408203125, + "rewards/rejected": -0.12109375, + "step": 1230 + }, + { + "epoch": 0.98, + "grad_norm": 2.5240280415155723, + "learning_rate": 3.032329580380838e-07, + "log_odds_chosen": 0.28306883573532104, + "log_odds_ratio": -0.6612304449081421, + "logits/chosen": -1.90625, + "logits/rejected": -1.96875, + "logps/chosen": -1.09375, + "logps/rejected": -1.28125, + "loss": 1.1956, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.109375, + "rewards/margins": 0.01904296875, + "rewards/rejected": -0.1279296875, + "step": 1240 + }, + { + "epoch": 0.98, + "grad_norm": 2.743773542575128, + "learning_rate": 2.998681918101871e-07, + "log_odds_chosen": 0.3384033143520355, + "log_odds_ratio": -0.6493164300918579, + "logits/chosen": -1.8828125, + "logits/rejected": -1.9453125, + "logps/chosen": -1.078125, + "logps/rejected": -1.328125, + "loss": 1.206, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.025390625, + "rewards/rejected": -0.1328125, + "step": 1250 + }, + { + "epoch": 0.99, + "grad_norm": 2.8074211611598066, + "learning_rate": 2.9649399096429714e-07, + "log_odds_chosen": 0.23601074516773224, + "log_odds_ratio": -0.6533203125, + "logits/chosen": -1.859375, + "logits/rejected": -1.9140625, + "logps/chosen": -1.078125, + "logps/rejected": -1.234375, + "loss": 1.188, + "nll_loss": 1.109375, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.015625, + "rewards/rejected": -0.123046875, + "step": 1260 + }, + { + "epoch": 1.0, + "grad_norm": 2.525044784627154, + "learning_rate": 2.931109938691786e-07, + "log_odds_chosen": 0.16881103813648224, + "log_odds_ratio": -0.684277355670929, + "logits/chosen": -1.8515625, + "logits/rejected": -2.0, + "logps/chosen": -1.09375, + "logps/rejected": -1.21875, + "loss": 1.188, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0128173828125, + "rewards/rejected": -0.1220703125, + "step": 1270 + }, + { + "epoch": 1.01, + "grad_norm": 2.668512392567912, + "learning_rate": 2.8971984055776853e-07, + "log_odds_chosen": 0.21584472060203552, + "log_odds_ratio": -0.672558605670929, + "logits/chosen": -1.84375, + "logits/rejected": -1.9765625, + "logps/chosen": -1.078125, + "logps/rejected": -1.2265625, + "loss": 1.2336, + "nll_loss": 1.15625, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.01470947265625, + "rewards/rejected": -0.12255859375, + "step": 1280 + }, + { + "epoch": 1.02, + "grad_norm": 2.659729033509314, + "learning_rate": 2.863211726060875e-07, + "log_odds_chosen": 0.2547973692417145, + "log_odds_ratio": -0.6659179925918579, + "logits/chosen": -1.96875, + "logits/rejected": -2.015625, + "logps/chosen": -1.140625, + "logps/rejected": -1.3125, + "loss": 1.2367, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1142578125, + "rewards/margins": 0.0167236328125, + "rewards/rejected": -0.130859375, + "step": 1290 + }, + { + "epoch": 1.02, + "grad_norm": 2.470961884835421, + "learning_rate": 2.829156330118589e-07, + "log_odds_chosen": 0.24007567763328552, + "log_odds_ratio": -0.65283203125, + "logits/chosen": -1.859375, + "logits/rejected": -1.9609375, + "logps/chosen": -1.125, + "logps/rejected": -1.28125, + "loss": 1.2008, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.11181640625, + "rewards/margins": 0.0159912109375, + "rewards/rejected": -0.1279296875, + "step": 1300 + }, + { + "epoch": 1.03, + "grad_norm": 2.5904466369333026, + "learning_rate": 2.7950386607286e-07, + "log_odds_chosen": 0.28740233182907104, + "log_odds_ratio": -0.6572265625, + "logits/chosen": -1.8671875, + "logits/rejected": -1.9609375, + "logps/chosen": -1.0625, + "logps/rejected": -1.2578125, + "loss": 1.2003, + "nll_loss": 1.0859375, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.019775390625, + "rewards/rejected": -0.1259765625, + "step": 1310 + }, + { + "epoch": 1.04, + "grad_norm": 2.8679276152227726, + "learning_rate": 2.7608651726502607e-07, + "log_odds_chosen": 0.29725342988967896, + "log_odds_ratio": -0.6602538824081421, + "logits/chosen": -1.84375, + "logits/rejected": -2.015625, + "logps/chosen": -1.0625, + "logps/rejected": -1.2734375, + "loss": 1.2296, + "nll_loss": 1.125, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.0213623046875, + "rewards/rejected": -0.1279296875, + "step": 1320 + }, + { + "epoch": 1.05, + "grad_norm": 2.5150772854856243, + "learning_rate": 2.7266423312033226e-07, + "log_odds_chosen": 0.2159423828125, + "log_odds_ratio": -0.7059570550918579, + "logits/chosen": -1.9140625, + "logits/rejected": -2.0, + "logps/chosen": -1.078125, + "logps/rejected": -1.25, + "loss": 1.185, + "nll_loss": 1.140625, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.017333984375, + "rewards/rejected": -0.125, + "step": 1330 + }, + { + "epoch": 1.06, + "grad_norm": 2.737219590030928, + "learning_rate": 2.692376611044757e-07, + "log_odds_chosen": 0.3914794921875, + "log_odds_ratio": -0.640332043170929, + "logits/chosen": -1.796875, + "logits/rejected": -1.8671875, + "logps/chosen": -1.03125, + "logps/rejected": -1.2890625, + "loss": 1.2041, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1025390625, + "rewards/margins": 0.026123046875, + "rewards/rejected": -0.12890625, + "step": 1340 + }, + { + "epoch": 1.06, + "grad_norm": 2.7769961907081293, + "learning_rate": 2.6580744949438045e-07, + "log_odds_chosen": 0.08111572265625, + "log_odds_ratio": -0.731249988079071, + "logits/chosen": -1.8828125, + "logits/rejected": -1.9921875, + "logps/chosen": -1.1484375, + "logps/rejected": -1.1875, + "loss": 1.2605, + "nll_loss": 1.203125, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.11474609375, + "rewards/margins": 0.0037078857421875, + "rewards/rejected": -0.11865234375, + "step": 1350 + }, + { + "epoch": 1.07, + "grad_norm": 2.9775601305183463, + "learning_rate": 2.6237424725554935e-07, + "log_odds_chosen": 0.3329834043979645, + "log_odds_ratio": -0.635937511920929, + "logits/chosen": -1.8359375, + "logits/rejected": -1.921875, + "logps/chosen": -1.0703125, + "logps/rejected": -1.296875, + "loss": 1.2152, + "nll_loss": 1.109375, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.0233154296875, + "rewards/rejected": -0.1298828125, + "step": 1360 + }, + { + "epoch": 1.08, + "grad_norm": 2.279068955006949, + "learning_rate": 2.589387039192858e-07, + "log_odds_chosen": 0.20733642578125, + "log_odds_ratio": -0.667675793170929, + "logits/chosen": -1.859375, + "logits/rejected": -1.9765625, + "logps/chosen": -1.125, + "logps/rejected": -1.2578125, + "loss": 1.2064, + "nll_loss": 1.15625, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.013671875, + "rewards/rejected": -0.1259765625, + "step": 1370 + }, + { + "epoch": 1.09, + "grad_norm": 3.0021514828628746, + "learning_rate": 2.555014694598077e-07, + "log_odds_chosen": 0.23118896782398224, + "log_odds_ratio": -0.6884765625, + "logits/chosen": -1.765625, + "logits/rejected": -1.9453125, + "logps/chosen": -1.0859375, + "logps/rejected": -1.2265625, + "loss": 1.2152, + "nll_loss": 1.09375, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.1083984375, + "rewards/margins": 0.0145263671875, + "rewards/rejected": -0.123046875, + "step": 1380 + }, + { + "epoch": 1.09, + "grad_norm": 2.735522050073968, + "learning_rate": 2.5206319417127873e-07, + "log_odds_chosen": 0.3378845155239105, + "log_odds_ratio": -0.632128894329071, + "logits/chosen": -1.7421875, + "logits/rejected": -1.8984375, + "logps/chosen": -1.0234375, + "logps/rejected": -1.2578125, + "loss": 1.1638, + "nll_loss": 1.0703125, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.1025390625, + "rewards/margins": 0.0228271484375, + "rewards/rejected": -0.125, + "step": 1390 + }, + { + "epoch": 1.1, + "grad_norm": 2.5736254747923923, + "learning_rate": 2.4862452854477784e-07, + "log_odds_chosen": 0.3209228515625, + "log_odds_ratio": -0.65576171875, + "logits/chosen": -1.734375, + "logits/rejected": -1.8984375, + "logps/chosen": -1.03125, + "logps/rejected": -1.2578125, + "loss": 1.166, + "nll_loss": 1.046875, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.103515625, + "rewards/margins": 0.0225830078125, + "rewards/rejected": -0.1259765625, + "step": 1400 + }, + { + "epoch": 1.11, + "grad_norm": 2.7098667746876073, + "learning_rate": 2.4518612314523265e-07, + "log_odds_chosen": 0.08408202975988388, + "log_odds_ratio": -0.732421875, + "logits/chosen": -1.84375, + "logits/rejected": -1.953125, + "logps/chosen": -1.09375, + "logps/rejected": -1.140625, + "loss": 1.1805, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.00457763671875, + "rewards/rejected": -0.1142578125, + "step": 1410 + }, + { + "epoch": 1.12, + "grad_norm": 2.7073252776256966, + "learning_rate": 2.4174862848833806e-07, + "log_odds_chosen": 0.20045165717601776, + "log_odds_ratio": -0.67236328125, + "logits/chosen": -1.7578125, + "logits/rejected": -1.8359375, + "logps/chosen": -1.0703125, + "logps/rejected": -1.1875, + "loss": 1.2051, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.0118408203125, + "rewards/rejected": -0.119140625, + "step": 1420 + }, + { + "epoch": 1.13, + "grad_norm": 2.7514653552282233, + "learning_rate": 2.3831269491748467e-07, + "log_odds_chosen": 0.22596435248851776, + "log_odds_ratio": -0.708984375, + "logits/chosen": -1.796875, + "logits/rejected": -1.8828125, + "logps/chosen": -1.1171875, + "logps/rejected": -1.2890625, + "loss": 1.217, + "nll_loss": 1.171875, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.11181640625, + "rewards/margins": 0.017333984375, + "rewards/rejected": -0.12890625, + "step": 1430 + }, + { + "epoch": 1.13, + "grad_norm": 2.8823498677475183, + "learning_rate": 2.3487897248071941e-07, + "log_odds_chosen": 0.2939697206020355, + "log_odds_ratio": -0.664257824420929, + "logits/chosen": -1.7890625, + "logits/rejected": -1.9375, + "logps/chosen": -1.046875, + "logps/rejected": -1.25, + "loss": 1.1892, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.0205078125, + "rewards/rejected": -0.1259765625, + "step": 1440 + }, + { + "epoch": 1.14, + "grad_norm": 2.69332509317782, + "learning_rate": 2.314481108077624e-07, + "log_odds_chosen": 0.1607666015625, + "log_odds_ratio": -0.6968749761581421, + "logits/chosen": -1.8203125, + "logits/rejected": -1.8515625, + "logps/chosen": -1.078125, + "logps/rejected": -1.1796875, + "loss": 1.1978, + "nll_loss": 1.125, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.01068115234375, + "rewards/rejected": -0.1181640625, + "step": 1450 + }, + { + "epoch": 1.15, + "grad_norm": 2.5989208277674356, + "learning_rate": 2.280207589871026e-07, + "log_odds_chosen": 0.3521362245082855, + "log_odds_ratio": -0.642382800579071, + "logits/chosen": -1.8125, + "logits/rejected": -1.9375, + "logps/chosen": -1.078125, + "logps/rejected": -1.3203125, + "loss": 1.1628, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.0242919921875, + "rewards/rejected": -0.1318359375, + "step": 1460 + }, + { + "epoch": 1.16, + "grad_norm": 2.5631030942900805, + "learning_rate": 2.2459756544319627e-07, + "log_odds_chosen": 0.1890869140625, + "log_odds_ratio": -0.696972668170929, + "logits/chosen": -1.796875, + "logits/rejected": -1.890625, + "logps/chosen": -1.015625, + "logps/rejected": -1.1328125, + "loss": 1.1771, + "nll_loss": 1.0546875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.10205078125, + "rewards/margins": 0.01129150390625, + "rewards/rejected": -0.11328125, + "step": 1470 + }, + { + "epoch": 1.17, + "grad_norm": 2.7548023973263613, + "learning_rate": 2.2117917781379067e-07, + "log_odds_chosen": 0.19255371391773224, + "log_odds_ratio": -0.679394543170929, + "logits/chosen": -1.734375, + "logits/rejected": -1.859375, + "logps/chosen": -1.09375, + "logps/rejected": -1.21875, + "loss": 1.2441, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0123291015625, + "rewards/rejected": -0.12158203125, + "step": 1480 + }, + { + "epoch": 1.17, + "grad_norm": 2.6382486056871177, + "learning_rate": 2.177662428273968e-07, + "log_odds_chosen": 0.23670653998851776, + "log_odds_ratio": -0.67626953125, + "logits/chosen": -1.7578125, + "logits/rejected": -1.8984375, + "logps/chosen": -1.046875, + "logps/rejected": -1.203125, + "loss": 1.1895, + "nll_loss": 1.09375, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.0150146484375, + "rewards/rejected": -0.1201171875, + "step": 1490 + }, + { + "epoch": 1.18, + "grad_norm": 2.5099170844954317, + "learning_rate": 2.1435940618093414e-07, + "log_odds_chosen": 0.19310303032398224, + "log_odds_ratio": -0.690625011920929, + "logits/chosen": -1.765625, + "logits/rejected": -1.875, + "logps/chosen": -1.0859375, + "logps/rejected": -1.2265625, + "loss": 1.1881, + "nll_loss": 1.1015625, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.0140380859375, + "rewards/rejected": -0.123046875, + "step": 1500 + }, + { + "epoch": 1.19, + "grad_norm": 2.7950237991583493, + "learning_rate": 2.1095931241757062e-07, + "log_odds_chosen": 0.2502685487270355, + "log_odds_ratio": -0.680957019329071, + "logits/chosen": -1.7734375, + "logits/rejected": -1.8515625, + "logps/chosen": -1.0390625, + "logps/rejected": -1.2265625, + "loss": 1.1906, + "nll_loss": 1.09375, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.10400390625, + "rewards/margins": 0.0185546875, + "rewards/rejected": -0.1220703125, + "step": 1510 + }, + { + "epoch": 1.2, + "grad_norm": 2.609790265054367, + "learning_rate": 2.075666048047806e-07, + "log_odds_chosen": 0.15053710341453552, + "log_odds_ratio": -0.698437511920929, + "logits/chosen": -1.7578125, + "logits/rejected": -1.828125, + "logps/chosen": -1.1015625, + "logps/rejected": -1.21875, + "loss": 1.221, + "nll_loss": 1.125, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.01171875, + "rewards/rejected": -0.1220703125, + "step": 1520 + }, + { + "epoch": 1.21, + "grad_norm": 2.5467686003601697, + "learning_rate": 2.0418192521264454e-07, + "log_odds_chosen": 0.23857422173023224, + "log_odds_ratio": -0.659863293170929, + "logits/chosen": -1.7734375, + "logits/rejected": -1.8515625, + "logps/chosen": -1.0625, + "logps/rejected": -1.2109375, + "loss": 1.1898, + "nll_loss": 1.109375, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.01397705078125, + "rewards/rejected": -0.12060546875, + "step": 1530 + }, + { + "epoch": 1.21, + "grad_norm": 2.459650956326835, + "learning_rate": 2.0080591399241292e-07, + "log_odds_chosen": 0.23247070610523224, + "log_odds_ratio": -0.6712890863418579, + "logits/chosen": -1.7578125, + "logits/rejected": -1.7734375, + "logps/chosen": -1.109375, + "logps/rejected": -1.2578125, + "loss": 1.1708, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.014892578125, + "rewards/rejected": -0.1259765625, + "step": 1540 + }, + { + "epoch": 1.22, + "grad_norm": 2.47537989067237, + "learning_rate": 1.9743920985535729e-07, + "log_odds_chosen": 0.3998779356479645, + "log_odds_ratio": -0.625781238079071, + "logits/chosen": -1.6484375, + "logits/rejected": -1.796875, + "logps/chosen": -0.98828125, + "logps/rejected": -1.2578125, + "loss": 1.1589, + "nll_loss": 1.0390625, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.09912109375, + "rewards/margins": 0.0269775390625, + "rewards/rejected": -0.1259765625, + "step": 1550 + }, + { + "epoch": 1.23, + "grad_norm": 2.7703541098291455, + "learning_rate": 1.94082449751932e-07, + "log_odds_chosen": 0.2127685546875, + "log_odds_ratio": -0.6846679449081421, + "logits/chosen": -1.734375, + "logits/rejected": -1.8125, + "logps/chosen": -1.09375, + "logps/rejected": -1.25, + "loss": 1.1794, + "nll_loss": 1.171875, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.0157470703125, + "rewards/rejected": -0.125, + "step": 1560 + }, + { + "epoch": 1.24, + "grad_norm": 2.885795668675382, + "learning_rate": 1.9073626875126874e-07, + "log_odds_chosen": 0.26057130098342896, + "log_odds_ratio": -0.649707019329071, + "logits/chosen": -1.7734375, + "logits/rejected": -1.7890625, + "logps/chosen": -1.0234375, + "logps/rejected": -1.1875, + "loss": 1.1671, + "nll_loss": 1.1015625, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.1025390625, + "rewards/margins": 0.01611328125, + "rewards/rejected": -0.11865234375, + "step": 1570 + }, + { + "epoch": 1.24, + "grad_norm": 2.722691086755302, + "learning_rate": 1.874012999210271e-07, + "log_odds_chosen": 0.19356079399585724, + "log_odds_ratio": -0.694140613079071, + "logits/chosen": -1.859375, + "logits/rejected": -1.8828125, + "logps/chosen": -1.1171875, + "logps/rejected": -1.25, + "loss": 1.1779, + "nll_loss": 1.171875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.111328125, + "rewards/margins": 0.01385498046875, + "rewards/rejected": -0.125, + "step": 1580 + }, + { + "epoch": 1.25, + "grad_norm": 2.604921389210434, + "learning_rate": 1.8407817420762383e-07, + "log_odds_chosen": 0.26337891817092896, + "log_odds_ratio": -0.6572265625, + "logits/chosen": -1.734375, + "logits/rejected": -1.875, + "logps/chosen": -1.09375, + "logps/rejected": -1.2734375, + "loss": 1.184, + "nll_loss": 1.140625, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.01806640625, + "rewards/rejected": -0.126953125, + "step": 1590 + }, + { + "epoch": 1.26, + "grad_norm": 2.732739073707677, + "learning_rate": 1.8076752031686343e-07, + "log_odds_chosen": 0.14312133193016052, + "log_odds_ratio": -0.704882800579071, + "logits/chosen": -1.7734375, + "logits/rejected": -1.8671875, + "logps/chosen": -1.0390625, + "logps/rejected": -1.140625, + "loss": 1.188, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.103515625, + "rewards/margins": 0.010498046875, + "rewards/rejected": -0.1142578125, + "step": 1600 + }, + { + "epoch": 1.27, + "grad_norm": 2.395290977769547, + "learning_rate": 1.7746996459499254e-07, + "log_odds_chosen": 0.242431640625, + "log_odds_ratio": -0.6644531488418579, + "logits/chosen": -1.65625, + "logits/rejected": -1.84375, + "logps/chosen": -1.0546875, + "logps/rejected": -1.21875, + "loss": 1.1618, + "nll_loss": 1.046875, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10546875, + "rewards/margins": 0.016357421875, + "rewards/rejected": -0.1220703125, + "step": 1610 + }, + { + "epoch": 1.28, + "grad_norm": 2.547454975163186, + "learning_rate": 1.741861309102009e-07, + "log_odds_chosen": 0.26506346464157104, + "log_odds_ratio": -0.6689453125, + "logits/chosen": -1.640625, + "logits/rejected": -1.7890625, + "logps/chosen": -1.0546875, + "logps/rejected": -1.2109375, + "loss": 1.1665, + "nll_loss": 1.109375, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.0159912109375, + "rewards/rejected": -0.12109375, + "step": 1620 + }, + { + "epoch": 1.28, + "grad_norm": 2.396584161009059, + "learning_rate": 1.7091664053459088e-07, + "log_odds_chosen": 0.18143311142921448, + "log_odds_ratio": -0.6865234375, + "logits/chosen": -1.71875, + "logits/rejected": -1.8359375, + "logps/chosen": -1.0859375, + "logps/rejected": -1.1875, + "loss": 1.2118, + "nll_loss": 1.15625, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.01043701171875, + "rewards/rejected": -0.11865234375, + "step": 1630 + }, + { + "epoch": 1.29, + "grad_norm": 2.279515658884766, + "learning_rate": 1.6766211202663844e-07, + "log_odds_chosen": 0.05356445163488388, + "log_odds_ratio": -0.739453136920929, + "logits/chosen": -1.6875, + "logits/rejected": -1.7890625, + "logps/chosen": -1.1171875, + "logps/rejected": -1.140625, + "loss": 1.2049, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.111328125, + "rewards/margins": 0.0027008056640625, + "rewards/rejected": -0.1142578125, + "step": 1640 + }, + { + "epoch": 1.3, + "grad_norm": 2.4004304534033265, + "learning_rate": 1.6442316111416743e-07, + "log_odds_chosen": 0.26105958223342896, + "log_odds_ratio": -0.6631835699081421, + "logits/chosen": -1.6796875, + "logits/rejected": -1.7421875, + "logps/chosen": -1.0859375, + "logps/rejected": -1.2734375, + "loss": 1.1816, + "nll_loss": 1.140625, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.0184326171875, + "rewards/rejected": -0.126953125, + "step": 1650 + }, + { + "epoch": 1.31, + "grad_norm": 2.763579524745402, + "learning_rate": 1.6120040057785928e-07, + "log_odds_chosen": 0.29625242948532104, + "log_odds_ratio": -0.6499999761581421, + "logits/chosen": -1.8125, + "logits/rejected": -1.875, + "logps/chosen": -1.0859375, + "logps/rejected": -1.28125, + "loss": 1.1727, + "nll_loss": 1.09375, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1083984375, + "rewards/margins": 0.0194091796875, + "rewards/rejected": -0.1279296875, + "step": 1660 + }, + { + "epoch": 1.32, + "grad_norm": 2.8036257747935154, + "learning_rate": 1.5799444013532038e-07, + "log_odds_chosen": 0.23708495497703552, + "log_odds_ratio": -0.67431640625, + "logits/chosen": -1.7109375, + "logits/rejected": -1.8203125, + "logps/chosen": -1.0625, + "logps/rejected": -1.21875, + "loss": 1.1238, + "nll_loss": 1.0859375, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.01556396484375, + "rewards/rejected": -0.1220703125, + "step": 1670 + }, + { + "epoch": 1.32, + "grad_norm": 2.6570430804218566, + "learning_rate": 1.5480588632572885e-07, + "log_odds_chosen": 0.37006837129592896, + "log_odds_ratio": -0.635937511920929, + "logits/chosen": -1.7421875, + "logits/rejected": -1.8046875, + "logps/chosen": -1.0390625, + "logps/rejected": -1.2890625, + "loss": 1.1907, + "nll_loss": 1.078125, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.103515625, + "rewards/margins": 0.025390625, + "rewards/rejected": -0.12890625, + "step": 1680 + }, + { + "epoch": 1.33, + "grad_norm": 2.590262402608133, + "learning_rate": 1.516353423950829e-07, + "log_odds_chosen": 0.3837524354457855, + "log_odds_ratio": -0.625, + "logits/chosen": -1.796875, + "logits/rejected": -1.9140625, + "logps/chosen": -1.046875, + "logps/rejected": -1.28125, + "loss": 1.1898, + "nll_loss": 1.125, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.1044921875, + "rewards/margins": 0.0235595703125, + "rewards/rejected": -0.1279296875, + "step": 1690 + }, + { + "epoch": 1.34, + "grad_norm": 2.568483644438331, + "learning_rate": 1.4848340818207184e-07, + "log_odds_chosen": 0.26896971464157104, + "log_odds_ratio": -0.66455078125, + "logits/chosen": -1.75, + "logits/rejected": -1.859375, + "logps/chosen": -1.046875, + "logps/rejected": -1.2421875, + "loss": 1.1799, + "nll_loss": 1.09375, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.019287109375, + "rewards/rejected": -0.1240234375, + "step": 1700 + }, + { + "epoch": 1.35, + "grad_norm": 3.318714021827115, + "learning_rate": 1.453506800045921e-07, + "log_odds_chosen": 0.12944336235523224, + "log_odds_ratio": -0.7064453363418579, + "logits/chosen": -1.75, + "logits/rejected": -1.7890625, + "logps/chosen": -1.0859375, + "logps/rejected": -1.1796875, + "loss": 1.2096, + "nll_loss": 1.140625, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.1083984375, + "rewards/margins": 0.00970458984375, + "rewards/rejected": -0.1181640625, + "step": 1710 + }, + { + "epoch": 1.35, + "grad_norm": 2.7195091239689426, + "learning_rate": 1.422377505469293e-07, + "log_odds_chosen": 0.14760741591453552, + "log_odds_ratio": -0.72802734375, + "logits/chosen": -1.703125, + "logits/rejected": -1.75, + "logps/chosen": -1.1328125, + "logps/rejected": -1.21875, + "loss": 1.2695, + "nll_loss": 1.234375, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.11328125, + "rewards/margins": 0.00823974609375, + "rewards/rejected": -0.12158203125, + "step": 1720 + }, + { + "epoch": 1.36, + "grad_norm": 2.4116745200019696, + "learning_rate": 1.3914520874762726e-07, + "log_odds_chosen": 0.2623352110385895, + "log_odds_ratio": -0.6844726800918579, + "logits/chosen": -1.6875, + "logits/rejected": -1.765625, + "logps/chosen": -1.109375, + "logps/rejected": -1.3046875, + "loss": 1.2307, + "nll_loss": 1.15625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.111328125, + "rewards/margins": 0.0191650390625, + "rewards/rejected": -0.130859375, + "step": 1730 + }, + { + "epoch": 1.37, + "grad_norm": 2.6643696734974127, + "learning_rate": 1.3607363968806645e-07, + "log_odds_chosen": 0.3259033262729645, + "log_odds_ratio": -0.623046875, + "logits/chosen": -1.6953125, + "logits/rejected": -1.78125, + "logps/chosen": -1.03125, + "logps/rejected": -1.25, + "loss": 1.2087, + "nll_loss": 1.046875, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10302734375, + "rewards/margins": 0.022216796875, + "rewards/rejected": -0.125, + "step": 1740 + }, + { + "epoch": 1.38, + "grad_norm": 2.734224521952181, + "learning_rate": 1.3302362448177167e-07, + "log_odds_chosen": 0.30589598417282104, + "log_odds_ratio": -0.6283203363418579, + "logits/chosen": -1.703125, + "logits/rejected": -1.796875, + "logps/chosen": -1.03125, + "logps/rejected": -1.234375, + "loss": 1.1853, + "nll_loss": 1.1015625, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.103515625, + "rewards/margins": 0.0198974609375, + "rewards/rejected": -0.12353515625, + "step": 1750 + }, + { + "epoch": 1.39, + "grad_norm": 3.213949669653505, + "learning_rate": 1.2999574016447056e-07, + "log_odds_chosen": 0.3102783262729645, + "log_odds_ratio": -0.6421874761581421, + "logits/chosen": -1.6875, + "logits/rejected": -1.8125, + "logps/chosen": -1.046875, + "logps/rejected": -1.2578125, + "loss": 1.196, + "nll_loss": 1.0703125, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1044921875, + "rewards/margins": 0.0211181640625, + "rewards/rejected": -0.1259765625, + "step": 1760 + }, + { + "epoch": 1.39, + "grad_norm": 2.3847902873067492, + "learning_rate": 1.2699055958492344e-07, + "log_odds_chosen": 0.19971923530101776, + "log_odds_ratio": -0.6748046875, + "logits/chosen": -1.7421875, + "logits/rejected": -1.8203125, + "logps/chosen": -1.046875, + "logps/rejected": -1.171875, + "loss": 1.2064, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1044921875, + "rewards/margins": 0.01287841796875, + "rewards/rejected": -0.11767578125, + "step": 1770 + }, + { + "epoch": 1.4, + "grad_norm": 2.5039350155364573, + "learning_rate": 1.2400865129654567e-07, + "log_odds_chosen": 0.27821046113967896, + "log_odds_ratio": -0.652050793170929, + "logits/chosen": -1.6796875, + "logits/rejected": -1.796875, + "logps/chosen": -1.0390625, + "logps/rejected": -1.2265625, + "loss": 1.1371, + "nll_loss": 1.0703125, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.10400390625, + "rewards/margins": 0.018310546875, + "rewards/rejected": -0.12255859375, + "step": 1780 + }, + { + "epoch": 1.41, + "grad_norm": 2.5879591956281995, + "learning_rate": 1.210505794498422e-07, + "log_odds_chosen": 0.23630371689796448, + "log_odds_ratio": -0.673828125, + "logits/chosen": -1.7109375, + "logits/rejected": -1.7421875, + "logps/chosen": -1.09375, + "logps/rejected": -1.2578125, + "loss": 1.1706, + "nll_loss": 1.109375, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0164794921875, + "rewards/rejected": -0.1259765625, + "step": 1790 + }, + { + "epoch": 1.42, + "grad_norm": 2.752234308496576, + "learning_rate": 1.1811690368567545e-07, + "log_odds_chosen": 0.14584961533546448, + "log_odds_ratio": -0.6947265863418579, + "logits/chosen": -1.7109375, + "logits/rejected": -1.8203125, + "logps/chosen": -1.046875, + "logps/rejected": -1.1328125, + "loss": 1.236, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.1044921875, + "rewards/margins": 0.00872802734375, + "rewards/rejected": -0.11328125, + "step": 1800 + }, + { + "epoch": 1.43, + "grad_norm": 2.573963727957766, + "learning_rate": 1.1520817902938618e-07, + "log_odds_chosen": 0.07918091118335724, + "log_odds_ratio": -0.7347656488418579, + "logits/chosen": -1.7109375, + "logits/rejected": -1.78125, + "logps/chosen": -1.125, + "logps/rejected": -1.1875, + "loss": 1.1915, + "nll_loss": 1.15625, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.11279296875, + "rewards/margins": 0.00604248046875, + "rewards/rejected": -0.11865234375, + "step": 1810 + }, + { + "epoch": 1.43, + "grad_norm": 2.7806823611081177, + "learning_rate": 1.1232495578578755e-07, + "log_odds_chosen": 0.15264892578125, + "log_odds_ratio": -0.6976562738418579, + "logits/chosen": -1.75, + "logits/rejected": -1.78125, + "logps/chosen": -1.0390625, + "logps/rejected": -1.1171875, + "loss": 1.1626, + "nll_loss": 1.09375, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.103515625, + "rewards/margins": 0.00823974609375, + "rewards/rejected": -0.11181640625, + "step": 1820 + }, + { + "epoch": 1.44, + "grad_norm": 2.436201094808605, + "learning_rate": 1.0946777943505254e-07, + "log_odds_chosen": 0.23690184950828552, + "log_odds_ratio": -0.6917968988418579, + "logits/chosen": -1.7109375, + "logits/rejected": -1.78125, + "logps/chosen": -1.0625, + "logps/rejected": -1.2265625, + "loss": 1.1971, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.0167236328125, + "rewards/rejected": -0.123046875, + "step": 1830 + }, + { + "epoch": 1.45, + "grad_norm": 2.417259454035703, + "learning_rate": 1.0663719052951381e-07, + "log_odds_chosen": 0.19016113877296448, + "log_odds_ratio": -0.699414074420929, + "logits/chosen": -1.6796875, + "logits/rejected": -1.765625, + "logps/chosen": -1.0859375, + "logps/rejected": -1.1953125, + "loss": 1.1861, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.0107421875, + "rewards/rejected": -0.119140625, + "step": 1840 + }, + { + "epoch": 1.46, + "grad_norm": 2.340681686492189, + "learning_rate": 1.0383372459139608e-07, + "log_odds_chosen": 0.30018919706344604, + "log_odds_ratio": -0.6387695074081421, + "logits/chosen": -1.78125, + "logits/rejected": -1.921875, + "logps/chosen": -1.0546875, + "logps/rejected": -1.2421875, + "loss": 1.1958, + "nll_loss": 1.078125, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.0191650390625, + "rewards/rejected": -0.12451171875, + "step": 1850 + }, + { + "epoch": 1.47, + "grad_norm": 2.530421254724575, + "learning_rate": 1.0105791201150002e-07, + "log_odds_chosen": 0.3886962831020355, + "log_odds_ratio": -0.620312511920929, + "logits/chosen": -1.6796875, + "logits/rejected": -1.7734375, + "logps/chosen": -1.0625, + "logps/rejected": -1.3359375, + "loss": 1.16, + "nll_loss": 1.046875, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.027587890625, + "rewards/rejected": -0.1337890625, + "step": 1860 + }, + { + "epoch": 1.47, + "grad_norm": 2.6492828085260225, + "learning_rate": 9.831027794885713e-08, + "log_odds_chosen": 0.34185791015625, + "log_odds_ratio": -0.6444336175918579, + "logits/chosen": -1.6484375, + "logits/rejected": -1.671875, + "logps/chosen": -1.0234375, + "logps/rejected": -1.234375, + "loss": 1.1779, + "nll_loss": 1.140625, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1025390625, + "rewards/margins": 0.0211181640625, + "rewards/rejected": -0.12353515625, + "step": 1870 + }, + { + "epoch": 1.48, + "grad_norm": 2.6971126252475286, + "learning_rate": 9.559134223137424e-08, + "log_odds_chosen": 0.2640136778354645, + "log_odds_ratio": -0.673046886920929, + "logits/chosen": -1.7578125, + "logits/rejected": -1.8046875, + "logps/chosen": -1.09375, + "logps/rejected": -1.2578125, + "loss": 1.2186, + "nll_loss": 1.171875, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.109375, + "rewards/margins": 0.016845703125, + "rewards/rejected": -0.1259765625, + "step": 1880 + }, + { + "epoch": 1.49, + "grad_norm": 2.768346463128813, + "learning_rate": 9.290161925748674e-08, + "log_odds_chosen": 0.333740234375, + "log_odds_ratio": -0.6434570550918579, + "logits/chosen": -1.71875, + "logits/rejected": -1.796875, + "logps/chosen": -1.1015625, + "logps/rejected": -1.328125, + "loss": 1.1758, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.0224609375, + "rewards/rejected": -0.1328125, + "step": 1890 + }, + { + "epoch": 1.5, + "grad_norm": 2.4131576506486168, + "learning_rate": 9.024161789883897e-08, + "log_odds_chosen": 0.13895873725414276, + "log_odds_ratio": -0.695507824420929, + "logits/chosen": -1.625, + "logits/rejected": -1.71875, + "logps/chosen": -1.03125, + "logps/rejected": -1.1171875, + "loss": 1.1368, + "nll_loss": 1.0625, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10302734375, + "rewards/margins": 0.009033203125, + "rewards/rejected": -0.1123046875, + "step": 1900 + }, + { + "epoch": 1.5, + "grad_norm": 2.5654624061461253, + "learning_rate": 8.761184140401023e-08, + "log_odds_chosen": 0.25886231660842896, + "log_odds_ratio": -0.662109375, + "logits/chosen": -1.609375, + "logits/rejected": -1.6875, + "logps/chosen": -1.0390625, + "logps/rejected": -1.203125, + "loss": 1.1906, + "nll_loss": 1.09375, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10400390625, + "rewards/margins": 0.0164794921875, + "rewards/rejected": -0.12060546875, + "step": 1910 + }, + { + "epoch": 1.51, + "grad_norm": 2.6681020896294676, + "learning_rate": 8.501278730330463e-08, + "log_odds_chosen": 0.36528319120407104, + "log_odds_ratio": -0.626269519329071, + "logits/chosen": -1.609375, + "logits/rejected": -1.671875, + "logps/chosen": -1.078125, + "logps/rejected": -1.3203125, + "loss": 1.1977, + "nll_loss": 1.140625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.02490234375, + "rewards/rejected": -0.1318359375, + "step": 1920 + }, + { + "epoch": 1.52, + "grad_norm": 2.7519831354278512, + "learning_rate": 8.244494731462279e-08, + "log_odds_chosen": 0.24447020888328552, + "log_odds_ratio": -0.6788085699081421, + "logits/chosen": -1.59375, + "logits/rejected": -1.703125, + "logps/chosen": -1.03125, + "logps/rejected": -1.1953125, + "loss": 1.164, + "nll_loss": 1.0546875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.103515625, + "rewards/margins": 0.015869140625, + "rewards/rejected": -0.119140625, + "step": 1930 + }, + { + "epoch": 1.53, + "grad_norm": 2.870452087544285, + "learning_rate": 7.990880725043322e-08, + "log_odds_chosen": 0.2567138671875, + "log_odds_ratio": -0.670703113079071, + "logits/chosen": -1.625, + "logits/rejected": -1.7578125, + "logps/chosen": -1.046875, + "logps/rejected": -1.21875, + "loss": 1.1622, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.0167236328125, + "rewards/rejected": -0.12158203125, + "step": 1940 + }, + { + "epoch": 1.54, + "grad_norm": 2.6968676817822645, + "learning_rate": 7.740484692586074e-08, + "log_odds_chosen": 0.2530761659145355, + "log_odds_ratio": -0.680371105670929, + "logits/chosen": -1.671875, + "logits/rejected": -1.8203125, + "logps/chosen": -1.1796875, + "logps/rejected": -1.3515625, + "loss": 1.2242, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11767578125, + "rewards/margins": 0.0174560546875, + "rewards/rejected": -0.1357421875, + "step": 1950 + }, + { + "epoch": 1.54, + "grad_norm": 2.735947390317322, + "learning_rate": 7.493354006791006e-08, + "log_odds_chosen": 0.24350586533546448, + "log_odds_ratio": -0.682421863079071, + "logits/chosen": -1.6171875, + "logits/rejected": -1.6796875, + "logps/chosen": -1.0546875, + "logps/rejected": -1.2265625, + "loss": 1.1973, + "nll_loss": 1.15625, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.10546875, + "rewards/margins": 0.0169677734375, + "rewards/rejected": -0.12255859375, + "step": 1960 + }, + { + "epoch": 1.55, + "grad_norm": 2.4135041559261885, + "learning_rate": 7.249535422584055e-08, + "log_odds_chosen": 0.19566650688648224, + "log_odds_ratio": -0.6849609613418579, + "logits/chosen": -1.8125, + "logits/rejected": -1.84375, + "logps/chosen": -1.078125, + "logps/rejected": -1.1875, + "loss": 1.1835, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.01129150390625, + "rewards/rejected": -0.119140625, + "step": 1970 + }, + { + "epoch": 1.56, + "grad_norm": 2.3578015206878575, + "learning_rate": 7.009075068271031e-08, + "log_odds_chosen": 0.12241820991039276, + "log_odds_ratio": -0.7289062738418579, + "logits/chosen": -1.578125, + "logits/rejected": -1.7578125, + "logps/chosen": -1.09375, + "logps/rejected": -1.1796875, + "loss": 1.1747, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0087890625, + "rewards/rejected": -0.1181640625, + "step": 1980 + }, + { + "epoch": 1.57, + "grad_norm": 2.7517093669068933, + "learning_rate": 6.772018436810525e-08, + "log_odds_chosen": 0.34681397676467896, + "log_odds_ratio": -0.616992175579071, + "logits/chosen": -1.7109375, + "logits/rejected": -1.7734375, + "logps/chosen": -1.1015625, + "logps/rejected": -1.34375, + "loss": 1.1863, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.0240478515625, + "rewards/rejected": -0.134765625, + "step": 1990 + }, + { + "epoch": 1.58, + "grad_norm": 2.758447250920097, + "learning_rate": 6.538410377207082e-08, + "log_odds_chosen": 0.03367309644818306, + "log_odds_ratio": -0.773144543170929, + "logits/chosen": -1.625, + "logits/rejected": -1.7421875, + "logps/chosen": -1.125, + "logps/rejected": -1.15625, + "loss": 1.2376, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.0030364990234375, + "rewards/rejected": -0.115234375, + "step": 2000 + }, + { + "epoch": 1.58, + "grad_norm": 2.880552308468262, + "learning_rate": 6.308295086026133e-08, + "log_odds_chosen": 0.17825928330421448, + "log_odds_ratio": -0.696972668170929, + "logits/chosen": -1.65625, + "logits/rejected": -1.671875, + "logps/chosen": -1.125, + "logps/rejected": -1.25, + "loss": 1.2262, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.1123046875, + "rewards/margins": 0.01220703125, + "rewards/rejected": -0.12451171875, + "step": 2010 + }, + { + "epoch": 1.59, + "grad_norm": 2.439317791546686, + "learning_rate": 6.081716099032417e-08, + "log_odds_chosen": 0.3602050840854645, + "log_odds_ratio": -0.6319335699081421, + "logits/chosen": -1.609375, + "logits/rejected": -1.7578125, + "logps/chosen": -0.99609375, + "logps/rejected": -1.2421875, + "loss": 1.1792, + "nll_loss": 1.0625, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.099609375, + "rewards/margins": 0.024169921875, + "rewards/rejected": -0.1240234375, + "step": 2020 + }, + { + "epoch": 1.6, + "grad_norm": 2.810922238332508, + "learning_rate": 5.858716282953407e-08, + "log_odds_chosen": 0.24152831733226776, + "log_odds_ratio": -0.654492199420929, + "logits/chosen": -1.6328125, + "logits/rejected": -1.75, + "logps/chosen": -1.09375, + "logps/rejected": -1.2578125, + "loss": 1.1982, + "nll_loss": 1.140625, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0166015625, + "rewards/rejected": -0.1259765625, + "step": 2030 + }, + { + "epoch": 1.61, + "grad_norm": 2.5961073589782466, + "learning_rate": 5.639337827369289e-08, + "log_odds_chosen": 0.17608642578125, + "log_odds_ratio": -0.702832043170929, + "logits/chosen": -1.671875, + "logits/rejected": -1.7890625, + "logps/chosen": -1.1171875, + "logps/rejected": -1.2265625, + "loss": 1.1879, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.111328125, + "rewards/margins": 0.0111083984375, + "rewards/rejected": -0.12255859375, + "step": 2040 + }, + { + "epoch": 1.61, + "grad_norm": 2.3150779833374266, + "learning_rate": 5.4236222367310816e-08, + "log_odds_chosen": 0.29583740234375, + "log_odds_ratio": -0.653124988079071, + "logits/chosen": -1.578125, + "logits/rejected": -1.71875, + "logps/chosen": -1.0703125, + "logps/rejected": -1.2734375, + "loss": 1.1965, + "nll_loss": 1.0859375, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.020263671875, + "rewards/rejected": -0.126953125, + "step": 2050 + }, + { + "epoch": 1.62, + "grad_norm": 2.1606096674823068, + "learning_rate": 5.211610322508364e-08, + "log_odds_chosen": 0.15689697861671448, + "log_odds_ratio": -0.70263671875, + "logits/chosen": -1.6875, + "logits/rejected": -1.765625, + "logps/chosen": -1.0625, + "logps/rejected": -1.1640625, + "loss": 1.1801, + "nll_loss": 1.09375, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.0098876953125, + "rewards/rejected": -0.11669921875, + "step": 2060 + }, + { + "epoch": 1.63, + "grad_norm": 2.547538507688686, + "learning_rate": 5.003342195468102e-08, + "log_odds_chosen": 0.221527099609375, + "log_odds_ratio": -0.66357421875, + "logits/chosen": -1.59375, + "logits/rejected": -1.640625, + "logps/chosen": -1.0703125, + "logps/rejected": -1.203125, + "loss": 1.2133, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.01263427734375, + "rewards/rejected": -0.11962890625, + "step": 2070 + }, + { + "epoch": 1.64, + "grad_norm": 2.8414624562639546, + "learning_rate": 4.798857258086053e-08, + "log_odds_chosen": 0.25762939453125, + "log_odds_ratio": -0.6714843511581421, + "logits/chosen": -1.609375, + "logits/rejected": -1.7109375, + "logps/chosen": -1.0625, + "logps/rejected": -1.25, + "loss": 1.2074, + "nll_loss": 1.109375, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.0186767578125, + "rewards/rejected": -0.125, + "step": 2080 + }, + { + "epoch": 1.65, + "grad_norm": 3.120660574019042, + "learning_rate": 4.5981941970921646e-08, + "log_odds_chosen": 0.48786622285842896, + "log_odds_ratio": -0.591992199420929, + "logits/chosen": -1.6171875, + "logits/rejected": -1.65625, + "logps/chosen": -1.03125, + "logps/rejected": -1.375, + "loss": 1.1441, + "nll_loss": 1.0390625, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10302734375, + "rewards/margins": 0.034423828125, + "rewards/rejected": -0.1376953125, + "step": 2090 + }, + { + "epoch": 1.65, + "grad_norm": 2.635871013914355, + "learning_rate": 4.4013909761513894e-08, + "log_odds_chosen": 0.2707275450229645, + "log_odds_ratio": -0.649218738079071, + "logits/chosen": -1.71875, + "logits/rejected": -1.7109375, + "logps/chosen": -1.09375, + "logps/rejected": -1.265625, + "loss": 1.1887, + "nll_loss": 1.140625, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0169677734375, + "rewards/rejected": -0.1259765625, + "step": 2100 + }, + { + "epoch": 1.66, + "grad_norm": 3.1511666169638346, + "learning_rate": 4.2084848286813105e-08, + "log_odds_chosen": 0.3526855409145355, + "log_odds_ratio": -0.6600586175918579, + "logits/chosen": -1.5859375, + "logits/rejected": -1.65625, + "logps/chosen": -1.0625, + "logps/rejected": -1.34375, + "loss": 1.1851, + "nll_loss": 1.15625, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.0281982421875, + "rewards/rejected": -0.134765625, + "step": 2110 + }, + { + "epoch": 1.67, + "grad_norm": 2.7305670197672747, + "learning_rate": 4.0195122508078886e-08, + "log_odds_chosen": 0.27125245332717896, + "log_odds_ratio": -0.65625, + "logits/chosen": -1.5859375, + "logits/rejected": -1.6875, + "logps/chosen": -1.0625, + "logps/rejected": -1.2421875, + "loss": 1.1709, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.017822265625, + "rewards/rejected": -0.12451171875, + "step": 2120 + }, + { + "epoch": 1.68, + "grad_norm": 3.2312418757529726, + "learning_rate": 3.834508994460736e-08, + "log_odds_chosen": 0.23995360732078552, + "log_odds_ratio": -0.654589831829071, + "logits/chosen": -1.578125, + "logits/rejected": -1.640625, + "logps/chosen": -1.03125, + "logps/rejected": -1.1875, + "loss": 1.1604, + "nll_loss": 1.046875, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.103515625, + "rewards/margins": 0.0150146484375, + "rewards/rejected": -0.1181640625, + "step": 2130 + }, + { + "epoch": 1.69, + "grad_norm": 2.874032565275268, + "learning_rate": 3.653510060609166e-08, + "log_odds_chosen": 0.13707275688648224, + "log_odds_ratio": -0.7138671875, + "logits/chosen": -1.6796875, + "logits/rejected": -1.765625, + "logps/chosen": -1.0625, + "logps/rejected": -1.1484375, + "loss": 1.173, + "nll_loss": 1.078125, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.00848388671875, + "rewards/rejected": -0.1142578125, + "step": 2140 + }, + { + "epoch": 1.69, + "grad_norm": 2.6604754366861822, + "learning_rate": 3.476549692640316e-08, + "log_odds_chosen": 0.34288328886032104, + "log_odds_ratio": -0.620312511920929, + "logits/chosen": -1.6015625, + "logits/rejected": -1.7109375, + "logps/chosen": -1.0, + "logps/rejected": -1.21875, + "loss": 1.1585, + "nll_loss": 1.078125, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.099609375, + "rewards/margins": 0.0218505859375, + "rewards/rejected": -0.12158203125, + "step": 2150 + }, + { + "epoch": 1.7, + "grad_norm": 2.71892900942932, + "learning_rate": 3.3036613698806085e-08, + "log_odds_chosen": 0.21519775688648224, + "log_odds_ratio": -0.691601574420929, + "logits/chosen": -1.640625, + "logits/rejected": -1.734375, + "logps/chosen": -1.078125, + "logps/rejected": -1.21875, + "loss": 1.2288, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.01434326171875, + "rewards/rejected": -0.1220703125, + "step": 2160 + }, + { + "epoch": 1.71, + "grad_norm": 2.878603239597823, + "learning_rate": 3.134877801261765e-08, + "log_odds_chosen": 0.3372802734375, + "log_odds_ratio": -0.642285168170929, + "logits/chosen": -1.703125, + "logits/rejected": -1.734375, + "logps/chosen": -1.109375, + "logps/rejected": -1.328125, + "loss": 1.2136, + "nll_loss": 1.1640625, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.11083984375, + "rewards/margins": 0.0218505859375, + "rewards/rejected": -0.1328125, + "step": 2170 + }, + { + "epoch": 1.72, + "grad_norm": 2.6992535601969085, + "learning_rate": 2.9702309191325492e-08, + "log_odds_chosen": 0.24042968451976776, + "log_odds_ratio": -0.675585925579071, + "logits/chosen": -1.7109375, + "logits/rejected": -1.796875, + "logps/chosen": -1.0703125, + "logps/rejected": -1.2265625, + "loss": 1.2242, + "nll_loss": 1.109375, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.0159912109375, + "rewards/rejected": -0.123046875, + "step": 2180 + }, + { + "epoch": 1.73, + "grad_norm": 2.971420026998493, + "learning_rate": 2.809751873217478e-08, + "log_odds_chosen": 0.32117921113967896, + "log_odds_ratio": -0.6463867425918579, + "logits/chosen": -1.734375, + "logits/rejected": -1.7578125, + "logps/chosen": -1.0859375, + "logps/rejected": -1.3046875, + "loss": 1.1702, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.0218505859375, + "rewards/rejected": -0.130859375, + "step": 2190 + }, + { + "epoch": 1.73, + "grad_norm": 2.311036000971507, + "learning_rate": 2.653471024723547e-08, + "log_odds_chosen": 0.43181151151657104, + "log_odds_ratio": -0.5894531011581421, + "logits/chosen": -1.6171875, + "logits/rejected": -1.6875, + "logps/chosen": -1.015625, + "logps/rejected": -1.3046875, + "loss": 1.1592, + "nll_loss": 1.09375, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1015625, + "rewards/margins": 0.0289306640625, + "rewards/rejected": -0.130859375, + "step": 2200 + }, + { + "epoch": 1.74, + "grad_norm": 2.5416101230783363, + "learning_rate": 2.501417940596168e-08, + "log_odds_chosen": 0.02521972730755806, + "log_odds_ratio": -0.75, + "logits/chosen": -1.6875, + "logits/rejected": -1.7109375, + "logps/chosen": -1.1015625, + "logps/rejected": -1.109375, + "loss": 1.1748, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.000507354736328125, + "rewards/rejected": -0.11083984375, + "step": 2210 + }, + { + "epoch": 1.75, + "grad_norm": 2.471954369214634, + "learning_rate": 2.353621387925375e-08, + "log_odds_chosen": 0.3322509825229645, + "log_odds_ratio": -0.6597656011581421, + "logits/chosen": -1.6015625, + "logits/rejected": -1.671875, + "logps/chosen": -1.03125, + "logps/rejected": -1.2578125, + "loss": 1.1634, + "nll_loss": 1.078125, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.10302734375, + "rewards/margins": 0.022705078125, + "rewards/rejected": -0.1259765625, + "step": 2220 + }, + { + "epoch": 1.76, + "grad_norm": 2.801308375939805, + "learning_rate": 2.2101093285033373e-08, + "log_odds_chosen": 0.3058715760707855, + "log_odds_ratio": -0.6502929925918579, + "logits/chosen": -1.6484375, + "logits/rejected": -1.7265625, + "logps/chosen": -1.046875, + "logps/rejected": -1.25, + "loss": 1.1307, + "nll_loss": 1.03125, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10498046875, + "rewards/margins": 0.020263671875, + "rewards/rejected": -0.125, + "step": 2230 + }, + { + "epoch": 1.76, + "grad_norm": 2.6457419438315233, + "learning_rate": 2.070908913534236e-08, + "log_odds_chosen": 0.24928589165210724, + "log_odds_ratio": -0.6776367425918579, + "logits/chosen": -1.65625, + "logits/rejected": -1.6640625, + "logps/chosen": -1.078125, + "logps/rejected": -1.234375, + "loss": 1.2119, + "nll_loss": 1.1953125, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.015869140625, + "rewards/rejected": -0.12353515625, + "step": 2240 + }, + { + "epoch": 1.77, + "grad_norm": 2.8172476905649764, + "learning_rate": 1.9360464784975024e-08, + "log_odds_chosen": 0.21148681640625, + "log_odds_ratio": -0.6849609613418579, + "logits/chosen": -1.5625, + "logits/rejected": -1.65625, + "logps/chosen": -1.0703125, + "logps/rejected": -1.21875, + "loss": 1.1771, + "nll_loss": 1.109375, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.0146484375, + "rewards/rejected": -0.1220703125, + "step": 2250 + }, + { + "epoch": 1.78, + "grad_norm": 2.963550785069561, + "learning_rate": 1.8055475381653807e-08, + "log_odds_chosen": 0.27608031034469604, + "log_odds_ratio": -0.6513671875, + "logits/chosen": -1.71875, + "logits/rejected": -1.7421875, + "logps/chosen": -1.046875, + "logps/rejected": -1.234375, + "loss": 1.2014, + "nll_loss": 1.109375, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.1044921875, + "rewards/margins": 0.018798828125, + "rewards/rejected": -0.12353515625, + "step": 2260 + }, + { + "epoch": 1.79, + "grad_norm": 3.1613513759896534, + "learning_rate": 1.679436781775759e-08, + "log_odds_chosen": 0.31138914823532104, + "log_odds_ratio": -0.675976574420929, + "logits/chosen": -1.65625, + "logits/rejected": -1.7421875, + "logps/chosen": -1.109375, + "logps/rejected": -1.3203125, + "loss": 1.1978, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.11083984375, + "rewards/margins": 0.0213623046875, + "rewards/rejected": -0.1318359375, + "step": 2270 + }, + { + "epoch": 1.8, + "grad_norm": 2.92153711849868, + "learning_rate": 1.5577380683611807e-08, + "log_odds_chosen": 0.2562316954135895, + "log_odds_ratio": -0.6595703363418579, + "logits/chosen": -1.671875, + "logits/rejected": -1.7265625, + "logps/chosen": -1.0546875, + "logps/rejected": -1.21875, + "loss": 1.198, + "nll_loss": 1.09375, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.10546875, + "rewards/margins": 0.0164794921875, + "rewards/rejected": -0.12158203125, + "step": 2280 + }, + { + "epoch": 1.8, + "grad_norm": 3.486180847986093, + "learning_rate": 1.4404744222349358e-08, + "log_odds_chosen": 0.48161619901657104, + "log_odds_ratio": -0.605664074420929, + "logits/chosen": -1.625, + "logits/rejected": -1.6875, + "logps/chosen": -1.03125, + "logps/rejected": -1.359375, + "loss": 1.172, + "nll_loss": 1.078125, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.10302734375, + "rewards/margins": 0.032470703125, + "rewards/rejected": -0.1357421875, + "step": 2290 + }, + { + "epoch": 1.81, + "grad_norm": 2.50535325154869, + "learning_rate": 1.3276680286350594e-08, + "log_odds_chosen": 0.31635743379592896, + "log_odds_ratio": -0.641796886920929, + "logits/chosen": -1.6875, + "logits/rejected": -1.8203125, + "logps/chosen": -1.0703125, + "logps/rejected": -1.2734375, + "loss": 1.204, + "nll_loss": 1.1015625, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.0203857421875, + "rewards/rejected": -0.126953125, + "step": 2300 + }, + { + "epoch": 1.82, + "grad_norm": 2.6381410134685392, + "learning_rate": 1.2193402295270854e-08, + "log_odds_chosen": 0.2996459901332855, + "log_odds_ratio": -0.6700195074081421, + "logits/chosen": -1.5234375, + "logits/rejected": -1.671875, + "logps/chosen": -1.03125, + "logps/rejected": -1.234375, + "loss": 1.1512, + "nll_loss": 1.125, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.10302734375, + "rewards/margins": 0.0205078125, + "rewards/rejected": -0.12353515625, + "step": 2310 + }, + { + "epoch": 1.83, + "grad_norm": 2.344444293869932, + "learning_rate": 1.115511519566334e-08, + "log_odds_chosen": 0.3412719666957855, + "log_odds_ratio": -0.657519519329071, + "logits/chosen": -1.6171875, + "logits/rejected": -1.7578125, + "logps/chosen": -0.98828125, + "logps/rejected": -1.234375, + "loss": 1.1698, + "nll_loss": 1.03125, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0986328125, + "rewards/margins": 0.0244140625, + "rewards/rejected": -0.123046875, + "step": 2320 + }, + { + "epoch": 1.84, + "grad_norm": 2.277202951820151, + "learning_rate": 1.01620154222051e-08, + "log_odds_chosen": 0.14781494438648224, + "log_odds_ratio": -0.708984375, + "logits/chosen": -1.6171875, + "logits/rejected": -1.6953125, + "logps/chosen": -1.125, + "logps/rejected": -1.203125, + "loss": 1.1759, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11181640625, + "rewards/margins": 0.0079345703125, + "rewards/rejected": -0.1201171875, + "step": 2330 + }, + { + "epoch": 1.84, + "grad_norm": 2.5811302552961943, + "learning_rate": 9.214290860533242e-09, + "log_odds_chosen": 0.22308655083179474, + "log_odds_ratio": -0.6734374761581421, + "logits/chosen": -1.6640625, + "logits/rejected": -1.7421875, + "logps/chosen": -1.1015625, + "logps/rejected": -1.234375, + "loss": 1.205, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.1103515625, + "rewards/margins": 0.0133056640625, + "rewards/rejected": -0.12353515625, + "step": 2340 + }, + { + "epoch": 1.85, + "grad_norm": 2.4190439831152326, + "learning_rate": 8.312120811698798e-09, + "log_odds_chosen": 0.24127808213233948, + "log_odds_ratio": -0.6958984136581421, + "logits/chosen": -1.6875, + "logits/rejected": -1.7734375, + "logps/chosen": -1.0625, + "logps/rejected": -1.234375, + "loss": 1.1753, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.017333984375, + "rewards/rejected": -0.12353515625, + "step": 2350 + }, + { + "epoch": 1.86, + "grad_norm": 2.3733902102708897, + "learning_rate": 7.455675958244422e-09, + "log_odds_chosen": 0.2683349549770355, + "log_odds_ratio": -0.660449206829071, + "logits/chosen": -1.6484375, + "logits/rejected": -1.765625, + "logps/chosen": -1.078125, + "logps/rejected": -1.2578125, + "loss": 1.1939, + "nll_loss": 1.125, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.0184326171875, + "rewards/rejected": -0.1259765625, + "step": 2360 + }, + { + "epoch": 1.87, + "grad_norm": 2.581589417454669, + "learning_rate": 6.64511833191278e-09, + "log_odds_chosen": 0.16912230849266052, + "log_odds_ratio": -0.703125, + "logits/chosen": -1.640625, + "logits/rejected": -1.765625, + "logps/chosen": -1.0546875, + "logps/rejected": -1.1796875, + "loss": 1.1997, + "nll_loss": 1.15625, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.01220703125, + "rewards/rejected": -0.1181640625, + "step": 2370 + }, + { + "epoch": 1.87, + "grad_norm": 2.8853418036737297, + "learning_rate": 5.8806012829916985e-09, + "log_odds_chosen": 0.3464111387729645, + "log_odds_ratio": -0.615234375, + "logits/chosen": -1.71875, + "logits/rejected": -1.8125, + "logps/chosen": -1.0078125, + "logps/rejected": -1.2421875, + "loss": 1.1569, + "nll_loss": 1.078125, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.10107421875, + "rewards/margins": 0.02294921875, + "rewards/rejected": -0.1240234375, + "step": 2380 + }, + { + "epoch": 1.88, + "grad_norm": 2.5807272249096913, + "learning_rate": 5.162269451301576e-09, + "log_odds_chosen": 0.16444091498851776, + "log_odds_ratio": -0.6973632574081421, + "logits/chosen": -1.78125, + "logits/rejected": -1.7890625, + "logps/chosen": -1.0859375, + "logps/rejected": -1.2109375, + "loss": 1.2152, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.01202392578125, + "rewards/rejected": -0.12060546875, + "step": 2390 + }, + { + "epoch": 1.89, + "grad_norm": 2.6925182997680515, + "learning_rate": 4.490258738830771e-09, + "log_odds_chosen": 0.2374267578125, + "log_odds_ratio": -0.6766601800918579, + "logits/chosen": -1.640625, + "logits/rejected": -1.6875, + "logps/chosen": -1.15625, + "logps/rejected": -1.3203125, + "loss": 1.2008, + "nll_loss": 1.1796875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.115234375, + "rewards/margins": 0.016357421875, + "rewards/rejected": -0.1318359375, + "step": 2400 + }, + { + "epoch": 1.9, + "grad_norm": 3.01082383722649, + "learning_rate": 3.864696284024249e-09, + "log_odds_chosen": 0.38875120878219604, + "log_odds_ratio": -0.6109374761581421, + "logits/chosen": -1.59375, + "logits/rejected": -1.71875, + "logps/chosen": -1.0625, + "logps/rejected": -1.3359375, + "loss": 1.1823, + "nll_loss": 1.0859375, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.028076171875, + "rewards/rejected": -0.1337890625, + "step": 2410 + }, + { + "epoch": 1.91, + "grad_norm": 2.7770288933270755, + "learning_rate": 3.285700437730077e-09, + "log_odds_chosen": 0.35822755098342896, + "log_odds_ratio": -0.6693359613418579, + "logits/chosen": -1.5859375, + "logits/rejected": -1.6796875, + "logps/chosen": -1.0859375, + "logps/rejected": -1.3515625, + "loss": 1.1699, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1083984375, + "rewards/margins": 0.0264892578125, + "rewards/rejected": -0.134765625, + "step": 2420 + }, + { + "epoch": 1.91, + "grad_norm": 2.5907890754339262, + "learning_rate": 2.7533807408084973e-09, + "log_odds_chosen": 0.17686156928539276, + "log_odds_ratio": -0.698535144329071, + "logits/chosen": -1.7421875, + "logits/rejected": -1.8203125, + "logps/chosen": -1.1015625, + "logps/rejected": -1.2109375, + "loss": 1.1861, + "nll_loss": 1.140625, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10986328125, + "rewards/margins": 0.01153564453125, + "rewards/rejected": -0.12109375, + "step": 2430 + }, + { + "epoch": 1.92, + "grad_norm": 2.4966778392886955, + "learning_rate": 2.2678379034077877e-09, + "log_odds_chosen": 0.2807373106479645, + "log_odds_ratio": -0.6429687738418579, + "logits/chosen": -1.640625, + "logits/rejected": -1.7109375, + "logps/chosen": -1.09375, + "logps/rejected": -1.2734375, + "loss": 1.2285, + "nll_loss": 1.1484375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10888671875, + "rewards/margins": 0.018310546875, + "rewards/rejected": -0.126953125, + "step": 2440 + }, + { + "epoch": 1.93, + "grad_norm": 2.3101513660116466, + "learning_rate": 1.82916378591072e-09, + "log_odds_chosen": 0.35594481229782104, + "log_odds_ratio": -0.6319335699081421, + "logits/chosen": -1.6875, + "logits/rejected": -1.7890625, + "logps/chosen": -1.0546875, + "logps/rejected": -1.28125, + "loss": 1.1772, + "nll_loss": 1.125, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10546875, + "rewards/margins": 0.0225830078125, + "rewards/rejected": -0.1279296875, + "step": 2450 + }, + { + "epoch": 1.94, + "grad_norm": 2.550820385872949, + "learning_rate": 1.4374413815555763e-09, + "log_odds_chosen": 0.21019287407398224, + "log_odds_ratio": -0.6913086175918579, + "logits/chosen": -1.6484375, + "logits/rejected": -1.6796875, + "logps/chosen": -1.09375, + "logps/rejected": -1.25, + "loss": 1.2096, + "nll_loss": 1.15625, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0157470703125, + "rewards/rejected": -0.125, + "step": 2460 + }, + { + "epoch": 1.95, + "grad_norm": 2.3418264361304293, + "learning_rate": 1.0927448007343188e-09, + "log_odds_chosen": 0.2827392518520355, + "log_odds_ratio": -0.6519531011581421, + "logits/chosen": -1.671875, + "logits/rejected": -1.796875, + "logps/chosen": -1.0703125, + "logps/rejected": -1.2734375, + "loss": 1.2057, + "nll_loss": 1.1328125, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.10693359375, + "rewards/margins": 0.0203857421875, + "rewards/rejected": -0.126953125, + "step": 2470 + }, + { + "epoch": 1.95, + "grad_norm": 3.0181770379881936, + "learning_rate": 7.951392569717774e-10, + "log_odds_chosen": 0.32861328125, + "log_odds_ratio": -0.641406238079071, + "logits/chosen": -1.71875, + "logits/rejected": -1.7421875, + "logps/chosen": -1.09375, + "logps/rejected": -1.296875, + "loss": 1.2017, + "nll_loss": 1.125, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.109375, + "rewards/margins": 0.0206298828125, + "rewards/rejected": -0.1298828125, + "step": 2480 + }, + { + "epoch": 1.96, + "grad_norm": 2.6598066518808965, + "learning_rate": 5.446810545877423e-10, + "log_odds_chosen": 0.29930418729782104, + "log_odds_ratio": -0.6373046636581421, + "logits/chosen": -1.703125, + "logits/rejected": -1.7890625, + "logps/chosen": -1.0703125, + "logps/rejected": -1.28125, + "loss": 1.1938, + "nll_loss": 1.125, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.107421875, + "rewards/margins": 0.021240234375, + "rewards/rejected": -0.12890625, + "step": 2490 + }, + { + "epoch": 1.97, + "grad_norm": 2.612456535550154, + "learning_rate": 3.414175780446227e-10, + "log_odds_chosen": 0.27032470703125, + "log_odds_ratio": -0.64501953125, + "logits/chosen": -1.6015625, + "logits/rejected": -1.671875, + "logps/chosen": -1.0625, + "logps/rejected": -1.2265625, + "loss": 1.2178, + "nll_loss": 1.125, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10595703125, + "rewards/margins": 0.0167236328125, + "rewards/rejected": -0.12255859375, + "step": 2500 + }, + { + "epoch": 1.98, + "grad_norm": 2.5663855569599123, + "learning_rate": 1.8538728298292395e-10, + "log_odds_chosen": 0.25556641817092896, + "log_odds_ratio": -0.6698242425918579, + "logits/chosen": -1.6171875, + "logits/rejected": -1.6640625, + "logps/chosen": -1.0546875, + "logps/rejected": -1.234375, + "loss": 1.198, + "nll_loss": 1.1015625, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10546875, + "rewards/margins": 0.01806640625, + "rewards/rejected": -0.12353515625, + "step": 2510 + }, + { + "epoch": 1.99, + "grad_norm": 2.5581951001300336, + "learning_rate": 7.661968894551174e-11, + "log_odds_chosen": 0.3158630430698395, + "log_odds_ratio": -0.636425793170929, + "logits/chosen": -1.7109375, + "logits/rejected": -1.828125, + "logps/chosen": -1.0234375, + "logps/rejected": -1.2265625, + "loss": 1.1801, + "nll_loss": 1.09375, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.1025390625, + "rewards/margins": 0.0205078125, + "rewards/rejected": -0.123046875, + "step": 2520 + }, + { + "epoch": 1.99, + "grad_norm": 2.6003805241560958, + "learning_rate": 1.513537379305152e-11, + "log_odds_chosen": 0.20820312201976776, + "log_odds_ratio": -0.664843738079071, + "logits/chosen": -1.671875, + "logits/rejected": -1.734375, + "logps/chosen": -1.109375, + "logps/rejected": -1.2421875, + "loss": 1.2092, + "nll_loss": 1.1171875, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.11083984375, + "rewards/margins": 0.0135498046875, + "rewards/rejected": -0.12451171875, + "step": 2530 + }, + { + "epoch": 2.0, + "step": 2538, + "total_flos": 0.0, + "train_loss": 1.2334148878183206, + "train_runtime": 17382.4906, + "train_samples_per_second": 7.009, + "train_steps_per_second": 0.146 + } + ], + "logging_steps": 10, + "max_steps": 2538, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}