{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9992122883024814, "eval_steps": 500, "global_step": 2538, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 6.010698237247525, "learning_rate": 1.968503937007874e-08, "log_odds_chosen": 0.27912598848342896, "log_odds_ratio": -0.7284179925918579, "logits/chosen": -2.015625, "logits/rejected": -2.046875, "logps/chosen": -2.03125, "logps/rejected": -2.28125, "loss": 1.5763, "nll_loss": 1.4375, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.2041015625, "rewards/margins": 0.023681640625, "rewards/rejected": -0.2275390625, "step": 10 }, { "epoch": 0.02, "grad_norm": 5.135758173057069, "learning_rate": 3.937007874015748e-08, "log_odds_chosen": 0.36018067598342896, "log_odds_ratio": -0.750683605670929, "logits/chosen": -2.109375, "logits/rejected": -2.140625, "logps/chosen": -1.9765625, "logps/rejected": -2.296875, "loss": 1.5927, "nll_loss": 1.5, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.197265625, "rewards/margins": 0.0322265625, "rewards/rejected": -0.2294921875, "step": 20 }, { "epoch": 0.02, "grad_norm": 6.927967353705024, "learning_rate": 5.9055118110236216e-08, "log_odds_chosen": 0.17539063096046448, "log_odds_ratio": -0.812207043170929, "logits/chosen": -1.9375, "logits/rejected": -2.03125, "logps/chosen": -2.0625, "logps/rejected": -2.1875, "loss": 1.5598, "nll_loss": 1.5, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.2060546875, "rewards/margins": 0.013427734375, "rewards/rejected": -0.2197265625, "step": 30 }, { "epoch": 0.03, "grad_norm": 9.514842032339935, "learning_rate": 7.874015748031496e-08, "log_odds_chosen": 0.3271545469760895, "log_odds_ratio": -0.705859363079071, "logits/chosen": -2.03125, "logits/rejected": -2.03125, "logps/chosen": -1.9921875, "logps/rejected": -2.296875, "loss": 1.6148, "nll_loss": 1.546875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.19921875, "rewards/margins": 0.031005859375, "rewards/rejected": -0.23046875, "step": 40 }, { "epoch": 0.04, "grad_norm": 5.878873460342644, "learning_rate": 9.84251968503937e-08, "log_odds_chosen": 0.1710205078125, "log_odds_ratio": -0.77587890625, "logits/chosen": -2.03125, "logits/rejected": -2.0, "logps/chosen": -1.9609375, "logps/rejected": -2.09375, "loss": 1.5496, "nll_loss": 1.484375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1962890625, "rewards/margins": 0.0137939453125, "rewards/rejected": -0.2099609375, "step": 50 }, { "epoch": 0.05, "grad_norm": 6.390918916154321, "learning_rate": 1.1811023622047243e-07, "log_odds_chosen": 0.21818237006664276, "log_odds_ratio": -0.7723633050918579, "logits/chosen": -2.046875, "logits/rejected": -2.03125, "logps/chosen": -2.0625, "logps/rejected": -2.25, "loss": 1.6078, "nll_loss": 1.5625, "rewards/accuracies": 0.53125, "rewards/chosen": -0.2060546875, "rewards/margins": 0.0185546875, "rewards/rejected": -0.224609375, "step": 60 }, { "epoch": 0.06, "grad_norm": 7.289138170054862, "learning_rate": 1.3779527559055117e-07, "log_odds_chosen": 0.34990233182907104, "log_odds_ratio": -0.7339843511581421, "logits/chosen": -2.015625, "logits/rejected": -2.046875, "logps/chosen": -2.0625, "logps/rejected": -2.375, "loss": 1.5634, "nll_loss": 1.5078125, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.20703125, "rewards/margins": 0.0311279296875, "rewards/rejected": -0.23828125, "step": 70 }, { "epoch": 0.06, "grad_norm": 6.612859448964533, "learning_rate": 1.5748031496062992e-07, "log_odds_chosen": 0.4515624940395355, "log_odds_ratio": -0.640917956829071, "logits/chosen": -1.9609375, "logits/rejected": -2.015625, "logps/chosen": -1.7734375, "logps/rejected": -2.171875, "loss": 1.532, "nll_loss": 1.421875, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.177734375, "rewards/margins": 0.039794921875, "rewards/rejected": -0.2177734375, "step": 80 }, { "epoch": 0.07, "grad_norm": 8.397857351399276, "learning_rate": 1.7716535433070863e-07, "log_odds_chosen": 0.17760619521141052, "log_odds_ratio": -0.7791992425918579, "logits/chosen": -2.0625, "logits/rejected": -2.03125, "logps/chosen": -2.0625, "logps/rejected": -2.21875, "loss": 1.5648, "nll_loss": 1.546875, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.2060546875, "rewards/margins": 0.0166015625, "rewards/rejected": -0.22265625, "step": 90 }, { "epoch": 0.08, "grad_norm": 7.586832559599352, "learning_rate": 1.968503937007874e-07, "log_odds_chosen": 0.2771240174770355, "log_odds_ratio": -0.6792968511581421, "logits/chosen": -2.140625, "logits/rejected": -2.171875, "logps/chosen": -1.890625, "logps/rejected": -2.125, "loss": 1.5286, "nll_loss": 1.484375, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.189453125, "rewards/margins": 0.02392578125, "rewards/rejected": -0.212890625, "step": 100 }, { "epoch": 0.09, "grad_norm": 5.07791601254699, "learning_rate": 2.1653543307086615e-07, "log_odds_chosen": 0.214080810546875, "log_odds_ratio": -0.7261718511581421, "logits/chosen": -2.09375, "logits/rejected": -2.15625, "logps/chosen": -1.890625, "logps/rejected": -2.0625, "loss": 1.4891, "nll_loss": 1.4140625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.189453125, "rewards/margins": 0.017333984375, "rewards/rejected": -0.2060546875, "step": 110 }, { "epoch": 0.09, "grad_norm": 5.712206786453907, "learning_rate": 2.3622047244094486e-07, "log_odds_chosen": 0.14680786430835724, "log_odds_ratio": -0.7562500238418579, "logits/chosen": -2.078125, "logits/rejected": -2.15625, "logps/chosen": -1.9375, "logps/rejected": -2.03125, "loss": 1.4901, "nll_loss": 1.34375, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.193359375, "rewards/margins": 0.00994873046875, "rewards/rejected": -0.2041015625, "step": 120 }, { "epoch": 0.1, "grad_norm": 5.959474046553222, "learning_rate": 2.559055118110236e-07, "log_odds_chosen": 0.2553772032260895, "log_odds_ratio": -0.6973632574081421, "logits/chosen": -2.15625, "logits/rejected": -2.21875, "logps/chosen": -1.7890625, "logps/rejected": -1.9921875, "loss": 1.497, "nll_loss": 1.3828125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1787109375, "rewards/margins": 0.0205078125, "rewards/rejected": -0.19921875, "step": 130 }, { "epoch": 0.11, "grad_norm": 5.960730803694658, "learning_rate": 2.7559055118110235e-07, "log_odds_chosen": 0.16423340141773224, "log_odds_ratio": -0.718457043170929, "logits/chosen": -2.15625, "logits/rejected": -2.21875, "logps/chosen": -1.765625, "logps/rejected": -1.8984375, "loss": 1.5228, "nll_loss": 1.453125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1767578125, "rewards/margins": 0.0133056640625, "rewards/rejected": -0.189453125, "step": 140 }, { "epoch": 0.12, "grad_norm": 4.558212884083645, "learning_rate": 2.9527559055118104e-07, "log_odds_chosen": 0.24582520127296448, "log_odds_ratio": -0.671191394329071, "logits/chosen": -2.125, "logits/rejected": -2.1875, "logps/chosen": -1.546875, "logps/rejected": -1.7421875, "loss": 1.4124, "nll_loss": 1.3125, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.154296875, "rewards/margins": 0.0191650390625, "rewards/rejected": -0.173828125, "step": 150 }, { "epoch": 0.13, "grad_norm": 3.4831822067708686, "learning_rate": 3.1496062992125984e-07, "log_odds_chosen": 0.17824706435203552, "log_odds_ratio": -0.69384765625, "logits/chosen": -2.25, "logits/rejected": -2.28125, "logps/chosen": -1.671875, "logps/rejected": -1.8203125, "loss": 1.4297, "nll_loss": 1.390625, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1669921875, "rewards/margins": 0.0145263671875, "rewards/rejected": -0.181640625, "step": 160 }, { "epoch": 0.13, "grad_norm": 4.034138231637428, "learning_rate": 3.346456692913386e-07, "log_odds_chosen": 0.12167968600988388, "log_odds_ratio": -0.7186523675918579, "logits/chosen": -2.1875, "logits/rejected": -2.296875, "logps/chosen": -1.5625, "logps/rejected": -1.6640625, "loss": 1.3835, "nll_loss": 1.3203125, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.15625, "rewards/margins": 0.01007080078125, "rewards/rejected": -0.166015625, "step": 170 }, { "epoch": 0.14, "grad_norm": 4.146506823609489, "learning_rate": 3.5433070866141727e-07, "log_odds_chosen": 0.13695068657398224, "log_odds_ratio": -0.719433605670929, "logits/chosen": -2.28125, "logits/rejected": -2.40625, "logps/chosen": -1.5546875, "logps/rejected": -1.671875, "loss": 1.4352, "nll_loss": 1.3828125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1552734375, "rewards/margins": 0.0115966796875, "rewards/rejected": -0.1669921875, "step": 180 }, { "epoch": 0.15, "grad_norm": 3.5266117677319087, "learning_rate": 3.7401574803149606e-07, "log_odds_chosen": 0.17273560166358948, "log_odds_ratio": -0.681640625, "logits/chosen": -2.3125, "logits/rejected": -2.4375, "logps/chosen": -1.4765625, "logps/rejected": -1.625, "loss": 1.3599, "nll_loss": 1.3046875, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1474609375, "rewards/margins": 0.01495361328125, "rewards/rejected": -0.162109375, "step": 190 }, { "epoch": 0.16, "grad_norm": 3.4327683095111072, "learning_rate": 3.937007874015748e-07, "log_odds_chosen": 0.11888428032398224, "log_odds_ratio": -0.711230456829071, "logits/chosen": -2.296875, "logits/rejected": -2.40625, "logps/chosen": -1.3828125, "logps/rejected": -1.484375, "loss": 1.3592, "nll_loss": 1.28125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.138671875, "rewards/margins": 0.0093994140625, "rewards/rejected": -0.1484375, "step": 200 }, { "epoch": 0.17, "grad_norm": 2.811761812417915, "learning_rate": 4.133858267716535e-07, "log_odds_chosen": 0.13620606064796448, "log_odds_ratio": -0.692089855670929, "logits/chosen": -2.3125, "logits/rejected": -2.4375, "logps/chosen": -1.359375, "logps/rejected": -1.453125, "loss": 1.3822, "nll_loss": 1.2421875, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1357421875, "rewards/margins": 0.010009765625, "rewards/rejected": -0.1455078125, "step": 210 }, { "epoch": 0.17, "grad_norm": 3.1783123040584775, "learning_rate": 4.330708661417323e-07, "log_odds_chosen": 0.23652343451976776, "log_odds_ratio": -0.656054675579071, "logits/chosen": -2.3125, "logits/rejected": -2.4375, "logps/chosen": -1.296875, "logps/rejected": -1.46875, "loss": 1.3022, "nll_loss": 1.2265625, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12890625, "rewards/margins": 0.017578125, "rewards/rejected": -0.146484375, "step": 220 }, { "epoch": 0.18, "grad_norm": 3.398094645144472, "learning_rate": 4.52755905511811e-07, "log_odds_chosen": 0.10042724758386612, "log_odds_ratio": -0.737500011920929, "logits/chosen": -2.421875, "logits/rejected": -2.546875, "logps/chosen": -1.265625, "logps/rejected": -1.3359375, "loss": 1.3118, "nll_loss": 1.21875, "rewards/accuracies": 0.5, "rewards/chosen": -0.126953125, "rewards/margins": 0.007049560546875, "rewards/rejected": -0.1337890625, "step": 230 }, { "epoch": 0.19, "grad_norm": 3.0602456337138735, "learning_rate": 4.7244094488188973e-07, "log_odds_chosen": 0.05767212063074112, "log_odds_ratio": -0.7372070550918579, "logits/chosen": -2.328125, "logits/rejected": -2.4375, "logps/chosen": -1.359375, "logps/rejected": -1.40625, "loss": 1.3639, "nll_loss": 1.3359375, "rewards/accuracies": 0.5, "rewards/chosen": -0.1357421875, "rewards/margins": 0.0050048828125, "rewards/rejected": -0.140625, "step": 240 }, { "epoch": 0.2, "grad_norm": 3.1390537608793543, "learning_rate": 4.921259842519685e-07, "log_odds_chosen": 0.16054077446460724, "log_odds_ratio": -0.6732422113418579, "logits/chosen": -2.34375, "logits/rejected": -2.40625, "logps/chosen": -1.2421875, "logps/rejected": -1.3515625, "loss": 1.3024, "nll_loss": 1.234375, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1240234375, "rewards/margins": 0.01104736328125, "rewards/rejected": -0.134765625, "step": 250 }, { "epoch": 0.2, "grad_norm": 2.518997318792438, "learning_rate": 4.999914863146575e-07, "log_odds_chosen": 0.16718749701976776, "log_odds_ratio": -0.6884765625, "logits/chosen": -2.359375, "logits/rejected": -2.5, "logps/chosen": -1.234375, "logps/rejected": -1.3359375, "loss": 1.3314, "nll_loss": 1.234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12353515625, "rewards/margins": 0.01019287109375, "rewards/rejected": -0.1337890625, "step": 260 }, { "epoch": 0.21, "grad_norm": 2.798537540317038, "learning_rate": 4.999394603374641e-07, "log_odds_chosen": 0.19011840224266052, "log_odds_ratio": -0.676562488079071, "logits/chosen": -2.359375, "logits/rejected": -2.46875, "logps/chosen": -1.203125, "logps/rejected": -1.3515625, "loss": 1.2872, "nll_loss": 1.1875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.12060546875, "rewards/margins": 0.01422119140625, "rewards/rejected": -0.134765625, "step": 270 }, { "epoch": 0.22, "grad_norm": 2.8758259093431437, "learning_rate": 4.99840148039188e-07, "log_odds_chosen": 0.29682618379592896, "log_odds_ratio": -0.637890636920929, "logits/chosen": -2.3125, "logits/rejected": -2.5, "logps/chosen": -1.203125, "logps/rejected": -1.4140625, "loss": 1.2201, "nll_loss": 1.1484375, "rewards/accuracies": 0.625, "rewards/chosen": -0.1201171875, "rewards/margins": 0.021484375, "rewards/rejected": -0.1416015625, "step": 280 }, { "epoch": 0.23, "grad_norm": 2.695897803134525, "learning_rate": 4.996935682088318e-07, "log_odds_chosen": 0.22941894829273224, "log_odds_ratio": -0.6490234136581421, "logits/chosen": -2.359375, "logits/rejected": -2.46875, "logps/chosen": -1.21875, "logps/rejected": -1.375, "loss": 1.2819, "nll_loss": 1.1875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1220703125, "rewards/margins": 0.015380859375, "rewards/rejected": -0.1376953125, "step": 290 }, { "epoch": 0.24, "grad_norm": 2.8982419601141585, "learning_rate": 4.994997485779947e-07, "log_odds_chosen": 0.23259887099266052, "log_odds_ratio": -0.666796863079071, "logits/chosen": -2.25, "logits/rejected": -2.375, "logps/chosen": -1.1796875, "logps/rejected": -1.34375, "loss": 1.2759, "nll_loss": 1.1953125, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.1181640625, "rewards/margins": 0.015869140625, "rewards/rejected": -0.1337890625, "step": 300 }, { "epoch": 0.24, "grad_norm": 2.9056748531961585, "learning_rate": 4.992587258156258e-07, "log_odds_chosen": 0.17786864936351776, "log_odds_ratio": -0.6656249761581421, "logits/chosen": -2.265625, "logits/rejected": -2.359375, "logps/chosen": -1.21875, "logps/rejected": -1.34375, "loss": 1.2812, "nll_loss": 1.2109375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.12158203125, "rewards/margins": 0.01324462890625, "rewards/rejected": -0.134765625, "step": 310 }, { "epoch": 0.25, "grad_norm": 2.596709437423632, "learning_rate": 4.989705455210862e-07, "log_odds_chosen": 0.21816405653953552, "log_odds_ratio": -0.662890613079071, "logits/chosen": -2.3125, "logits/rejected": -2.421875, "logps/chosen": -1.1875, "logps/rejected": -1.3515625, "loss": 1.2184, "nll_loss": 1.109375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.11865234375, "rewards/margins": 0.016357421875, "rewards/rejected": -0.134765625, "step": 320 }, { "epoch": 0.26, "grad_norm": 2.476759669633908, "learning_rate": 4.986352622155222e-07, "log_odds_chosen": 0.17100830376148224, "log_odds_ratio": -0.6903320550918579, "logits/chosen": -2.359375, "logits/rejected": -2.46875, "logps/chosen": -1.1953125, "logps/rejected": -1.3046875, "loss": 1.2865, "nll_loss": 1.21875, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.119140625, "rewards/margins": 0.01129150390625, "rewards/rejected": -0.130859375, "step": 330 }, { "epoch": 0.27, "grad_norm": 2.628189970289334, "learning_rate": 4.98252939331551e-07, "log_odds_chosen": 0.17416992783546448, "log_odds_ratio": -0.697070300579071, "logits/chosen": -2.328125, "logits/rejected": -2.4375, "logps/chosen": -1.21875, "logps/rejected": -1.34375, "loss": 1.2797, "nll_loss": 1.2109375, "rewards/accuracies": 0.53125, "rewards/chosen": -0.12158203125, "rewards/margins": 0.01318359375, "rewards/rejected": -0.134765625, "step": 340 }, { "epoch": 0.28, "grad_norm": 2.6295659881126943, "learning_rate": 4.978236492012589e-07, "log_odds_chosen": 0.02346191368997097, "log_odds_ratio": -0.755078136920929, "logits/chosen": -2.421875, "logits/rejected": -2.421875, "logps/chosen": -1.2578125, "logps/rejected": -1.2734375, "loss": 1.3077, "nll_loss": 1.25, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1259765625, "rewards/margins": 0.00180816650390625, "rewards/rejected": -0.126953125, "step": 350 }, { "epoch": 0.28, "grad_norm": 2.959773704803729, "learning_rate": 4.973474730425173e-07, "log_odds_chosen": 0.19099120795726776, "log_odds_ratio": -0.6749023199081421, "logits/chosen": -2.25, "logits/rejected": -2.359375, "logps/chosen": -1.1875, "logps/rejected": -1.3125, "loss": 1.2568, "nll_loss": 1.171875, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11865234375, "rewards/margins": 0.0125732421875, "rewards/rejected": -0.1318359375, "step": 360 }, { "epoch": 0.29, "grad_norm": 2.613182586833654, "learning_rate": 4.968245009436167e-07, "log_odds_chosen": 0.10064697265625, "log_odds_ratio": -0.72216796875, "logits/chosen": -2.375, "logits/rejected": -2.4375, "logps/chosen": -1.28125, "logps/rejected": -1.3671875, "loss": 1.2944, "nll_loss": 1.2890625, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.1279296875, "rewards/margins": 0.00860595703125, "rewards/rejected": -0.13671875, "step": 370 }, { "epoch": 0.3, "grad_norm": 2.8683189618015126, "learning_rate": 4.962548318462231e-07, "log_odds_chosen": 0.19755859673023224, "log_odds_ratio": -0.6724609136581421, "logits/chosen": -2.296875, "logits/rejected": -2.359375, "logps/chosen": -1.171875, "logps/rejected": -1.3046875, "loss": 1.2778, "nll_loss": 1.2265625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11669921875, "rewards/margins": 0.01373291015625, "rewards/rejected": -0.130859375, "step": 380 }, { "epoch": 0.31, "grad_norm": 2.9807597681868305, "learning_rate": 4.95638573526659e-07, "log_odds_chosen": 0.14707031846046448, "log_odds_ratio": -0.7005859613418579, "logits/chosen": -2.28125, "logits/rejected": -2.453125, "logps/chosen": -1.171875, "logps/rejected": -1.2734375, "loss": 1.2844, "nll_loss": 1.203125, "rewards/accuracies": 0.53125, "rewards/chosen": -0.11767578125, "rewards/margins": 0.0098876953125, "rewards/rejected": -0.126953125, "step": 390 }, { "epoch": 0.32, "grad_norm": 2.655124275329291, "learning_rate": 4.949758425755127e-07, "log_odds_chosen": 0.10791015625, "log_odds_ratio": -0.7230468988418579, "logits/chosen": -2.1875, "logits/rejected": -2.28125, "logps/chosen": -1.203125, "logps/rejected": -1.28125, "loss": 1.2902, "nll_loss": 1.1875, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1201171875, "rewards/margins": 0.00762939453125, "rewards/rejected": -0.1279296875, "step": 400 }, { "epoch": 0.32, "grad_norm": 2.4371959032830293, "learning_rate": 4.94266764375581e-07, "log_odds_chosen": 0.20887450873851776, "log_odds_ratio": -0.6651366949081421, "logits/chosen": -2.21875, "logits/rejected": -2.34375, "logps/chosen": -1.1328125, "logps/rejected": -1.2890625, "loss": 1.2674, "nll_loss": 1.1484375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11376953125, "rewards/margins": 0.01544189453125, "rewards/rejected": -0.12890625, "step": 410 }, { "epoch": 0.33, "grad_norm": 2.729848906556158, "learning_rate": 4.935114730781475e-07, "log_odds_chosen": 0.27691650390625, "log_odds_ratio": -0.6527343988418579, "logits/chosen": -2.140625, "logits/rejected": -2.296875, "logps/chosen": -1.1953125, "logps/rejected": -1.3828125, "loss": 1.2544, "nll_loss": 1.1875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11962890625, "rewards/margins": 0.018798828125, "rewards/rejected": -0.138671875, "step": 420 }, { "epoch": 0.34, "grad_norm": 2.50393555238819, "learning_rate": 4.927101115776026e-07, "log_odds_chosen": 0.14921875298023224, "log_odds_ratio": -0.702343761920929, "logits/chosen": -2.28125, "logits/rejected": -2.34375, "logps/chosen": -1.203125, "logps/rejected": -1.3203125, "loss": 1.2471, "nll_loss": 1.2265625, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.12060546875, "rewards/margins": 0.01129150390625, "rewards/rejected": -0.1318359375, "step": 430 }, { "epoch": 0.35, "grad_norm": 2.7483117165130744, "learning_rate": 4.918628314844088e-07, "log_odds_chosen": 0.04735717922449112, "log_odds_ratio": -0.749218761920929, "logits/chosen": -2.265625, "logits/rejected": -2.359375, "logps/chosen": -1.2109375, "logps/rejected": -1.25, "loss": 1.2351, "nll_loss": 1.21875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.12060546875, "rewards/margins": 0.0040283203125, "rewards/rejected": -0.12451171875, "step": 440 }, { "epoch": 0.35, "grad_norm": 2.9881422727710887, "learning_rate": 4.909697930964179e-07, "log_odds_chosen": 0.16976317763328552, "log_odds_ratio": -0.6986328363418579, "logits/chosen": -2.25, "logits/rejected": -2.40625, "logps/chosen": -1.1953125, "logps/rejected": -1.3125, "loss": 1.2467, "nll_loss": 1.2421875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11962890625, "rewards/margins": 0.01153564453125, "rewards/rejected": -0.130859375, "step": 450 }, { "epoch": 0.36, "grad_norm": 2.6175879243996363, "learning_rate": 4.900311653685437e-07, "log_odds_chosen": 0.20297852158546448, "log_odds_ratio": -0.669140636920929, "logits/chosen": -2.296875, "logits/rejected": -2.359375, "logps/chosen": -1.1640625, "logps/rejected": -1.3125, "loss": 1.2102, "nll_loss": 1.1953125, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1162109375, "rewards/margins": 0.01513671875, "rewards/rejected": -0.1318359375, "step": 460 }, { "epoch": 0.37, "grad_norm": 2.2300783745527317, "learning_rate": 4.890471258807968e-07, "log_odds_chosen": 0.19609375298023224, "log_odds_ratio": -0.6773437261581421, "logits/chosen": -2.1875, "logits/rejected": -2.265625, "logps/chosen": -1.1875, "logps/rejected": -1.296875, "loss": 1.2387, "nll_loss": 1.203125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.11865234375, "rewards/margins": 0.01116943359375, "rewards/rejected": -0.1298828125, "step": 470 }, { "epoch": 0.38, "grad_norm": 2.9471805049826094, "learning_rate": 4.880178608046894e-07, "log_odds_chosen": 0.14970703423023224, "log_odds_ratio": -0.7040039300918579, "logits/chosen": -2.171875, "logits/rejected": -2.203125, "logps/chosen": -1.140625, "logps/rejected": -1.25, "loss": 1.2675, "nll_loss": 1.2109375, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1142578125, "rewards/margins": 0.0108642578125, "rewards/rejected": -0.125, "step": 480 }, { "epoch": 0.39, "grad_norm": 2.7162956655728623, "learning_rate": 4.869435648680116e-07, "log_odds_chosen": 0.15129394829273224, "log_odds_ratio": -0.6802734136581421, "logits/chosen": -2.1875, "logits/rejected": -2.28125, "logps/chosen": -1.1171875, "logps/rejected": -1.2265625, "loss": 1.1889, "nll_loss": 1.1328125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11181640625, "rewards/margins": 0.01043701171875, "rewards/rejected": -0.12255859375, "step": 490 }, { "epoch": 0.39, "grad_norm": 4.459550453771863, "learning_rate": 4.858244413179923e-07, "log_odds_chosen": 0.2993102967739105, "log_odds_ratio": -0.64111328125, "logits/chosen": -2.171875, "logits/rejected": -2.25, "logps/chosen": -1.1171875, "logps/rejected": -1.3125, "loss": 1.2095, "nll_loss": 1.1640625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.111328125, "rewards/margins": 0.0198974609375, "rewards/rejected": -0.1318359375, "step": 500 }, { "epoch": 0.4, "grad_norm": 3.2619422051015836, "learning_rate": 4.846607018828449e-07, "log_odds_chosen": 0.210205078125, "log_odds_ratio": -0.673632800579071, "logits/chosen": -2.21875, "logits/rejected": -2.421875, "logps/chosen": -1.1875, "logps/rejected": -1.3203125, "loss": 1.2653, "nll_loss": 1.21875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.119140625, "rewards/margins": 0.013427734375, "rewards/rejected": -0.1328125, "step": 510 }, { "epoch": 0.41, "grad_norm": 2.7715133106941576, "learning_rate": 4.834525667317121e-07, "log_odds_chosen": 0.22309570014476776, "log_odds_ratio": -0.66357421875, "logits/chosen": -2.0625, "logits/rejected": -2.234375, "logps/chosen": -1.15625, "logps/rejected": -1.3046875, "loss": 1.2614, "nll_loss": 1.1796875, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.115234375, "rewards/margins": 0.0150146484375, "rewards/rejected": -0.1298828125, "step": 520 }, { "epoch": 0.42, "grad_norm": 2.4609772540284593, "learning_rate": 4.822002644330101e-07, "log_odds_chosen": 0.19017334282398224, "log_odds_ratio": -0.711718738079071, "logits/chosen": -2.171875, "logits/rejected": -2.296875, "logps/chosen": -1.21875, "logps/rejected": -1.3515625, "loss": 1.2548, "nll_loss": 1.2421875, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.12158203125, "rewards/margins": 0.0137939453125, "rewards/rejected": -0.1357421875, "step": 530 }, { "epoch": 0.43, "grad_norm": 2.645873831673924, "learning_rate": 4.809040319111865e-07, "log_odds_chosen": 0.12646484375, "log_odds_ratio": -0.713671863079071, "logits/chosen": -2.09375, "logits/rejected": -2.25, "logps/chosen": -1.1484375, "logps/rejected": -1.2421875, "loss": 1.2402, "nll_loss": 1.15625, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.11474609375, "rewards/margins": 0.0096435546875, "rewards/rejected": -0.12451171875, "step": 540 }, { "epoch": 0.43, "grad_norm": 2.45398616162251, "learning_rate": 4.795641144018965e-07, "log_odds_chosen": 0.09213867038488388, "log_odds_ratio": -0.74267578125, "logits/chosen": -2.203125, "logits/rejected": -2.203125, "logps/chosen": -1.2109375, "logps/rejected": -1.28125, "loss": 1.2755, "nll_loss": 1.25, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12109375, "rewards/margins": 0.006805419921875, "rewards/rejected": -0.1279296875, "step": 550 }, { "epoch": 0.44, "grad_norm": 2.5512768402172683, "learning_rate": 4.781807654056053e-07, "log_odds_chosen": 0.214599609375, "log_odds_ratio": -0.692675769329071, "logits/chosen": -2.109375, "logits/rejected": -2.21875, "logps/chosen": -1.125, "logps/rejected": -1.2890625, "loss": 1.2303, "nll_loss": 1.140625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0159912109375, "rewards/rejected": -0.12890625, "step": 560 }, { "epoch": 0.45, "grad_norm": 2.411346604585139, "learning_rate": 4.7675424663962933e-07, "log_odds_chosen": 0.165435791015625, "log_odds_ratio": -0.700390636920929, "logits/chosen": -2.140625, "logits/rejected": -2.21875, "logps/chosen": -1.2109375, "logps/rejected": -1.3125, "loss": 1.2571, "nll_loss": 1.234375, "rewards/accuracies": 0.5, "rewards/chosen": -0.12109375, "rewards/margins": 0.010498046875, "rewards/rejected": -0.1318359375, "step": 570 }, { "epoch": 0.46, "grad_norm": 2.48077566767054, "learning_rate": 4.752848279886212e-07, "log_odds_chosen": 0.207489013671875, "log_odds_ratio": -0.66748046875, "logits/chosen": -2.0625, "logits/rejected": -2.234375, "logps/chosen": -1.1015625, "logps/rejected": -1.2421875, "loss": 1.1978, "nll_loss": 1.140625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.10986328125, "rewards/margins": 0.0145263671875, "rewards/rejected": -0.12451171875, "step": 580 }, { "epoch": 0.46, "grad_norm": 2.7897141919738786, "learning_rate": 4.7377278745350984e-07, "log_odds_chosen": 0.2662353515625, "log_odds_ratio": -0.663867175579071, "logits/chosen": -2.03125, "logits/rejected": -2.15625, "logps/chosen": -1.046875, "logps/rejected": -1.1875, "loss": 1.2309, "nll_loss": 1.0859375, "rewards/accuracies": 0.625, "rewards/chosen": -0.1044921875, "rewards/margins": 0.0147705078125, "rewards/rejected": -0.119140625, "step": 590 }, { "epoch": 0.47, "grad_norm": 2.758617809500896, "learning_rate": 4.7221841109890506e-07, "log_odds_chosen": 0.22445067763328552, "log_odds_ratio": -0.695117175579071, "logits/chosen": -2.0625, "logits/rejected": -2.140625, "logps/chosen": -1.1015625, "logps/rejected": -1.2578125, "loss": 1.2281, "nll_loss": 1.125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10986328125, "rewards/margins": 0.0159912109375, "rewards/rejected": -0.1259765625, "step": 600 }, { "epoch": 0.48, "grad_norm": 2.6514784587831204, "learning_rate": 4.706219929989771e-07, "log_odds_chosen": 0.2147216796875, "log_odds_ratio": -0.660937488079071, "logits/chosen": -2.078125, "logits/rejected": -2.203125, "logps/chosen": -1.09375, "logps/rejected": -1.234375, "loss": 1.2094, "nll_loss": 1.0859375, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10986328125, "rewards/margins": 0.0140380859375, "rewards/rejected": -0.12353515625, "step": 610 }, { "epoch": 0.49, "grad_norm": 2.630207221232529, "learning_rate": 4.6898383518182007e-07, "log_odds_chosen": 0.19202271103858948, "log_odds_ratio": -0.6786133050918579, "logits/chosen": -2.046875, "logits/rejected": -2.125, "logps/chosen": -1.0859375, "logps/rejected": -1.2265625, "loss": 1.2307, "nll_loss": 1.1328125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1083984375, "rewards/margins": 0.014404296875, "rewards/rejected": -0.12255859375, "step": 620 }, { "epoch": 0.5, "grad_norm": 2.9225816829730427, "learning_rate": 4.67304247572311e-07, "log_odds_chosen": 0.2799316346645355, "log_odds_ratio": -0.659960925579071, "logits/chosen": -2.09375, "logits/rejected": -2.1875, "logps/chosen": -1.1171875, "logps/rejected": -1.3046875, "loss": 1.2257, "nll_loss": 1.1328125, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.11181640625, "rewards/margins": 0.018310546875, "rewards/rejected": -0.130859375, "step": 630 }, { "epoch": 0.5, "grad_norm": 2.331606515139032, "learning_rate": 4.65583547933475e-07, "log_odds_chosen": 0.16041259467601776, "log_odds_ratio": -0.713085949420929, "logits/chosen": -2.0, "logits/rejected": -2.140625, "logps/chosen": -1.1171875, "logps/rejected": -1.21875, "loss": 1.21, "nll_loss": 1.1328125, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.11181640625, "rewards/margins": 0.01031494140625, "rewards/rejected": -0.1220703125, "step": 640 }, { "epoch": 0.51, "grad_norm": 2.541498557632385, "learning_rate": 4.6382206180636705e-07, "log_odds_chosen": 0.12631836533546448, "log_odds_ratio": -0.7256835699081421, "logits/chosen": -2.0625, "logits/rejected": -2.1875, "logps/chosen": -1.140625, "logps/rejected": -1.25, "loss": 1.2675, "nll_loss": 1.1484375, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1142578125, "rewards/margins": 0.01019287109375, "rewards/rejected": -0.12451171875, "step": 650 }, { "epoch": 0.52, "grad_norm": 3.2675127960880586, "learning_rate": 4.620201224484827e-07, "log_odds_chosen": 0.2113037109375, "log_odds_ratio": -0.658496081829071, "logits/chosen": -2.0, "logits/rejected": -2.078125, "logps/chosen": -1.078125, "logps/rejected": -1.2265625, "loss": 1.209, "nll_loss": 1.0703125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.107421875, "rewards/margins": 0.01470947265625, "rewards/rejected": -0.12255859375, "step": 660 }, { "epoch": 0.53, "grad_norm": 2.5735852092457248, "learning_rate": 4.601780707707087e-07, "log_odds_chosen": 0.25184327363967896, "log_odds_ratio": -0.679394543170929, "logits/chosen": -2.015625, "logits/rejected": -2.109375, "logps/chosen": -1.109375, "logps/rejected": -1.28125, "loss": 1.1888, "nll_loss": 1.15625, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11083984375, "rewards/margins": 0.017333984375, "rewards/rejected": -0.1279296875, "step": 670 }, { "epoch": 0.54, "grad_norm": 2.678233631526468, "learning_rate": 4.5829625527282554e-07, "log_odds_chosen": 0.15609130263328552, "log_odds_ratio": -0.700488269329071, "logits/chosen": -2.03125, "logits/rejected": -2.125, "logps/chosen": -1.125, "logps/rejected": -1.234375, "loss": 1.2431, "nll_loss": 1.1796875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11279296875, "rewards/margins": 0.01080322265625, "rewards/rejected": -0.12353515625, "step": 680 }, { "epoch": 0.54, "grad_norm": 2.423777152319806, "learning_rate": 4.5637503197757474e-07, "log_odds_chosen": 0.089111328125, "log_odds_ratio": -0.746874988079071, "logits/chosen": -1.890625, "logits/rejected": -2.015625, "logps/chosen": -1.140625, "logps/rejected": -1.2109375, "loss": 1.1964, "nll_loss": 1.140625, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.11376953125, "rewards/margins": 0.007568359375, "rewards/rejected": -0.12158203125, "step": 690 }, { "epoch": 0.55, "grad_norm": 3.0765189053391633, "learning_rate": 4.5441476436330204e-07, "log_odds_chosen": 0.27679443359375, "log_odds_ratio": -0.677929699420929, "logits/chosen": -2.078125, "logits/rejected": -2.21875, "logps/chosen": -1.109375, "logps/rejected": -1.296875, "loss": 1.2492, "nll_loss": 1.171875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.111328125, "rewards/margins": 0.018310546875, "rewards/rejected": -0.1298828125, "step": 700 }, { "epoch": 0.56, "grad_norm": 2.6130205345904334, "learning_rate": 4.5241582329519105e-07, "log_odds_chosen": 0.150299072265625, "log_odds_ratio": -0.7164062261581421, "logits/chosen": -1.984375, "logits/rejected": -2.109375, "logps/chosen": -1.125, "logps/rejected": -1.2421875, "loss": 1.2128, "nll_loss": 1.1640625, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0115966796875, "rewards/rejected": -0.12451171875, "step": 710 }, { "epoch": 0.57, "grad_norm": 2.710305930916119, "learning_rate": 4.503785869550984e-07, "log_odds_chosen": 0.17982177436351776, "log_odds_ratio": -0.708300769329071, "logits/chosen": -2.046875, "logits/rejected": -2.078125, "logps/chosen": -1.1796875, "logps/rejected": -1.3203125, "loss": 1.2557, "nll_loss": 1.2109375, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.1181640625, "rewards/margins": 0.01446533203125, "rewards/rejected": -0.1328125, "step": 720 }, { "epoch": 0.58, "grad_norm": 2.784125203819912, "learning_rate": 4.4830344077000535e-07, "log_odds_chosen": 0.17173461616039276, "log_odds_ratio": -0.70556640625, "logits/chosen": -1.921875, "logits/rejected": -2.109375, "logps/chosen": -1.1328125, "logps/rejected": -1.2578125, "loss": 1.2264, "nll_loss": 1.1796875, "rewards/accuracies": 0.5, "rewards/chosen": -0.11376953125, "rewards/margins": 0.012451171875, "rewards/rejected": -0.1259765625, "step": 730 }, { "epoch": 0.58, "grad_norm": 2.969932216303278, "learning_rate": 4.461907773390984e-07, "log_odds_chosen": 0.24876098334789276, "log_odds_ratio": -0.671191394329071, "logits/chosen": -2.0625, "logits/rejected": -2.171875, "logps/chosen": -1.1171875, "logps/rejected": -1.2890625, "loss": 1.2521, "nll_loss": 1.203125, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.11181640625, "rewards/margins": 0.0169677734375, "rewards/rejected": -0.12890625, "step": 740 }, { "epoch": 0.59, "grad_norm": 2.3389098001594553, "learning_rate": 4.4404099635949297e-07, "log_odds_chosen": 0.20144042372703552, "log_odds_ratio": -0.679394543170929, "logits/chosen": -1.984375, "logits/rejected": -2.0625, "logps/chosen": -1.09375, "logps/rejected": -1.2265625, "loss": 1.1809, "nll_loss": 1.140625, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.109375, "rewards/margins": 0.012939453125, "rewards/rejected": -0.12255859375, "step": 750 }, { "epoch": 0.6, "grad_norm": 3.0086699300119872, "learning_rate": 4.418545045506144e-07, "log_odds_chosen": 0.10061035305261612, "log_odds_ratio": -0.7476562261581421, "logits/chosen": -2.0625, "logits/rejected": -2.15625, "logps/chosen": -1.0703125, "logps/rejected": -1.140625, "loss": 1.239, "nll_loss": 1.1015625, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.10693359375, "rewards/margins": 0.007415771484375, "rewards/rejected": -0.1142578125, "step": 760 }, { "epoch": 0.61, "grad_norm": 2.2320233236738143, "learning_rate": 4.3963171557725004e-07, "log_odds_chosen": 0.0516357421875, "log_odds_ratio": -0.776171863079071, "logits/chosen": -2.015625, "logits/rejected": -2.109375, "logps/chosen": -1.125, "logps/rejected": -1.1796875, "loss": 1.2253, "nll_loss": 1.1484375, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.1123046875, "rewards/margins": 0.00567626953125, "rewards/rejected": -0.1181640625, "step": 770 }, { "epoch": 0.61, "grad_norm": 2.5006901009181877, "learning_rate": 4.3737304997128765e-07, "log_odds_chosen": 0.2787841856479645, "log_odds_ratio": -0.668652355670929, "logits/chosen": -1.96875, "logits/rejected": -2.078125, "logps/chosen": -1.125, "logps/rejected": -1.3125, "loss": 1.2413, "nll_loss": 1.203125, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11279296875, "rewards/margins": 0.0184326171875, "rewards/rejected": -0.130859375, "step": 780 }, { "epoch": 0.62, "grad_norm": 2.7387563755483, "learning_rate": 4.350789350521548e-07, "log_odds_chosen": 0.19570311903953552, "log_odds_ratio": -0.71337890625, "logits/chosen": -1.9921875, "logits/rejected": -2.078125, "logps/chosen": -1.0859375, "logps/rejected": -1.234375, "loss": 1.2163, "nll_loss": 1.1484375, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.10888671875, "rewards/margins": 0.014892578125, "rewards/rejected": -0.12353515625, "step": 790 }, { "epoch": 0.63, "grad_norm": 2.898794879634056, "learning_rate": 4.32749804845973e-07, "log_odds_chosen": 0.16457518935203552, "log_odds_ratio": -0.7186523675918579, "logits/chosen": -2.03125, "logits/rejected": -2.15625, "logps/chosen": -1.1484375, "logps/rejected": -1.25, "loss": 1.2604, "nll_loss": 1.1796875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.115234375, "rewards/margins": 0.0098876953125, "rewards/rejected": -0.125, "step": 800 }, { "epoch": 0.64, "grad_norm": 2.5762145094524973, "learning_rate": 4.303861000034449e-07, "log_odds_chosen": 0.17528076469898224, "log_odds_ratio": -0.6820312738418579, "logits/chosen": -1.953125, "logits/rejected": -2.03125, "logps/chosen": -1.0703125, "logps/rejected": -1.1875, "loss": 1.1942, "nll_loss": 1.0625, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.107421875, "rewards/margins": 0.01123046875, "rewards/rejected": -0.1181640625, "step": 810 }, { "epoch": 0.65, "grad_norm": 2.865127283376686, "learning_rate": 4.2798826771648635e-07, "log_odds_chosen": 0.24028930068016052, "log_odds_ratio": -0.6766601800918579, "logits/chosen": -1.9375, "logits/rejected": -2.078125, "logps/chosen": -1.125, "logps/rejected": -1.296875, "loss": 1.2299, "nll_loss": 1.171875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1123046875, "rewards/margins": 0.017333984375, "rewards/rejected": -0.1298828125, "step": 820 }, { "epoch": 0.65, "grad_norm": 2.446066437808379, "learning_rate": 4.2555676163362205e-07, "log_odds_chosen": 0.21907348930835724, "log_odds_ratio": -0.6822265386581421, "logits/chosen": -2.015625, "logits/rejected": -2.140625, "logps/chosen": -1.1640625, "logps/rejected": -1.296875, "loss": 1.2354, "nll_loss": 1.21875, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.1162109375, "rewards/margins": 0.0133056640625, "rewards/rejected": -0.12890625, "step": 830 }, { "epoch": 0.66, "grad_norm": 2.372183587847385, "learning_rate": 4.230920417741589e-07, "log_odds_chosen": 0.27910155057907104, "log_odds_ratio": -0.666308581829071, "logits/chosen": -2.0625, "logits/rejected": -2.203125, "logps/chosen": -1.0703125, "logps/rejected": -1.2734375, "loss": 1.2168, "nll_loss": 1.125, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.10693359375, "rewards/margins": 0.020263671875, "rewards/rejected": -0.126953125, "step": 840 }, { "epoch": 0.67, "grad_norm": 2.9436571486068623, "learning_rate": 4.205945744411551e-07, "log_odds_chosen": 0.12534180283546448, "log_odds_ratio": -0.73974609375, "logits/chosen": -2.046875, "logits/rejected": -2.15625, "logps/chosen": -1.078125, "logps/rejected": -1.1640625, "loss": 1.2815, "nll_loss": 1.203125, "rewards/accuracies": 0.53125, "rewards/chosen": -0.107421875, "rewards/margins": 0.00909423828125, "rewards/rejected": -0.11669921875, "step": 850 }, { "epoch": 0.68, "grad_norm": 2.8833427572143133, "learning_rate": 4.1806483213319877e-07, "log_odds_chosen": 0.23845215141773224, "log_odds_ratio": -0.6595703363418579, "logits/chosen": -1.859375, "logits/rejected": -2.046875, "logps/chosen": -1.1015625, "logps/rejected": -1.265625, "loss": 1.2544, "nll_loss": 1.171875, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0167236328125, "rewards/rejected": -0.126953125, "step": 860 }, { "epoch": 0.69, "grad_norm": 2.715581746962796, "learning_rate": 4.155032934550165e-07, "log_odds_chosen": 0.16794434189796448, "log_odds_ratio": -0.6874023675918579, "logits/chosen": -1.9296875, "logits/rejected": -2.046875, "logps/chosen": -1.078125, "logps/rejected": -1.1640625, "loss": 1.1984, "nll_loss": 1.125, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.107421875, "rewards/margins": 0.00909423828125, "rewards/rejected": -0.11669921875, "step": 870 }, { "epoch": 0.69, "grad_norm": 2.784822109898019, "learning_rate": 4.129104430269248e-07, "log_odds_chosen": 0.15845946967601776, "log_odds_ratio": -0.693359375, "logits/chosen": -1.9453125, "logits/rejected": -2.09375, "logps/chosen": -1.109375, "logps/rejected": -1.21875, "loss": 1.2345, "nll_loss": 1.15625, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.11083984375, "rewards/margins": 0.0106201171875, "rewards/rejected": -0.12158203125, "step": 880 }, { "epoch": 0.7, "grad_norm": 2.5161836223908263, "learning_rate": 4.102867713931448e-07, "log_odds_chosen": 0.16597899794578552, "log_odds_ratio": -0.6788085699081421, "logits/chosen": -1.9765625, "logits/rejected": -2.125, "logps/chosen": -1.09375, "logps/rejected": -1.1953125, "loss": 1.2663, "nll_loss": 1.1796875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.10986328125, "rewards/margins": 0.0096435546875, "rewards/rejected": -0.119140625, "step": 890 }, { "epoch": 0.71, "grad_norm": 2.644860041118969, "learning_rate": 4.0763277492899504e-07, "log_odds_chosen": 0.23768310248851776, "log_odds_ratio": -0.6807616949081421, "logits/chosen": -1.984375, "logits/rejected": -2.109375, "logps/chosen": -1.1328125, "logps/rejected": -1.2890625, "loss": 1.2307, "nll_loss": 1.1953125, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11328125, "rewards/margins": 0.0157470703125, "rewards/rejected": -0.12890625, "step": 900 }, { "epoch": 0.72, "grad_norm": 2.7157553266494503, "learning_rate": 4.049489557469824e-07, "log_odds_chosen": 0.15152588486671448, "log_odds_ratio": -0.7015625238418579, "logits/chosen": -1.7890625, "logits/rejected": -1.96875, "logps/chosen": -1.0625, "logps/rejected": -1.1640625, "loss": 1.168, "nll_loss": 1.09375, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10595703125, "rewards/margins": 0.01025390625, "rewards/rejected": -0.1162109375, "step": 910 }, { "epoch": 0.72, "grad_norm": 2.302289432995534, "learning_rate": 4.0223582160180623e-07, "log_odds_chosen": 0.13297119736671448, "log_odds_ratio": -0.725781261920929, "logits/chosen": -1.9375, "logits/rejected": -2.0, "logps/chosen": -1.125, "logps/rejected": -1.21875, "loss": 1.1737, "nll_loss": 1.1328125, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1123046875, "rewards/margins": 0.00994873046875, "rewards/rejected": -0.1220703125, "step": 920 }, { "epoch": 0.73, "grad_norm": 2.6556730641084543, "learning_rate": 3.9949388579429614e-07, "log_odds_chosen": 0.00870361365377903, "log_odds_ratio": -0.7632812261581421, "logits/chosen": -1.8828125, "logits/rejected": -2.046875, "logps/chosen": -1.1015625, "logps/rejected": -1.1015625, "loss": 1.2113, "nll_loss": 1.171875, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.10986328125, "rewards/margins": -0.0003070831298828125, "rewards/rejected": -0.10986328125, "step": 930 }, { "epoch": 0.74, "grad_norm": 2.3947626659116406, "learning_rate": 3.967236670742998e-07, "log_odds_chosen": 0.22456054389476776, "log_odds_ratio": -0.681347668170929, "logits/chosen": -2.0, "logits/rejected": -2.109375, "logps/chosen": -1.1796875, "logps/rejected": -1.3515625, "loss": 1.2084, "nll_loss": 1.203125, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.11767578125, "rewards/margins": 0.0174560546875, "rewards/rejected": -0.134765625, "step": 940 }, { "epoch": 0.75, "grad_norm": 2.7830252945871896, "learning_rate": 3.9392568954254023e-07, "log_odds_chosen": 0.2349853515625, "log_odds_ratio": -0.6742187738418579, "logits/chosen": -1.90625, "logits/rejected": -2.03125, "logps/chosen": -1.140625, "logps/rejected": -1.3125, "loss": 1.2185, "nll_loss": 1.1640625, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.11376953125, "rewards/margins": 0.01708984375, "rewards/rejected": -0.130859375, "step": 950 }, { "epoch": 0.76, "grad_norm": 2.4217959208998723, "learning_rate": 3.9110048255146043e-07, "log_odds_chosen": 0.16409912705421448, "log_odds_ratio": -0.693164050579071, "logits/chosen": -2.046875, "logits/rejected": -2.15625, "logps/chosen": -1.109375, "logps/rejected": -1.2109375, "loss": 1.2102, "nll_loss": 1.1953125, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0107421875, "rewards/rejected": -0.12158203125, "step": 960 }, { "epoch": 0.76, "grad_norm": 2.7469080448706706, "learning_rate": 3.882485806050748e-07, "log_odds_chosen": 0.31447142362594604, "log_odds_ratio": -0.639355480670929, "logits/chosen": -1.9609375, "logits/rejected": -2.078125, "logps/chosen": -1.0625, "logps/rejected": -1.2578125, "loss": 1.2235, "nll_loss": 1.1328125, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.10595703125, "rewards/margins": 0.0194091796875, "rewards/rejected": -0.125, "step": 970 }, { "epoch": 0.77, "grad_norm": 2.596832510754079, "learning_rate": 3.8537052325784573e-07, "log_odds_chosen": 0.2929016053676605, "log_odds_ratio": -0.650585949420929, "logits/chosen": -1.96875, "logits/rejected": -2.046875, "logps/chosen": -1.1015625, "logps/rejected": -1.3125, "loss": 1.1857, "nll_loss": 1.1328125, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0205078125, "rewards/rejected": -0.130859375, "step": 980 }, { "epoch": 0.78, "grad_norm": 2.7575190212441383, "learning_rate": 3.824668550126046e-07, "log_odds_chosen": 0.19545897841453552, "log_odds_ratio": -0.6885741949081421, "logits/chosen": -1.96875, "logits/rejected": -2.0625, "logps/chosen": -1.0703125, "logps/rejected": -1.1953125, "loss": 1.1889, "nll_loss": 1.140625, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10693359375, "rewards/margins": 0.0123291015625, "rewards/rejected": -0.119140625, "step": 990 }, { "epoch": 0.79, "grad_norm": 3.592994219979355, "learning_rate": 3.7953812521753643e-07, "log_odds_chosen": 0.16755370795726776, "log_odds_ratio": -0.6943359375, "logits/chosen": -1.921875, "logits/rejected": -2.0, "logps/chosen": -1.0703125, "logps/rejected": -1.171875, "loss": 1.1494, "nll_loss": 1.1171875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.107421875, "rewards/margins": 0.0098876953125, "rewards/rejected": -0.1171875, "step": 1000 }, { "epoch": 0.8, "grad_norm": 2.5202564213089405, "learning_rate": 3.7658488796224885e-07, "log_odds_chosen": 0.11643066257238388, "log_odds_ratio": -0.716796875, "logits/chosen": -2.046875, "logits/rejected": -2.125, "logps/chosen": -1.1484375, "logps/rejected": -1.234375, "loss": 1.2212, "nll_loss": 1.203125, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.11474609375, "rewards/margins": 0.00860595703125, "rewards/rejected": -0.12353515625, "step": 1010 }, { "epoch": 0.8, "grad_norm": 2.724799909308137, "learning_rate": 3.736077019729425e-07, "log_odds_chosen": 0.302978515625, "log_odds_ratio": -0.6385742425918579, "logits/chosen": -1.9296875, "logits/rejected": -2.046875, "logps/chosen": -1.0390625, "logps/rejected": -1.2421875, "loss": 1.1893, "nll_loss": 1.109375, "rewards/accuracies": 0.65625, "rewards/chosen": -0.10400390625, "rewards/margins": 0.0205078125, "rewards/rejected": -0.12451171875, "step": 1020 }, { "epoch": 0.81, "grad_norm": 2.4835614341515053, "learning_rate": 3.7060713050670546e-07, "log_odds_chosen": 0.2666015625, "log_odds_ratio": -0.6908203363418579, "logits/chosen": -1.875, "logits/rejected": -2.015625, "logps/chosen": -1.1171875, "logps/rejected": -1.328125, "loss": 1.2376, "nll_loss": 1.1796875, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1123046875, "rewards/margins": 0.020751953125, "rewards/rejected": -0.1328125, "step": 1030 }, { "epoch": 0.82, "grad_norm": 2.709722079150454, "learning_rate": 3.6758374124494973e-07, "log_odds_chosen": 0.185791015625, "log_odds_ratio": -0.6966797113418579, "logits/chosen": -1.8984375, "logits/rejected": -2.03125, "logps/chosen": -1.0703125, "logps/rejected": -1.2109375, "loss": 1.2082, "nll_loss": 1.109375, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1064453125, "rewards/margins": 0.01458740234375, "rewards/rejected": -0.12109375, "step": 1040 }, { "epoch": 0.83, "grad_norm": 2.8331342756102167, "learning_rate": 3.645381061860113e-07, "log_odds_chosen": 0.3631835877895355, "log_odds_ratio": -0.6460937261581421, "logits/chosen": -1.921875, "logits/rejected": -1.9765625, "logps/chosen": -1.0078125, "logps/rejected": -1.2578125, "loss": 1.1933, "nll_loss": 1.1015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.1005859375, "rewards/margins": 0.025146484375, "rewards/rejected": -0.1259765625, "step": 1050 }, { "epoch": 0.83, "grad_norm": 2.6269186805524143, "learning_rate": 3.61470801536933e-07, "log_odds_chosen": 0.12788085639476776, "log_odds_ratio": -0.7337890863418579, "logits/chosen": -2.0, "logits/rejected": -2.078125, "logps/chosen": -1.0390625, "logps/rejected": -1.1328125, "loss": 1.2153, "nll_loss": 1.109375, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.10400390625, "rewards/margins": 0.00909423828125, "rewards/rejected": -0.11279296875, "step": 1060 }, { "epoch": 0.84, "grad_norm": 2.60712425422802, "learning_rate": 3.583824076044508e-07, "log_odds_chosen": 0.08272705227136612, "log_odds_ratio": -0.7518554925918579, "logits/chosen": -1.890625, "logits/rejected": -1.984375, "logps/chosen": -1.140625, "logps/rejected": -1.2265625, "loss": 1.2114, "nll_loss": 1.171875, "rewards/accuracies": 0.46875, "rewards/chosen": -0.1142578125, "rewards/margins": 0.0078125, "rewards/rejected": -0.12255859375, "step": 1070 }, { "epoch": 0.85, "grad_norm": 2.742344457324174, "learning_rate": 3.55273508685206e-07, "log_odds_chosen": 0.11997070163488388, "log_odds_ratio": -0.7144531011581421, "logits/chosen": -1.8828125, "logits/rejected": -2.046875, "logps/chosen": -1.140625, "logps/rejected": -1.21875, "loss": 1.2194, "nll_loss": 1.171875, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.11376953125, "rewards/margins": 0.0084228515625, "rewards/rejected": -0.1220703125, "step": 1080 }, { "epoch": 0.86, "grad_norm": 2.677923442608537, "learning_rate": 3.5214469295520033e-07, "log_odds_chosen": 0.2944091856479645, "log_odds_ratio": -0.6474609375, "logits/chosen": -1.953125, "logits/rejected": -2.0625, "logps/chosen": -1.09375, "logps/rejected": -1.296875, "loss": 1.1926, "nll_loss": 1.140625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.109375, "rewards/margins": 0.0206298828125, "rewards/rejected": -0.1298828125, "step": 1090 }, { "epoch": 0.87, "grad_norm": 2.614103984779814, "learning_rate": 3.4899655235851903e-07, "log_odds_chosen": 0.15128174424171448, "log_odds_ratio": -0.692187488079071, "logits/chosen": -1.875, "logits/rejected": -2.03125, "logps/chosen": -1.140625, "logps/rejected": -1.2421875, "loss": 1.2353, "nll_loss": 1.2265625, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11376953125, "rewards/margins": 0.01055908203125, "rewards/rejected": -0.12451171875, "step": 1100 }, { "epoch": 0.87, "grad_norm": 2.661524044558228, "learning_rate": 3.458296824953403e-07, "log_odds_chosen": 0.19251708686351776, "log_odds_ratio": -0.681445300579071, "logits/chosen": -1.8125, "logits/rejected": -1.9609375, "logps/chosen": -1.125, "logps/rejected": -1.2578125, "loss": 1.2002, "nll_loss": 1.1328125, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.1123046875, "rewards/margins": 0.01312255859375, "rewards/rejected": -0.1259765625, "step": 1110 }, { "epoch": 0.88, "grad_norm": 2.479788982713935, "learning_rate": 3.426446825092525e-07, "log_odds_chosen": 0.30213624238967896, "log_odds_ratio": -0.6465820074081421, "logits/chosen": -1.875, "logits/rejected": -1.9765625, "logps/chosen": -1.0546875, "logps/rejected": -1.2734375, "loss": 1.2165, "nll_loss": 1.1484375, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.10546875, "rewards/margins": 0.021728515625, "rewards/rejected": -0.126953125, "step": 1120 }, { "epoch": 0.89, "grad_norm": 3.266264486839817, "learning_rate": 3.3944215497390197e-07, "log_odds_chosen": 0.12014160305261612, "log_odds_ratio": -0.707812488079071, "logits/chosen": -1.875, "logits/rejected": -1.9375, "logps/chosen": -1.1015625, "logps/rejected": -1.1875, "loss": 1.2284, "nll_loss": 1.1640625, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.10986328125, "rewards/margins": 0.00848388671875, "rewards/rejected": -0.1181640625, "step": 1130 }, { "epoch": 0.9, "grad_norm": 2.397641031210895, "learning_rate": 3.362227057789915e-07, "log_odds_chosen": 0.3463378846645355, "log_odds_ratio": -0.619335949420929, "logits/chosen": -1.890625, "logits/rejected": -2.046875, "logps/chosen": -1.0703125, "logps/rejected": -1.2890625, "loss": 1.1821, "nll_loss": 1.1171875, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.10693359375, "rewards/margins": 0.0220947265625, "rewards/rejected": -0.12890625, "step": 1140 }, { "epoch": 0.91, "grad_norm": 2.4645155740078617, "learning_rate": 3.329869440156512e-07, "log_odds_chosen": 0.357086181640625, "log_odds_ratio": -0.63232421875, "logits/chosen": -1.890625, "logits/rejected": -1.9609375, "logps/chosen": -1.0625, "logps/rejected": -1.3125, "loss": 1.176, "nll_loss": 1.109375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1064453125, "rewards/margins": 0.024658203125, "rewards/rejected": -0.130859375, "step": 1150 }, { "epoch": 0.91, "grad_norm": 2.8912394279639084, "learning_rate": 3.297354818612037e-07, "log_odds_chosen": 0.05325927585363388, "log_odds_ratio": -0.7728515863418579, "logits/chosen": -1.9453125, "logits/rejected": -2.015625, "logps/chosen": -1.09375, "logps/rejected": -1.125, "loss": 1.2402, "nll_loss": 1.1328125, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.10986328125, "rewards/margins": 0.0030975341796875, "rewards/rejected": -0.11279296875, "step": 1160 }, { "epoch": 0.92, "grad_norm": 2.563419103608563, "learning_rate": 3.264689344633461e-07, "log_odds_chosen": 0.14066162705421448, "log_odds_ratio": -0.6943359375, "logits/chosen": -1.8125, "logits/rejected": -1.859375, "logps/chosen": -1.140625, "logps/rejected": -1.2265625, "loss": 1.1959, "nll_loss": 1.1953125, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1142578125, "rewards/margins": 0.0084228515625, "rewards/rejected": -0.12255859375, "step": 1170 }, { "epoch": 0.93, "grad_norm": 2.8288693775232643, "learning_rate": 3.2318791982376923e-07, "log_odds_chosen": 0.19826659560203552, "log_odds_ratio": -0.6885741949081421, "logits/chosen": -1.9375, "logits/rejected": -2.09375, "logps/chosen": -1.0859375, "logps/rejected": -1.21875, "loss": 1.2221, "nll_loss": 1.109375, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10888671875, "rewards/margins": 0.01275634765625, "rewards/rejected": -0.12158203125, "step": 1180 }, { "epoch": 0.94, "grad_norm": 2.9337235954606844, "learning_rate": 3.198930586812372e-07, "log_odds_chosen": 0.3016296327114105, "log_odds_ratio": -0.67626953125, "logits/chosen": -1.8671875, "logits/rejected": -2.0, "logps/chosen": -1.0859375, "logps/rejected": -1.3046875, "loss": 1.1805, "nll_loss": 1.171875, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1083984375, "rewards/margins": 0.02197265625, "rewards/rejected": -0.1298828125, "step": 1190 }, { "epoch": 0.95, "grad_norm": 2.815544385281363, "learning_rate": 3.1658497439414935e-07, "log_odds_chosen": 0.18316039443016052, "log_odds_ratio": -0.6927734613418579, "logits/chosen": -1.96875, "logits/rejected": -2.015625, "logps/chosen": -1.0859375, "logps/rejected": -1.203125, "loss": 1.2118, "nll_loss": 1.140625, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1083984375, "rewards/margins": 0.01153564453125, "rewards/rejected": -0.1201171875, "step": 1200 }, { "epoch": 0.95, "grad_norm": 2.806645073099231, "learning_rate": 3.132642928226061e-07, "log_odds_chosen": 0.33399659395217896, "log_odds_ratio": -0.6348632574081421, "logits/chosen": -1.8828125, "logits/rejected": -1.9921875, "logps/chosen": -1.0546875, "logps/rejected": -1.2734375, "loss": 1.1911, "nll_loss": 1.140625, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10498046875, "rewards/margins": 0.0223388671875, "rewards/rejected": -0.1279296875, "step": 1210 }, { "epoch": 0.96, "grad_norm": 2.4581782453300884, "learning_rate": 3.0993164221000207e-07, "log_odds_chosen": 0.215545654296875, "log_odds_ratio": -0.672070324420929, "logits/chosen": -1.9140625, "logits/rejected": -2.0, "logps/chosen": -1.109375, "logps/rejected": -1.2578125, "loss": 1.182, "nll_loss": 1.1640625, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.11083984375, "rewards/margins": 0.0145263671875, "rewards/rejected": -0.125, "step": 1220 }, { "epoch": 0.97, "grad_norm": 5.263613381972474, "learning_rate": 3.0658765306416794e-07, "log_odds_chosen": 0.166778564453125, "log_odds_ratio": -0.6953125, "logits/chosen": -1.8359375, "logits/rejected": -1.921875, "logps/chosen": -1.09375, "logps/rejected": -1.2109375, "loss": 1.2193, "nll_loss": 1.1796875, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10888671875, "rewards/margins": 0.0118408203125, "rewards/rejected": -0.12109375, "step": 1230 }, { "epoch": 0.98, "grad_norm": 2.5240280415155723, "learning_rate": 3.032329580380838e-07, "log_odds_chosen": 0.28306883573532104, "log_odds_ratio": -0.6612304449081421, "logits/chosen": -1.90625, "logits/rejected": -1.96875, "logps/chosen": -1.09375, "logps/rejected": -1.28125, "loss": 1.1956, "nll_loss": 1.1484375, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.109375, "rewards/margins": 0.01904296875, "rewards/rejected": -0.1279296875, "step": 1240 }, { "epoch": 0.98, "grad_norm": 2.743773542575128, "learning_rate": 2.998681918101871e-07, "log_odds_chosen": 0.3384033143520355, "log_odds_ratio": -0.6493164300918579, "logits/chosen": -1.8828125, "logits/rejected": -1.9453125, "logps/chosen": -1.078125, "logps/rejected": -1.328125, "loss": 1.206, "nll_loss": 1.1484375, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.107421875, "rewards/margins": 0.025390625, "rewards/rejected": -0.1328125, "step": 1250 }, { "epoch": 0.99, "grad_norm": 2.8074211611598066, "learning_rate": 2.9649399096429714e-07, "log_odds_chosen": 0.23601074516773224, "log_odds_ratio": -0.6533203125, "logits/chosen": -1.859375, "logits/rejected": -1.9140625, "logps/chosen": -1.078125, "logps/rejected": -1.234375, "loss": 1.188, "nll_loss": 1.109375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.107421875, "rewards/margins": 0.015625, "rewards/rejected": -0.123046875, "step": 1260 }, { "epoch": 1.0, "grad_norm": 2.525044784627154, "learning_rate": 2.931109938691786e-07, "log_odds_chosen": 0.16881103813648224, "log_odds_ratio": -0.684277355670929, "logits/chosen": -1.8515625, "logits/rejected": -2.0, "logps/chosen": -1.09375, "logps/rejected": -1.21875, "loss": 1.188, "nll_loss": 1.1171875, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.109375, "rewards/margins": 0.0128173828125, "rewards/rejected": -0.1220703125, "step": 1270 }, { "epoch": 1.01, "grad_norm": 2.668512392567912, "learning_rate": 2.8971984055776853e-07, "log_odds_chosen": 0.21584472060203552, "log_odds_ratio": -0.672558605670929, "logits/chosen": -1.84375, "logits/rejected": -1.9765625, "logps/chosen": -1.078125, "logps/rejected": -1.2265625, "loss": 1.2336, "nll_loss": 1.15625, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10791015625, "rewards/margins": 0.01470947265625, "rewards/rejected": -0.12255859375, "step": 1280 }, { "epoch": 1.02, "grad_norm": 2.659729033509314, "learning_rate": 2.863211726060875e-07, "log_odds_chosen": 0.2547973692417145, "log_odds_ratio": -0.6659179925918579, "logits/chosen": -1.96875, "logits/rejected": -2.015625, "logps/chosen": -1.140625, "logps/rejected": -1.3125, "loss": 1.2367, "nll_loss": 1.1953125, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1142578125, "rewards/margins": 0.0167236328125, "rewards/rejected": -0.130859375, "step": 1290 }, { "epoch": 1.02, "grad_norm": 2.470961884835421, "learning_rate": 2.829156330118589e-07, "log_odds_chosen": 0.24007567763328552, "log_odds_ratio": -0.65283203125, "logits/chosen": -1.859375, "logits/rejected": -1.9609375, "logps/chosen": -1.125, "logps/rejected": -1.28125, "loss": 1.2008, "nll_loss": 1.1953125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11181640625, "rewards/margins": 0.0159912109375, "rewards/rejected": -0.1279296875, "step": 1300 }, { "epoch": 1.03, "grad_norm": 2.5904466369333026, "learning_rate": 2.7950386607286e-07, "log_odds_chosen": 0.28740233182907104, "log_odds_ratio": -0.6572265625, "logits/chosen": -1.8671875, "logits/rejected": -1.9609375, "logps/chosen": -1.0625, "logps/rejected": -1.2578125, "loss": 1.2003, "nll_loss": 1.0859375, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1064453125, "rewards/margins": 0.019775390625, "rewards/rejected": -0.1259765625, "step": 1310 }, { "epoch": 1.04, "grad_norm": 2.8679276152227726, "learning_rate": 2.7608651726502607e-07, "log_odds_chosen": 0.29725342988967896, "log_odds_ratio": -0.6602538824081421, "logits/chosen": -1.84375, "logits/rejected": -2.015625, "logps/chosen": -1.0625, "logps/rejected": -1.2734375, "loss": 1.2296, "nll_loss": 1.125, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.1064453125, "rewards/margins": 0.0213623046875, "rewards/rejected": -0.1279296875, "step": 1320 }, { "epoch": 1.05, "grad_norm": 2.5150772854856243, "learning_rate": 2.7266423312033226e-07, "log_odds_chosen": 0.2159423828125, "log_odds_ratio": -0.7059570550918579, "logits/chosen": -1.9140625, "logits/rejected": -2.0, "logps/chosen": -1.078125, "logps/rejected": -1.25, "loss": 1.185, "nll_loss": 1.140625, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10791015625, "rewards/margins": 0.017333984375, "rewards/rejected": -0.125, "step": 1330 }, { "epoch": 1.06, "grad_norm": 2.737219590030928, "learning_rate": 2.692376611044757e-07, "log_odds_chosen": 0.3914794921875, "log_odds_ratio": -0.640332043170929, "logits/chosen": -1.796875, "logits/rejected": -1.8671875, "logps/chosen": -1.03125, "logps/rejected": -1.2890625, "loss": 1.2041, "nll_loss": 1.1328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.1025390625, "rewards/margins": 0.026123046875, "rewards/rejected": -0.12890625, "step": 1340 }, { "epoch": 1.06, "grad_norm": 2.7769961907081293, "learning_rate": 2.6580744949438045e-07, "log_odds_chosen": 0.08111572265625, "log_odds_ratio": -0.731249988079071, "logits/chosen": -1.8828125, "logits/rejected": -1.9921875, "logps/chosen": -1.1484375, "logps/rejected": -1.1875, "loss": 1.2605, "nll_loss": 1.203125, "rewards/accuracies": 0.53125, "rewards/chosen": -0.11474609375, "rewards/margins": 0.0037078857421875, "rewards/rejected": -0.11865234375, "step": 1350 }, { "epoch": 1.07, "grad_norm": 2.9775601305183463, "learning_rate": 2.6237424725554935e-07, "log_odds_chosen": 0.3329834043979645, "log_odds_ratio": -0.635937511920929, "logits/chosen": -1.8359375, "logits/rejected": -1.921875, "logps/chosen": -1.0703125, "logps/rejected": -1.296875, "loss": 1.2152, "nll_loss": 1.109375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1064453125, "rewards/margins": 0.0233154296875, "rewards/rejected": -0.1298828125, "step": 1360 }, { "epoch": 1.08, "grad_norm": 2.279068955006949, "learning_rate": 2.589387039192858e-07, "log_odds_chosen": 0.20733642578125, "log_odds_ratio": -0.667675793170929, "logits/chosen": -1.859375, "logits/rejected": -1.9765625, "logps/chosen": -1.125, "logps/rejected": -1.2578125, "loss": 1.2064, "nll_loss": 1.15625, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1123046875, "rewards/margins": 0.013671875, "rewards/rejected": -0.1259765625, "step": 1370 }, { "epoch": 1.09, "grad_norm": 3.0021514828628746, "learning_rate": 2.555014694598077e-07, "log_odds_chosen": 0.23118896782398224, "log_odds_ratio": -0.6884765625, "logits/chosen": -1.765625, "logits/rejected": -1.9453125, "logps/chosen": -1.0859375, "logps/rejected": -1.2265625, "loss": 1.2152, "nll_loss": 1.09375, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1083984375, "rewards/margins": 0.0145263671875, "rewards/rejected": -0.123046875, "step": 1380 }, { "epoch": 1.09, "grad_norm": 2.735522050073968, "learning_rate": 2.5206319417127873e-07, "log_odds_chosen": 0.3378845155239105, "log_odds_ratio": -0.632128894329071, "logits/chosen": -1.7421875, "logits/rejected": -1.8984375, "logps/chosen": -1.0234375, "logps/rejected": -1.2578125, "loss": 1.1638, "nll_loss": 1.0703125, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.1025390625, "rewards/margins": 0.0228271484375, "rewards/rejected": -0.125, "step": 1390 }, { "epoch": 1.1, "grad_norm": 2.5736254747923923, "learning_rate": 2.4862452854477784e-07, "log_odds_chosen": 0.3209228515625, "log_odds_ratio": -0.65576171875, "logits/chosen": -1.734375, "logits/rejected": -1.8984375, "logps/chosen": -1.03125, "logps/rejected": -1.2578125, "loss": 1.166, "nll_loss": 1.046875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.103515625, "rewards/margins": 0.0225830078125, "rewards/rejected": -0.1259765625, "step": 1400 }, { "epoch": 1.11, "grad_norm": 2.7098667746876073, "learning_rate": 2.4518612314523265e-07, "log_odds_chosen": 0.08408202975988388, "log_odds_ratio": -0.732421875, "logits/chosen": -1.84375, "logits/rejected": -1.953125, "logps/chosen": -1.09375, "logps/rejected": -1.140625, "loss": 1.1805, "nll_loss": 1.1484375, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.10986328125, "rewards/margins": 0.00457763671875, "rewards/rejected": -0.1142578125, "step": 1410 }, { "epoch": 1.12, "grad_norm": 2.7073252776256966, "learning_rate": 2.4174862848833806e-07, "log_odds_chosen": 0.20045165717601776, "log_odds_ratio": -0.67236328125, "logits/chosen": -1.7578125, "logits/rejected": -1.8359375, "logps/chosen": -1.0703125, "logps/rejected": -1.1875, "loss": 1.2051, "nll_loss": 1.1328125, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.10693359375, "rewards/margins": 0.0118408203125, "rewards/rejected": -0.119140625, "step": 1420 }, { "epoch": 1.13, "grad_norm": 2.7514653552282233, "learning_rate": 2.3831269491748467e-07, "log_odds_chosen": 0.22596435248851776, "log_odds_ratio": -0.708984375, "logits/chosen": -1.796875, "logits/rejected": -1.8828125, "logps/chosen": -1.1171875, "logps/rejected": -1.2890625, "loss": 1.217, "nll_loss": 1.171875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.11181640625, "rewards/margins": 0.017333984375, "rewards/rejected": -0.12890625, "step": 1430 }, { "epoch": 1.13, "grad_norm": 2.8823498677475183, "learning_rate": 2.3487897248071941e-07, "log_odds_chosen": 0.2939697206020355, "log_odds_ratio": -0.664257824420929, "logits/chosen": -1.7890625, "logits/rejected": -1.9375, "logps/chosen": -1.046875, "logps/rejected": -1.25, "loss": 1.1892, "nll_loss": 1.1328125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10498046875, "rewards/margins": 0.0205078125, "rewards/rejected": -0.1259765625, "step": 1440 }, { "epoch": 1.14, "grad_norm": 2.69332509317782, "learning_rate": 2.314481108077624e-07, "log_odds_chosen": 0.1607666015625, "log_odds_ratio": -0.6968749761581421, "logits/chosen": -1.8203125, "logits/rejected": -1.8515625, "logps/chosen": -1.078125, "logps/rejected": -1.1796875, "loss": 1.1978, "nll_loss": 1.125, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.107421875, "rewards/margins": 0.01068115234375, "rewards/rejected": -0.1181640625, "step": 1450 }, { "epoch": 1.15, "grad_norm": 2.5989208277674356, "learning_rate": 2.280207589871026e-07, "log_odds_chosen": 0.3521362245082855, "log_odds_ratio": -0.642382800579071, "logits/chosen": -1.8125, "logits/rejected": -1.9375, "logps/chosen": -1.078125, "logps/rejected": -1.3203125, "loss": 1.1628, "nll_loss": 1.1328125, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.10791015625, "rewards/margins": 0.0242919921875, "rewards/rejected": -0.1318359375, "step": 1460 }, { "epoch": 1.16, "grad_norm": 2.5631030942900805, "learning_rate": 2.2459756544319627e-07, "log_odds_chosen": 0.1890869140625, "log_odds_ratio": -0.696972668170929, "logits/chosen": -1.796875, "logits/rejected": -1.890625, "logps/chosen": -1.015625, "logps/rejected": -1.1328125, "loss": 1.1771, "nll_loss": 1.0546875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10205078125, "rewards/margins": 0.01129150390625, "rewards/rejected": -0.11328125, "step": 1470 }, { "epoch": 1.17, "grad_norm": 2.7548023973263613, "learning_rate": 2.2117917781379067e-07, "log_odds_chosen": 0.19255371391773224, "log_odds_ratio": -0.679394543170929, "logits/chosen": -1.734375, "logits/rejected": -1.859375, "logps/chosen": -1.09375, "logps/rejected": -1.21875, "loss": 1.2441, "nll_loss": 1.1796875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.109375, "rewards/margins": 0.0123291015625, "rewards/rejected": -0.12158203125, "step": 1480 }, { "epoch": 1.17, "grad_norm": 2.6382486056871177, "learning_rate": 2.177662428273968e-07, "log_odds_chosen": 0.23670653998851776, "log_odds_ratio": -0.67626953125, "logits/chosen": -1.7578125, "logits/rejected": -1.8984375, "logps/chosen": -1.046875, "logps/rejected": -1.203125, "loss": 1.1895, "nll_loss": 1.09375, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.10498046875, "rewards/margins": 0.0150146484375, "rewards/rejected": -0.1201171875, "step": 1490 }, { "epoch": 1.18, "grad_norm": 2.5099170844954317, "learning_rate": 2.1435940618093414e-07, "log_odds_chosen": 0.19310303032398224, "log_odds_ratio": -0.690625011920929, "logits/chosen": -1.765625, "logits/rejected": -1.875, "logps/chosen": -1.0859375, "logps/rejected": -1.2265625, "loss": 1.1881, "nll_loss": 1.1015625, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10888671875, "rewards/margins": 0.0140380859375, "rewards/rejected": -0.123046875, "step": 1500 }, { "epoch": 1.19, "grad_norm": 2.7950237991583493, "learning_rate": 2.1095931241757062e-07, "log_odds_chosen": 0.2502685487270355, "log_odds_ratio": -0.680957019329071, "logits/chosen": -1.7734375, "logits/rejected": -1.8515625, "logps/chosen": -1.0390625, "logps/rejected": -1.2265625, "loss": 1.1906, "nll_loss": 1.09375, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.10400390625, "rewards/margins": 0.0185546875, "rewards/rejected": -0.1220703125, "step": 1510 }, { "epoch": 1.2, "grad_norm": 2.609790265054367, "learning_rate": 2.075666048047806e-07, "log_odds_chosen": 0.15053710341453552, "log_odds_ratio": -0.698437511920929, "logits/chosen": -1.7578125, "logits/rejected": -1.828125, "logps/chosen": -1.1015625, "logps/rejected": -1.21875, "loss": 1.221, "nll_loss": 1.125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1103515625, "rewards/margins": 0.01171875, "rewards/rejected": -0.1220703125, "step": 1520 }, { "epoch": 1.21, "grad_norm": 2.5467686003601697, "learning_rate": 2.0418192521264454e-07, "log_odds_chosen": 0.23857422173023224, "log_odds_ratio": -0.659863293170929, "logits/chosen": -1.7734375, "logits/rejected": -1.8515625, "logps/chosen": -1.0625, "logps/rejected": -1.2109375, "loss": 1.1898, "nll_loss": 1.109375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1064453125, "rewards/margins": 0.01397705078125, "rewards/rejected": -0.12060546875, "step": 1530 }, { "epoch": 1.21, "grad_norm": 2.459650956326835, "learning_rate": 2.0080591399241292e-07, "log_odds_chosen": 0.23247070610523224, "log_odds_ratio": -0.6712890863418579, "logits/chosen": -1.7578125, "logits/rejected": -1.7734375, "logps/chosen": -1.109375, "logps/rejected": -1.2578125, "loss": 1.1708, "nll_loss": 1.1328125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1103515625, "rewards/margins": 0.014892578125, "rewards/rejected": -0.1259765625, "step": 1540 }, { "epoch": 1.22, "grad_norm": 2.47537989067237, "learning_rate": 1.9743920985535729e-07, "log_odds_chosen": 0.3998779356479645, "log_odds_ratio": -0.625781238079071, "logits/chosen": -1.6484375, "logits/rejected": -1.796875, "logps/chosen": -0.98828125, "logps/rejected": -1.2578125, "loss": 1.1589, "nll_loss": 1.0390625, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09912109375, "rewards/margins": 0.0269775390625, "rewards/rejected": -0.1259765625, "step": 1550 }, { "epoch": 1.23, "grad_norm": 2.7703541098291455, "learning_rate": 1.94082449751932e-07, "log_odds_chosen": 0.2127685546875, "log_odds_ratio": -0.6846679449081421, "logits/chosen": -1.734375, "logits/rejected": -1.8125, "logps/chosen": -1.09375, "logps/rejected": -1.25, "loss": 1.1794, "nll_loss": 1.171875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10888671875, "rewards/margins": 0.0157470703125, "rewards/rejected": -0.125, "step": 1560 }, { "epoch": 1.24, "grad_norm": 2.885795668675382, "learning_rate": 1.9073626875126874e-07, "log_odds_chosen": 0.26057130098342896, "log_odds_ratio": -0.649707019329071, "logits/chosen": -1.7734375, "logits/rejected": -1.7890625, "logps/chosen": -1.0234375, "logps/rejected": -1.1875, "loss": 1.1671, "nll_loss": 1.1015625, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.1025390625, "rewards/margins": 0.01611328125, "rewards/rejected": -0.11865234375, "step": 1570 }, { "epoch": 1.24, "grad_norm": 2.722691086755302, "learning_rate": 1.874012999210271e-07, "log_odds_chosen": 0.19356079399585724, "log_odds_ratio": -0.694140613079071, "logits/chosen": -1.859375, "logits/rejected": -1.8828125, "logps/chosen": -1.1171875, "logps/rejected": -1.25, "loss": 1.1779, "nll_loss": 1.171875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.111328125, "rewards/margins": 0.01385498046875, "rewards/rejected": -0.125, "step": 1580 }, { "epoch": 1.25, "grad_norm": 2.604921389210434, "learning_rate": 1.8407817420762383e-07, "log_odds_chosen": 0.26337891817092896, "log_odds_ratio": -0.6572265625, "logits/chosen": -1.734375, "logits/rejected": -1.875, "logps/chosen": -1.09375, "logps/rejected": -1.2734375, "loss": 1.184, "nll_loss": 1.140625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10888671875, "rewards/margins": 0.01806640625, "rewards/rejected": -0.126953125, "step": 1590 }, { "epoch": 1.26, "grad_norm": 2.732739073707677, "learning_rate": 1.8076752031686343e-07, "log_odds_chosen": 0.14312133193016052, "log_odds_ratio": -0.704882800579071, "logits/chosen": -1.7734375, "logits/rejected": -1.8671875, "logps/chosen": -1.0390625, "logps/rejected": -1.140625, "loss": 1.188, "nll_loss": 1.1171875, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.103515625, "rewards/margins": 0.010498046875, "rewards/rejected": -0.1142578125, "step": 1600 }, { "epoch": 1.27, "grad_norm": 2.395290977769547, "learning_rate": 1.7746996459499254e-07, "log_odds_chosen": 0.242431640625, "log_odds_ratio": -0.6644531488418579, "logits/chosen": -1.65625, "logits/rejected": -1.84375, "logps/chosen": -1.0546875, "logps/rejected": -1.21875, "loss": 1.1618, "nll_loss": 1.046875, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10546875, "rewards/margins": 0.016357421875, "rewards/rejected": -0.1220703125, "step": 1610 }, { "epoch": 1.28, "grad_norm": 2.547454975163186, "learning_rate": 1.741861309102009e-07, "log_odds_chosen": 0.26506346464157104, "log_odds_ratio": -0.6689453125, "logits/chosen": -1.640625, "logits/rejected": -1.7890625, "logps/chosen": -1.0546875, "logps/rejected": -1.2109375, "loss": 1.1665, "nll_loss": 1.109375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10498046875, "rewards/margins": 0.0159912109375, "rewards/rejected": -0.12109375, "step": 1620 }, { "epoch": 1.28, "grad_norm": 2.396584161009059, "learning_rate": 1.7091664053459088e-07, "log_odds_chosen": 0.18143311142921448, "log_odds_ratio": -0.6865234375, "logits/chosen": -1.71875, "logits/rejected": -1.8359375, "logps/chosen": -1.0859375, "logps/rejected": -1.1875, "loss": 1.2118, "nll_loss": 1.15625, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.10791015625, "rewards/margins": 0.01043701171875, "rewards/rejected": -0.11865234375, "step": 1630 }, { "epoch": 1.29, "grad_norm": 2.279515658884766, "learning_rate": 1.6766211202663844e-07, "log_odds_chosen": 0.05356445163488388, "log_odds_ratio": -0.739453136920929, "logits/chosen": -1.6875, "logits/rejected": -1.7890625, "logps/chosen": -1.1171875, "logps/rejected": -1.140625, "loss": 1.2049, "nll_loss": 1.1640625, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.111328125, "rewards/margins": 0.0027008056640625, "rewards/rejected": -0.1142578125, "step": 1640 }, { "epoch": 1.3, "grad_norm": 2.4004304534033265, "learning_rate": 1.6442316111416743e-07, "log_odds_chosen": 0.26105958223342896, "log_odds_ratio": -0.6631835699081421, "logits/chosen": -1.6796875, "logits/rejected": -1.7421875, "logps/chosen": -1.0859375, "logps/rejected": -1.2734375, "loss": 1.1816, "nll_loss": 1.140625, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10888671875, "rewards/margins": 0.0184326171875, "rewards/rejected": -0.126953125, "step": 1650 }, { "epoch": 1.31, "grad_norm": 2.763579524745402, "learning_rate": 1.6120040057785928e-07, "log_odds_chosen": 0.29625242948532104, "log_odds_ratio": -0.6499999761581421, "logits/chosen": -1.8125, "logits/rejected": -1.875, "logps/chosen": -1.0859375, "logps/rejected": -1.28125, "loss": 1.1727, "nll_loss": 1.09375, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1083984375, "rewards/margins": 0.0194091796875, "rewards/rejected": -0.1279296875, "step": 1660 }, { "epoch": 1.32, "grad_norm": 2.8036257747935154, "learning_rate": 1.5799444013532038e-07, "log_odds_chosen": 0.23708495497703552, "log_odds_ratio": -0.67431640625, "logits/chosen": -1.7109375, "logits/rejected": -1.8203125, "logps/chosen": -1.0625, "logps/rejected": -1.21875, "loss": 1.1238, "nll_loss": 1.0859375, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.1064453125, "rewards/margins": 0.01556396484375, "rewards/rejected": -0.1220703125, "step": 1670 }, { "epoch": 1.32, "grad_norm": 2.6570430804218566, "learning_rate": 1.5480588632572885e-07, "log_odds_chosen": 0.37006837129592896, "log_odds_ratio": -0.635937511920929, "logits/chosen": -1.7421875, "logits/rejected": -1.8046875, "logps/chosen": -1.0390625, "logps/rejected": -1.2890625, "loss": 1.1907, "nll_loss": 1.078125, "rewards/accuracies": 0.59375, "rewards/chosen": -0.103515625, "rewards/margins": 0.025390625, "rewards/rejected": -0.12890625, "step": 1680 }, { "epoch": 1.33, "grad_norm": 2.590262402608133, "learning_rate": 1.516353423950829e-07, "log_odds_chosen": 0.3837524354457855, "log_odds_ratio": -0.625, "logits/chosen": -1.796875, "logits/rejected": -1.9140625, "logps/chosen": -1.046875, "logps/rejected": -1.28125, "loss": 1.1898, "nll_loss": 1.125, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.1044921875, "rewards/margins": 0.0235595703125, "rewards/rejected": -0.1279296875, "step": 1690 }, { "epoch": 1.34, "grad_norm": 2.568483644438331, "learning_rate": 1.4848340818207184e-07, "log_odds_chosen": 0.26896971464157104, "log_odds_ratio": -0.66455078125, "logits/chosen": -1.75, "logits/rejected": -1.859375, "logps/chosen": -1.046875, "logps/rejected": -1.2421875, "loss": 1.1799, "nll_loss": 1.09375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10498046875, "rewards/margins": 0.019287109375, "rewards/rejected": -0.1240234375, "step": 1700 }, { "epoch": 1.35, "grad_norm": 3.318714021827115, "learning_rate": 1.453506800045921e-07, "log_odds_chosen": 0.12944336235523224, "log_odds_ratio": -0.7064453363418579, "logits/chosen": -1.75, "logits/rejected": -1.7890625, "logps/chosen": -1.0859375, "logps/rejected": -1.1796875, "loss": 1.2096, "nll_loss": 1.140625, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.1083984375, "rewards/margins": 0.00970458984375, "rewards/rejected": -0.1181640625, "step": 1710 }, { "epoch": 1.35, "grad_norm": 2.7195091239689426, "learning_rate": 1.422377505469293e-07, "log_odds_chosen": 0.14760741591453552, "log_odds_ratio": -0.72802734375, "logits/chosen": -1.703125, "logits/rejected": -1.75, "logps/chosen": -1.1328125, "logps/rejected": -1.21875, "loss": 1.2695, "nll_loss": 1.234375, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.11328125, "rewards/margins": 0.00823974609375, "rewards/rejected": -0.12158203125, "step": 1720 }, { "epoch": 1.36, "grad_norm": 2.4116745200019696, "learning_rate": 1.3914520874762726e-07, "log_odds_chosen": 0.2623352110385895, "log_odds_ratio": -0.6844726800918579, "logits/chosen": -1.6875, "logits/rejected": -1.765625, "logps/chosen": -1.109375, "logps/rejected": -1.3046875, "loss": 1.2307, "nll_loss": 1.15625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.111328125, "rewards/margins": 0.0191650390625, "rewards/rejected": -0.130859375, "step": 1730 }, { "epoch": 1.37, "grad_norm": 2.6643696734974127, "learning_rate": 1.3607363968806645e-07, "log_odds_chosen": 0.3259033262729645, "log_odds_ratio": -0.623046875, "logits/chosen": -1.6953125, "logits/rejected": -1.78125, "logps/chosen": -1.03125, "logps/rejected": -1.25, "loss": 1.2087, "nll_loss": 1.046875, "rewards/accuracies": 0.625, "rewards/chosen": -0.10302734375, "rewards/margins": 0.022216796875, "rewards/rejected": -0.125, "step": 1740 }, { "epoch": 1.38, "grad_norm": 2.734224521952181, "learning_rate": 1.3302362448177167e-07, "log_odds_chosen": 0.30589598417282104, "log_odds_ratio": -0.6283203363418579, "logits/chosen": -1.703125, "logits/rejected": -1.796875, "logps/chosen": -1.03125, "logps/rejected": -1.234375, "loss": 1.1853, "nll_loss": 1.1015625, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.103515625, "rewards/margins": 0.0198974609375, "rewards/rejected": -0.12353515625, "step": 1750 }, { "epoch": 1.39, "grad_norm": 3.213949669653505, "learning_rate": 1.2999574016447056e-07, "log_odds_chosen": 0.3102783262729645, "log_odds_ratio": -0.6421874761581421, "logits/chosen": -1.6875, "logits/rejected": -1.8125, "logps/chosen": -1.046875, "logps/rejected": -1.2578125, "loss": 1.196, "nll_loss": 1.0703125, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1044921875, "rewards/margins": 0.0211181640625, "rewards/rejected": -0.1259765625, "step": 1760 }, { "epoch": 1.39, "grad_norm": 2.3847902873067492, "learning_rate": 1.2699055958492344e-07, "log_odds_chosen": 0.19971923530101776, "log_odds_ratio": -0.6748046875, "logits/chosen": -1.7421875, "logits/rejected": -1.8203125, "logps/chosen": -1.046875, "logps/rejected": -1.171875, "loss": 1.2064, "nll_loss": 1.1171875, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1044921875, "rewards/margins": 0.01287841796875, "rewards/rejected": -0.11767578125, "step": 1770 }, { "epoch": 1.4, "grad_norm": 2.5039350155364573, "learning_rate": 1.2400865129654567e-07, "log_odds_chosen": 0.27821046113967896, "log_odds_ratio": -0.652050793170929, "logits/chosen": -1.6796875, "logits/rejected": -1.796875, "logps/chosen": -1.0390625, "logps/rejected": -1.2265625, "loss": 1.1371, "nll_loss": 1.0703125, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.10400390625, "rewards/margins": 0.018310546875, "rewards/rejected": -0.12255859375, "step": 1780 }, { "epoch": 1.41, "grad_norm": 2.5879591956281995, "learning_rate": 1.210505794498422e-07, "log_odds_chosen": 0.23630371689796448, "log_odds_ratio": -0.673828125, "logits/chosen": -1.7109375, "logits/rejected": -1.7421875, "logps/chosen": -1.09375, "logps/rejected": -1.2578125, "loss": 1.1706, "nll_loss": 1.109375, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.109375, "rewards/margins": 0.0164794921875, "rewards/rejected": -0.1259765625, "step": 1790 }, { "epoch": 1.42, "grad_norm": 2.752234308496576, "learning_rate": 1.1811690368567545e-07, "log_odds_chosen": 0.14584961533546448, "log_odds_ratio": -0.6947265863418579, "logits/chosen": -1.7109375, "logits/rejected": -1.8203125, "logps/chosen": -1.046875, "logps/rejected": -1.1328125, "loss": 1.236, "nll_loss": 1.1171875, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.1044921875, "rewards/margins": 0.00872802734375, "rewards/rejected": -0.11328125, "step": 1800 }, { "epoch": 1.43, "grad_norm": 2.573963727957766, "learning_rate": 1.1520817902938618e-07, "log_odds_chosen": 0.07918091118335724, "log_odds_ratio": -0.7347656488418579, "logits/chosen": -1.7109375, "logits/rejected": -1.78125, "logps/chosen": -1.125, "logps/rejected": -1.1875, "loss": 1.1915, "nll_loss": 1.15625, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.11279296875, "rewards/margins": 0.00604248046875, "rewards/rejected": -0.11865234375, "step": 1810 }, { "epoch": 1.43, "grad_norm": 2.7806823611081177, "learning_rate": 1.1232495578578755e-07, "log_odds_chosen": 0.15264892578125, "log_odds_ratio": -0.6976562738418579, "logits/chosen": -1.75, "logits/rejected": -1.78125, "logps/chosen": -1.0390625, "logps/rejected": -1.1171875, "loss": 1.1626, "nll_loss": 1.09375, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.103515625, "rewards/margins": 0.00823974609375, "rewards/rejected": -0.11181640625, "step": 1820 }, { "epoch": 1.44, "grad_norm": 2.436201094808605, "learning_rate": 1.0946777943505254e-07, "log_odds_chosen": 0.23690184950828552, "log_odds_ratio": -0.6917968988418579, "logits/chosen": -1.7109375, "logits/rejected": -1.78125, "logps/chosen": -1.0625, "logps/rejected": -1.2265625, "loss": 1.1971, "nll_loss": 1.1328125, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.10595703125, "rewards/margins": 0.0167236328125, "rewards/rejected": -0.123046875, "step": 1830 }, { "epoch": 1.45, "grad_norm": 2.417259454035703, "learning_rate": 1.0663719052951381e-07, "log_odds_chosen": 0.19016113877296448, "log_odds_ratio": -0.699414074420929, "logits/chosen": -1.6796875, "logits/rejected": -1.765625, "logps/chosen": -1.0859375, "logps/rejected": -1.1953125, "loss": 1.1861, "nll_loss": 1.1328125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10888671875, "rewards/margins": 0.0107421875, "rewards/rejected": -0.119140625, "step": 1840 }, { "epoch": 1.46, "grad_norm": 2.340681686492189, "learning_rate": 1.0383372459139608e-07, "log_odds_chosen": 0.30018919706344604, "log_odds_ratio": -0.6387695074081421, "logits/chosen": -1.78125, "logits/rejected": -1.921875, "logps/chosen": -1.0546875, "logps/rejected": -1.2421875, "loss": 1.1958, "nll_loss": 1.078125, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10498046875, "rewards/margins": 0.0191650390625, "rewards/rejected": -0.12451171875, "step": 1850 }, { "epoch": 1.47, "grad_norm": 2.530421254724575, "learning_rate": 1.0105791201150002e-07, "log_odds_chosen": 0.3886962831020355, "log_odds_ratio": -0.620312511920929, "logits/chosen": -1.6796875, "logits/rejected": -1.7734375, "logps/chosen": -1.0625, "logps/rejected": -1.3359375, "loss": 1.16, "nll_loss": 1.046875, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.10595703125, "rewards/margins": 0.027587890625, "rewards/rejected": -0.1337890625, "step": 1860 }, { "epoch": 1.47, "grad_norm": 2.6492828085260225, "learning_rate": 9.831027794885713e-08, "log_odds_chosen": 0.34185791015625, "log_odds_ratio": -0.6444336175918579, "logits/chosen": -1.6484375, "logits/rejected": -1.671875, "logps/chosen": -1.0234375, "logps/rejected": -1.234375, "loss": 1.1779, "nll_loss": 1.140625, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1025390625, "rewards/margins": 0.0211181640625, "rewards/rejected": -0.12353515625, "step": 1870 }, { "epoch": 1.48, "grad_norm": 2.6971126252475286, "learning_rate": 9.559134223137424e-08, "log_odds_chosen": 0.2640136778354645, "log_odds_ratio": -0.673046886920929, "logits/chosen": -1.7578125, "logits/rejected": -1.8046875, "logps/chosen": -1.09375, "logps/rejected": -1.2578125, "loss": 1.2186, "nll_loss": 1.171875, "rewards/accuracies": 0.625, "rewards/chosen": -0.109375, "rewards/margins": 0.016845703125, "rewards/rejected": -0.1259765625, "step": 1880 }, { "epoch": 1.49, "grad_norm": 2.768346463128813, "learning_rate": 9.290161925748674e-08, "log_odds_chosen": 0.333740234375, "log_odds_ratio": -0.6434570550918579, "logits/chosen": -1.71875, "logits/rejected": -1.796875, "logps/chosen": -1.1015625, "logps/rejected": -1.328125, "loss": 1.1758, "nll_loss": 1.1640625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0224609375, "rewards/rejected": -0.1328125, "step": 1890 }, { "epoch": 1.5, "grad_norm": 2.4131576506486168, "learning_rate": 9.024161789883897e-08, "log_odds_chosen": 0.13895873725414276, "log_odds_ratio": -0.695507824420929, "logits/chosen": -1.625, "logits/rejected": -1.71875, "logps/chosen": -1.03125, "logps/rejected": -1.1171875, "loss": 1.1368, "nll_loss": 1.0625, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10302734375, "rewards/margins": 0.009033203125, "rewards/rejected": -0.1123046875, "step": 1900 }, { "epoch": 1.5, "grad_norm": 2.5654624061461253, "learning_rate": 8.761184140401023e-08, "log_odds_chosen": 0.25886231660842896, "log_odds_ratio": -0.662109375, "logits/chosen": -1.609375, "logits/rejected": -1.6875, "logps/chosen": -1.0390625, "logps/rejected": -1.203125, "loss": 1.1906, "nll_loss": 1.09375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10400390625, "rewards/margins": 0.0164794921875, "rewards/rejected": -0.12060546875, "step": 1910 }, { "epoch": 1.51, "grad_norm": 2.6681020896294676, "learning_rate": 8.501278730330463e-08, "log_odds_chosen": 0.36528319120407104, "log_odds_ratio": -0.626269519329071, "logits/chosen": -1.609375, "logits/rejected": -1.671875, "logps/chosen": -1.078125, "logps/rejected": -1.3203125, "loss": 1.1977, "nll_loss": 1.140625, "rewards/accuracies": 0.625, "rewards/chosen": -0.107421875, "rewards/margins": 0.02490234375, "rewards/rejected": -0.1318359375, "step": 1920 }, { "epoch": 1.52, "grad_norm": 2.7519831354278512, "learning_rate": 8.244494731462279e-08, "log_odds_chosen": 0.24447020888328552, "log_odds_ratio": -0.6788085699081421, "logits/chosen": -1.59375, "logits/rejected": -1.703125, "logps/chosen": -1.03125, "logps/rejected": -1.1953125, "loss": 1.164, "nll_loss": 1.0546875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.103515625, "rewards/margins": 0.015869140625, "rewards/rejected": -0.119140625, "step": 1930 }, { "epoch": 1.53, "grad_norm": 2.870452087544285, "learning_rate": 7.990880725043322e-08, "log_odds_chosen": 0.2567138671875, "log_odds_ratio": -0.670703113079071, "logits/chosen": -1.625, "logits/rejected": -1.7578125, "logps/chosen": -1.046875, "logps/rejected": -1.21875, "loss": 1.1622, "nll_loss": 1.1171875, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.10498046875, "rewards/margins": 0.0167236328125, "rewards/rejected": -0.12158203125, "step": 1940 }, { "epoch": 1.54, "grad_norm": 2.6968676817822645, "learning_rate": 7.740484692586074e-08, "log_odds_chosen": 0.2530761659145355, "log_odds_ratio": -0.680371105670929, "logits/chosen": -1.671875, "logits/rejected": -1.8203125, "logps/chosen": -1.1796875, "logps/rejected": -1.3515625, "loss": 1.2242, "nll_loss": 1.1953125, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11767578125, "rewards/margins": 0.0174560546875, "rewards/rejected": -0.1357421875, "step": 1950 }, { "epoch": 1.54, "grad_norm": 2.735947390317322, "learning_rate": 7.493354006791006e-08, "log_odds_chosen": 0.24350586533546448, "log_odds_ratio": -0.682421863079071, "logits/chosen": -1.6171875, "logits/rejected": -1.6796875, "logps/chosen": -1.0546875, "logps/rejected": -1.2265625, "loss": 1.1973, "nll_loss": 1.15625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.10546875, "rewards/margins": 0.0169677734375, "rewards/rejected": -0.12255859375, "step": 1960 }, { "epoch": 1.55, "grad_norm": 2.4135041559261885, "learning_rate": 7.249535422584055e-08, "log_odds_chosen": 0.19566650688648224, "log_odds_ratio": -0.6849609613418579, "logits/chosen": -1.8125, "logits/rejected": -1.84375, "logps/chosen": -1.078125, "logps/rejected": -1.1875, "loss": 1.1835, "nll_loss": 1.1171875, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10791015625, "rewards/margins": 0.01129150390625, "rewards/rejected": -0.119140625, "step": 1970 }, { "epoch": 1.56, "grad_norm": 2.3578015206878575, "learning_rate": 7.009075068271031e-08, "log_odds_chosen": 0.12241820991039276, "log_odds_ratio": -0.7289062738418579, "logits/chosen": -1.578125, "logits/rejected": -1.7578125, "logps/chosen": -1.09375, "logps/rejected": -1.1796875, "loss": 1.1747, "nll_loss": 1.1171875, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.109375, "rewards/margins": 0.0087890625, "rewards/rejected": -0.1181640625, "step": 1980 }, { "epoch": 1.57, "grad_norm": 2.7517093669068933, "learning_rate": 6.772018436810525e-08, "log_odds_chosen": 0.34681397676467896, "log_odds_ratio": -0.616992175579071, "logits/chosen": -1.7109375, "logits/rejected": -1.7734375, "logps/chosen": -1.1015625, "logps/rejected": -1.34375, "loss": 1.1863, "nll_loss": 1.1328125, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0240478515625, "rewards/rejected": -0.134765625, "step": 1990 }, { "epoch": 1.58, "grad_norm": 2.758447250920097, "learning_rate": 6.538410377207082e-08, "log_odds_chosen": 0.03367309644818306, "log_odds_ratio": -0.773144543170929, "logits/chosen": -1.625, "logits/rejected": -1.7421875, "logps/chosen": -1.125, "logps/rejected": -1.15625, "loss": 1.2376, "nll_loss": 1.1640625, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.1123046875, "rewards/margins": 0.0030364990234375, "rewards/rejected": -0.115234375, "step": 2000 }, { "epoch": 1.58, "grad_norm": 2.880552308468262, "learning_rate": 6.308295086026133e-08, "log_odds_chosen": 0.17825928330421448, "log_odds_ratio": -0.696972668170929, "logits/chosen": -1.65625, "logits/rejected": -1.671875, "logps/chosen": -1.125, "logps/rejected": -1.25, "loss": 1.2262, "nll_loss": 1.1796875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1123046875, "rewards/margins": 0.01220703125, "rewards/rejected": -0.12451171875, "step": 2010 }, { "epoch": 1.59, "grad_norm": 2.439317791546686, "learning_rate": 6.081716099032417e-08, "log_odds_chosen": 0.3602050840854645, "log_odds_ratio": -0.6319335699081421, "logits/chosen": -1.609375, "logits/rejected": -1.7578125, "logps/chosen": -0.99609375, "logps/rejected": -1.2421875, "loss": 1.1792, "nll_loss": 1.0625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.099609375, "rewards/margins": 0.024169921875, "rewards/rejected": -0.1240234375, "step": 2020 }, { "epoch": 1.6, "grad_norm": 2.810922238332508, "learning_rate": 5.858716282953407e-08, "log_odds_chosen": 0.24152831733226776, "log_odds_ratio": -0.654492199420929, "logits/chosen": -1.6328125, "logits/rejected": -1.75, "logps/chosen": -1.09375, "logps/rejected": -1.2578125, "loss": 1.1982, "nll_loss": 1.140625, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.109375, "rewards/margins": 0.0166015625, "rewards/rejected": -0.1259765625, "step": 2030 }, { "epoch": 1.61, "grad_norm": 2.5961073589782466, "learning_rate": 5.639337827369289e-08, "log_odds_chosen": 0.17608642578125, "log_odds_ratio": -0.702832043170929, "logits/chosen": -1.671875, "logits/rejected": -1.7890625, "logps/chosen": -1.1171875, "logps/rejected": -1.2265625, "loss": 1.1879, "nll_loss": 1.1640625, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.111328125, "rewards/margins": 0.0111083984375, "rewards/rejected": -0.12255859375, "step": 2040 }, { "epoch": 1.61, "grad_norm": 2.3150779833374266, "learning_rate": 5.4236222367310816e-08, "log_odds_chosen": 0.29583740234375, "log_odds_ratio": -0.653124988079071, "logits/chosen": -1.578125, "logits/rejected": -1.71875, "logps/chosen": -1.0703125, "logps/rejected": -1.2734375, "loss": 1.1965, "nll_loss": 1.0859375, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10693359375, "rewards/margins": 0.020263671875, "rewards/rejected": -0.126953125, "step": 2050 }, { "epoch": 1.62, "grad_norm": 2.1606096674823068, "learning_rate": 5.211610322508364e-08, "log_odds_chosen": 0.15689697861671448, "log_odds_ratio": -0.70263671875, "logits/chosen": -1.6875, "logits/rejected": -1.765625, "logps/chosen": -1.0625, "logps/rejected": -1.1640625, "loss": 1.1801, "nll_loss": 1.09375, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1064453125, "rewards/margins": 0.0098876953125, "rewards/rejected": -0.11669921875, "step": 2060 }, { "epoch": 1.63, "grad_norm": 2.547538507688686, "learning_rate": 5.003342195468102e-08, "log_odds_chosen": 0.221527099609375, "log_odds_ratio": -0.66357421875, "logits/chosen": -1.59375, "logits/rejected": -1.640625, "logps/chosen": -1.0703125, "logps/rejected": -1.203125, "loss": 1.2133, "nll_loss": 1.1328125, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.107421875, "rewards/margins": 0.01263427734375, "rewards/rejected": -0.11962890625, "step": 2070 }, { "epoch": 1.64, "grad_norm": 2.8414624562639546, "learning_rate": 4.798857258086053e-08, "log_odds_chosen": 0.25762939453125, "log_odds_ratio": -0.6714843511581421, "logits/chosen": -1.609375, "logits/rejected": -1.7109375, "logps/chosen": -1.0625, "logps/rejected": -1.25, "loss": 1.2074, "nll_loss": 1.109375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10595703125, "rewards/margins": 0.0186767578125, "rewards/rejected": -0.125, "step": 2080 }, { "epoch": 1.65, "grad_norm": 3.120660574019042, "learning_rate": 4.5981941970921646e-08, "log_odds_chosen": 0.48786622285842896, "log_odds_ratio": -0.591992199420929, "logits/chosen": -1.6171875, "logits/rejected": -1.65625, "logps/chosen": -1.03125, "logps/rejected": -1.375, "loss": 1.1441, "nll_loss": 1.0390625, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10302734375, "rewards/margins": 0.034423828125, "rewards/rejected": -0.1376953125, "step": 2090 }, { "epoch": 1.65, "grad_norm": 2.635871013914355, "learning_rate": 4.4013909761513894e-08, "log_odds_chosen": 0.2707275450229645, "log_odds_ratio": -0.649218738079071, "logits/chosen": -1.71875, "logits/rejected": -1.7109375, "logps/chosen": -1.09375, "logps/rejected": -1.265625, "loss": 1.1887, "nll_loss": 1.140625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.109375, "rewards/margins": 0.0169677734375, "rewards/rejected": -0.1259765625, "step": 2100 }, { "epoch": 1.66, "grad_norm": 3.1511666169638346, "learning_rate": 4.2084848286813105e-08, "log_odds_chosen": 0.3526855409145355, "log_odds_ratio": -0.6600586175918579, "logits/chosen": -1.5859375, "logits/rejected": -1.65625, "logps/chosen": -1.0625, "logps/rejected": -1.34375, "loss": 1.1851, "nll_loss": 1.15625, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.1064453125, "rewards/margins": 0.0281982421875, "rewards/rejected": -0.134765625, "step": 2110 }, { "epoch": 1.67, "grad_norm": 2.7305670197672747, "learning_rate": 4.0195122508078886e-08, "log_odds_chosen": 0.27125245332717896, "log_odds_ratio": -0.65625, "logits/chosen": -1.5859375, "logits/rejected": -1.6875, "logps/chosen": -1.0625, "logps/rejected": -1.2421875, "loss": 1.1709, "nll_loss": 1.1328125, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1064453125, "rewards/margins": 0.017822265625, "rewards/rejected": -0.12451171875, "step": 2120 }, { "epoch": 1.68, "grad_norm": 3.2312418757529726, "learning_rate": 3.834508994460736e-08, "log_odds_chosen": 0.23995360732078552, "log_odds_ratio": -0.654589831829071, "logits/chosen": -1.578125, "logits/rejected": -1.640625, "logps/chosen": -1.03125, "logps/rejected": -1.1875, "loss": 1.1604, "nll_loss": 1.046875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.103515625, "rewards/margins": 0.0150146484375, "rewards/rejected": -0.1181640625, "step": 2130 }, { "epoch": 1.69, "grad_norm": 2.874032565275268, "learning_rate": 3.653510060609166e-08, "log_odds_chosen": 0.13707275688648224, "log_odds_ratio": -0.7138671875, "logits/chosen": -1.6796875, "logits/rejected": -1.765625, "logps/chosen": -1.0625, "logps/rejected": -1.1484375, "loss": 1.173, "nll_loss": 1.078125, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.10595703125, "rewards/margins": 0.00848388671875, "rewards/rejected": -0.1142578125, "step": 2140 }, { "epoch": 1.69, "grad_norm": 2.6604754366861822, "learning_rate": 3.476549692640316e-08, "log_odds_chosen": 0.34288328886032104, "log_odds_ratio": -0.620312511920929, "logits/chosen": -1.6015625, "logits/rejected": -1.7109375, "logps/chosen": -1.0, "logps/rejected": -1.21875, "loss": 1.1585, "nll_loss": 1.078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.099609375, "rewards/margins": 0.0218505859375, "rewards/rejected": -0.12158203125, "step": 2150 }, { "epoch": 1.7, "grad_norm": 2.71892900942932, "learning_rate": 3.3036613698806085e-08, "log_odds_chosen": 0.21519775688648224, "log_odds_ratio": -0.691601574420929, "logits/chosen": -1.640625, "logits/rejected": -1.734375, "logps/chosen": -1.078125, "logps/rejected": -1.21875, "loss": 1.2288, "nll_loss": 1.1796875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10791015625, "rewards/margins": 0.01434326171875, "rewards/rejected": -0.1220703125, "step": 2160 }, { "epoch": 1.71, "grad_norm": 2.878603239597823, "learning_rate": 3.134877801261765e-08, "log_odds_chosen": 0.3372802734375, "log_odds_ratio": -0.642285168170929, "logits/chosen": -1.703125, "logits/rejected": -1.734375, "logps/chosen": -1.109375, "logps/rejected": -1.328125, "loss": 1.2136, "nll_loss": 1.1640625, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11083984375, "rewards/margins": 0.0218505859375, "rewards/rejected": -0.1328125, "step": 2170 }, { "epoch": 1.72, "grad_norm": 2.6992535601969085, "learning_rate": 2.9702309191325492e-08, "log_odds_chosen": 0.24042968451976776, "log_odds_ratio": -0.675585925579071, "logits/chosen": -1.7109375, "logits/rejected": -1.796875, "logps/chosen": -1.0703125, "logps/rejected": -1.2265625, "loss": 1.2242, "nll_loss": 1.109375, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.10693359375, "rewards/margins": 0.0159912109375, "rewards/rejected": -0.123046875, "step": 2180 }, { "epoch": 1.73, "grad_norm": 2.971420026998493, "learning_rate": 2.809751873217478e-08, "log_odds_chosen": 0.32117921113967896, "log_odds_ratio": -0.6463867425918579, "logits/chosen": -1.734375, "logits/rejected": -1.7578125, "logps/chosen": -1.0859375, "logps/rejected": -1.3046875, "loss": 1.1702, "nll_loss": 1.1328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.10888671875, "rewards/margins": 0.0218505859375, "rewards/rejected": -0.130859375, "step": 2190 }, { "epoch": 1.73, "grad_norm": 2.311036000971507, "learning_rate": 2.653471024723547e-08, "log_odds_chosen": 0.43181151151657104, "log_odds_ratio": -0.5894531011581421, "logits/chosen": -1.6171875, "logits/rejected": -1.6875, "logps/chosen": -1.015625, "logps/rejected": -1.3046875, "loss": 1.1592, "nll_loss": 1.09375, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1015625, "rewards/margins": 0.0289306640625, "rewards/rejected": -0.130859375, "step": 2200 }, { "epoch": 1.74, "grad_norm": 2.5416101230783363, "learning_rate": 2.501417940596168e-08, "log_odds_chosen": 0.02521972730755806, "log_odds_ratio": -0.75, "logits/chosen": -1.6875, "logits/rejected": -1.7109375, "logps/chosen": -1.1015625, "logps/rejected": -1.109375, "loss": 1.1748, "nll_loss": 1.1484375, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1103515625, "rewards/margins": 0.000507354736328125, "rewards/rejected": -0.11083984375, "step": 2210 }, { "epoch": 1.75, "grad_norm": 2.471954369214634, "learning_rate": 2.353621387925375e-08, "log_odds_chosen": 0.3322509825229645, "log_odds_ratio": -0.6597656011581421, "logits/chosen": -1.6015625, "logits/rejected": -1.671875, "logps/chosen": -1.03125, "logps/rejected": -1.2578125, "loss": 1.1634, "nll_loss": 1.078125, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.10302734375, "rewards/margins": 0.022705078125, "rewards/rejected": -0.1259765625, "step": 2220 }, { "epoch": 1.76, "grad_norm": 2.801308375939805, "learning_rate": 2.2101093285033373e-08, "log_odds_chosen": 0.3058715760707855, "log_odds_ratio": -0.6502929925918579, "logits/chosen": -1.6484375, "logits/rejected": -1.7265625, "logps/chosen": -1.046875, "logps/rejected": -1.25, "loss": 1.1307, "nll_loss": 1.03125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10498046875, "rewards/margins": 0.020263671875, "rewards/rejected": -0.125, "step": 2230 }, { "epoch": 1.76, "grad_norm": 2.6457419438315233, "learning_rate": 2.070908913534236e-08, "log_odds_chosen": 0.24928589165210724, "log_odds_ratio": -0.6776367425918579, "logits/chosen": -1.65625, "logits/rejected": -1.6640625, "logps/chosen": -1.078125, "logps/rejected": -1.234375, "loss": 1.2119, "nll_loss": 1.1953125, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.10791015625, "rewards/margins": 0.015869140625, "rewards/rejected": -0.12353515625, "step": 2240 }, { "epoch": 1.77, "grad_norm": 2.8172476905649764, "learning_rate": 1.9360464784975024e-08, "log_odds_chosen": 0.21148681640625, "log_odds_ratio": -0.6849609613418579, "logits/chosen": -1.5625, "logits/rejected": -1.65625, "logps/chosen": -1.0703125, "logps/rejected": -1.21875, "loss": 1.1771, "nll_loss": 1.109375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.107421875, "rewards/margins": 0.0146484375, "rewards/rejected": -0.1220703125, "step": 2250 }, { "epoch": 1.78, "grad_norm": 2.963550785069561, "learning_rate": 1.8055475381653807e-08, "log_odds_chosen": 0.27608031034469604, "log_odds_ratio": -0.6513671875, "logits/chosen": -1.71875, "logits/rejected": -1.7421875, "logps/chosen": -1.046875, "logps/rejected": -1.234375, "loss": 1.2014, "nll_loss": 1.109375, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.1044921875, "rewards/margins": 0.018798828125, "rewards/rejected": -0.12353515625, "step": 2260 }, { "epoch": 1.79, "grad_norm": 3.1613513759896534, "learning_rate": 1.679436781775759e-08, "log_odds_chosen": 0.31138914823532104, "log_odds_ratio": -0.675976574420929, "logits/chosen": -1.65625, "logits/rejected": -1.7421875, "logps/chosen": -1.109375, "logps/rejected": -1.3203125, "loss": 1.1978, "nll_loss": 1.1796875, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.11083984375, "rewards/margins": 0.0213623046875, "rewards/rejected": -0.1318359375, "step": 2270 }, { "epoch": 1.8, "grad_norm": 2.92153711849868, "learning_rate": 1.5577380683611807e-08, "log_odds_chosen": 0.2562316954135895, "log_odds_ratio": -0.6595703363418579, "logits/chosen": -1.671875, "logits/rejected": -1.7265625, "logps/chosen": -1.0546875, "logps/rejected": -1.21875, "loss": 1.198, "nll_loss": 1.09375, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.10546875, "rewards/margins": 0.0164794921875, "rewards/rejected": -0.12158203125, "step": 2280 }, { "epoch": 1.8, "grad_norm": 3.486180847986093, "learning_rate": 1.4404744222349358e-08, "log_odds_chosen": 0.48161619901657104, "log_odds_ratio": -0.605664074420929, "logits/chosen": -1.625, "logits/rejected": -1.6875, "logps/chosen": -1.03125, "logps/rejected": -1.359375, "loss": 1.172, "nll_loss": 1.078125, "rewards/accuracies": 0.65625, "rewards/chosen": -0.10302734375, "rewards/margins": 0.032470703125, "rewards/rejected": -0.1357421875, "step": 2290 }, { "epoch": 1.81, "grad_norm": 2.50535325154869, "learning_rate": 1.3276680286350594e-08, "log_odds_chosen": 0.31635743379592896, "log_odds_ratio": -0.641796886920929, "logits/chosen": -1.6875, "logits/rejected": -1.8203125, "logps/chosen": -1.0703125, "logps/rejected": -1.2734375, "loss": 1.204, "nll_loss": 1.1015625, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.10693359375, "rewards/margins": 0.0203857421875, "rewards/rejected": -0.126953125, "step": 2300 }, { "epoch": 1.82, "grad_norm": 2.6381410134685392, "learning_rate": 1.2193402295270854e-08, "log_odds_chosen": 0.2996459901332855, "log_odds_ratio": -0.6700195074081421, "logits/chosen": -1.5234375, "logits/rejected": -1.671875, "logps/chosen": -1.03125, "logps/rejected": -1.234375, "loss": 1.1512, "nll_loss": 1.125, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10302734375, "rewards/margins": 0.0205078125, "rewards/rejected": -0.12353515625, "step": 2310 }, { "epoch": 1.83, "grad_norm": 2.344444293869932, "learning_rate": 1.115511519566334e-08, "log_odds_chosen": 0.3412719666957855, "log_odds_ratio": -0.657519519329071, "logits/chosen": -1.6171875, "logits/rejected": -1.7578125, "logps/chosen": -0.98828125, "logps/rejected": -1.234375, "loss": 1.1698, "nll_loss": 1.03125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0986328125, "rewards/margins": 0.0244140625, "rewards/rejected": -0.123046875, "step": 2320 }, { "epoch": 1.84, "grad_norm": 2.277202951820151, "learning_rate": 1.01620154222051e-08, "log_odds_chosen": 0.14781494438648224, "log_odds_ratio": -0.708984375, "logits/chosen": -1.6171875, "logits/rejected": -1.6953125, "logps/chosen": -1.125, "logps/rejected": -1.203125, "loss": 1.1759, "nll_loss": 1.1328125, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11181640625, "rewards/margins": 0.0079345703125, "rewards/rejected": -0.1201171875, "step": 2330 }, { "epoch": 1.84, "grad_norm": 2.5811302552961943, "learning_rate": 9.214290860533242e-09, "log_odds_chosen": 0.22308655083179474, "log_odds_ratio": -0.6734374761581421, "logits/chosen": -1.6640625, "logits/rejected": -1.7421875, "logps/chosen": -1.1015625, "logps/rejected": -1.234375, "loss": 1.205, "nll_loss": 1.1796875, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1103515625, "rewards/margins": 0.0133056640625, "rewards/rejected": -0.12353515625, "step": 2340 }, { "epoch": 1.85, "grad_norm": 2.4190439831152326, "learning_rate": 8.312120811698798e-09, "log_odds_chosen": 0.24127808213233948, "log_odds_ratio": -0.6958984136581421, "logits/chosen": -1.6875, "logits/rejected": -1.7734375, "logps/chosen": -1.0625, "logps/rejected": -1.234375, "loss": 1.1753, "nll_loss": 1.1328125, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1064453125, "rewards/margins": 0.017333984375, "rewards/rejected": -0.12353515625, "step": 2350 }, { "epoch": 1.86, "grad_norm": 2.3733902102708897, "learning_rate": 7.455675958244422e-09, "log_odds_chosen": 0.2683349549770355, "log_odds_ratio": -0.660449206829071, "logits/chosen": -1.6484375, "logits/rejected": -1.765625, "logps/chosen": -1.078125, "logps/rejected": -1.2578125, "loss": 1.1939, "nll_loss": 1.125, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.107421875, "rewards/margins": 0.0184326171875, "rewards/rejected": -0.1259765625, "step": 2360 }, { "epoch": 1.87, "grad_norm": 2.581589417454669, "learning_rate": 6.64511833191278e-09, "log_odds_chosen": 0.16912230849266052, "log_odds_ratio": -0.703125, "logits/chosen": -1.640625, "logits/rejected": -1.765625, "logps/chosen": -1.0546875, "logps/rejected": -1.1796875, "loss": 1.1997, "nll_loss": 1.15625, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10595703125, "rewards/margins": 0.01220703125, "rewards/rejected": -0.1181640625, "step": 2370 }, { "epoch": 1.87, "grad_norm": 2.8853418036737297, "learning_rate": 5.8806012829916985e-09, "log_odds_chosen": 0.3464111387729645, "log_odds_ratio": -0.615234375, "logits/chosen": -1.71875, "logits/rejected": -1.8125, "logps/chosen": -1.0078125, "logps/rejected": -1.2421875, "loss": 1.1569, "nll_loss": 1.078125, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.10107421875, "rewards/margins": 0.02294921875, "rewards/rejected": -0.1240234375, "step": 2380 }, { "epoch": 1.88, "grad_norm": 2.5807272249096913, "learning_rate": 5.162269451301576e-09, "log_odds_chosen": 0.16444091498851776, "log_odds_ratio": -0.6973632574081421, "logits/chosen": -1.78125, "logits/rejected": -1.7890625, "logps/chosen": -1.0859375, "logps/rejected": -1.2109375, "loss": 1.2152, "nll_loss": 1.1484375, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10888671875, "rewards/margins": 0.01202392578125, "rewards/rejected": -0.12060546875, "step": 2390 }, { "epoch": 1.89, "grad_norm": 2.6925182997680515, "learning_rate": 4.490258738830771e-09, "log_odds_chosen": 0.2374267578125, "log_odds_ratio": -0.6766601800918579, "logits/chosen": -1.640625, "logits/rejected": -1.6875, "logps/chosen": -1.15625, "logps/rejected": -1.3203125, "loss": 1.2008, "nll_loss": 1.1796875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.115234375, "rewards/margins": 0.016357421875, "rewards/rejected": -0.1318359375, "step": 2400 }, { "epoch": 1.9, "grad_norm": 3.01082383722649, "learning_rate": 3.864696284024249e-09, "log_odds_chosen": 0.38875120878219604, "log_odds_ratio": -0.6109374761581421, "logits/chosen": -1.59375, "logits/rejected": -1.71875, "logps/chosen": -1.0625, "logps/rejected": -1.3359375, "loss": 1.1823, "nll_loss": 1.0859375, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.10595703125, "rewards/margins": 0.028076171875, "rewards/rejected": -0.1337890625, "step": 2410 }, { "epoch": 1.91, "grad_norm": 2.7770288933270755, "learning_rate": 3.285700437730077e-09, "log_odds_chosen": 0.35822755098342896, "log_odds_ratio": -0.6693359613418579, "logits/chosen": -1.5859375, "logits/rejected": -1.6796875, "logps/chosen": -1.0859375, "logps/rejected": -1.3515625, "loss": 1.1699, "nll_loss": 1.1171875, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1083984375, "rewards/margins": 0.0264892578125, "rewards/rejected": -0.134765625, "step": 2420 }, { "epoch": 1.91, "grad_norm": 2.5907890754339262, "learning_rate": 2.7533807408084973e-09, "log_odds_chosen": 0.17686156928539276, "log_odds_ratio": -0.698535144329071, "logits/chosen": -1.7421875, "logits/rejected": -1.8203125, "logps/chosen": -1.1015625, "logps/rejected": -1.2109375, "loss": 1.1861, "nll_loss": 1.140625, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10986328125, "rewards/margins": 0.01153564453125, "rewards/rejected": -0.12109375, "step": 2430 }, { "epoch": 1.92, "grad_norm": 2.4966778392886955, "learning_rate": 2.2678379034077877e-09, "log_odds_chosen": 0.2807373106479645, "log_odds_ratio": -0.6429687738418579, "logits/chosen": -1.640625, "logits/rejected": -1.7109375, "logps/chosen": -1.09375, "logps/rejected": -1.2734375, "loss": 1.2285, "nll_loss": 1.1484375, "rewards/accuracies": 0.625, "rewards/chosen": -0.10888671875, "rewards/margins": 0.018310546875, "rewards/rejected": -0.126953125, "step": 2440 }, { "epoch": 1.93, "grad_norm": 2.3101513660116466, "learning_rate": 1.82916378591072e-09, "log_odds_chosen": 0.35594481229782104, "log_odds_ratio": -0.6319335699081421, "logits/chosen": -1.6875, "logits/rejected": -1.7890625, "logps/chosen": -1.0546875, "logps/rejected": -1.28125, "loss": 1.1772, "nll_loss": 1.125, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10546875, "rewards/margins": 0.0225830078125, "rewards/rejected": -0.1279296875, "step": 2450 }, { "epoch": 1.94, "grad_norm": 2.550820385872949, "learning_rate": 1.4374413815555763e-09, "log_odds_chosen": 0.21019287407398224, "log_odds_ratio": -0.6913086175918579, "logits/chosen": -1.6484375, "logits/rejected": -1.6796875, "logps/chosen": -1.09375, "logps/rejected": -1.25, "loss": 1.2096, "nll_loss": 1.15625, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.109375, "rewards/margins": 0.0157470703125, "rewards/rejected": -0.125, "step": 2460 }, { "epoch": 1.95, "grad_norm": 2.3418264361304293, "learning_rate": 1.0927448007343188e-09, "log_odds_chosen": 0.2827392518520355, "log_odds_ratio": -0.6519531011581421, "logits/chosen": -1.671875, "logits/rejected": -1.796875, "logps/chosen": -1.0703125, "logps/rejected": -1.2734375, "loss": 1.2057, "nll_loss": 1.1328125, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10693359375, "rewards/margins": 0.0203857421875, "rewards/rejected": -0.126953125, "step": 2470 }, { "epoch": 1.95, "grad_norm": 3.0181770379881936, "learning_rate": 7.951392569717774e-10, "log_odds_chosen": 0.32861328125, "log_odds_ratio": -0.641406238079071, "logits/chosen": -1.71875, "logits/rejected": -1.7421875, "logps/chosen": -1.09375, "logps/rejected": -1.296875, "loss": 1.2017, "nll_loss": 1.125, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.109375, "rewards/margins": 0.0206298828125, "rewards/rejected": -0.1298828125, "step": 2480 }, { "epoch": 1.96, "grad_norm": 2.6598066518808965, "learning_rate": 5.446810545877423e-10, "log_odds_chosen": 0.29930418729782104, "log_odds_ratio": -0.6373046636581421, "logits/chosen": -1.703125, "logits/rejected": -1.7890625, "logps/chosen": -1.0703125, "logps/rejected": -1.28125, "loss": 1.1938, "nll_loss": 1.125, "rewards/accuracies": 0.59375, "rewards/chosen": -0.107421875, "rewards/margins": 0.021240234375, "rewards/rejected": -0.12890625, "step": 2490 }, { "epoch": 1.97, "grad_norm": 2.612456535550154, "learning_rate": 3.414175780446227e-10, "log_odds_chosen": 0.27032470703125, "log_odds_ratio": -0.64501953125, "logits/chosen": -1.6015625, "logits/rejected": -1.671875, "logps/chosen": -1.0625, "logps/rejected": -1.2265625, "loss": 1.2178, "nll_loss": 1.125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10595703125, "rewards/margins": 0.0167236328125, "rewards/rejected": -0.12255859375, "step": 2500 }, { "epoch": 1.98, "grad_norm": 2.5663855569599123, "learning_rate": 1.8538728298292395e-10, "log_odds_chosen": 0.25556641817092896, "log_odds_ratio": -0.6698242425918579, "logits/chosen": -1.6171875, "logits/rejected": -1.6640625, "logps/chosen": -1.0546875, "logps/rejected": -1.234375, "loss": 1.198, "nll_loss": 1.1015625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10546875, "rewards/margins": 0.01806640625, "rewards/rejected": -0.12353515625, "step": 2510 }, { "epoch": 1.99, "grad_norm": 2.5581951001300336, "learning_rate": 7.661968894551174e-11, "log_odds_chosen": 0.3158630430698395, "log_odds_ratio": -0.636425793170929, "logits/chosen": -1.7109375, "logits/rejected": -1.828125, "logps/chosen": -1.0234375, "logps/rejected": -1.2265625, "loss": 1.1801, "nll_loss": 1.09375, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.1025390625, "rewards/margins": 0.0205078125, "rewards/rejected": -0.123046875, "step": 2520 }, { "epoch": 1.99, "grad_norm": 2.6003805241560958, "learning_rate": 1.513537379305152e-11, "log_odds_chosen": 0.20820312201976776, "log_odds_ratio": -0.664843738079071, "logits/chosen": -1.671875, "logits/rejected": -1.734375, "logps/chosen": -1.109375, "logps/rejected": -1.2421875, "loss": 1.2092, "nll_loss": 1.1171875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.11083984375, "rewards/margins": 0.0135498046875, "rewards/rejected": -0.12451171875, "step": 2530 }, { "epoch": 2.0, "step": 2538, "total_flos": 0.0, "train_loss": 1.2334148878183206, "train_runtime": 17382.4906, "train_samples_per_second": 7.009, "train_steps_per_second": 0.146 } ], "logging_steps": 10, "max_steps": 2538, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }