stablelm-2-1_6b-orpo-full-v2 / trainer_state.json
vain05's picture
Model save
2df3cc3 verified
raw
history blame contribute delete
No virus
147 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9992122883024814,
"eval_steps": 500,
"global_step": 2538,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 6.010698237247525,
"learning_rate": 1.968503937007874e-08,
"log_odds_chosen": 0.27912598848342896,
"log_odds_ratio": -0.7284179925918579,
"logits/chosen": -2.015625,
"logits/rejected": -2.046875,
"logps/chosen": -2.03125,
"logps/rejected": -2.28125,
"loss": 1.5763,
"nll_loss": 1.4375,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.2041015625,
"rewards/margins": 0.023681640625,
"rewards/rejected": -0.2275390625,
"step": 10
},
{
"epoch": 0.02,
"grad_norm": 5.135758173057069,
"learning_rate": 3.937007874015748e-08,
"log_odds_chosen": 0.36018067598342896,
"log_odds_ratio": -0.750683605670929,
"logits/chosen": -2.109375,
"logits/rejected": -2.140625,
"logps/chosen": -1.9765625,
"logps/rejected": -2.296875,
"loss": 1.5927,
"nll_loss": 1.5,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.197265625,
"rewards/margins": 0.0322265625,
"rewards/rejected": -0.2294921875,
"step": 20
},
{
"epoch": 0.02,
"grad_norm": 6.927967353705024,
"learning_rate": 5.9055118110236216e-08,
"log_odds_chosen": 0.17539063096046448,
"log_odds_ratio": -0.812207043170929,
"logits/chosen": -1.9375,
"logits/rejected": -2.03125,
"logps/chosen": -2.0625,
"logps/rejected": -2.1875,
"loss": 1.5598,
"nll_loss": 1.5,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.2060546875,
"rewards/margins": 0.013427734375,
"rewards/rejected": -0.2197265625,
"step": 30
},
{
"epoch": 0.03,
"grad_norm": 9.514842032339935,
"learning_rate": 7.874015748031496e-08,
"log_odds_chosen": 0.3271545469760895,
"log_odds_ratio": -0.705859363079071,
"logits/chosen": -2.03125,
"logits/rejected": -2.03125,
"logps/chosen": -1.9921875,
"logps/rejected": -2.296875,
"loss": 1.6148,
"nll_loss": 1.546875,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.19921875,
"rewards/margins": 0.031005859375,
"rewards/rejected": -0.23046875,
"step": 40
},
{
"epoch": 0.04,
"grad_norm": 5.878873460342644,
"learning_rate": 9.84251968503937e-08,
"log_odds_chosen": 0.1710205078125,
"log_odds_ratio": -0.77587890625,
"logits/chosen": -2.03125,
"logits/rejected": -2.0,
"logps/chosen": -1.9609375,
"logps/rejected": -2.09375,
"loss": 1.5496,
"nll_loss": 1.484375,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.1962890625,
"rewards/margins": 0.0137939453125,
"rewards/rejected": -0.2099609375,
"step": 50
},
{
"epoch": 0.05,
"grad_norm": 6.390918916154321,
"learning_rate": 1.1811023622047243e-07,
"log_odds_chosen": 0.21818237006664276,
"log_odds_ratio": -0.7723633050918579,
"logits/chosen": -2.046875,
"logits/rejected": -2.03125,
"logps/chosen": -2.0625,
"logps/rejected": -2.25,
"loss": 1.6078,
"nll_loss": 1.5625,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.2060546875,
"rewards/margins": 0.0185546875,
"rewards/rejected": -0.224609375,
"step": 60
},
{
"epoch": 0.06,
"grad_norm": 7.289138170054862,
"learning_rate": 1.3779527559055117e-07,
"log_odds_chosen": 0.34990233182907104,
"log_odds_ratio": -0.7339843511581421,
"logits/chosen": -2.015625,
"logits/rejected": -2.046875,
"logps/chosen": -2.0625,
"logps/rejected": -2.375,
"loss": 1.5634,
"nll_loss": 1.5078125,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.20703125,
"rewards/margins": 0.0311279296875,
"rewards/rejected": -0.23828125,
"step": 70
},
{
"epoch": 0.06,
"grad_norm": 6.612859448964533,
"learning_rate": 1.5748031496062992e-07,
"log_odds_chosen": 0.4515624940395355,
"log_odds_ratio": -0.640917956829071,
"logits/chosen": -1.9609375,
"logits/rejected": -2.015625,
"logps/chosen": -1.7734375,
"logps/rejected": -2.171875,
"loss": 1.532,
"nll_loss": 1.421875,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.177734375,
"rewards/margins": 0.039794921875,
"rewards/rejected": -0.2177734375,
"step": 80
},
{
"epoch": 0.07,
"grad_norm": 8.397857351399276,
"learning_rate": 1.7716535433070863e-07,
"log_odds_chosen": 0.17760619521141052,
"log_odds_ratio": -0.7791992425918579,
"logits/chosen": -2.0625,
"logits/rejected": -2.03125,
"logps/chosen": -2.0625,
"logps/rejected": -2.21875,
"loss": 1.5648,
"nll_loss": 1.546875,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.2060546875,
"rewards/margins": 0.0166015625,
"rewards/rejected": -0.22265625,
"step": 90
},
{
"epoch": 0.08,
"grad_norm": 7.586832559599352,
"learning_rate": 1.968503937007874e-07,
"log_odds_chosen": 0.2771240174770355,
"log_odds_ratio": -0.6792968511581421,
"logits/chosen": -2.140625,
"logits/rejected": -2.171875,
"logps/chosen": -1.890625,
"logps/rejected": -2.125,
"loss": 1.5286,
"nll_loss": 1.484375,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.189453125,
"rewards/margins": 0.02392578125,
"rewards/rejected": -0.212890625,
"step": 100
},
{
"epoch": 0.09,
"grad_norm": 5.07791601254699,
"learning_rate": 2.1653543307086615e-07,
"log_odds_chosen": 0.214080810546875,
"log_odds_ratio": -0.7261718511581421,
"logits/chosen": -2.09375,
"logits/rejected": -2.15625,
"logps/chosen": -1.890625,
"logps/rejected": -2.0625,
"loss": 1.4891,
"nll_loss": 1.4140625,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.189453125,
"rewards/margins": 0.017333984375,
"rewards/rejected": -0.2060546875,
"step": 110
},
{
"epoch": 0.09,
"grad_norm": 5.712206786453907,
"learning_rate": 2.3622047244094486e-07,
"log_odds_chosen": 0.14680786430835724,
"log_odds_ratio": -0.7562500238418579,
"logits/chosen": -2.078125,
"logits/rejected": -2.15625,
"logps/chosen": -1.9375,
"logps/rejected": -2.03125,
"loss": 1.4901,
"nll_loss": 1.34375,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.193359375,
"rewards/margins": 0.00994873046875,
"rewards/rejected": -0.2041015625,
"step": 120
},
{
"epoch": 0.1,
"grad_norm": 5.959474046553222,
"learning_rate": 2.559055118110236e-07,
"log_odds_chosen": 0.2553772032260895,
"log_odds_ratio": -0.6973632574081421,
"logits/chosen": -2.15625,
"logits/rejected": -2.21875,
"logps/chosen": -1.7890625,
"logps/rejected": -1.9921875,
"loss": 1.497,
"nll_loss": 1.3828125,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.1787109375,
"rewards/margins": 0.0205078125,
"rewards/rejected": -0.19921875,
"step": 130
},
{
"epoch": 0.11,
"grad_norm": 5.960730803694658,
"learning_rate": 2.7559055118110235e-07,
"log_odds_chosen": 0.16423340141773224,
"log_odds_ratio": -0.718457043170929,
"logits/chosen": -2.15625,
"logits/rejected": -2.21875,
"logps/chosen": -1.765625,
"logps/rejected": -1.8984375,
"loss": 1.5228,
"nll_loss": 1.453125,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.1767578125,
"rewards/margins": 0.0133056640625,
"rewards/rejected": -0.189453125,
"step": 140
},
{
"epoch": 0.12,
"grad_norm": 4.558212884083645,
"learning_rate": 2.9527559055118104e-07,
"log_odds_chosen": 0.24582520127296448,
"log_odds_ratio": -0.671191394329071,
"logits/chosen": -2.125,
"logits/rejected": -2.1875,
"logps/chosen": -1.546875,
"logps/rejected": -1.7421875,
"loss": 1.4124,
"nll_loss": 1.3125,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.154296875,
"rewards/margins": 0.0191650390625,
"rewards/rejected": -0.173828125,
"step": 150
},
{
"epoch": 0.13,
"grad_norm": 3.4831822067708686,
"learning_rate": 3.1496062992125984e-07,
"log_odds_chosen": 0.17824706435203552,
"log_odds_ratio": -0.69384765625,
"logits/chosen": -2.25,
"logits/rejected": -2.28125,
"logps/chosen": -1.671875,
"logps/rejected": -1.8203125,
"loss": 1.4297,
"nll_loss": 1.390625,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.1669921875,
"rewards/margins": 0.0145263671875,
"rewards/rejected": -0.181640625,
"step": 160
},
{
"epoch": 0.13,
"grad_norm": 4.034138231637428,
"learning_rate": 3.346456692913386e-07,
"log_odds_chosen": 0.12167968600988388,
"log_odds_ratio": -0.7186523675918579,
"logits/chosen": -2.1875,
"logits/rejected": -2.296875,
"logps/chosen": -1.5625,
"logps/rejected": -1.6640625,
"loss": 1.3835,
"nll_loss": 1.3203125,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.15625,
"rewards/margins": 0.01007080078125,
"rewards/rejected": -0.166015625,
"step": 170
},
{
"epoch": 0.14,
"grad_norm": 4.146506823609489,
"learning_rate": 3.5433070866141727e-07,
"log_odds_chosen": 0.13695068657398224,
"log_odds_ratio": -0.719433605670929,
"logits/chosen": -2.28125,
"logits/rejected": -2.40625,
"logps/chosen": -1.5546875,
"logps/rejected": -1.671875,
"loss": 1.4352,
"nll_loss": 1.3828125,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.1552734375,
"rewards/margins": 0.0115966796875,
"rewards/rejected": -0.1669921875,
"step": 180
},
{
"epoch": 0.15,
"grad_norm": 3.5266117677319087,
"learning_rate": 3.7401574803149606e-07,
"log_odds_chosen": 0.17273560166358948,
"log_odds_ratio": -0.681640625,
"logits/chosen": -2.3125,
"logits/rejected": -2.4375,
"logps/chosen": -1.4765625,
"logps/rejected": -1.625,
"loss": 1.3599,
"nll_loss": 1.3046875,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.1474609375,
"rewards/margins": 0.01495361328125,
"rewards/rejected": -0.162109375,
"step": 190
},
{
"epoch": 0.16,
"grad_norm": 3.4327683095111072,
"learning_rate": 3.937007874015748e-07,
"log_odds_chosen": 0.11888428032398224,
"log_odds_ratio": -0.711230456829071,
"logits/chosen": -2.296875,
"logits/rejected": -2.40625,
"logps/chosen": -1.3828125,
"logps/rejected": -1.484375,
"loss": 1.3592,
"nll_loss": 1.28125,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.138671875,
"rewards/margins": 0.0093994140625,
"rewards/rejected": -0.1484375,
"step": 200
},
{
"epoch": 0.17,
"grad_norm": 2.811761812417915,
"learning_rate": 4.133858267716535e-07,
"log_odds_chosen": 0.13620606064796448,
"log_odds_ratio": -0.692089855670929,
"logits/chosen": -2.3125,
"logits/rejected": -2.4375,
"logps/chosen": -1.359375,
"logps/rejected": -1.453125,
"loss": 1.3822,
"nll_loss": 1.2421875,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.1357421875,
"rewards/margins": 0.010009765625,
"rewards/rejected": -0.1455078125,
"step": 210
},
{
"epoch": 0.17,
"grad_norm": 3.1783123040584775,
"learning_rate": 4.330708661417323e-07,
"log_odds_chosen": 0.23652343451976776,
"log_odds_ratio": -0.656054675579071,
"logits/chosen": -2.3125,
"logits/rejected": -2.4375,
"logps/chosen": -1.296875,
"logps/rejected": -1.46875,
"loss": 1.3022,
"nll_loss": 1.2265625,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.12890625,
"rewards/margins": 0.017578125,
"rewards/rejected": -0.146484375,
"step": 220
},
{
"epoch": 0.18,
"grad_norm": 3.398094645144472,
"learning_rate": 4.52755905511811e-07,
"log_odds_chosen": 0.10042724758386612,
"log_odds_ratio": -0.737500011920929,
"logits/chosen": -2.421875,
"logits/rejected": -2.546875,
"logps/chosen": -1.265625,
"logps/rejected": -1.3359375,
"loss": 1.3118,
"nll_loss": 1.21875,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.126953125,
"rewards/margins": 0.007049560546875,
"rewards/rejected": -0.1337890625,
"step": 230
},
{
"epoch": 0.19,
"grad_norm": 3.0602456337138735,
"learning_rate": 4.7244094488188973e-07,
"log_odds_chosen": 0.05767212063074112,
"log_odds_ratio": -0.7372070550918579,
"logits/chosen": -2.328125,
"logits/rejected": -2.4375,
"logps/chosen": -1.359375,
"logps/rejected": -1.40625,
"loss": 1.3639,
"nll_loss": 1.3359375,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.1357421875,
"rewards/margins": 0.0050048828125,
"rewards/rejected": -0.140625,
"step": 240
},
{
"epoch": 0.2,
"grad_norm": 3.1390537608793543,
"learning_rate": 4.921259842519685e-07,
"log_odds_chosen": 0.16054077446460724,
"log_odds_ratio": -0.6732422113418579,
"logits/chosen": -2.34375,
"logits/rejected": -2.40625,
"logps/chosen": -1.2421875,
"logps/rejected": -1.3515625,
"loss": 1.3024,
"nll_loss": 1.234375,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.1240234375,
"rewards/margins": 0.01104736328125,
"rewards/rejected": -0.134765625,
"step": 250
},
{
"epoch": 0.2,
"grad_norm": 2.518997318792438,
"learning_rate": 4.999914863146575e-07,
"log_odds_chosen": 0.16718749701976776,
"log_odds_ratio": -0.6884765625,
"logits/chosen": -2.359375,
"logits/rejected": -2.5,
"logps/chosen": -1.234375,
"logps/rejected": -1.3359375,
"loss": 1.3314,
"nll_loss": 1.234375,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.12353515625,
"rewards/margins": 0.01019287109375,
"rewards/rejected": -0.1337890625,
"step": 260
},
{
"epoch": 0.21,
"grad_norm": 2.798537540317038,
"learning_rate": 4.999394603374641e-07,
"log_odds_chosen": 0.19011840224266052,
"log_odds_ratio": -0.676562488079071,
"logits/chosen": -2.359375,
"logits/rejected": -2.46875,
"logps/chosen": -1.203125,
"logps/rejected": -1.3515625,
"loss": 1.2872,
"nll_loss": 1.1875,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.12060546875,
"rewards/margins": 0.01422119140625,
"rewards/rejected": -0.134765625,
"step": 270
},
{
"epoch": 0.22,
"grad_norm": 2.8758259093431437,
"learning_rate": 4.99840148039188e-07,
"log_odds_chosen": 0.29682618379592896,
"log_odds_ratio": -0.637890636920929,
"logits/chosen": -2.3125,
"logits/rejected": -2.5,
"logps/chosen": -1.203125,
"logps/rejected": -1.4140625,
"loss": 1.2201,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.1201171875,
"rewards/margins": 0.021484375,
"rewards/rejected": -0.1416015625,
"step": 280
},
{
"epoch": 0.23,
"grad_norm": 2.695897803134525,
"learning_rate": 4.996935682088318e-07,
"log_odds_chosen": 0.22941894829273224,
"log_odds_ratio": -0.6490234136581421,
"logits/chosen": -2.359375,
"logits/rejected": -2.46875,
"logps/chosen": -1.21875,
"logps/rejected": -1.375,
"loss": 1.2819,
"nll_loss": 1.1875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.1220703125,
"rewards/margins": 0.015380859375,
"rewards/rejected": -0.1376953125,
"step": 290
},
{
"epoch": 0.24,
"grad_norm": 2.8982419601141585,
"learning_rate": 4.994997485779947e-07,
"log_odds_chosen": 0.23259887099266052,
"log_odds_ratio": -0.666796863079071,
"logits/chosen": -2.25,
"logits/rejected": -2.375,
"logps/chosen": -1.1796875,
"logps/rejected": -1.34375,
"loss": 1.2759,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.1181640625,
"rewards/margins": 0.015869140625,
"rewards/rejected": -0.1337890625,
"step": 300
},
{
"epoch": 0.24,
"grad_norm": 2.9056748531961585,
"learning_rate": 4.992587258156258e-07,
"log_odds_chosen": 0.17786864936351776,
"log_odds_ratio": -0.6656249761581421,
"logits/chosen": -2.265625,
"logits/rejected": -2.359375,
"logps/chosen": -1.21875,
"logps/rejected": -1.34375,
"loss": 1.2812,
"nll_loss": 1.2109375,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.12158203125,
"rewards/margins": 0.01324462890625,
"rewards/rejected": -0.134765625,
"step": 310
},
{
"epoch": 0.25,
"grad_norm": 2.596709437423632,
"learning_rate": 4.989705455210862e-07,
"log_odds_chosen": 0.21816405653953552,
"log_odds_ratio": -0.662890613079071,
"logits/chosen": -2.3125,
"logits/rejected": -2.421875,
"logps/chosen": -1.1875,
"logps/rejected": -1.3515625,
"loss": 1.2184,
"nll_loss": 1.109375,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.11865234375,
"rewards/margins": 0.016357421875,
"rewards/rejected": -0.134765625,
"step": 320
},
{
"epoch": 0.26,
"grad_norm": 2.476759669633908,
"learning_rate": 4.986352622155222e-07,
"log_odds_chosen": 0.17100830376148224,
"log_odds_ratio": -0.6903320550918579,
"logits/chosen": -2.359375,
"logits/rejected": -2.46875,
"logps/chosen": -1.1953125,
"logps/rejected": -1.3046875,
"loss": 1.2865,
"nll_loss": 1.21875,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.119140625,
"rewards/margins": 0.01129150390625,
"rewards/rejected": -0.130859375,
"step": 330
},
{
"epoch": 0.27,
"grad_norm": 2.628189970289334,
"learning_rate": 4.98252939331551e-07,
"log_odds_chosen": 0.17416992783546448,
"log_odds_ratio": -0.697070300579071,
"logits/chosen": -2.328125,
"logits/rejected": -2.4375,
"logps/chosen": -1.21875,
"logps/rejected": -1.34375,
"loss": 1.2797,
"nll_loss": 1.2109375,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.12158203125,
"rewards/margins": 0.01318359375,
"rewards/rejected": -0.134765625,
"step": 340
},
{
"epoch": 0.28,
"grad_norm": 2.6295659881126943,
"learning_rate": 4.978236492012589e-07,
"log_odds_chosen": 0.02346191368997097,
"log_odds_ratio": -0.755078136920929,
"logits/chosen": -2.421875,
"logits/rejected": -2.421875,
"logps/chosen": -1.2578125,
"logps/rejected": -1.2734375,
"loss": 1.3077,
"nll_loss": 1.25,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.1259765625,
"rewards/margins": 0.00180816650390625,
"rewards/rejected": -0.126953125,
"step": 350
},
{
"epoch": 0.28,
"grad_norm": 2.959773704803729,
"learning_rate": 4.973474730425173e-07,
"log_odds_chosen": 0.19099120795726776,
"log_odds_ratio": -0.6749023199081421,
"logits/chosen": -2.25,
"logits/rejected": -2.359375,
"logps/chosen": -1.1875,
"logps/rejected": -1.3125,
"loss": 1.2568,
"nll_loss": 1.171875,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.11865234375,
"rewards/margins": 0.0125732421875,
"rewards/rejected": -0.1318359375,
"step": 360
},
{
"epoch": 0.29,
"grad_norm": 2.613182586833654,
"learning_rate": 4.968245009436167e-07,
"log_odds_chosen": 0.10064697265625,
"log_odds_ratio": -0.72216796875,
"logits/chosen": -2.375,
"logits/rejected": -2.4375,
"logps/chosen": -1.28125,
"logps/rejected": -1.3671875,
"loss": 1.2944,
"nll_loss": 1.2890625,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.1279296875,
"rewards/margins": 0.00860595703125,
"rewards/rejected": -0.13671875,
"step": 370
},
{
"epoch": 0.3,
"grad_norm": 2.8683189618015126,
"learning_rate": 4.962548318462231e-07,
"log_odds_chosen": 0.19755859673023224,
"log_odds_ratio": -0.6724609136581421,
"logits/chosen": -2.296875,
"logits/rejected": -2.359375,
"logps/chosen": -1.171875,
"logps/rejected": -1.3046875,
"loss": 1.2778,
"nll_loss": 1.2265625,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.11669921875,
"rewards/margins": 0.01373291015625,
"rewards/rejected": -0.130859375,
"step": 380
},
{
"epoch": 0.31,
"grad_norm": 2.9807597681868305,
"learning_rate": 4.95638573526659e-07,
"log_odds_chosen": 0.14707031846046448,
"log_odds_ratio": -0.7005859613418579,
"logits/chosen": -2.28125,
"logits/rejected": -2.453125,
"logps/chosen": -1.171875,
"logps/rejected": -1.2734375,
"loss": 1.2844,
"nll_loss": 1.203125,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.11767578125,
"rewards/margins": 0.0098876953125,
"rewards/rejected": -0.126953125,
"step": 390
},
{
"epoch": 0.32,
"grad_norm": 2.655124275329291,
"learning_rate": 4.949758425755127e-07,
"log_odds_chosen": 0.10791015625,
"log_odds_ratio": -0.7230468988418579,
"logits/chosen": -2.1875,
"logits/rejected": -2.28125,
"logps/chosen": -1.203125,
"logps/rejected": -1.28125,
"loss": 1.2902,
"nll_loss": 1.1875,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.1201171875,
"rewards/margins": 0.00762939453125,
"rewards/rejected": -0.1279296875,
"step": 400
},
{
"epoch": 0.32,
"grad_norm": 2.4371959032830293,
"learning_rate": 4.94266764375581e-07,
"log_odds_chosen": 0.20887450873851776,
"log_odds_ratio": -0.6651366949081421,
"logits/chosen": -2.21875,
"logits/rejected": -2.34375,
"logps/chosen": -1.1328125,
"logps/rejected": -1.2890625,
"loss": 1.2674,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.11376953125,
"rewards/margins": 0.01544189453125,
"rewards/rejected": -0.12890625,
"step": 410
},
{
"epoch": 0.33,
"grad_norm": 2.729848906556158,
"learning_rate": 4.935114730781475e-07,
"log_odds_chosen": 0.27691650390625,
"log_odds_ratio": -0.6527343988418579,
"logits/chosen": -2.140625,
"logits/rejected": -2.296875,
"logps/chosen": -1.1953125,
"logps/rejected": -1.3828125,
"loss": 1.2544,
"nll_loss": 1.1875,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11962890625,
"rewards/margins": 0.018798828125,
"rewards/rejected": -0.138671875,
"step": 420
},
{
"epoch": 0.34,
"grad_norm": 2.50393555238819,
"learning_rate": 4.927101115776026e-07,
"log_odds_chosen": 0.14921875298023224,
"log_odds_ratio": -0.702343761920929,
"logits/chosen": -2.28125,
"logits/rejected": -2.34375,
"logps/chosen": -1.203125,
"logps/rejected": -1.3203125,
"loss": 1.2471,
"nll_loss": 1.2265625,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.12060546875,
"rewards/margins": 0.01129150390625,
"rewards/rejected": -0.1318359375,
"step": 430
},
{
"epoch": 0.35,
"grad_norm": 2.7483117165130744,
"learning_rate": 4.918628314844088e-07,
"log_odds_chosen": 0.04735717922449112,
"log_odds_ratio": -0.749218761920929,
"logits/chosen": -2.265625,
"logits/rejected": -2.359375,
"logps/chosen": -1.2109375,
"logps/rejected": -1.25,
"loss": 1.2351,
"nll_loss": 1.21875,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.12060546875,
"rewards/margins": 0.0040283203125,
"rewards/rejected": -0.12451171875,
"step": 440
},
{
"epoch": 0.35,
"grad_norm": 2.9881422727710887,
"learning_rate": 4.909697930964179e-07,
"log_odds_chosen": 0.16976317763328552,
"log_odds_ratio": -0.6986328363418579,
"logits/chosen": -2.25,
"logits/rejected": -2.40625,
"logps/chosen": -1.1953125,
"logps/rejected": -1.3125,
"loss": 1.2467,
"nll_loss": 1.2421875,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11962890625,
"rewards/margins": 0.01153564453125,
"rewards/rejected": -0.130859375,
"step": 450
},
{
"epoch": 0.36,
"grad_norm": 2.6175879243996363,
"learning_rate": 4.900311653685437e-07,
"log_odds_chosen": 0.20297852158546448,
"log_odds_ratio": -0.669140636920929,
"logits/chosen": -2.296875,
"logits/rejected": -2.359375,
"logps/chosen": -1.1640625,
"logps/rejected": -1.3125,
"loss": 1.2102,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.1162109375,
"rewards/margins": 0.01513671875,
"rewards/rejected": -0.1318359375,
"step": 460
},
{
"epoch": 0.37,
"grad_norm": 2.2300783745527317,
"learning_rate": 4.890471258807968e-07,
"log_odds_chosen": 0.19609375298023224,
"log_odds_ratio": -0.6773437261581421,
"logits/chosen": -2.1875,
"logits/rejected": -2.265625,
"logps/chosen": -1.1875,
"logps/rejected": -1.296875,
"loss": 1.2387,
"nll_loss": 1.203125,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.11865234375,
"rewards/margins": 0.01116943359375,
"rewards/rejected": -0.1298828125,
"step": 470
},
{
"epoch": 0.38,
"grad_norm": 2.9471805049826094,
"learning_rate": 4.880178608046894e-07,
"log_odds_chosen": 0.14970703423023224,
"log_odds_ratio": -0.7040039300918579,
"logits/chosen": -2.171875,
"logits/rejected": -2.203125,
"logps/chosen": -1.140625,
"logps/rejected": -1.25,
"loss": 1.2675,
"nll_loss": 1.2109375,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.1142578125,
"rewards/margins": 0.0108642578125,
"rewards/rejected": -0.125,
"step": 480
},
{
"epoch": 0.39,
"grad_norm": 2.7162956655728623,
"learning_rate": 4.869435648680116e-07,
"log_odds_chosen": 0.15129394829273224,
"log_odds_ratio": -0.6802734136581421,
"logits/chosen": -2.1875,
"logits/rejected": -2.28125,
"logps/chosen": -1.1171875,
"logps/rejected": -1.2265625,
"loss": 1.1889,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.11181640625,
"rewards/margins": 0.01043701171875,
"rewards/rejected": -0.12255859375,
"step": 490
},
{
"epoch": 0.39,
"grad_norm": 4.459550453771863,
"learning_rate": 4.858244413179923e-07,
"log_odds_chosen": 0.2993102967739105,
"log_odds_ratio": -0.64111328125,
"logits/chosen": -2.171875,
"logits/rejected": -2.25,
"logps/chosen": -1.1171875,
"logps/rejected": -1.3125,
"loss": 1.2095,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.111328125,
"rewards/margins": 0.0198974609375,
"rewards/rejected": -0.1318359375,
"step": 500
},
{
"epoch": 0.4,
"grad_norm": 3.2619422051015836,
"learning_rate": 4.846607018828449e-07,
"log_odds_chosen": 0.210205078125,
"log_odds_ratio": -0.673632800579071,
"logits/chosen": -2.21875,
"logits/rejected": -2.421875,
"logps/chosen": -1.1875,
"logps/rejected": -1.3203125,
"loss": 1.2653,
"nll_loss": 1.21875,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.119140625,
"rewards/margins": 0.013427734375,
"rewards/rejected": -0.1328125,
"step": 510
},
{
"epoch": 0.41,
"grad_norm": 2.7715133106941576,
"learning_rate": 4.834525667317121e-07,
"log_odds_chosen": 0.22309570014476776,
"log_odds_ratio": -0.66357421875,
"logits/chosen": -2.0625,
"logits/rejected": -2.234375,
"logps/chosen": -1.15625,
"logps/rejected": -1.3046875,
"loss": 1.2614,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.115234375,
"rewards/margins": 0.0150146484375,
"rewards/rejected": -0.1298828125,
"step": 520
},
{
"epoch": 0.42,
"grad_norm": 2.4609772540284593,
"learning_rate": 4.822002644330101e-07,
"log_odds_chosen": 0.19017334282398224,
"log_odds_ratio": -0.711718738079071,
"logits/chosen": -2.171875,
"logits/rejected": -2.296875,
"logps/chosen": -1.21875,
"logps/rejected": -1.3515625,
"loss": 1.2548,
"nll_loss": 1.2421875,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.12158203125,
"rewards/margins": 0.0137939453125,
"rewards/rejected": -0.1357421875,
"step": 530
},
{
"epoch": 0.43,
"grad_norm": 2.645873831673924,
"learning_rate": 4.809040319111865e-07,
"log_odds_chosen": 0.12646484375,
"log_odds_ratio": -0.713671863079071,
"logits/chosen": -2.09375,
"logits/rejected": -2.25,
"logps/chosen": -1.1484375,
"logps/rejected": -1.2421875,
"loss": 1.2402,
"nll_loss": 1.15625,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.11474609375,
"rewards/margins": 0.0096435546875,
"rewards/rejected": -0.12451171875,
"step": 540
},
{
"epoch": 0.43,
"grad_norm": 2.45398616162251,
"learning_rate": 4.795641144018965e-07,
"log_odds_chosen": 0.09213867038488388,
"log_odds_ratio": -0.74267578125,
"logits/chosen": -2.203125,
"logits/rejected": -2.203125,
"logps/chosen": -1.2109375,
"logps/rejected": -1.28125,
"loss": 1.2755,
"nll_loss": 1.25,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.12109375,
"rewards/margins": 0.006805419921875,
"rewards/rejected": -0.1279296875,
"step": 550
},
{
"epoch": 0.44,
"grad_norm": 2.5512768402172683,
"learning_rate": 4.781807654056053e-07,
"log_odds_chosen": 0.214599609375,
"log_odds_ratio": -0.692675769329071,
"logits/chosen": -2.109375,
"logits/rejected": -2.21875,
"logps/chosen": -1.125,
"logps/rejected": -1.2890625,
"loss": 1.2303,
"nll_loss": 1.140625,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.11279296875,
"rewards/margins": 0.0159912109375,
"rewards/rejected": -0.12890625,
"step": 560
},
{
"epoch": 0.45,
"grad_norm": 2.411346604585139,
"learning_rate": 4.7675424663962933e-07,
"log_odds_chosen": 0.165435791015625,
"log_odds_ratio": -0.700390636920929,
"logits/chosen": -2.140625,
"logits/rejected": -2.21875,
"logps/chosen": -1.2109375,
"logps/rejected": -1.3125,
"loss": 1.2571,
"nll_loss": 1.234375,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.12109375,
"rewards/margins": 0.010498046875,
"rewards/rejected": -0.1318359375,
"step": 570
},
{
"epoch": 0.46,
"grad_norm": 2.48077566767054,
"learning_rate": 4.752848279886212e-07,
"log_odds_chosen": 0.207489013671875,
"log_odds_ratio": -0.66748046875,
"logits/chosen": -2.0625,
"logits/rejected": -2.234375,
"logps/chosen": -1.1015625,
"logps/rejected": -1.2421875,
"loss": 1.1978,
"nll_loss": 1.140625,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.0145263671875,
"rewards/rejected": -0.12451171875,
"step": 580
},
{
"epoch": 0.46,
"grad_norm": 2.7897141919738786,
"learning_rate": 4.7377278745350984e-07,
"log_odds_chosen": 0.2662353515625,
"log_odds_ratio": -0.663867175579071,
"logits/chosen": -2.03125,
"logits/rejected": -2.15625,
"logps/chosen": -1.046875,
"logps/rejected": -1.1875,
"loss": 1.2309,
"nll_loss": 1.0859375,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.1044921875,
"rewards/margins": 0.0147705078125,
"rewards/rejected": -0.119140625,
"step": 590
},
{
"epoch": 0.47,
"grad_norm": 2.758617809500896,
"learning_rate": 4.7221841109890506e-07,
"log_odds_chosen": 0.22445067763328552,
"log_odds_ratio": -0.695117175579071,
"logits/chosen": -2.0625,
"logits/rejected": -2.140625,
"logps/chosen": -1.1015625,
"logps/rejected": -1.2578125,
"loss": 1.2281,
"nll_loss": 1.125,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.0159912109375,
"rewards/rejected": -0.1259765625,
"step": 600
},
{
"epoch": 0.48,
"grad_norm": 2.6514784587831204,
"learning_rate": 4.706219929989771e-07,
"log_odds_chosen": 0.2147216796875,
"log_odds_ratio": -0.660937488079071,
"logits/chosen": -2.078125,
"logits/rejected": -2.203125,
"logps/chosen": -1.09375,
"logps/rejected": -1.234375,
"loss": 1.2094,
"nll_loss": 1.0859375,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.0140380859375,
"rewards/rejected": -0.12353515625,
"step": 610
},
{
"epoch": 0.49,
"grad_norm": 2.630207221232529,
"learning_rate": 4.6898383518182007e-07,
"log_odds_chosen": 0.19202271103858948,
"log_odds_ratio": -0.6786133050918579,
"logits/chosen": -2.046875,
"logits/rejected": -2.125,
"logps/chosen": -1.0859375,
"logps/rejected": -1.2265625,
"loss": 1.2307,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.1083984375,
"rewards/margins": 0.014404296875,
"rewards/rejected": -0.12255859375,
"step": 620
},
{
"epoch": 0.5,
"grad_norm": 2.9225816829730427,
"learning_rate": 4.67304247572311e-07,
"log_odds_chosen": 0.2799316346645355,
"log_odds_ratio": -0.659960925579071,
"logits/chosen": -2.09375,
"logits/rejected": -2.1875,
"logps/chosen": -1.1171875,
"logps/rejected": -1.3046875,
"loss": 1.2257,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.11181640625,
"rewards/margins": 0.018310546875,
"rewards/rejected": -0.130859375,
"step": 630
},
{
"epoch": 0.5,
"grad_norm": 2.331606515139032,
"learning_rate": 4.65583547933475e-07,
"log_odds_chosen": 0.16041259467601776,
"log_odds_ratio": -0.713085949420929,
"logits/chosen": -2.0,
"logits/rejected": -2.140625,
"logps/chosen": -1.1171875,
"logps/rejected": -1.21875,
"loss": 1.21,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.11181640625,
"rewards/margins": 0.01031494140625,
"rewards/rejected": -0.1220703125,
"step": 640
},
{
"epoch": 0.51,
"grad_norm": 2.541498557632385,
"learning_rate": 4.6382206180636705e-07,
"log_odds_chosen": 0.12631836533546448,
"log_odds_ratio": -0.7256835699081421,
"logits/chosen": -2.0625,
"logits/rejected": -2.1875,
"logps/chosen": -1.140625,
"logps/rejected": -1.25,
"loss": 1.2675,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.1142578125,
"rewards/margins": 0.01019287109375,
"rewards/rejected": -0.12451171875,
"step": 650
},
{
"epoch": 0.52,
"grad_norm": 3.2675127960880586,
"learning_rate": 4.620201224484827e-07,
"log_odds_chosen": 0.2113037109375,
"log_odds_ratio": -0.658496081829071,
"logits/chosen": -2.0,
"logits/rejected": -2.078125,
"logps/chosen": -1.078125,
"logps/rejected": -1.2265625,
"loss": 1.209,
"nll_loss": 1.0703125,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.01470947265625,
"rewards/rejected": -0.12255859375,
"step": 660
},
{
"epoch": 0.53,
"grad_norm": 2.5735852092457248,
"learning_rate": 4.601780707707087e-07,
"log_odds_chosen": 0.25184327363967896,
"log_odds_ratio": -0.679394543170929,
"logits/chosen": -2.015625,
"logits/rejected": -2.109375,
"logps/chosen": -1.109375,
"logps/rejected": -1.28125,
"loss": 1.1888,
"nll_loss": 1.15625,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.11083984375,
"rewards/margins": 0.017333984375,
"rewards/rejected": -0.1279296875,
"step": 670
},
{
"epoch": 0.54,
"grad_norm": 2.678233631526468,
"learning_rate": 4.5829625527282554e-07,
"log_odds_chosen": 0.15609130263328552,
"log_odds_ratio": -0.700488269329071,
"logits/chosen": -2.03125,
"logits/rejected": -2.125,
"logps/chosen": -1.125,
"logps/rejected": -1.234375,
"loss": 1.2431,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.11279296875,
"rewards/margins": 0.01080322265625,
"rewards/rejected": -0.12353515625,
"step": 680
},
{
"epoch": 0.54,
"grad_norm": 2.423777152319806,
"learning_rate": 4.5637503197757474e-07,
"log_odds_chosen": 0.089111328125,
"log_odds_ratio": -0.746874988079071,
"logits/chosen": -1.890625,
"logits/rejected": -2.015625,
"logps/chosen": -1.140625,
"logps/rejected": -1.2109375,
"loss": 1.1964,
"nll_loss": 1.140625,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.11376953125,
"rewards/margins": 0.007568359375,
"rewards/rejected": -0.12158203125,
"step": 690
},
{
"epoch": 0.55,
"grad_norm": 3.0765189053391633,
"learning_rate": 4.5441476436330204e-07,
"log_odds_chosen": 0.27679443359375,
"log_odds_ratio": -0.677929699420929,
"logits/chosen": -2.078125,
"logits/rejected": -2.21875,
"logps/chosen": -1.109375,
"logps/rejected": -1.296875,
"loss": 1.2492,
"nll_loss": 1.171875,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.111328125,
"rewards/margins": 0.018310546875,
"rewards/rejected": -0.1298828125,
"step": 700
},
{
"epoch": 0.56,
"grad_norm": 2.6130205345904334,
"learning_rate": 4.5241582329519105e-07,
"log_odds_chosen": 0.150299072265625,
"log_odds_ratio": -0.7164062261581421,
"logits/chosen": -1.984375,
"logits/rejected": -2.109375,
"logps/chosen": -1.125,
"logps/rejected": -1.2421875,
"loss": 1.2128,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11279296875,
"rewards/margins": 0.0115966796875,
"rewards/rejected": -0.12451171875,
"step": 710
},
{
"epoch": 0.57,
"grad_norm": 2.710305930916119,
"learning_rate": 4.503785869550984e-07,
"log_odds_chosen": 0.17982177436351776,
"log_odds_ratio": -0.708300769329071,
"logits/chosen": -2.046875,
"logits/rejected": -2.078125,
"logps/chosen": -1.1796875,
"logps/rejected": -1.3203125,
"loss": 1.2557,
"nll_loss": 1.2109375,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.1181640625,
"rewards/margins": 0.01446533203125,
"rewards/rejected": -0.1328125,
"step": 720
},
{
"epoch": 0.58,
"grad_norm": 2.784125203819912,
"learning_rate": 4.4830344077000535e-07,
"log_odds_chosen": 0.17173461616039276,
"log_odds_ratio": -0.70556640625,
"logits/chosen": -1.921875,
"logits/rejected": -2.109375,
"logps/chosen": -1.1328125,
"logps/rejected": -1.2578125,
"loss": 1.2264,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.11376953125,
"rewards/margins": 0.012451171875,
"rewards/rejected": -0.1259765625,
"step": 730
},
{
"epoch": 0.58,
"grad_norm": 2.969932216303278,
"learning_rate": 4.461907773390984e-07,
"log_odds_chosen": 0.24876098334789276,
"log_odds_ratio": -0.671191394329071,
"logits/chosen": -2.0625,
"logits/rejected": -2.171875,
"logps/chosen": -1.1171875,
"logps/rejected": -1.2890625,
"loss": 1.2521,
"nll_loss": 1.203125,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.11181640625,
"rewards/margins": 0.0169677734375,
"rewards/rejected": -0.12890625,
"step": 740
},
{
"epoch": 0.59,
"grad_norm": 2.3389098001594553,
"learning_rate": 4.4404099635949297e-07,
"log_odds_chosen": 0.20144042372703552,
"log_odds_ratio": -0.679394543170929,
"logits/chosen": -1.984375,
"logits/rejected": -2.0625,
"logps/chosen": -1.09375,
"logps/rejected": -1.2265625,
"loss": 1.1809,
"nll_loss": 1.140625,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.109375,
"rewards/margins": 0.012939453125,
"rewards/rejected": -0.12255859375,
"step": 750
},
{
"epoch": 0.6,
"grad_norm": 3.0086699300119872,
"learning_rate": 4.418545045506144e-07,
"log_odds_chosen": 0.10061035305261612,
"log_odds_ratio": -0.7476562261581421,
"logits/chosen": -2.0625,
"logits/rejected": -2.15625,
"logps/chosen": -1.0703125,
"logps/rejected": -1.140625,
"loss": 1.239,
"nll_loss": 1.1015625,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.007415771484375,
"rewards/rejected": -0.1142578125,
"step": 760
},
{
"epoch": 0.61,
"grad_norm": 2.2320233236738143,
"learning_rate": 4.3963171557725004e-07,
"log_odds_chosen": 0.0516357421875,
"log_odds_ratio": -0.776171863079071,
"logits/chosen": -2.015625,
"logits/rejected": -2.109375,
"logps/chosen": -1.125,
"logps/rejected": -1.1796875,
"loss": 1.2253,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.00567626953125,
"rewards/rejected": -0.1181640625,
"step": 770
},
{
"epoch": 0.61,
"grad_norm": 2.5006901009181877,
"learning_rate": 4.3737304997128765e-07,
"log_odds_chosen": 0.2787841856479645,
"log_odds_ratio": -0.668652355670929,
"logits/chosen": -1.96875,
"logits/rejected": -2.078125,
"logps/chosen": -1.125,
"logps/rejected": -1.3125,
"loss": 1.2413,
"nll_loss": 1.203125,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11279296875,
"rewards/margins": 0.0184326171875,
"rewards/rejected": -0.130859375,
"step": 780
},
{
"epoch": 0.62,
"grad_norm": 2.7387563755483,
"learning_rate": 4.350789350521548e-07,
"log_odds_chosen": 0.19570311903953552,
"log_odds_ratio": -0.71337890625,
"logits/chosen": -1.9921875,
"logits/rejected": -2.078125,
"logps/chosen": -1.0859375,
"logps/rejected": -1.234375,
"loss": 1.2163,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.014892578125,
"rewards/rejected": -0.12353515625,
"step": 790
},
{
"epoch": 0.63,
"grad_norm": 2.898794879634056,
"learning_rate": 4.32749804845973e-07,
"log_odds_chosen": 0.16457518935203552,
"log_odds_ratio": -0.7186523675918579,
"logits/chosen": -2.03125,
"logits/rejected": -2.15625,
"logps/chosen": -1.1484375,
"logps/rejected": -1.25,
"loss": 1.2604,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.115234375,
"rewards/margins": 0.0098876953125,
"rewards/rejected": -0.125,
"step": 800
},
{
"epoch": 0.64,
"grad_norm": 2.5762145094524973,
"learning_rate": 4.303861000034449e-07,
"log_odds_chosen": 0.17528076469898224,
"log_odds_ratio": -0.6820312738418579,
"logits/chosen": -1.953125,
"logits/rejected": -2.03125,
"logps/chosen": -1.0703125,
"logps/rejected": -1.1875,
"loss": 1.1942,
"nll_loss": 1.0625,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.01123046875,
"rewards/rejected": -0.1181640625,
"step": 810
},
{
"epoch": 0.65,
"grad_norm": 2.865127283376686,
"learning_rate": 4.2798826771648635e-07,
"log_odds_chosen": 0.24028930068016052,
"log_odds_ratio": -0.6766601800918579,
"logits/chosen": -1.9375,
"logits/rejected": -2.078125,
"logps/chosen": -1.125,
"logps/rejected": -1.296875,
"loss": 1.2299,
"nll_loss": 1.171875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.017333984375,
"rewards/rejected": -0.1298828125,
"step": 820
},
{
"epoch": 0.65,
"grad_norm": 2.446066437808379,
"learning_rate": 4.2555676163362205e-07,
"log_odds_chosen": 0.21907348930835724,
"log_odds_ratio": -0.6822265386581421,
"logits/chosen": -2.015625,
"logits/rejected": -2.140625,
"logps/chosen": -1.1640625,
"logps/rejected": -1.296875,
"loss": 1.2354,
"nll_loss": 1.21875,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.1162109375,
"rewards/margins": 0.0133056640625,
"rewards/rejected": -0.12890625,
"step": 830
},
{
"epoch": 0.66,
"grad_norm": 2.372183587847385,
"learning_rate": 4.230920417741589e-07,
"log_odds_chosen": 0.27910155057907104,
"log_odds_ratio": -0.666308581829071,
"logits/chosen": -2.0625,
"logits/rejected": -2.203125,
"logps/chosen": -1.0703125,
"logps/rejected": -1.2734375,
"loss": 1.2168,
"nll_loss": 1.125,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.020263671875,
"rewards/rejected": -0.126953125,
"step": 840
},
{
"epoch": 0.67,
"grad_norm": 2.9436571486068623,
"learning_rate": 4.205945744411551e-07,
"log_odds_chosen": 0.12534180283546448,
"log_odds_ratio": -0.73974609375,
"logits/chosen": -2.046875,
"logits/rejected": -2.15625,
"logps/chosen": -1.078125,
"logps/rejected": -1.1640625,
"loss": 1.2815,
"nll_loss": 1.203125,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.00909423828125,
"rewards/rejected": -0.11669921875,
"step": 850
},
{
"epoch": 0.68,
"grad_norm": 2.8833427572143133,
"learning_rate": 4.1806483213319877e-07,
"log_odds_chosen": 0.23845215141773224,
"log_odds_ratio": -0.6595703363418579,
"logits/chosen": -1.859375,
"logits/rejected": -2.046875,
"logps/chosen": -1.1015625,
"logps/rejected": -1.265625,
"loss": 1.2544,
"nll_loss": 1.171875,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.0167236328125,
"rewards/rejected": -0.126953125,
"step": 860
},
{
"epoch": 0.69,
"grad_norm": 2.715581746962796,
"learning_rate": 4.155032934550165e-07,
"log_odds_chosen": 0.16794434189796448,
"log_odds_ratio": -0.6874023675918579,
"logits/chosen": -1.9296875,
"logits/rejected": -2.046875,
"logps/chosen": -1.078125,
"logps/rejected": -1.1640625,
"loss": 1.1984,
"nll_loss": 1.125,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.00909423828125,
"rewards/rejected": -0.11669921875,
"step": 870
},
{
"epoch": 0.69,
"grad_norm": 2.784822109898019,
"learning_rate": 4.129104430269248e-07,
"log_odds_chosen": 0.15845946967601776,
"log_odds_ratio": -0.693359375,
"logits/chosen": -1.9453125,
"logits/rejected": -2.09375,
"logps/chosen": -1.109375,
"logps/rejected": -1.21875,
"loss": 1.2345,
"nll_loss": 1.15625,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.11083984375,
"rewards/margins": 0.0106201171875,
"rewards/rejected": -0.12158203125,
"step": 880
},
{
"epoch": 0.7,
"grad_norm": 2.5161836223908263,
"learning_rate": 4.102867713931448e-07,
"log_odds_chosen": 0.16597899794578552,
"log_odds_ratio": -0.6788085699081421,
"logits/chosen": -1.9765625,
"logits/rejected": -2.125,
"logps/chosen": -1.09375,
"logps/rejected": -1.1953125,
"loss": 1.2663,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.0096435546875,
"rewards/rejected": -0.119140625,
"step": 890
},
{
"epoch": 0.71,
"grad_norm": 2.644860041118969,
"learning_rate": 4.0763277492899504e-07,
"log_odds_chosen": 0.23768310248851776,
"log_odds_ratio": -0.6807616949081421,
"logits/chosen": -1.984375,
"logits/rejected": -2.109375,
"logps/chosen": -1.1328125,
"logps/rejected": -1.2890625,
"loss": 1.2307,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.11328125,
"rewards/margins": 0.0157470703125,
"rewards/rejected": -0.12890625,
"step": 900
},
{
"epoch": 0.72,
"grad_norm": 2.7157553266494503,
"learning_rate": 4.049489557469824e-07,
"log_odds_chosen": 0.15152588486671448,
"log_odds_ratio": -0.7015625238418579,
"logits/chosen": -1.7890625,
"logits/rejected": -1.96875,
"logps/chosen": -1.0625,
"logps/rejected": -1.1640625,
"loss": 1.168,
"nll_loss": 1.09375,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.01025390625,
"rewards/rejected": -0.1162109375,
"step": 910
},
{
"epoch": 0.72,
"grad_norm": 2.302289432995534,
"learning_rate": 4.0223582160180623e-07,
"log_odds_chosen": 0.13297119736671448,
"log_odds_ratio": -0.725781261920929,
"logits/chosen": -1.9375,
"logits/rejected": -2.0,
"logps/chosen": -1.125,
"logps/rejected": -1.21875,
"loss": 1.1737,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.00994873046875,
"rewards/rejected": -0.1220703125,
"step": 920
},
{
"epoch": 0.73,
"grad_norm": 2.6556730641084543,
"learning_rate": 3.9949388579429614e-07,
"log_odds_chosen": 0.00870361365377903,
"log_odds_ratio": -0.7632812261581421,
"logits/chosen": -1.8828125,
"logits/rejected": -2.046875,
"logps/chosen": -1.1015625,
"logps/rejected": -1.1015625,
"loss": 1.2113,
"nll_loss": 1.171875,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.10986328125,
"rewards/margins": -0.0003070831298828125,
"rewards/rejected": -0.10986328125,
"step": 930
},
{
"epoch": 0.74,
"grad_norm": 2.3947626659116406,
"learning_rate": 3.967236670742998e-07,
"log_odds_chosen": 0.22456054389476776,
"log_odds_ratio": -0.681347668170929,
"logits/chosen": -2.0,
"logits/rejected": -2.109375,
"logps/chosen": -1.1796875,
"logps/rejected": -1.3515625,
"loss": 1.2084,
"nll_loss": 1.203125,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.11767578125,
"rewards/margins": 0.0174560546875,
"rewards/rejected": -0.134765625,
"step": 940
},
{
"epoch": 0.75,
"grad_norm": 2.7830252945871896,
"learning_rate": 3.9392568954254023e-07,
"log_odds_chosen": 0.2349853515625,
"log_odds_ratio": -0.6742187738418579,
"logits/chosen": -1.90625,
"logits/rejected": -2.03125,
"logps/chosen": -1.140625,
"logps/rejected": -1.3125,
"loss": 1.2185,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.11376953125,
"rewards/margins": 0.01708984375,
"rewards/rejected": -0.130859375,
"step": 950
},
{
"epoch": 0.76,
"grad_norm": 2.4217959208998723,
"learning_rate": 3.9110048255146043e-07,
"log_odds_chosen": 0.16409912705421448,
"log_odds_ratio": -0.693164050579071,
"logits/chosen": -2.046875,
"logits/rejected": -2.15625,
"logps/chosen": -1.109375,
"logps/rejected": -1.2109375,
"loss": 1.2102,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.0107421875,
"rewards/rejected": -0.12158203125,
"step": 960
},
{
"epoch": 0.76,
"grad_norm": 2.7469080448706706,
"learning_rate": 3.882485806050748e-07,
"log_odds_chosen": 0.31447142362594604,
"log_odds_ratio": -0.639355480670929,
"logits/chosen": -1.9609375,
"logits/rejected": -2.078125,
"logps/chosen": -1.0625,
"logps/rejected": -1.2578125,
"loss": 1.2235,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.0194091796875,
"rewards/rejected": -0.125,
"step": 970
},
{
"epoch": 0.77,
"grad_norm": 2.596832510754079,
"learning_rate": 3.8537052325784573e-07,
"log_odds_chosen": 0.2929016053676605,
"log_odds_ratio": -0.650585949420929,
"logits/chosen": -1.96875,
"logits/rejected": -2.046875,
"logps/chosen": -1.1015625,
"logps/rejected": -1.3125,
"loss": 1.1857,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.0205078125,
"rewards/rejected": -0.130859375,
"step": 980
},
{
"epoch": 0.78,
"grad_norm": 2.7575190212441383,
"learning_rate": 3.824668550126046e-07,
"log_odds_chosen": 0.19545897841453552,
"log_odds_ratio": -0.6885741949081421,
"logits/chosen": -1.96875,
"logits/rejected": -2.0625,
"logps/chosen": -1.0703125,
"logps/rejected": -1.1953125,
"loss": 1.1889,
"nll_loss": 1.140625,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.0123291015625,
"rewards/rejected": -0.119140625,
"step": 990
},
{
"epoch": 0.79,
"grad_norm": 3.592994219979355,
"learning_rate": 3.7953812521753643e-07,
"log_odds_chosen": 0.16755370795726776,
"log_odds_ratio": -0.6943359375,
"logits/chosen": -1.921875,
"logits/rejected": -2.0,
"logps/chosen": -1.0703125,
"logps/rejected": -1.171875,
"loss": 1.1494,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.0098876953125,
"rewards/rejected": -0.1171875,
"step": 1000
},
{
"epoch": 0.8,
"grad_norm": 2.5202564213089405,
"learning_rate": 3.7658488796224885e-07,
"log_odds_chosen": 0.11643066257238388,
"log_odds_ratio": -0.716796875,
"logits/chosen": -2.046875,
"logits/rejected": -2.125,
"logps/chosen": -1.1484375,
"logps/rejected": -1.234375,
"loss": 1.2212,
"nll_loss": 1.203125,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.11474609375,
"rewards/margins": 0.00860595703125,
"rewards/rejected": -0.12353515625,
"step": 1010
},
{
"epoch": 0.8,
"grad_norm": 2.724799909308137,
"learning_rate": 3.736077019729425e-07,
"log_odds_chosen": 0.302978515625,
"log_odds_ratio": -0.6385742425918579,
"logits/chosen": -1.9296875,
"logits/rejected": -2.046875,
"logps/chosen": -1.0390625,
"logps/rejected": -1.2421875,
"loss": 1.1893,
"nll_loss": 1.109375,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.10400390625,
"rewards/margins": 0.0205078125,
"rewards/rejected": -0.12451171875,
"step": 1020
},
{
"epoch": 0.81,
"grad_norm": 2.4835614341515053,
"learning_rate": 3.7060713050670546e-07,
"log_odds_chosen": 0.2666015625,
"log_odds_ratio": -0.6908203363418579,
"logits/chosen": -1.875,
"logits/rejected": -2.015625,
"logps/chosen": -1.1171875,
"logps/rejected": -1.328125,
"loss": 1.2376,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.020751953125,
"rewards/rejected": -0.1328125,
"step": 1030
},
{
"epoch": 0.82,
"grad_norm": 2.709722079150454,
"learning_rate": 3.6758374124494973e-07,
"log_odds_chosen": 0.185791015625,
"log_odds_ratio": -0.6966797113418579,
"logits/chosen": -1.8984375,
"logits/rejected": -2.03125,
"logps/chosen": -1.0703125,
"logps/rejected": -1.2109375,
"loss": 1.2082,
"nll_loss": 1.109375,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.01458740234375,
"rewards/rejected": -0.12109375,
"step": 1040
},
{
"epoch": 0.83,
"grad_norm": 2.8331342756102167,
"learning_rate": 3.645381061860113e-07,
"log_odds_chosen": 0.3631835877895355,
"log_odds_ratio": -0.6460937261581421,
"logits/chosen": -1.921875,
"logits/rejected": -1.9765625,
"logps/chosen": -1.0078125,
"logps/rejected": -1.2578125,
"loss": 1.1933,
"nll_loss": 1.1015625,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.1005859375,
"rewards/margins": 0.025146484375,
"rewards/rejected": -0.1259765625,
"step": 1050
},
{
"epoch": 0.83,
"grad_norm": 2.6269186805524143,
"learning_rate": 3.61470801536933e-07,
"log_odds_chosen": 0.12788085639476776,
"log_odds_ratio": -0.7337890863418579,
"logits/chosen": -2.0,
"logits/rejected": -2.078125,
"logps/chosen": -1.0390625,
"logps/rejected": -1.1328125,
"loss": 1.2153,
"nll_loss": 1.109375,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.10400390625,
"rewards/margins": 0.00909423828125,
"rewards/rejected": -0.11279296875,
"step": 1060
},
{
"epoch": 0.84,
"grad_norm": 2.60712425422802,
"learning_rate": 3.583824076044508e-07,
"log_odds_chosen": 0.08272705227136612,
"log_odds_ratio": -0.7518554925918579,
"logits/chosen": -1.890625,
"logits/rejected": -1.984375,
"logps/chosen": -1.140625,
"logps/rejected": -1.2265625,
"loss": 1.2114,
"nll_loss": 1.171875,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.1142578125,
"rewards/margins": 0.0078125,
"rewards/rejected": -0.12255859375,
"step": 1070
},
{
"epoch": 0.85,
"grad_norm": 2.742344457324174,
"learning_rate": 3.55273508685206e-07,
"log_odds_chosen": 0.11997070163488388,
"log_odds_ratio": -0.7144531011581421,
"logits/chosen": -1.8828125,
"logits/rejected": -2.046875,
"logps/chosen": -1.140625,
"logps/rejected": -1.21875,
"loss": 1.2194,
"nll_loss": 1.171875,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.11376953125,
"rewards/margins": 0.0084228515625,
"rewards/rejected": -0.1220703125,
"step": 1080
},
{
"epoch": 0.86,
"grad_norm": 2.677923442608537,
"learning_rate": 3.5214469295520033e-07,
"log_odds_chosen": 0.2944091856479645,
"log_odds_ratio": -0.6474609375,
"logits/chosen": -1.953125,
"logits/rejected": -2.0625,
"logps/chosen": -1.09375,
"logps/rejected": -1.296875,
"loss": 1.1926,
"nll_loss": 1.140625,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0206298828125,
"rewards/rejected": -0.1298828125,
"step": 1090
},
{
"epoch": 0.87,
"grad_norm": 2.614103984779814,
"learning_rate": 3.4899655235851903e-07,
"log_odds_chosen": 0.15128174424171448,
"log_odds_ratio": -0.692187488079071,
"logits/chosen": -1.875,
"logits/rejected": -2.03125,
"logps/chosen": -1.140625,
"logps/rejected": -1.2421875,
"loss": 1.2353,
"nll_loss": 1.2265625,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11376953125,
"rewards/margins": 0.01055908203125,
"rewards/rejected": -0.12451171875,
"step": 1100
},
{
"epoch": 0.87,
"grad_norm": 2.661524044558228,
"learning_rate": 3.458296824953403e-07,
"log_odds_chosen": 0.19251708686351776,
"log_odds_ratio": -0.681445300579071,
"logits/chosen": -1.8125,
"logits/rejected": -1.9609375,
"logps/chosen": -1.125,
"logps/rejected": -1.2578125,
"loss": 1.2002,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.01312255859375,
"rewards/rejected": -0.1259765625,
"step": 1110
},
{
"epoch": 0.88,
"grad_norm": 2.479788982713935,
"learning_rate": 3.426446825092525e-07,
"log_odds_chosen": 0.30213624238967896,
"log_odds_ratio": -0.6465820074081421,
"logits/chosen": -1.875,
"logits/rejected": -1.9765625,
"logps/chosen": -1.0546875,
"logps/rejected": -1.2734375,
"loss": 1.2165,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.10546875,
"rewards/margins": 0.021728515625,
"rewards/rejected": -0.126953125,
"step": 1120
},
{
"epoch": 0.89,
"grad_norm": 3.266264486839817,
"learning_rate": 3.3944215497390197e-07,
"log_odds_chosen": 0.12014160305261612,
"log_odds_ratio": -0.707812488079071,
"logits/chosen": -1.875,
"logits/rejected": -1.9375,
"logps/chosen": -1.1015625,
"logps/rejected": -1.1875,
"loss": 1.2284,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.00848388671875,
"rewards/rejected": -0.1181640625,
"step": 1130
},
{
"epoch": 0.9,
"grad_norm": 2.397641031210895,
"learning_rate": 3.362227057789915e-07,
"log_odds_chosen": 0.3463378846645355,
"log_odds_ratio": -0.619335949420929,
"logits/chosen": -1.890625,
"logits/rejected": -2.046875,
"logps/chosen": -1.0703125,
"logps/rejected": -1.2890625,
"loss": 1.1821,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.0220947265625,
"rewards/rejected": -0.12890625,
"step": 1140
},
{
"epoch": 0.91,
"grad_norm": 2.4645155740078617,
"learning_rate": 3.329869440156512e-07,
"log_odds_chosen": 0.357086181640625,
"log_odds_ratio": -0.63232421875,
"logits/chosen": -1.890625,
"logits/rejected": -1.9609375,
"logps/chosen": -1.0625,
"logps/rejected": -1.3125,
"loss": 1.176,
"nll_loss": 1.109375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.024658203125,
"rewards/rejected": -0.130859375,
"step": 1150
},
{
"epoch": 0.91,
"grad_norm": 2.8912394279639084,
"learning_rate": 3.297354818612037e-07,
"log_odds_chosen": 0.05325927585363388,
"log_odds_ratio": -0.7728515863418579,
"logits/chosen": -1.9453125,
"logits/rejected": -2.015625,
"logps/chosen": -1.09375,
"logps/rejected": -1.125,
"loss": 1.2402,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.0030975341796875,
"rewards/rejected": -0.11279296875,
"step": 1160
},
{
"epoch": 0.92,
"grad_norm": 2.563419103608563,
"learning_rate": 3.264689344633461e-07,
"log_odds_chosen": 0.14066162705421448,
"log_odds_ratio": -0.6943359375,
"logits/chosen": -1.8125,
"logits/rejected": -1.859375,
"logps/chosen": -1.140625,
"logps/rejected": -1.2265625,
"loss": 1.1959,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.1142578125,
"rewards/margins": 0.0084228515625,
"rewards/rejected": -0.12255859375,
"step": 1170
},
{
"epoch": 0.93,
"grad_norm": 2.8288693775232643,
"learning_rate": 3.2318791982376923e-07,
"log_odds_chosen": 0.19826659560203552,
"log_odds_ratio": -0.6885741949081421,
"logits/chosen": -1.9375,
"logits/rejected": -2.09375,
"logps/chosen": -1.0859375,
"logps/rejected": -1.21875,
"loss": 1.2221,
"nll_loss": 1.109375,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.01275634765625,
"rewards/rejected": -0.12158203125,
"step": 1180
},
{
"epoch": 0.94,
"grad_norm": 2.9337235954606844,
"learning_rate": 3.198930586812372e-07,
"log_odds_chosen": 0.3016296327114105,
"log_odds_ratio": -0.67626953125,
"logits/chosen": -1.8671875,
"logits/rejected": -2.0,
"logps/chosen": -1.0859375,
"logps/rejected": -1.3046875,
"loss": 1.1805,
"nll_loss": 1.171875,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.1083984375,
"rewards/margins": 0.02197265625,
"rewards/rejected": -0.1298828125,
"step": 1190
},
{
"epoch": 0.95,
"grad_norm": 2.815544385281363,
"learning_rate": 3.1658497439414935e-07,
"log_odds_chosen": 0.18316039443016052,
"log_odds_ratio": -0.6927734613418579,
"logits/chosen": -1.96875,
"logits/rejected": -2.015625,
"logps/chosen": -1.0859375,
"logps/rejected": -1.203125,
"loss": 1.2118,
"nll_loss": 1.140625,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.1083984375,
"rewards/margins": 0.01153564453125,
"rewards/rejected": -0.1201171875,
"step": 1200
},
{
"epoch": 0.95,
"grad_norm": 2.806645073099231,
"learning_rate": 3.132642928226061e-07,
"log_odds_chosen": 0.33399659395217896,
"log_odds_ratio": -0.6348632574081421,
"logits/chosen": -1.8828125,
"logits/rejected": -1.9921875,
"logps/chosen": -1.0546875,
"logps/rejected": -1.2734375,
"loss": 1.1911,
"nll_loss": 1.140625,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.0223388671875,
"rewards/rejected": -0.1279296875,
"step": 1210
},
{
"epoch": 0.96,
"grad_norm": 2.4581782453300884,
"learning_rate": 3.0993164221000207e-07,
"log_odds_chosen": 0.215545654296875,
"log_odds_ratio": -0.672070324420929,
"logits/chosen": -1.9140625,
"logits/rejected": -2.0,
"logps/chosen": -1.109375,
"logps/rejected": -1.2578125,
"loss": 1.182,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.11083984375,
"rewards/margins": 0.0145263671875,
"rewards/rejected": -0.125,
"step": 1220
},
{
"epoch": 0.97,
"grad_norm": 5.263613381972474,
"learning_rate": 3.0658765306416794e-07,
"log_odds_chosen": 0.166778564453125,
"log_odds_ratio": -0.6953125,
"logits/chosen": -1.8359375,
"logits/rejected": -1.921875,
"logps/chosen": -1.09375,
"logps/rejected": -1.2109375,
"loss": 1.2193,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.0118408203125,
"rewards/rejected": -0.12109375,
"step": 1230
},
{
"epoch": 0.98,
"grad_norm": 2.5240280415155723,
"learning_rate": 3.032329580380838e-07,
"log_odds_chosen": 0.28306883573532104,
"log_odds_ratio": -0.6612304449081421,
"logits/chosen": -1.90625,
"logits/rejected": -1.96875,
"logps/chosen": -1.09375,
"logps/rejected": -1.28125,
"loss": 1.1956,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.109375,
"rewards/margins": 0.01904296875,
"rewards/rejected": -0.1279296875,
"step": 1240
},
{
"epoch": 0.98,
"grad_norm": 2.743773542575128,
"learning_rate": 2.998681918101871e-07,
"log_odds_chosen": 0.3384033143520355,
"log_odds_ratio": -0.6493164300918579,
"logits/chosen": -1.8828125,
"logits/rejected": -1.9453125,
"logps/chosen": -1.078125,
"logps/rejected": -1.328125,
"loss": 1.206,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.025390625,
"rewards/rejected": -0.1328125,
"step": 1250
},
{
"epoch": 0.99,
"grad_norm": 2.8074211611598066,
"learning_rate": 2.9649399096429714e-07,
"log_odds_chosen": 0.23601074516773224,
"log_odds_ratio": -0.6533203125,
"logits/chosen": -1.859375,
"logits/rejected": -1.9140625,
"logps/chosen": -1.078125,
"logps/rejected": -1.234375,
"loss": 1.188,
"nll_loss": 1.109375,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.015625,
"rewards/rejected": -0.123046875,
"step": 1260
},
{
"epoch": 1.0,
"grad_norm": 2.525044784627154,
"learning_rate": 2.931109938691786e-07,
"log_odds_chosen": 0.16881103813648224,
"log_odds_ratio": -0.684277355670929,
"logits/chosen": -1.8515625,
"logits/rejected": -2.0,
"logps/chosen": -1.09375,
"logps/rejected": -1.21875,
"loss": 1.188,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0128173828125,
"rewards/rejected": -0.1220703125,
"step": 1270
},
{
"epoch": 1.01,
"grad_norm": 2.668512392567912,
"learning_rate": 2.8971984055776853e-07,
"log_odds_chosen": 0.21584472060203552,
"log_odds_ratio": -0.672558605670929,
"logits/chosen": -1.84375,
"logits/rejected": -1.9765625,
"logps/chosen": -1.078125,
"logps/rejected": -1.2265625,
"loss": 1.2336,
"nll_loss": 1.15625,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.10791015625,
"rewards/margins": 0.01470947265625,
"rewards/rejected": -0.12255859375,
"step": 1280
},
{
"epoch": 1.02,
"grad_norm": 2.659729033509314,
"learning_rate": 2.863211726060875e-07,
"log_odds_chosen": 0.2547973692417145,
"log_odds_ratio": -0.6659179925918579,
"logits/chosen": -1.96875,
"logits/rejected": -2.015625,
"logps/chosen": -1.140625,
"logps/rejected": -1.3125,
"loss": 1.2367,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.1142578125,
"rewards/margins": 0.0167236328125,
"rewards/rejected": -0.130859375,
"step": 1290
},
{
"epoch": 1.02,
"grad_norm": 2.470961884835421,
"learning_rate": 2.829156330118589e-07,
"log_odds_chosen": 0.24007567763328552,
"log_odds_ratio": -0.65283203125,
"logits/chosen": -1.859375,
"logits/rejected": -1.9609375,
"logps/chosen": -1.125,
"logps/rejected": -1.28125,
"loss": 1.2008,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.11181640625,
"rewards/margins": 0.0159912109375,
"rewards/rejected": -0.1279296875,
"step": 1300
},
{
"epoch": 1.03,
"grad_norm": 2.5904466369333026,
"learning_rate": 2.7950386607286e-07,
"log_odds_chosen": 0.28740233182907104,
"log_odds_ratio": -0.6572265625,
"logits/chosen": -1.8671875,
"logits/rejected": -1.9609375,
"logps/chosen": -1.0625,
"logps/rejected": -1.2578125,
"loss": 1.2003,
"nll_loss": 1.0859375,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.019775390625,
"rewards/rejected": -0.1259765625,
"step": 1310
},
{
"epoch": 1.04,
"grad_norm": 2.8679276152227726,
"learning_rate": 2.7608651726502607e-07,
"log_odds_chosen": 0.29725342988967896,
"log_odds_ratio": -0.6602538824081421,
"logits/chosen": -1.84375,
"logits/rejected": -2.015625,
"logps/chosen": -1.0625,
"logps/rejected": -1.2734375,
"loss": 1.2296,
"nll_loss": 1.125,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.0213623046875,
"rewards/rejected": -0.1279296875,
"step": 1320
},
{
"epoch": 1.05,
"grad_norm": 2.5150772854856243,
"learning_rate": 2.7266423312033226e-07,
"log_odds_chosen": 0.2159423828125,
"log_odds_ratio": -0.7059570550918579,
"logits/chosen": -1.9140625,
"logits/rejected": -2.0,
"logps/chosen": -1.078125,
"logps/rejected": -1.25,
"loss": 1.185,
"nll_loss": 1.140625,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.10791015625,
"rewards/margins": 0.017333984375,
"rewards/rejected": -0.125,
"step": 1330
},
{
"epoch": 1.06,
"grad_norm": 2.737219590030928,
"learning_rate": 2.692376611044757e-07,
"log_odds_chosen": 0.3914794921875,
"log_odds_ratio": -0.640332043170929,
"logits/chosen": -1.796875,
"logits/rejected": -1.8671875,
"logps/chosen": -1.03125,
"logps/rejected": -1.2890625,
"loss": 1.2041,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.1025390625,
"rewards/margins": 0.026123046875,
"rewards/rejected": -0.12890625,
"step": 1340
},
{
"epoch": 1.06,
"grad_norm": 2.7769961907081293,
"learning_rate": 2.6580744949438045e-07,
"log_odds_chosen": 0.08111572265625,
"log_odds_ratio": -0.731249988079071,
"logits/chosen": -1.8828125,
"logits/rejected": -1.9921875,
"logps/chosen": -1.1484375,
"logps/rejected": -1.1875,
"loss": 1.2605,
"nll_loss": 1.203125,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.11474609375,
"rewards/margins": 0.0037078857421875,
"rewards/rejected": -0.11865234375,
"step": 1350
},
{
"epoch": 1.07,
"grad_norm": 2.9775601305183463,
"learning_rate": 2.6237424725554935e-07,
"log_odds_chosen": 0.3329834043979645,
"log_odds_ratio": -0.635937511920929,
"logits/chosen": -1.8359375,
"logits/rejected": -1.921875,
"logps/chosen": -1.0703125,
"logps/rejected": -1.296875,
"loss": 1.2152,
"nll_loss": 1.109375,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.0233154296875,
"rewards/rejected": -0.1298828125,
"step": 1360
},
{
"epoch": 1.08,
"grad_norm": 2.279068955006949,
"learning_rate": 2.589387039192858e-07,
"log_odds_chosen": 0.20733642578125,
"log_odds_ratio": -0.667675793170929,
"logits/chosen": -1.859375,
"logits/rejected": -1.9765625,
"logps/chosen": -1.125,
"logps/rejected": -1.2578125,
"loss": 1.2064,
"nll_loss": 1.15625,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.013671875,
"rewards/rejected": -0.1259765625,
"step": 1370
},
{
"epoch": 1.09,
"grad_norm": 3.0021514828628746,
"learning_rate": 2.555014694598077e-07,
"log_odds_chosen": 0.23118896782398224,
"log_odds_ratio": -0.6884765625,
"logits/chosen": -1.765625,
"logits/rejected": -1.9453125,
"logps/chosen": -1.0859375,
"logps/rejected": -1.2265625,
"loss": 1.2152,
"nll_loss": 1.09375,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.1083984375,
"rewards/margins": 0.0145263671875,
"rewards/rejected": -0.123046875,
"step": 1380
},
{
"epoch": 1.09,
"grad_norm": 2.735522050073968,
"learning_rate": 2.5206319417127873e-07,
"log_odds_chosen": 0.3378845155239105,
"log_odds_ratio": -0.632128894329071,
"logits/chosen": -1.7421875,
"logits/rejected": -1.8984375,
"logps/chosen": -1.0234375,
"logps/rejected": -1.2578125,
"loss": 1.1638,
"nll_loss": 1.0703125,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.1025390625,
"rewards/margins": 0.0228271484375,
"rewards/rejected": -0.125,
"step": 1390
},
{
"epoch": 1.1,
"grad_norm": 2.5736254747923923,
"learning_rate": 2.4862452854477784e-07,
"log_odds_chosen": 0.3209228515625,
"log_odds_ratio": -0.65576171875,
"logits/chosen": -1.734375,
"logits/rejected": -1.8984375,
"logps/chosen": -1.03125,
"logps/rejected": -1.2578125,
"loss": 1.166,
"nll_loss": 1.046875,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.103515625,
"rewards/margins": 0.0225830078125,
"rewards/rejected": -0.1259765625,
"step": 1400
},
{
"epoch": 1.11,
"grad_norm": 2.7098667746876073,
"learning_rate": 2.4518612314523265e-07,
"log_odds_chosen": 0.08408202975988388,
"log_odds_ratio": -0.732421875,
"logits/chosen": -1.84375,
"logits/rejected": -1.953125,
"logps/chosen": -1.09375,
"logps/rejected": -1.140625,
"loss": 1.1805,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.00457763671875,
"rewards/rejected": -0.1142578125,
"step": 1410
},
{
"epoch": 1.12,
"grad_norm": 2.7073252776256966,
"learning_rate": 2.4174862848833806e-07,
"log_odds_chosen": 0.20045165717601776,
"log_odds_ratio": -0.67236328125,
"logits/chosen": -1.7578125,
"logits/rejected": -1.8359375,
"logps/chosen": -1.0703125,
"logps/rejected": -1.1875,
"loss": 1.2051,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.0118408203125,
"rewards/rejected": -0.119140625,
"step": 1420
},
{
"epoch": 1.13,
"grad_norm": 2.7514653552282233,
"learning_rate": 2.3831269491748467e-07,
"log_odds_chosen": 0.22596435248851776,
"log_odds_ratio": -0.708984375,
"logits/chosen": -1.796875,
"logits/rejected": -1.8828125,
"logps/chosen": -1.1171875,
"logps/rejected": -1.2890625,
"loss": 1.217,
"nll_loss": 1.171875,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.11181640625,
"rewards/margins": 0.017333984375,
"rewards/rejected": -0.12890625,
"step": 1430
},
{
"epoch": 1.13,
"grad_norm": 2.8823498677475183,
"learning_rate": 2.3487897248071941e-07,
"log_odds_chosen": 0.2939697206020355,
"log_odds_ratio": -0.664257824420929,
"logits/chosen": -1.7890625,
"logits/rejected": -1.9375,
"logps/chosen": -1.046875,
"logps/rejected": -1.25,
"loss": 1.1892,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.0205078125,
"rewards/rejected": -0.1259765625,
"step": 1440
},
{
"epoch": 1.14,
"grad_norm": 2.69332509317782,
"learning_rate": 2.314481108077624e-07,
"log_odds_chosen": 0.1607666015625,
"log_odds_ratio": -0.6968749761581421,
"logits/chosen": -1.8203125,
"logits/rejected": -1.8515625,
"logps/chosen": -1.078125,
"logps/rejected": -1.1796875,
"loss": 1.1978,
"nll_loss": 1.125,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.01068115234375,
"rewards/rejected": -0.1181640625,
"step": 1450
},
{
"epoch": 1.15,
"grad_norm": 2.5989208277674356,
"learning_rate": 2.280207589871026e-07,
"log_odds_chosen": 0.3521362245082855,
"log_odds_ratio": -0.642382800579071,
"logits/chosen": -1.8125,
"logits/rejected": -1.9375,
"logps/chosen": -1.078125,
"logps/rejected": -1.3203125,
"loss": 1.1628,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.10791015625,
"rewards/margins": 0.0242919921875,
"rewards/rejected": -0.1318359375,
"step": 1460
},
{
"epoch": 1.16,
"grad_norm": 2.5631030942900805,
"learning_rate": 2.2459756544319627e-07,
"log_odds_chosen": 0.1890869140625,
"log_odds_ratio": -0.696972668170929,
"logits/chosen": -1.796875,
"logits/rejected": -1.890625,
"logps/chosen": -1.015625,
"logps/rejected": -1.1328125,
"loss": 1.1771,
"nll_loss": 1.0546875,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.10205078125,
"rewards/margins": 0.01129150390625,
"rewards/rejected": -0.11328125,
"step": 1470
},
{
"epoch": 1.17,
"grad_norm": 2.7548023973263613,
"learning_rate": 2.2117917781379067e-07,
"log_odds_chosen": 0.19255371391773224,
"log_odds_ratio": -0.679394543170929,
"logits/chosen": -1.734375,
"logits/rejected": -1.859375,
"logps/chosen": -1.09375,
"logps/rejected": -1.21875,
"loss": 1.2441,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0123291015625,
"rewards/rejected": -0.12158203125,
"step": 1480
},
{
"epoch": 1.17,
"grad_norm": 2.6382486056871177,
"learning_rate": 2.177662428273968e-07,
"log_odds_chosen": 0.23670653998851776,
"log_odds_ratio": -0.67626953125,
"logits/chosen": -1.7578125,
"logits/rejected": -1.8984375,
"logps/chosen": -1.046875,
"logps/rejected": -1.203125,
"loss": 1.1895,
"nll_loss": 1.09375,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.0150146484375,
"rewards/rejected": -0.1201171875,
"step": 1490
},
{
"epoch": 1.18,
"grad_norm": 2.5099170844954317,
"learning_rate": 2.1435940618093414e-07,
"log_odds_chosen": 0.19310303032398224,
"log_odds_ratio": -0.690625011920929,
"logits/chosen": -1.765625,
"logits/rejected": -1.875,
"logps/chosen": -1.0859375,
"logps/rejected": -1.2265625,
"loss": 1.1881,
"nll_loss": 1.1015625,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.0140380859375,
"rewards/rejected": -0.123046875,
"step": 1500
},
{
"epoch": 1.19,
"grad_norm": 2.7950237991583493,
"learning_rate": 2.1095931241757062e-07,
"log_odds_chosen": 0.2502685487270355,
"log_odds_ratio": -0.680957019329071,
"logits/chosen": -1.7734375,
"logits/rejected": -1.8515625,
"logps/chosen": -1.0390625,
"logps/rejected": -1.2265625,
"loss": 1.1906,
"nll_loss": 1.09375,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.10400390625,
"rewards/margins": 0.0185546875,
"rewards/rejected": -0.1220703125,
"step": 1510
},
{
"epoch": 1.2,
"grad_norm": 2.609790265054367,
"learning_rate": 2.075666048047806e-07,
"log_odds_chosen": 0.15053710341453552,
"log_odds_ratio": -0.698437511920929,
"logits/chosen": -1.7578125,
"logits/rejected": -1.828125,
"logps/chosen": -1.1015625,
"logps/rejected": -1.21875,
"loss": 1.221,
"nll_loss": 1.125,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.01171875,
"rewards/rejected": -0.1220703125,
"step": 1520
},
{
"epoch": 1.21,
"grad_norm": 2.5467686003601697,
"learning_rate": 2.0418192521264454e-07,
"log_odds_chosen": 0.23857422173023224,
"log_odds_ratio": -0.659863293170929,
"logits/chosen": -1.7734375,
"logits/rejected": -1.8515625,
"logps/chosen": -1.0625,
"logps/rejected": -1.2109375,
"loss": 1.1898,
"nll_loss": 1.109375,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.01397705078125,
"rewards/rejected": -0.12060546875,
"step": 1530
},
{
"epoch": 1.21,
"grad_norm": 2.459650956326835,
"learning_rate": 2.0080591399241292e-07,
"log_odds_chosen": 0.23247070610523224,
"log_odds_ratio": -0.6712890863418579,
"logits/chosen": -1.7578125,
"logits/rejected": -1.7734375,
"logps/chosen": -1.109375,
"logps/rejected": -1.2578125,
"loss": 1.1708,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.014892578125,
"rewards/rejected": -0.1259765625,
"step": 1540
},
{
"epoch": 1.22,
"grad_norm": 2.47537989067237,
"learning_rate": 1.9743920985535729e-07,
"log_odds_chosen": 0.3998779356479645,
"log_odds_ratio": -0.625781238079071,
"logits/chosen": -1.6484375,
"logits/rejected": -1.796875,
"logps/chosen": -0.98828125,
"logps/rejected": -1.2578125,
"loss": 1.1589,
"nll_loss": 1.0390625,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.09912109375,
"rewards/margins": 0.0269775390625,
"rewards/rejected": -0.1259765625,
"step": 1550
},
{
"epoch": 1.23,
"grad_norm": 2.7703541098291455,
"learning_rate": 1.94082449751932e-07,
"log_odds_chosen": 0.2127685546875,
"log_odds_ratio": -0.6846679449081421,
"logits/chosen": -1.734375,
"logits/rejected": -1.8125,
"logps/chosen": -1.09375,
"logps/rejected": -1.25,
"loss": 1.1794,
"nll_loss": 1.171875,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.0157470703125,
"rewards/rejected": -0.125,
"step": 1560
},
{
"epoch": 1.24,
"grad_norm": 2.885795668675382,
"learning_rate": 1.9073626875126874e-07,
"log_odds_chosen": 0.26057130098342896,
"log_odds_ratio": -0.649707019329071,
"logits/chosen": -1.7734375,
"logits/rejected": -1.7890625,
"logps/chosen": -1.0234375,
"logps/rejected": -1.1875,
"loss": 1.1671,
"nll_loss": 1.1015625,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.1025390625,
"rewards/margins": 0.01611328125,
"rewards/rejected": -0.11865234375,
"step": 1570
},
{
"epoch": 1.24,
"grad_norm": 2.722691086755302,
"learning_rate": 1.874012999210271e-07,
"log_odds_chosen": 0.19356079399585724,
"log_odds_ratio": -0.694140613079071,
"logits/chosen": -1.859375,
"logits/rejected": -1.8828125,
"logps/chosen": -1.1171875,
"logps/rejected": -1.25,
"loss": 1.1779,
"nll_loss": 1.171875,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.111328125,
"rewards/margins": 0.01385498046875,
"rewards/rejected": -0.125,
"step": 1580
},
{
"epoch": 1.25,
"grad_norm": 2.604921389210434,
"learning_rate": 1.8407817420762383e-07,
"log_odds_chosen": 0.26337891817092896,
"log_odds_ratio": -0.6572265625,
"logits/chosen": -1.734375,
"logits/rejected": -1.875,
"logps/chosen": -1.09375,
"logps/rejected": -1.2734375,
"loss": 1.184,
"nll_loss": 1.140625,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.01806640625,
"rewards/rejected": -0.126953125,
"step": 1590
},
{
"epoch": 1.26,
"grad_norm": 2.732739073707677,
"learning_rate": 1.8076752031686343e-07,
"log_odds_chosen": 0.14312133193016052,
"log_odds_ratio": -0.704882800579071,
"logits/chosen": -1.7734375,
"logits/rejected": -1.8671875,
"logps/chosen": -1.0390625,
"logps/rejected": -1.140625,
"loss": 1.188,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.103515625,
"rewards/margins": 0.010498046875,
"rewards/rejected": -0.1142578125,
"step": 1600
},
{
"epoch": 1.27,
"grad_norm": 2.395290977769547,
"learning_rate": 1.7746996459499254e-07,
"log_odds_chosen": 0.242431640625,
"log_odds_ratio": -0.6644531488418579,
"logits/chosen": -1.65625,
"logits/rejected": -1.84375,
"logps/chosen": -1.0546875,
"logps/rejected": -1.21875,
"loss": 1.1618,
"nll_loss": 1.046875,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.10546875,
"rewards/margins": 0.016357421875,
"rewards/rejected": -0.1220703125,
"step": 1610
},
{
"epoch": 1.28,
"grad_norm": 2.547454975163186,
"learning_rate": 1.741861309102009e-07,
"log_odds_chosen": 0.26506346464157104,
"log_odds_ratio": -0.6689453125,
"logits/chosen": -1.640625,
"logits/rejected": -1.7890625,
"logps/chosen": -1.0546875,
"logps/rejected": -1.2109375,
"loss": 1.1665,
"nll_loss": 1.109375,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.0159912109375,
"rewards/rejected": -0.12109375,
"step": 1620
},
{
"epoch": 1.28,
"grad_norm": 2.396584161009059,
"learning_rate": 1.7091664053459088e-07,
"log_odds_chosen": 0.18143311142921448,
"log_odds_ratio": -0.6865234375,
"logits/chosen": -1.71875,
"logits/rejected": -1.8359375,
"logps/chosen": -1.0859375,
"logps/rejected": -1.1875,
"loss": 1.2118,
"nll_loss": 1.15625,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.10791015625,
"rewards/margins": 0.01043701171875,
"rewards/rejected": -0.11865234375,
"step": 1630
},
{
"epoch": 1.29,
"grad_norm": 2.279515658884766,
"learning_rate": 1.6766211202663844e-07,
"log_odds_chosen": 0.05356445163488388,
"log_odds_ratio": -0.739453136920929,
"logits/chosen": -1.6875,
"logits/rejected": -1.7890625,
"logps/chosen": -1.1171875,
"logps/rejected": -1.140625,
"loss": 1.2049,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.111328125,
"rewards/margins": 0.0027008056640625,
"rewards/rejected": -0.1142578125,
"step": 1640
},
{
"epoch": 1.3,
"grad_norm": 2.4004304534033265,
"learning_rate": 1.6442316111416743e-07,
"log_odds_chosen": 0.26105958223342896,
"log_odds_ratio": -0.6631835699081421,
"logits/chosen": -1.6796875,
"logits/rejected": -1.7421875,
"logps/chosen": -1.0859375,
"logps/rejected": -1.2734375,
"loss": 1.1816,
"nll_loss": 1.140625,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.0184326171875,
"rewards/rejected": -0.126953125,
"step": 1650
},
{
"epoch": 1.31,
"grad_norm": 2.763579524745402,
"learning_rate": 1.6120040057785928e-07,
"log_odds_chosen": 0.29625242948532104,
"log_odds_ratio": -0.6499999761581421,
"logits/chosen": -1.8125,
"logits/rejected": -1.875,
"logps/chosen": -1.0859375,
"logps/rejected": -1.28125,
"loss": 1.1727,
"nll_loss": 1.09375,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.1083984375,
"rewards/margins": 0.0194091796875,
"rewards/rejected": -0.1279296875,
"step": 1660
},
{
"epoch": 1.32,
"grad_norm": 2.8036257747935154,
"learning_rate": 1.5799444013532038e-07,
"log_odds_chosen": 0.23708495497703552,
"log_odds_ratio": -0.67431640625,
"logits/chosen": -1.7109375,
"logits/rejected": -1.8203125,
"logps/chosen": -1.0625,
"logps/rejected": -1.21875,
"loss": 1.1238,
"nll_loss": 1.0859375,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.01556396484375,
"rewards/rejected": -0.1220703125,
"step": 1670
},
{
"epoch": 1.32,
"grad_norm": 2.6570430804218566,
"learning_rate": 1.5480588632572885e-07,
"log_odds_chosen": 0.37006837129592896,
"log_odds_ratio": -0.635937511920929,
"logits/chosen": -1.7421875,
"logits/rejected": -1.8046875,
"logps/chosen": -1.0390625,
"logps/rejected": -1.2890625,
"loss": 1.1907,
"nll_loss": 1.078125,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.103515625,
"rewards/margins": 0.025390625,
"rewards/rejected": -0.12890625,
"step": 1680
},
{
"epoch": 1.33,
"grad_norm": 2.590262402608133,
"learning_rate": 1.516353423950829e-07,
"log_odds_chosen": 0.3837524354457855,
"log_odds_ratio": -0.625,
"logits/chosen": -1.796875,
"logits/rejected": -1.9140625,
"logps/chosen": -1.046875,
"logps/rejected": -1.28125,
"loss": 1.1898,
"nll_loss": 1.125,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.1044921875,
"rewards/margins": 0.0235595703125,
"rewards/rejected": -0.1279296875,
"step": 1690
},
{
"epoch": 1.34,
"grad_norm": 2.568483644438331,
"learning_rate": 1.4848340818207184e-07,
"log_odds_chosen": 0.26896971464157104,
"log_odds_ratio": -0.66455078125,
"logits/chosen": -1.75,
"logits/rejected": -1.859375,
"logps/chosen": -1.046875,
"logps/rejected": -1.2421875,
"loss": 1.1799,
"nll_loss": 1.09375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.019287109375,
"rewards/rejected": -0.1240234375,
"step": 1700
},
{
"epoch": 1.35,
"grad_norm": 3.318714021827115,
"learning_rate": 1.453506800045921e-07,
"log_odds_chosen": 0.12944336235523224,
"log_odds_ratio": -0.7064453363418579,
"logits/chosen": -1.75,
"logits/rejected": -1.7890625,
"logps/chosen": -1.0859375,
"logps/rejected": -1.1796875,
"loss": 1.2096,
"nll_loss": 1.140625,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.1083984375,
"rewards/margins": 0.00970458984375,
"rewards/rejected": -0.1181640625,
"step": 1710
},
{
"epoch": 1.35,
"grad_norm": 2.7195091239689426,
"learning_rate": 1.422377505469293e-07,
"log_odds_chosen": 0.14760741591453552,
"log_odds_ratio": -0.72802734375,
"logits/chosen": -1.703125,
"logits/rejected": -1.75,
"logps/chosen": -1.1328125,
"logps/rejected": -1.21875,
"loss": 1.2695,
"nll_loss": 1.234375,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.11328125,
"rewards/margins": 0.00823974609375,
"rewards/rejected": -0.12158203125,
"step": 1720
},
{
"epoch": 1.36,
"grad_norm": 2.4116745200019696,
"learning_rate": 1.3914520874762726e-07,
"log_odds_chosen": 0.2623352110385895,
"log_odds_ratio": -0.6844726800918579,
"logits/chosen": -1.6875,
"logits/rejected": -1.765625,
"logps/chosen": -1.109375,
"logps/rejected": -1.3046875,
"loss": 1.2307,
"nll_loss": 1.15625,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.111328125,
"rewards/margins": 0.0191650390625,
"rewards/rejected": -0.130859375,
"step": 1730
},
{
"epoch": 1.37,
"grad_norm": 2.6643696734974127,
"learning_rate": 1.3607363968806645e-07,
"log_odds_chosen": 0.3259033262729645,
"log_odds_ratio": -0.623046875,
"logits/chosen": -1.6953125,
"logits/rejected": -1.78125,
"logps/chosen": -1.03125,
"logps/rejected": -1.25,
"loss": 1.2087,
"nll_loss": 1.046875,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.10302734375,
"rewards/margins": 0.022216796875,
"rewards/rejected": -0.125,
"step": 1740
},
{
"epoch": 1.38,
"grad_norm": 2.734224521952181,
"learning_rate": 1.3302362448177167e-07,
"log_odds_chosen": 0.30589598417282104,
"log_odds_ratio": -0.6283203363418579,
"logits/chosen": -1.703125,
"logits/rejected": -1.796875,
"logps/chosen": -1.03125,
"logps/rejected": -1.234375,
"loss": 1.1853,
"nll_loss": 1.1015625,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.103515625,
"rewards/margins": 0.0198974609375,
"rewards/rejected": -0.12353515625,
"step": 1750
},
{
"epoch": 1.39,
"grad_norm": 3.213949669653505,
"learning_rate": 1.2999574016447056e-07,
"log_odds_chosen": 0.3102783262729645,
"log_odds_ratio": -0.6421874761581421,
"logits/chosen": -1.6875,
"logits/rejected": -1.8125,
"logps/chosen": -1.046875,
"logps/rejected": -1.2578125,
"loss": 1.196,
"nll_loss": 1.0703125,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.1044921875,
"rewards/margins": 0.0211181640625,
"rewards/rejected": -0.1259765625,
"step": 1760
},
{
"epoch": 1.39,
"grad_norm": 2.3847902873067492,
"learning_rate": 1.2699055958492344e-07,
"log_odds_chosen": 0.19971923530101776,
"log_odds_ratio": -0.6748046875,
"logits/chosen": -1.7421875,
"logits/rejected": -1.8203125,
"logps/chosen": -1.046875,
"logps/rejected": -1.171875,
"loss": 1.2064,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.1044921875,
"rewards/margins": 0.01287841796875,
"rewards/rejected": -0.11767578125,
"step": 1770
},
{
"epoch": 1.4,
"grad_norm": 2.5039350155364573,
"learning_rate": 1.2400865129654567e-07,
"log_odds_chosen": 0.27821046113967896,
"log_odds_ratio": -0.652050793170929,
"logits/chosen": -1.6796875,
"logits/rejected": -1.796875,
"logps/chosen": -1.0390625,
"logps/rejected": -1.2265625,
"loss": 1.1371,
"nll_loss": 1.0703125,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.10400390625,
"rewards/margins": 0.018310546875,
"rewards/rejected": -0.12255859375,
"step": 1780
},
{
"epoch": 1.41,
"grad_norm": 2.5879591956281995,
"learning_rate": 1.210505794498422e-07,
"log_odds_chosen": 0.23630371689796448,
"log_odds_ratio": -0.673828125,
"logits/chosen": -1.7109375,
"logits/rejected": -1.7421875,
"logps/chosen": -1.09375,
"logps/rejected": -1.2578125,
"loss": 1.1706,
"nll_loss": 1.109375,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0164794921875,
"rewards/rejected": -0.1259765625,
"step": 1790
},
{
"epoch": 1.42,
"grad_norm": 2.752234308496576,
"learning_rate": 1.1811690368567545e-07,
"log_odds_chosen": 0.14584961533546448,
"log_odds_ratio": -0.6947265863418579,
"logits/chosen": -1.7109375,
"logits/rejected": -1.8203125,
"logps/chosen": -1.046875,
"logps/rejected": -1.1328125,
"loss": 1.236,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.1044921875,
"rewards/margins": 0.00872802734375,
"rewards/rejected": -0.11328125,
"step": 1800
},
{
"epoch": 1.43,
"grad_norm": 2.573963727957766,
"learning_rate": 1.1520817902938618e-07,
"log_odds_chosen": 0.07918091118335724,
"log_odds_ratio": -0.7347656488418579,
"logits/chosen": -1.7109375,
"logits/rejected": -1.78125,
"logps/chosen": -1.125,
"logps/rejected": -1.1875,
"loss": 1.1915,
"nll_loss": 1.15625,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.11279296875,
"rewards/margins": 0.00604248046875,
"rewards/rejected": -0.11865234375,
"step": 1810
},
{
"epoch": 1.43,
"grad_norm": 2.7806823611081177,
"learning_rate": 1.1232495578578755e-07,
"log_odds_chosen": 0.15264892578125,
"log_odds_ratio": -0.6976562738418579,
"logits/chosen": -1.75,
"logits/rejected": -1.78125,
"logps/chosen": -1.0390625,
"logps/rejected": -1.1171875,
"loss": 1.1626,
"nll_loss": 1.09375,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.103515625,
"rewards/margins": 0.00823974609375,
"rewards/rejected": -0.11181640625,
"step": 1820
},
{
"epoch": 1.44,
"grad_norm": 2.436201094808605,
"learning_rate": 1.0946777943505254e-07,
"log_odds_chosen": 0.23690184950828552,
"log_odds_ratio": -0.6917968988418579,
"logits/chosen": -1.7109375,
"logits/rejected": -1.78125,
"logps/chosen": -1.0625,
"logps/rejected": -1.2265625,
"loss": 1.1971,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.0167236328125,
"rewards/rejected": -0.123046875,
"step": 1830
},
{
"epoch": 1.45,
"grad_norm": 2.417259454035703,
"learning_rate": 1.0663719052951381e-07,
"log_odds_chosen": 0.19016113877296448,
"log_odds_ratio": -0.699414074420929,
"logits/chosen": -1.6796875,
"logits/rejected": -1.765625,
"logps/chosen": -1.0859375,
"logps/rejected": -1.1953125,
"loss": 1.1861,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.0107421875,
"rewards/rejected": -0.119140625,
"step": 1840
},
{
"epoch": 1.46,
"grad_norm": 2.340681686492189,
"learning_rate": 1.0383372459139608e-07,
"log_odds_chosen": 0.30018919706344604,
"log_odds_ratio": -0.6387695074081421,
"logits/chosen": -1.78125,
"logits/rejected": -1.921875,
"logps/chosen": -1.0546875,
"logps/rejected": -1.2421875,
"loss": 1.1958,
"nll_loss": 1.078125,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.0191650390625,
"rewards/rejected": -0.12451171875,
"step": 1850
},
{
"epoch": 1.47,
"grad_norm": 2.530421254724575,
"learning_rate": 1.0105791201150002e-07,
"log_odds_chosen": 0.3886962831020355,
"log_odds_ratio": -0.620312511920929,
"logits/chosen": -1.6796875,
"logits/rejected": -1.7734375,
"logps/chosen": -1.0625,
"logps/rejected": -1.3359375,
"loss": 1.16,
"nll_loss": 1.046875,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.027587890625,
"rewards/rejected": -0.1337890625,
"step": 1860
},
{
"epoch": 1.47,
"grad_norm": 2.6492828085260225,
"learning_rate": 9.831027794885713e-08,
"log_odds_chosen": 0.34185791015625,
"log_odds_ratio": -0.6444336175918579,
"logits/chosen": -1.6484375,
"logits/rejected": -1.671875,
"logps/chosen": -1.0234375,
"logps/rejected": -1.234375,
"loss": 1.1779,
"nll_loss": 1.140625,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.1025390625,
"rewards/margins": 0.0211181640625,
"rewards/rejected": -0.12353515625,
"step": 1870
},
{
"epoch": 1.48,
"grad_norm": 2.6971126252475286,
"learning_rate": 9.559134223137424e-08,
"log_odds_chosen": 0.2640136778354645,
"log_odds_ratio": -0.673046886920929,
"logits/chosen": -1.7578125,
"logits/rejected": -1.8046875,
"logps/chosen": -1.09375,
"logps/rejected": -1.2578125,
"loss": 1.2186,
"nll_loss": 1.171875,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.109375,
"rewards/margins": 0.016845703125,
"rewards/rejected": -0.1259765625,
"step": 1880
},
{
"epoch": 1.49,
"grad_norm": 2.768346463128813,
"learning_rate": 9.290161925748674e-08,
"log_odds_chosen": 0.333740234375,
"log_odds_ratio": -0.6434570550918579,
"logits/chosen": -1.71875,
"logits/rejected": -1.796875,
"logps/chosen": -1.1015625,
"logps/rejected": -1.328125,
"loss": 1.1758,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.0224609375,
"rewards/rejected": -0.1328125,
"step": 1890
},
{
"epoch": 1.5,
"grad_norm": 2.4131576506486168,
"learning_rate": 9.024161789883897e-08,
"log_odds_chosen": 0.13895873725414276,
"log_odds_ratio": -0.695507824420929,
"logits/chosen": -1.625,
"logits/rejected": -1.71875,
"logps/chosen": -1.03125,
"logps/rejected": -1.1171875,
"loss": 1.1368,
"nll_loss": 1.0625,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10302734375,
"rewards/margins": 0.009033203125,
"rewards/rejected": -0.1123046875,
"step": 1900
},
{
"epoch": 1.5,
"grad_norm": 2.5654624061461253,
"learning_rate": 8.761184140401023e-08,
"log_odds_chosen": 0.25886231660842896,
"log_odds_ratio": -0.662109375,
"logits/chosen": -1.609375,
"logits/rejected": -1.6875,
"logps/chosen": -1.0390625,
"logps/rejected": -1.203125,
"loss": 1.1906,
"nll_loss": 1.09375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.10400390625,
"rewards/margins": 0.0164794921875,
"rewards/rejected": -0.12060546875,
"step": 1910
},
{
"epoch": 1.51,
"grad_norm": 2.6681020896294676,
"learning_rate": 8.501278730330463e-08,
"log_odds_chosen": 0.36528319120407104,
"log_odds_ratio": -0.626269519329071,
"logits/chosen": -1.609375,
"logits/rejected": -1.671875,
"logps/chosen": -1.078125,
"logps/rejected": -1.3203125,
"loss": 1.1977,
"nll_loss": 1.140625,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.02490234375,
"rewards/rejected": -0.1318359375,
"step": 1920
},
{
"epoch": 1.52,
"grad_norm": 2.7519831354278512,
"learning_rate": 8.244494731462279e-08,
"log_odds_chosen": 0.24447020888328552,
"log_odds_ratio": -0.6788085699081421,
"logits/chosen": -1.59375,
"logits/rejected": -1.703125,
"logps/chosen": -1.03125,
"logps/rejected": -1.1953125,
"loss": 1.164,
"nll_loss": 1.0546875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.103515625,
"rewards/margins": 0.015869140625,
"rewards/rejected": -0.119140625,
"step": 1930
},
{
"epoch": 1.53,
"grad_norm": 2.870452087544285,
"learning_rate": 7.990880725043322e-08,
"log_odds_chosen": 0.2567138671875,
"log_odds_ratio": -0.670703113079071,
"logits/chosen": -1.625,
"logits/rejected": -1.7578125,
"logps/chosen": -1.046875,
"logps/rejected": -1.21875,
"loss": 1.1622,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.0167236328125,
"rewards/rejected": -0.12158203125,
"step": 1940
},
{
"epoch": 1.54,
"grad_norm": 2.6968676817822645,
"learning_rate": 7.740484692586074e-08,
"log_odds_chosen": 0.2530761659145355,
"log_odds_ratio": -0.680371105670929,
"logits/chosen": -1.671875,
"logits/rejected": -1.8203125,
"logps/chosen": -1.1796875,
"logps/rejected": -1.3515625,
"loss": 1.2242,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.11767578125,
"rewards/margins": 0.0174560546875,
"rewards/rejected": -0.1357421875,
"step": 1950
},
{
"epoch": 1.54,
"grad_norm": 2.735947390317322,
"learning_rate": 7.493354006791006e-08,
"log_odds_chosen": 0.24350586533546448,
"log_odds_ratio": -0.682421863079071,
"logits/chosen": -1.6171875,
"logits/rejected": -1.6796875,
"logps/chosen": -1.0546875,
"logps/rejected": -1.2265625,
"loss": 1.1973,
"nll_loss": 1.15625,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.10546875,
"rewards/margins": 0.0169677734375,
"rewards/rejected": -0.12255859375,
"step": 1960
},
{
"epoch": 1.55,
"grad_norm": 2.4135041559261885,
"learning_rate": 7.249535422584055e-08,
"log_odds_chosen": 0.19566650688648224,
"log_odds_ratio": -0.6849609613418579,
"logits/chosen": -1.8125,
"logits/rejected": -1.84375,
"logps/chosen": -1.078125,
"logps/rejected": -1.1875,
"loss": 1.1835,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.10791015625,
"rewards/margins": 0.01129150390625,
"rewards/rejected": -0.119140625,
"step": 1970
},
{
"epoch": 1.56,
"grad_norm": 2.3578015206878575,
"learning_rate": 7.009075068271031e-08,
"log_odds_chosen": 0.12241820991039276,
"log_odds_ratio": -0.7289062738418579,
"logits/chosen": -1.578125,
"logits/rejected": -1.7578125,
"logps/chosen": -1.09375,
"logps/rejected": -1.1796875,
"loss": 1.1747,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0087890625,
"rewards/rejected": -0.1181640625,
"step": 1980
},
{
"epoch": 1.57,
"grad_norm": 2.7517093669068933,
"learning_rate": 6.772018436810525e-08,
"log_odds_chosen": 0.34681397676467896,
"log_odds_ratio": -0.616992175579071,
"logits/chosen": -1.7109375,
"logits/rejected": -1.7734375,
"logps/chosen": -1.1015625,
"logps/rejected": -1.34375,
"loss": 1.1863,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.0240478515625,
"rewards/rejected": -0.134765625,
"step": 1990
},
{
"epoch": 1.58,
"grad_norm": 2.758447250920097,
"learning_rate": 6.538410377207082e-08,
"log_odds_chosen": 0.03367309644818306,
"log_odds_ratio": -0.773144543170929,
"logits/chosen": -1.625,
"logits/rejected": -1.7421875,
"logps/chosen": -1.125,
"logps/rejected": -1.15625,
"loss": 1.2376,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.0030364990234375,
"rewards/rejected": -0.115234375,
"step": 2000
},
{
"epoch": 1.58,
"grad_norm": 2.880552308468262,
"learning_rate": 6.308295086026133e-08,
"log_odds_chosen": 0.17825928330421448,
"log_odds_ratio": -0.696972668170929,
"logits/chosen": -1.65625,
"logits/rejected": -1.671875,
"logps/chosen": -1.125,
"logps/rejected": -1.25,
"loss": 1.2262,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.1123046875,
"rewards/margins": 0.01220703125,
"rewards/rejected": -0.12451171875,
"step": 2010
},
{
"epoch": 1.59,
"grad_norm": 2.439317791546686,
"learning_rate": 6.081716099032417e-08,
"log_odds_chosen": 0.3602050840854645,
"log_odds_ratio": -0.6319335699081421,
"logits/chosen": -1.609375,
"logits/rejected": -1.7578125,
"logps/chosen": -0.99609375,
"logps/rejected": -1.2421875,
"loss": 1.1792,
"nll_loss": 1.0625,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.099609375,
"rewards/margins": 0.024169921875,
"rewards/rejected": -0.1240234375,
"step": 2020
},
{
"epoch": 1.6,
"grad_norm": 2.810922238332508,
"learning_rate": 5.858716282953407e-08,
"log_odds_chosen": 0.24152831733226776,
"log_odds_ratio": -0.654492199420929,
"logits/chosen": -1.6328125,
"logits/rejected": -1.75,
"logps/chosen": -1.09375,
"logps/rejected": -1.2578125,
"loss": 1.1982,
"nll_loss": 1.140625,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0166015625,
"rewards/rejected": -0.1259765625,
"step": 2030
},
{
"epoch": 1.61,
"grad_norm": 2.5961073589782466,
"learning_rate": 5.639337827369289e-08,
"log_odds_chosen": 0.17608642578125,
"log_odds_ratio": -0.702832043170929,
"logits/chosen": -1.671875,
"logits/rejected": -1.7890625,
"logps/chosen": -1.1171875,
"logps/rejected": -1.2265625,
"loss": 1.1879,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.111328125,
"rewards/margins": 0.0111083984375,
"rewards/rejected": -0.12255859375,
"step": 2040
},
{
"epoch": 1.61,
"grad_norm": 2.3150779833374266,
"learning_rate": 5.4236222367310816e-08,
"log_odds_chosen": 0.29583740234375,
"log_odds_ratio": -0.653124988079071,
"logits/chosen": -1.578125,
"logits/rejected": -1.71875,
"logps/chosen": -1.0703125,
"logps/rejected": -1.2734375,
"loss": 1.1965,
"nll_loss": 1.0859375,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.020263671875,
"rewards/rejected": -0.126953125,
"step": 2050
},
{
"epoch": 1.62,
"grad_norm": 2.1606096674823068,
"learning_rate": 5.211610322508364e-08,
"log_odds_chosen": 0.15689697861671448,
"log_odds_ratio": -0.70263671875,
"logits/chosen": -1.6875,
"logits/rejected": -1.765625,
"logps/chosen": -1.0625,
"logps/rejected": -1.1640625,
"loss": 1.1801,
"nll_loss": 1.09375,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.0098876953125,
"rewards/rejected": -0.11669921875,
"step": 2060
},
{
"epoch": 1.63,
"grad_norm": 2.547538507688686,
"learning_rate": 5.003342195468102e-08,
"log_odds_chosen": 0.221527099609375,
"log_odds_ratio": -0.66357421875,
"logits/chosen": -1.59375,
"logits/rejected": -1.640625,
"logps/chosen": -1.0703125,
"logps/rejected": -1.203125,
"loss": 1.2133,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.01263427734375,
"rewards/rejected": -0.11962890625,
"step": 2070
},
{
"epoch": 1.64,
"grad_norm": 2.8414624562639546,
"learning_rate": 4.798857258086053e-08,
"log_odds_chosen": 0.25762939453125,
"log_odds_ratio": -0.6714843511581421,
"logits/chosen": -1.609375,
"logits/rejected": -1.7109375,
"logps/chosen": -1.0625,
"logps/rejected": -1.25,
"loss": 1.2074,
"nll_loss": 1.109375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.0186767578125,
"rewards/rejected": -0.125,
"step": 2080
},
{
"epoch": 1.65,
"grad_norm": 3.120660574019042,
"learning_rate": 4.5981941970921646e-08,
"log_odds_chosen": 0.48786622285842896,
"log_odds_ratio": -0.591992199420929,
"logits/chosen": -1.6171875,
"logits/rejected": -1.65625,
"logps/chosen": -1.03125,
"logps/rejected": -1.375,
"loss": 1.1441,
"nll_loss": 1.0390625,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.10302734375,
"rewards/margins": 0.034423828125,
"rewards/rejected": -0.1376953125,
"step": 2090
},
{
"epoch": 1.65,
"grad_norm": 2.635871013914355,
"learning_rate": 4.4013909761513894e-08,
"log_odds_chosen": 0.2707275450229645,
"log_odds_ratio": -0.649218738079071,
"logits/chosen": -1.71875,
"logits/rejected": -1.7109375,
"logps/chosen": -1.09375,
"logps/rejected": -1.265625,
"loss": 1.1887,
"nll_loss": 1.140625,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0169677734375,
"rewards/rejected": -0.1259765625,
"step": 2100
},
{
"epoch": 1.66,
"grad_norm": 3.1511666169638346,
"learning_rate": 4.2084848286813105e-08,
"log_odds_chosen": 0.3526855409145355,
"log_odds_ratio": -0.6600586175918579,
"logits/chosen": -1.5859375,
"logits/rejected": -1.65625,
"logps/chosen": -1.0625,
"logps/rejected": -1.34375,
"loss": 1.1851,
"nll_loss": 1.15625,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.0281982421875,
"rewards/rejected": -0.134765625,
"step": 2110
},
{
"epoch": 1.67,
"grad_norm": 2.7305670197672747,
"learning_rate": 4.0195122508078886e-08,
"log_odds_chosen": 0.27125245332717896,
"log_odds_ratio": -0.65625,
"logits/chosen": -1.5859375,
"logits/rejected": -1.6875,
"logps/chosen": -1.0625,
"logps/rejected": -1.2421875,
"loss": 1.1709,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.017822265625,
"rewards/rejected": -0.12451171875,
"step": 2120
},
{
"epoch": 1.68,
"grad_norm": 3.2312418757529726,
"learning_rate": 3.834508994460736e-08,
"log_odds_chosen": 0.23995360732078552,
"log_odds_ratio": -0.654589831829071,
"logits/chosen": -1.578125,
"logits/rejected": -1.640625,
"logps/chosen": -1.03125,
"logps/rejected": -1.1875,
"loss": 1.1604,
"nll_loss": 1.046875,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.103515625,
"rewards/margins": 0.0150146484375,
"rewards/rejected": -0.1181640625,
"step": 2130
},
{
"epoch": 1.69,
"grad_norm": 2.874032565275268,
"learning_rate": 3.653510060609166e-08,
"log_odds_chosen": 0.13707275688648224,
"log_odds_ratio": -0.7138671875,
"logits/chosen": -1.6796875,
"logits/rejected": -1.765625,
"logps/chosen": -1.0625,
"logps/rejected": -1.1484375,
"loss": 1.173,
"nll_loss": 1.078125,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.00848388671875,
"rewards/rejected": -0.1142578125,
"step": 2140
},
{
"epoch": 1.69,
"grad_norm": 2.6604754366861822,
"learning_rate": 3.476549692640316e-08,
"log_odds_chosen": 0.34288328886032104,
"log_odds_ratio": -0.620312511920929,
"logits/chosen": -1.6015625,
"logits/rejected": -1.7109375,
"logps/chosen": -1.0,
"logps/rejected": -1.21875,
"loss": 1.1585,
"nll_loss": 1.078125,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.099609375,
"rewards/margins": 0.0218505859375,
"rewards/rejected": -0.12158203125,
"step": 2150
},
{
"epoch": 1.7,
"grad_norm": 2.71892900942932,
"learning_rate": 3.3036613698806085e-08,
"log_odds_chosen": 0.21519775688648224,
"log_odds_ratio": -0.691601574420929,
"logits/chosen": -1.640625,
"logits/rejected": -1.734375,
"logps/chosen": -1.078125,
"logps/rejected": -1.21875,
"loss": 1.2288,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.10791015625,
"rewards/margins": 0.01434326171875,
"rewards/rejected": -0.1220703125,
"step": 2160
},
{
"epoch": 1.71,
"grad_norm": 2.878603239597823,
"learning_rate": 3.134877801261765e-08,
"log_odds_chosen": 0.3372802734375,
"log_odds_ratio": -0.642285168170929,
"logits/chosen": -1.703125,
"logits/rejected": -1.734375,
"logps/chosen": -1.109375,
"logps/rejected": -1.328125,
"loss": 1.2136,
"nll_loss": 1.1640625,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.11083984375,
"rewards/margins": 0.0218505859375,
"rewards/rejected": -0.1328125,
"step": 2170
},
{
"epoch": 1.72,
"grad_norm": 2.6992535601969085,
"learning_rate": 2.9702309191325492e-08,
"log_odds_chosen": 0.24042968451976776,
"log_odds_ratio": -0.675585925579071,
"logits/chosen": -1.7109375,
"logits/rejected": -1.796875,
"logps/chosen": -1.0703125,
"logps/rejected": -1.2265625,
"loss": 1.2242,
"nll_loss": 1.109375,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.0159912109375,
"rewards/rejected": -0.123046875,
"step": 2180
},
{
"epoch": 1.73,
"grad_norm": 2.971420026998493,
"learning_rate": 2.809751873217478e-08,
"log_odds_chosen": 0.32117921113967896,
"log_odds_ratio": -0.6463867425918579,
"logits/chosen": -1.734375,
"logits/rejected": -1.7578125,
"logps/chosen": -1.0859375,
"logps/rejected": -1.3046875,
"loss": 1.1702,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.0218505859375,
"rewards/rejected": -0.130859375,
"step": 2190
},
{
"epoch": 1.73,
"grad_norm": 2.311036000971507,
"learning_rate": 2.653471024723547e-08,
"log_odds_chosen": 0.43181151151657104,
"log_odds_ratio": -0.5894531011581421,
"logits/chosen": -1.6171875,
"logits/rejected": -1.6875,
"logps/chosen": -1.015625,
"logps/rejected": -1.3046875,
"loss": 1.1592,
"nll_loss": 1.09375,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.1015625,
"rewards/margins": 0.0289306640625,
"rewards/rejected": -0.130859375,
"step": 2200
},
{
"epoch": 1.74,
"grad_norm": 2.5416101230783363,
"learning_rate": 2.501417940596168e-08,
"log_odds_chosen": 0.02521972730755806,
"log_odds_ratio": -0.75,
"logits/chosen": -1.6875,
"logits/rejected": -1.7109375,
"logps/chosen": -1.1015625,
"logps/rejected": -1.109375,
"loss": 1.1748,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.000507354736328125,
"rewards/rejected": -0.11083984375,
"step": 2210
},
{
"epoch": 1.75,
"grad_norm": 2.471954369214634,
"learning_rate": 2.353621387925375e-08,
"log_odds_chosen": 0.3322509825229645,
"log_odds_ratio": -0.6597656011581421,
"logits/chosen": -1.6015625,
"logits/rejected": -1.671875,
"logps/chosen": -1.03125,
"logps/rejected": -1.2578125,
"loss": 1.1634,
"nll_loss": 1.078125,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.10302734375,
"rewards/margins": 0.022705078125,
"rewards/rejected": -0.1259765625,
"step": 2220
},
{
"epoch": 1.76,
"grad_norm": 2.801308375939805,
"learning_rate": 2.2101093285033373e-08,
"log_odds_chosen": 0.3058715760707855,
"log_odds_ratio": -0.6502929925918579,
"logits/chosen": -1.6484375,
"logits/rejected": -1.7265625,
"logps/chosen": -1.046875,
"logps/rejected": -1.25,
"loss": 1.1307,
"nll_loss": 1.03125,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.020263671875,
"rewards/rejected": -0.125,
"step": 2230
},
{
"epoch": 1.76,
"grad_norm": 2.6457419438315233,
"learning_rate": 2.070908913534236e-08,
"log_odds_chosen": 0.24928589165210724,
"log_odds_ratio": -0.6776367425918579,
"logits/chosen": -1.65625,
"logits/rejected": -1.6640625,
"logps/chosen": -1.078125,
"logps/rejected": -1.234375,
"loss": 1.2119,
"nll_loss": 1.1953125,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10791015625,
"rewards/margins": 0.015869140625,
"rewards/rejected": -0.12353515625,
"step": 2240
},
{
"epoch": 1.77,
"grad_norm": 2.8172476905649764,
"learning_rate": 1.9360464784975024e-08,
"log_odds_chosen": 0.21148681640625,
"log_odds_ratio": -0.6849609613418579,
"logits/chosen": -1.5625,
"logits/rejected": -1.65625,
"logps/chosen": -1.0703125,
"logps/rejected": -1.21875,
"loss": 1.1771,
"nll_loss": 1.109375,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.0146484375,
"rewards/rejected": -0.1220703125,
"step": 2250
},
{
"epoch": 1.78,
"grad_norm": 2.963550785069561,
"learning_rate": 1.8055475381653807e-08,
"log_odds_chosen": 0.27608031034469604,
"log_odds_ratio": -0.6513671875,
"logits/chosen": -1.71875,
"logits/rejected": -1.7421875,
"logps/chosen": -1.046875,
"logps/rejected": -1.234375,
"loss": 1.2014,
"nll_loss": 1.109375,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.1044921875,
"rewards/margins": 0.018798828125,
"rewards/rejected": -0.12353515625,
"step": 2260
},
{
"epoch": 1.79,
"grad_norm": 3.1613513759896534,
"learning_rate": 1.679436781775759e-08,
"log_odds_chosen": 0.31138914823532104,
"log_odds_ratio": -0.675976574420929,
"logits/chosen": -1.65625,
"logits/rejected": -1.7421875,
"logps/chosen": -1.109375,
"logps/rejected": -1.3203125,
"loss": 1.1978,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.11083984375,
"rewards/margins": 0.0213623046875,
"rewards/rejected": -0.1318359375,
"step": 2270
},
{
"epoch": 1.8,
"grad_norm": 2.92153711849868,
"learning_rate": 1.5577380683611807e-08,
"log_odds_chosen": 0.2562316954135895,
"log_odds_ratio": -0.6595703363418579,
"logits/chosen": -1.671875,
"logits/rejected": -1.7265625,
"logps/chosen": -1.0546875,
"logps/rejected": -1.21875,
"loss": 1.198,
"nll_loss": 1.09375,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.10546875,
"rewards/margins": 0.0164794921875,
"rewards/rejected": -0.12158203125,
"step": 2280
},
{
"epoch": 1.8,
"grad_norm": 3.486180847986093,
"learning_rate": 1.4404744222349358e-08,
"log_odds_chosen": 0.48161619901657104,
"log_odds_ratio": -0.605664074420929,
"logits/chosen": -1.625,
"logits/rejected": -1.6875,
"logps/chosen": -1.03125,
"logps/rejected": -1.359375,
"loss": 1.172,
"nll_loss": 1.078125,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.10302734375,
"rewards/margins": 0.032470703125,
"rewards/rejected": -0.1357421875,
"step": 2290
},
{
"epoch": 1.81,
"grad_norm": 2.50535325154869,
"learning_rate": 1.3276680286350594e-08,
"log_odds_chosen": 0.31635743379592896,
"log_odds_ratio": -0.641796886920929,
"logits/chosen": -1.6875,
"logits/rejected": -1.8203125,
"logps/chosen": -1.0703125,
"logps/rejected": -1.2734375,
"loss": 1.204,
"nll_loss": 1.1015625,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.0203857421875,
"rewards/rejected": -0.126953125,
"step": 2300
},
{
"epoch": 1.82,
"grad_norm": 2.6381410134685392,
"learning_rate": 1.2193402295270854e-08,
"log_odds_chosen": 0.2996459901332855,
"log_odds_ratio": -0.6700195074081421,
"logits/chosen": -1.5234375,
"logits/rejected": -1.671875,
"logps/chosen": -1.03125,
"logps/rejected": -1.234375,
"loss": 1.1512,
"nll_loss": 1.125,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.10302734375,
"rewards/margins": 0.0205078125,
"rewards/rejected": -0.12353515625,
"step": 2310
},
{
"epoch": 1.83,
"grad_norm": 2.344444293869932,
"learning_rate": 1.115511519566334e-08,
"log_odds_chosen": 0.3412719666957855,
"log_odds_ratio": -0.657519519329071,
"logits/chosen": -1.6171875,
"logits/rejected": -1.7578125,
"logps/chosen": -0.98828125,
"logps/rejected": -1.234375,
"loss": 1.1698,
"nll_loss": 1.03125,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.0986328125,
"rewards/margins": 0.0244140625,
"rewards/rejected": -0.123046875,
"step": 2320
},
{
"epoch": 1.84,
"grad_norm": 2.277202951820151,
"learning_rate": 1.01620154222051e-08,
"log_odds_chosen": 0.14781494438648224,
"log_odds_ratio": -0.708984375,
"logits/chosen": -1.6171875,
"logits/rejected": -1.6953125,
"logps/chosen": -1.125,
"logps/rejected": -1.203125,
"loss": 1.1759,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.11181640625,
"rewards/margins": 0.0079345703125,
"rewards/rejected": -0.1201171875,
"step": 2330
},
{
"epoch": 1.84,
"grad_norm": 2.5811302552961943,
"learning_rate": 9.214290860533242e-09,
"log_odds_chosen": 0.22308655083179474,
"log_odds_ratio": -0.6734374761581421,
"logits/chosen": -1.6640625,
"logits/rejected": -1.7421875,
"logps/chosen": -1.1015625,
"logps/rejected": -1.234375,
"loss": 1.205,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.1103515625,
"rewards/margins": 0.0133056640625,
"rewards/rejected": -0.12353515625,
"step": 2340
},
{
"epoch": 1.85,
"grad_norm": 2.4190439831152326,
"learning_rate": 8.312120811698798e-09,
"log_odds_chosen": 0.24127808213233948,
"log_odds_ratio": -0.6958984136581421,
"logits/chosen": -1.6875,
"logits/rejected": -1.7734375,
"logps/chosen": -1.0625,
"logps/rejected": -1.234375,
"loss": 1.1753,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.017333984375,
"rewards/rejected": -0.12353515625,
"step": 2350
},
{
"epoch": 1.86,
"grad_norm": 2.3733902102708897,
"learning_rate": 7.455675958244422e-09,
"log_odds_chosen": 0.2683349549770355,
"log_odds_ratio": -0.660449206829071,
"logits/chosen": -1.6484375,
"logits/rejected": -1.765625,
"logps/chosen": -1.078125,
"logps/rejected": -1.2578125,
"loss": 1.1939,
"nll_loss": 1.125,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.0184326171875,
"rewards/rejected": -0.1259765625,
"step": 2360
},
{
"epoch": 1.87,
"grad_norm": 2.581589417454669,
"learning_rate": 6.64511833191278e-09,
"log_odds_chosen": 0.16912230849266052,
"log_odds_ratio": -0.703125,
"logits/chosen": -1.640625,
"logits/rejected": -1.765625,
"logps/chosen": -1.0546875,
"logps/rejected": -1.1796875,
"loss": 1.1997,
"nll_loss": 1.15625,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.01220703125,
"rewards/rejected": -0.1181640625,
"step": 2370
},
{
"epoch": 1.87,
"grad_norm": 2.8853418036737297,
"learning_rate": 5.8806012829916985e-09,
"log_odds_chosen": 0.3464111387729645,
"log_odds_ratio": -0.615234375,
"logits/chosen": -1.71875,
"logits/rejected": -1.8125,
"logps/chosen": -1.0078125,
"logps/rejected": -1.2421875,
"loss": 1.1569,
"nll_loss": 1.078125,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.10107421875,
"rewards/margins": 0.02294921875,
"rewards/rejected": -0.1240234375,
"step": 2380
},
{
"epoch": 1.88,
"grad_norm": 2.5807272249096913,
"learning_rate": 5.162269451301576e-09,
"log_odds_chosen": 0.16444091498851776,
"log_odds_ratio": -0.6973632574081421,
"logits/chosen": -1.78125,
"logits/rejected": -1.7890625,
"logps/chosen": -1.0859375,
"logps/rejected": -1.2109375,
"loss": 1.2152,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.01202392578125,
"rewards/rejected": -0.12060546875,
"step": 2390
},
{
"epoch": 1.89,
"grad_norm": 2.6925182997680515,
"learning_rate": 4.490258738830771e-09,
"log_odds_chosen": 0.2374267578125,
"log_odds_ratio": -0.6766601800918579,
"logits/chosen": -1.640625,
"logits/rejected": -1.6875,
"logps/chosen": -1.15625,
"logps/rejected": -1.3203125,
"loss": 1.2008,
"nll_loss": 1.1796875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.115234375,
"rewards/margins": 0.016357421875,
"rewards/rejected": -0.1318359375,
"step": 2400
},
{
"epoch": 1.9,
"grad_norm": 3.01082383722649,
"learning_rate": 3.864696284024249e-09,
"log_odds_chosen": 0.38875120878219604,
"log_odds_ratio": -0.6109374761581421,
"logits/chosen": -1.59375,
"logits/rejected": -1.71875,
"logps/chosen": -1.0625,
"logps/rejected": -1.3359375,
"loss": 1.1823,
"nll_loss": 1.0859375,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.028076171875,
"rewards/rejected": -0.1337890625,
"step": 2410
},
{
"epoch": 1.91,
"grad_norm": 2.7770288933270755,
"learning_rate": 3.285700437730077e-09,
"log_odds_chosen": 0.35822755098342896,
"log_odds_ratio": -0.6693359613418579,
"logits/chosen": -1.5859375,
"logits/rejected": -1.6796875,
"logps/chosen": -1.0859375,
"logps/rejected": -1.3515625,
"loss": 1.1699,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.1083984375,
"rewards/margins": 0.0264892578125,
"rewards/rejected": -0.134765625,
"step": 2420
},
{
"epoch": 1.91,
"grad_norm": 2.5907890754339262,
"learning_rate": 2.7533807408084973e-09,
"log_odds_chosen": 0.17686156928539276,
"log_odds_ratio": -0.698535144329071,
"logits/chosen": -1.7421875,
"logits/rejected": -1.8203125,
"logps/chosen": -1.1015625,
"logps/rejected": -1.2109375,
"loss": 1.1861,
"nll_loss": 1.140625,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.10986328125,
"rewards/margins": 0.01153564453125,
"rewards/rejected": -0.12109375,
"step": 2430
},
{
"epoch": 1.92,
"grad_norm": 2.4966778392886955,
"learning_rate": 2.2678379034077877e-09,
"log_odds_chosen": 0.2807373106479645,
"log_odds_ratio": -0.6429687738418579,
"logits/chosen": -1.640625,
"logits/rejected": -1.7109375,
"logps/chosen": -1.09375,
"logps/rejected": -1.2734375,
"loss": 1.2285,
"nll_loss": 1.1484375,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.10888671875,
"rewards/margins": 0.018310546875,
"rewards/rejected": -0.126953125,
"step": 2440
},
{
"epoch": 1.93,
"grad_norm": 2.3101513660116466,
"learning_rate": 1.82916378591072e-09,
"log_odds_chosen": 0.35594481229782104,
"log_odds_ratio": -0.6319335699081421,
"logits/chosen": -1.6875,
"logits/rejected": -1.7890625,
"logps/chosen": -1.0546875,
"logps/rejected": -1.28125,
"loss": 1.1772,
"nll_loss": 1.125,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.10546875,
"rewards/margins": 0.0225830078125,
"rewards/rejected": -0.1279296875,
"step": 2450
},
{
"epoch": 1.94,
"grad_norm": 2.550820385872949,
"learning_rate": 1.4374413815555763e-09,
"log_odds_chosen": 0.21019287407398224,
"log_odds_ratio": -0.6913086175918579,
"logits/chosen": -1.6484375,
"logits/rejected": -1.6796875,
"logps/chosen": -1.09375,
"logps/rejected": -1.25,
"loss": 1.2096,
"nll_loss": 1.15625,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0157470703125,
"rewards/rejected": -0.125,
"step": 2460
},
{
"epoch": 1.95,
"grad_norm": 2.3418264361304293,
"learning_rate": 1.0927448007343188e-09,
"log_odds_chosen": 0.2827392518520355,
"log_odds_ratio": -0.6519531011581421,
"logits/chosen": -1.671875,
"logits/rejected": -1.796875,
"logps/chosen": -1.0703125,
"logps/rejected": -1.2734375,
"loss": 1.2057,
"nll_loss": 1.1328125,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.10693359375,
"rewards/margins": 0.0203857421875,
"rewards/rejected": -0.126953125,
"step": 2470
},
{
"epoch": 1.95,
"grad_norm": 3.0181770379881936,
"learning_rate": 7.951392569717774e-10,
"log_odds_chosen": 0.32861328125,
"log_odds_ratio": -0.641406238079071,
"logits/chosen": -1.71875,
"logits/rejected": -1.7421875,
"logps/chosen": -1.09375,
"logps/rejected": -1.296875,
"loss": 1.2017,
"nll_loss": 1.125,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.109375,
"rewards/margins": 0.0206298828125,
"rewards/rejected": -0.1298828125,
"step": 2480
},
{
"epoch": 1.96,
"grad_norm": 2.6598066518808965,
"learning_rate": 5.446810545877423e-10,
"log_odds_chosen": 0.29930418729782104,
"log_odds_ratio": -0.6373046636581421,
"logits/chosen": -1.703125,
"logits/rejected": -1.7890625,
"logps/chosen": -1.0703125,
"logps/rejected": -1.28125,
"loss": 1.1938,
"nll_loss": 1.125,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.107421875,
"rewards/margins": 0.021240234375,
"rewards/rejected": -0.12890625,
"step": 2490
},
{
"epoch": 1.97,
"grad_norm": 2.612456535550154,
"learning_rate": 3.414175780446227e-10,
"log_odds_chosen": 0.27032470703125,
"log_odds_ratio": -0.64501953125,
"logits/chosen": -1.6015625,
"logits/rejected": -1.671875,
"logps/chosen": -1.0625,
"logps/rejected": -1.2265625,
"loss": 1.2178,
"nll_loss": 1.125,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.10595703125,
"rewards/margins": 0.0167236328125,
"rewards/rejected": -0.12255859375,
"step": 2500
},
{
"epoch": 1.98,
"grad_norm": 2.5663855569599123,
"learning_rate": 1.8538728298292395e-10,
"log_odds_chosen": 0.25556641817092896,
"log_odds_ratio": -0.6698242425918579,
"logits/chosen": -1.6171875,
"logits/rejected": -1.6640625,
"logps/chosen": -1.0546875,
"logps/rejected": -1.234375,
"loss": 1.198,
"nll_loss": 1.1015625,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.10546875,
"rewards/margins": 0.01806640625,
"rewards/rejected": -0.12353515625,
"step": 2510
},
{
"epoch": 1.99,
"grad_norm": 2.5581951001300336,
"learning_rate": 7.661968894551174e-11,
"log_odds_chosen": 0.3158630430698395,
"log_odds_ratio": -0.636425793170929,
"logits/chosen": -1.7109375,
"logits/rejected": -1.828125,
"logps/chosen": -1.0234375,
"logps/rejected": -1.2265625,
"loss": 1.1801,
"nll_loss": 1.09375,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.1025390625,
"rewards/margins": 0.0205078125,
"rewards/rejected": -0.123046875,
"step": 2520
},
{
"epoch": 1.99,
"grad_norm": 2.6003805241560958,
"learning_rate": 1.513537379305152e-11,
"log_odds_chosen": 0.20820312201976776,
"log_odds_ratio": -0.664843738079071,
"logits/chosen": -1.671875,
"logits/rejected": -1.734375,
"logps/chosen": -1.109375,
"logps/rejected": -1.2421875,
"loss": 1.2092,
"nll_loss": 1.1171875,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11083984375,
"rewards/margins": 0.0135498046875,
"rewards/rejected": -0.12451171875,
"step": 2530
},
{
"epoch": 2.0,
"step": 2538,
"total_flos": 0.0,
"train_loss": 1.2334148878183206,
"train_runtime": 17382.4906,
"train_samples_per_second": 7.009,
"train_steps_per_second": 0.146
}
],
"logging_steps": 10,
"max_steps": 2538,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}