zephyr-7b / trainer_state.json
jikaixuan's picture
Model save
59b129b verified
raw
history blame
No virus
34.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.59375,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -2.1666858196258545,
"logits/rejected": -2.182244300842285,
"logps/chosen": -12.368609428405762,
"logps/rejected": -24.687644958496094,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 10.0
},
{
"epoch": 0.02,
"grad_norm": 0.6796875,
"learning_rate": 1.0416666666666667e-06,
"logits/chosen": -2.2281553745269775,
"logits/rejected": -2.276446580886841,
"logps/chosen": -57.036190032958984,
"logps/rejected": -66.88007354736328,
"loss": 0.6927,
"pred_label": 0.0,
"rewards/accuracies": 0.24013157188892365,
"rewards/chosen": 0.003924594726413488,
"rewards/margins": 0.0009102027979679406,
"rewards/rejected": 0.0030143915209919214,
"step": 20,
"use_label": 170.0
},
{
"epoch": 0.04,
"grad_norm": 0.6328125,
"learning_rate": 2.0833333333333334e-06,
"logits/chosen": -2.2738099098205566,
"logits/rejected": -2.2623789310455322,
"logps/chosen": -54.78137969970703,
"logps/rejected": -67.2437515258789,
"loss": 0.6914,
"pred_label": 0.0,
"rewards/accuracies": 0.24687500298023224,
"rewards/chosen": 0.01747792772948742,
"rewards/margins": 0.001674558618105948,
"rewards/rejected": 0.015803368762135506,
"step": 40,
"use_label": 482.0
},
{
"epoch": 0.06,
"grad_norm": 0.71875,
"learning_rate": 3.125e-06,
"logits/chosen": -2.3237431049346924,
"logits/rejected": -2.321906089782715,
"logps/chosen": -75.5770034790039,
"logps/rejected": -87.68544006347656,
"loss": 0.6885,
"pred_label": 0.0,
"rewards/accuracies": 0.3125,
"rewards/chosen": 0.031676117330789566,
"rewards/margins": 0.009719676338136196,
"rewards/rejected": 0.021956440061330795,
"step": 60,
"use_label": 802.0
},
{
"epoch": 0.08,
"grad_norm": 0.73828125,
"learning_rate": 4.166666666666667e-06,
"logits/chosen": -2.2948005199432373,
"logits/rejected": -2.2623462677001953,
"logps/chosen": -79.29240417480469,
"logps/rejected": -83.04844665527344,
"loss": 0.6876,
"pred_label": 5.800000190734863,
"rewards/accuracies": 0.3343749940395355,
"rewards/chosen": 0.016009245067834854,
"rewards/margins": 0.018887853249907494,
"rewards/rejected": -0.0028786074835807085,
"step": 80,
"use_label": 1116.199951171875
},
{
"epoch": 0.1,
"grad_norm": 0.6953125,
"learning_rate": 4.9997324926814375e-06,
"logits/chosen": -2.2056884765625,
"logits/rejected": -2.210036039352417,
"logps/chosen": -68.87937927246094,
"logps/rejected": -77.87590026855469,
"loss": 0.6876,
"pred_label": 27.537500381469727,
"rewards/accuracies": 0.34062498807907104,
"rewards/chosen": -0.010471501387655735,
"rewards/margins": 0.03584115579724312,
"rewards/rejected": -0.04631265625357628,
"step": 100,
"use_label": 1414.4625244140625
},
{
"epoch": 0.1,
"eval_logits/chosen": -2.1076083183288574,
"eval_logits/rejected": -2.0761499404907227,
"eval_logps/chosen": -74.44951629638672,
"eval_logps/rejected": -85.2883071899414,
"eval_loss": 0.6895647048950195,
"eval_pred_label": 89.14286041259766,
"eval_rewards/accuracies": 0.335317462682724,
"eval_rewards/chosen": -0.05548960343003273,
"eval_rewards/margins": 0.04341282695531845,
"eval_rewards/rejected": -0.09890241920948029,
"eval_runtime": 247.5952,
"eval_samples_per_second": 8.078,
"eval_steps_per_second": 0.254,
"eval_use_label": 1766.857177734375,
"step": 100
},
{
"epoch": 0.13,
"grad_norm": 0.7578125,
"learning_rate": 4.9903757462135984e-06,
"logits/chosen": -2.2542896270751953,
"logits/rejected": -2.1902401447296143,
"logps/chosen": -70.2941665649414,
"logps/rejected": -84.7874755859375,
"loss": 0.6884,
"pred_label": 155.6374969482422,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.023759985342621803,
"rewards/margins": 0.051492441445589066,
"rewards/rejected": -0.07525241374969482,
"step": 120,
"use_label": 2110.362548828125
},
{
"epoch": 0.15,
"grad_norm": 0.55859375,
"learning_rate": 4.967700826904229e-06,
"logits/chosen": -2.1823272705078125,
"logits/rejected": -2.210157632827759,
"logps/chosen": -61.80498504638672,
"logps/rejected": -76.43424224853516,
"loss": 0.6907,
"pred_label": 204.22500610351562,
"rewards/accuracies": 0.26875001192092896,
"rewards/chosen": -0.029314354062080383,
"rewards/margins": 0.036702848970890045,
"rewards/rejected": -0.06601719558238983,
"step": 140,
"use_label": 2381.77490234375
},
{
"epoch": 0.17,
"grad_norm": 0.70703125,
"learning_rate": 4.931828996974498e-06,
"logits/chosen": -2.251568555831909,
"logits/rejected": -2.220432996749878,
"logps/chosen": -66.60148620605469,
"logps/rejected": -71.53702545166016,
"loss": 0.69,
"pred_label": 257.2124938964844,
"rewards/accuracies": 0.3343749940395355,
"rewards/chosen": -0.020524730905890465,
"rewards/margins": 0.05932433158159256,
"rewards/rejected": -0.07984906435012817,
"step": 160,
"use_label": 2648.78759765625
},
{
"epoch": 0.19,
"grad_norm": 0.6796875,
"learning_rate": 4.882952093833628e-06,
"logits/chosen": -2.114015817642212,
"logits/rejected": -2.126950740814209,
"logps/chosen": -66.40071868896484,
"logps/rejected": -78.54503631591797,
"loss": 0.6901,
"pred_label": 319.9624938964844,
"rewards/accuracies": 0.328125,
"rewards/chosen": -0.03171534463763237,
"rewards/margins": 0.0544399619102478,
"rewards/rejected": -0.08615531027317047,
"step": 180,
"use_label": 2906.03759765625
},
{
"epoch": 0.21,
"grad_norm": 0.9140625,
"learning_rate": 4.821331504159906e-06,
"logits/chosen": -2.138213872909546,
"logits/rejected": -2.108750343322754,
"logps/chosen": -77.92289733886719,
"logps/rejected": -78.32075500488281,
"loss": 0.6892,
"pred_label": 383.5249938964844,
"rewards/accuracies": 0.37812501192092896,
"rewards/chosen": -0.009543296881020069,
"rewards/margins": 0.06037301942706108,
"rewards/rejected": -0.06991632282733917,
"step": 200,
"use_label": 3162.47509765625
},
{
"epoch": 0.21,
"eval_logits/chosen": -2.051973581314087,
"eval_logits/rejected": -2.028658390045166,
"eval_logps/chosen": -69.3875503540039,
"eval_logps/rejected": -80.99542999267578,
"eval_loss": 0.6893584132194519,
"eval_pred_label": 459.1111145019531,
"eval_rewards/accuracies": 0.3492063581943512,
"eval_rewards/chosen": -0.0048699695616960526,
"eval_rewards/margins": 0.05110359564423561,
"eval_rewards/rejected": -0.05597356706857681,
"eval_runtime": 247.8689,
"eval_samples_per_second": 8.069,
"eval_steps_per_second": 0.254,
"eval_use_label": 3500.888916015625,
"step": 200
},
{
"epoch": 0.23,
"grad_norm": 0.765625,
"learning_rate": 4.747296766042161e-06,
"logits/chosen": -2.172316074371338,
"logits/rejected": -2.1599390506744385,
"logps/chosen": -73.75865173339844,
"logps/rejected": -76.45826721191406,
"loss": 0.6906,
"pred_label": 537.4000244140625,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.017265746369957924,
"rewards/margins": 0.061459798365831375,
"rewards/rejected": -0.07872554659843445,
"step": 220,
"use_label": 3832.60009765625
},
{
"epoch": 0.25,
"grad_norm": 0.671875,
"learning_rate": 4.661243806657256e-06,
"logits/chosen": -2.1377243995666504,
"logits/rejected": -2.114131450653076,
"logps/chosen": -78.08522033691406,
"logps/rejected": -88.16291809082031,
"loss": 0.6906,
"pred_label": 610.8624877929688,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.06858871877193451,
"rewards/margins": 0.07855252921581268,
"rewards/rejected": -0.1471412628889084,
"step": 240,
"use_label": 4079.137451171875
},
{
"epoch": 0.27,
"grad_norm": 0.70703125,
"learning_rate": 4.563632824908252e-06,
"logits/chosen": -2.1762757301330566,
"logits/rejected": -2.173243999481201,
"logps/chosen": -69.33678436279297,
"logps/rejected": -82.98787689208984,
"loss": 0.6907,
"pred_label": 682.2750244140625,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.06302420794963837,
"rewards/margins": 0.0732887014746666,
"rewards/rejected": -0.13631291687488556,
"step": 260,
"use_label": 4327.72509765625
},
{
"epoch": 0.29,
"grad_norm": 0.625,
"learning_rate": 4.454985830346574e-06,
"logits/chosen": -2.16465425491333,
"logits/rejected": -2.1788923740386963,
"logps/chosen": -74.41441345214844,
"logps/rejected": -78.55416870117188,
"loss": 0.6892,
"pred_label": 749.125,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.06083650514483452,
"rewards/margins": 0.04520425945520401,
"rewards/rejected": -0.10604077577590942,
"step": 280,
"use_label": 4580.875
},
{
"epoch": 0.31,
"grad_norm": 0.65234375,
"learning_rate": 4.335883851539693e-06,
"logits/chosen": -2.0553781986236572,
"logits/rejected": -2.0573229789733887,
"logps/chosen": -69.96788024902344,
"logps/rejected": -80.52223205566406,
"loss": 0.6904,
"pred_label": 824.5499877929688,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.04866168648004532,
"rewards/margins": 0.09801270812749863,
"rewards/rejected": -0.14667439460754395,
"step": 300,
"use_label": 4825.4501953125
},
{
"epoch": 0.31,
"eval_logits/chosen": -2.0163989067077637,
"eval_logits/rejected": -1.9942671060562134,
"eval_logps/chosen": -75.15243530273438,
"eval_logps/rejected": -89.50163269042969,
"eval_loss": 0.6908969879150391,
"eval_pred_label": 923.3174438476562,
"eval_rewards/accuracies": 0.3531745970249176,
"eval_rewards/chosen": -0.06251893937587738,
"eval_rewards/margins": 0.07851671427488327,
"eval_rewards/rejected": -0.14103564620018005,
"eval_runtime": 247.8241,
"eval_samples_per_second": 8.07,
"eval_steps_per_second": 0.254,
"eval_use_label": 5140.6826171875,
"step": 300
},
{
"epoch": 0.33,
"grad_norm": 0.9140625,
"learning_rate": 4.206963828813555e-06,
"logits/chosen": -2.065279483795166,
"logits/rejected": -2.0684821605682373,
"logps/chosen": -72.58639526367188,
"logps/rejected": -89.45655822753906,
"loss": 0.6899,
"pred_label": 1033.7874755859375,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.11120834201574326,
"rewards/margins": 0.0645986869931221,
"rewards/rejected": -0.17580702900886536,
"step": 320,
"use_label": 5440.21240234375
},
{
"epoch": 0.36,
"grad_norm": 0.56640625,
"learning_rate": 4.068915207986931e-06,
"logits/chosen": -2.033398151397705,
"logits/rejected": -1.991502046585083,
"logps/chosen": -71.1894760131836,
"logps/rejected": -84.0774154663086,
"loss": 0.6917,
"pred_label": 1122.112548828125,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.07950185984373093,
"rewards/margins": 0.08617939054965973,
"rewards/rejected": -0.16568127274513245,
"step": 340,
"use_label": 5671.8876953125
},
{
"epoch": 0.38,
"grad_norm": 0.84765625,
"learning_rate": 3.922476253313921e-06,
"logits/chosen": -2.0358688831329346,
"logits/rejected": -2.0224781036376953,
"logps/chosen": -76.57051849365234,
"logps/rejected": -84.2589340209961,
"loss": 0.6914,
"pred_label": 1204.4124755859375,
"rewards/accuracies": 0.31562501192092896,
"rewards/chosen": -0.11715561151504517,
"rewards/margins": 0.07723374664783478,
"rewards/rejected": -0.19438934326171875,
"step": 360,
"use_label": 5909.58740234375
},
{
"epoch": 0.4,
"grad_norm": 0.55078125,
"learning_rate": 3.768430099352445e-06,
"logits/chosen": -2.12782621383667,
"logits/rejected": -2.086026430130005,
"logps/chosen": -74.41622161865234,
"logps/rejected": -85.17180633544922,
"loss": 0.6918,
"pred_label": 1289.9375,
"rewards/accuracies": 0.3656249940395355,
"rewards/chosen": -0.07592298835515976,
"rewards/margins": 0.08457346260547638,
"rewards/rejected": -0.16049645841121674,
"step": 380,
"use_label": 6144.0625
},
{
"epoch": 0.42,
"grad_norm": 0.73046875,
"learning_rate": 3.607600562872785e-06,
"logits/chosen": -2.126784086227417,
"logits/rejected": -2.1261298656463623,
"logps/chosen": -83.82131958007812,
"logps/rejected": -86.00455474853516,
"loss": 0.6906,
"pred_label": 1373.137451171875,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.05874443054199219,
"rewards/margins": 0.06775099784135818,
"rewards/rejected": -0.12649545073509216,
"step": 400,
"use_label": 6380.8623046875
},
{
"epoch": 0.42,
"eval_logits/chosen": -2.0480618476867676,
"eval_logits/rejected": -2.0248324871063232,
"eval_logps/chosen": -75.26866149902344,
"eval_logps/rejected": -90.80635070800781,
"eval_loss": 0.6920759081840515,
"eval_pred_label": 1472.5714111328125,
"eval_rewards/accuracies": 0.3511904776096344,
"eval_rewards/chosen": -0.06368114054203033,
"eval_rewards/margins": 0.09040173143148422,
"eval_rewards/rejected": -0.15408287942409515,
"eval_runtime": 248.0088,
"eval_samples_per_second": 8.064,
"eval_steps_per_second": 0.254,
"eval_use_label": 6695.4287109375,
"step": 400
},
{
"epoch": 0.44,
"grad_norm": 0.78515625,
"learning_rate": 3.4408477372034743e-06,
"logits/chosen": -2.055358409881592,
"logits/rejected": -2.068175792694092,
"logps/chosen": -70.47552490234375,
"logps/rejected": -79.02010345458984,
"loss": 0.6903,
"pred_label": 1589.0374755859375,
"rewards/accuracies": 0.3656249940395355,
"rewards/chosen": -0.06399895995855331,
"rewards/margins": 0.0963120311498642,
"rewards/rejected": -0.16031098365783691,
"step": 420,
"use_label": 6988.96240234375
},
{
"epoch": 0.46,
"grad_norm": 0.95703125,
"learning_rate": 3.269063392575352e-06,
"logits/chosen": -2.0893940925598145,
"logits/rejected": -2.09212589263916,
"logps/chosen": -85.68560028076172,
"logps/rejected": -87.41291809082031,
"loss": 0.6912,
"pred_label": 1667.6875,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.13728377223014832,
"rewards/margins": 0.07875251770019531,
"rewards/rejected": -0.21603628993034363,
"step": 440,
"use_label": 7230.3125
},
{
"epoch": 0.48,
"grad_norm": 0.53515625,
"learning_rate": 3.09316620706208e-06,
"logits/chosen": -2.079465389251709,
"logits/rejected": -2.091001033782959,
"logps/chosen": -73.67254638671875,
"logps/rejected": -81.05415344238281,
"loss": 0.6916,
"pred_label": 1751.75,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.0876312330365181,
"rewards/margins": 0.08376732468605042,
"rewards/rejected": -0.17139855027198792,
"step": 460,
"use_label": 7466.25
},
{
"epoch": 0.5,
"grad_norm": 0.69921875,
"learning_rate": 2.91409685362137e-06,
"logits/chosen": -2.0379364490509033,
"logits/rejected": -2.0492634773254395,
"logps/chosen": -77.06828308105469,
"logps/rejected": -89.38865661621094,
"loss": 0.6912,
"pred_label": 1832.6500244140625,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.06041146069765091,
"rewards/margins": 0.10216375440359116,
"rewards/rejected": -0.16257521510124207,
"step": 480,
"use_label": 7705.35009765625
},
{
"epoch": 0.52,
"grad_norm": 0.86328125,
"learning_rate": 2.7328129695107205e-06,
"logits/chosen": -2.031346082687378,
"logits/rejected": -2.0272762775421143,
"logps/chosen": -79.55888366699219,
"logps/rejected": -84.47586822509766,
"loss": 0.6903,
"pred_label": 1919.5374755859375,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.08177755773067474,
"rewards/margins": 0.08017835766077042,
"rewards/rejected": -0.16195592284202576,
"step": 500,
"use_label": 7938.46240234375
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.0070507526397705,
"eval_logits/rejected": -1.9800992012023926,
"eval_logps/chosen": -76.36968231201172,
"eval_logps/rejected": -92.65614318847656,
"eval_loss": 0.6914148926734924,
"eval_pred_label": 2025.793701171875,
"eval_rewards/accuracies": 0.3492063581943512,
"eval_rewards/chosen": -0.07469133287668228,
"eval_rewards/margins": 0.09788943827152252,
"eval_rewards/rejected": -0.1725807636976242,
"eval_runtime": 247.8554,
"eval_samples_per_second": 8.069,
"eval_steps_per_second": 0.254,
"eval_use_label": 8246.2060546875,
"step": 500
},
{
"epoch": 0.54,
"grad_norm": 0.78125,
"learning_rate": 2.5502840349805074e-06,
"logits/chosen": -2.026449203491211,
"logits/rejected": -2.0701510906219482,
"logps/chosen": -75.1209487915039,
"logps/rejected": -88.01356506347656,
"loss": 0.6913,
"pred_label": 2148.887451171875,
"rewards/accuracies": 0.3531250059604645,
"rewards/chosen": -0.06801941990852356,
"rewards/margins": 0.09691040217876434,
"rewards/rejected": -0.1649298369884491,
"step": 520,
"use_label": 8533.1123046875
},
{
"epoch": 0.57,
"grad_norm": 1.09375,
"learning_rate": 2.367486188632446e-06,
"logits/chosen": -2.0245327949523926,
"logits/rejected": -2.0479135513305664,
"logps/chosen": -84.60169219970703,
"logps/rejected": -90.6330795288086,
"loss": 0.692,
"pred_label": 2235.550048828125,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.09091995656490326,
"rewards/margins": 0.11123095452785492,
"rewards/rejected": -0.20215091109275818,
"step": 540,
"use_label": 8766.4501953125
},
{
"epoch": 0.59,
"grad_norm": 0.75390625,
"learning_rate": 2.1853970071701415e-06,
"logits/chosen": -2.0177600383758545,
"logits/rejected": -2.016798257827759,
"logps/chosen": -78.94650268554688,
"logps/rejected": -80.36412811279297,
"loss": 0.6917,
"pred_label": 2319.53759765625,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": -0.10138510167598724,
"rewards/margins": 0.06911652535200119,
"rewards/rejected": -0.17050163447856903,
"step": 560,
"use_label": 9002.462890625
},
{
"epoch": 0.61,
"grad_norm": 0.71875,
"learning_rate": 2.00499027745888e-06,
"logits/chosen": -2.054065704345703,
"logits/rejected": -2.0555384159088135,
"logps/chosen": -80.3529281616211,
"logps/rejected": -95.12947082519531,
"loss": 0.6919,
"pred_label": 2401.675048828125,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.09597108513116837,
"rewards/margins": 0.09131233394145966,
"rewards/rejected": -0.18728342652320862,
"step": 580,
"use_label": 9240.3251953125
},
{
"epoch": 0.63,
"grad_norm": 0.76171875,
"learning_rate": 1.8272307888529276e-06,
"logits/chosen": -2.059126377105713,
"logits/rejected": -2.099806547164917,
"logps/chosen": -89.58797454833984,
"logps/rejected": -108.6166000366211,
"loss": 0.6903,
"pred_label": 2492.9375,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.12580521404743195,
"rewards/margins": 0.10241512209177017,
"rewards/rejected": -0.22822031378746033,
"step": 600,
"use_label": 9469.0625
},
{
"epoch": 0.63,
"eval_logits/chosen": -1.9870026111602783,
"eval_logits/rejected": -1.960112452507019,
"eval_logps/chosen": -78.95431518554688,
"eval_logps/rejected": -95.86695861816406,
"eval_loss": 0.6917396187782288,
"eval_pred_label": 2603.9365234375,
"eval_rewards/accuracies": 0.3551587164402008,
"eval_rewards/chosen": -0.1005377396941185,
"eval_rewards/margins": 0.104151152074337,
"eval_rewards/rejected": -0.2046888917684555,
"eval_runtime": 247.9642,
"eval_samples_per_second": 8.066,
"eval_steps_per_second": 0.254,
"eval_use_label": 9772.0634765625,
"step": 600
},
{
"epoch": 0.65,
"grad_norm": 0.5859375,
"learning_rate": 1.6530691736402317e-06,
"logits/chosen": -1.9752880334854126,
"logits/rejected": -2.011981964111328,
"logps/chosen": -69.71615600585938,
"logps/rejected": -95.88337707519531,
"loss": 0.6918,
"pred_label": 2726.324951171875,
"rewards/accuracies": 0.34687501192092896,
"rewards/chosen": -0.09408678859472275,
"rewards/margins": 0.09362435340881348,
"rewards/rejected": -0.18771114945411682,
"step": 620,
"use_label": 10059.6748046875
},
{
"epoch": 0.67,
"grad_norm": 0.73046875,
"learning_rate": 1.4834368231970922e-06,
"logits/chosen": -2.0288071632385254,
"logits/rejected": -2.0409998893737793,
"logps/chosen": -82.56907653808594,
"logps/rejected": -90.75765228271484,
"loss": 0.6894,
"pred_label": 2805.512451171875,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.10210500657558441,
"rewards/margins": 0.10695278644561768,
"rewards/rejected": -0.2090577781200409,
"step": 640,
"use_label": 10300.4873046875
},
{
"epoch": 0.69,
"grad_norm": 0.5625,
"learning_rate": 1.3192409070404582e-06,
"logits/chosen": -2.055405855178833,
"logits/rejected": -2.0071816444396973,
"logps/chosen": -77.25361633300781,
"logps/rejected": -88.34065246582031,
"loss": 0.6915,
"pred_label": 2899.9375,
"rewards/accuracies": 0.34687501192092896,
"rewards/chosen": -0.11595650017261505,
"rewards/margins": 0.0952102541923523,
"rewards/rejected": -0.21116676926612854,
"step": 660,
"use_label": 10526.0625
},
{
"epoch": 0.71,
"grad_norm": 0.67578125,
"learning_rate": 1.1613595214152713e-06,
"logits/chosen": -2.056795597076416,
"logits/rejected": -2.071035861968994,
"logps/chosen": -88.15283203125,
"logps/rejected": -96.39839172363281,
"loss": 0.6918,
"pred_label": 2978.0625,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.12273094803094864,
"rewards/margins": 0.09404005855321884,
"rewards/rejected": -0.2167709767818451,
"step": 680,
"use_label": 10767.9375
},
{
"epoch": 0.73,
"grad_norm": 0.74609375,
"learning_rate": 1.0106369933615043e-06,
"logits/chosen": -2.0782313346862793,
"logits/rejected": -2.0467371940612793,
"logps/chosen": -97.93621826171875,
"logps/rejected": -106.91497802734375,
"loss": 0.6917,
"pred_label": 3075.71240234375,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.1391007900238037,
"rewards/margins": 0.10766571760177612,
"rewards/rejected": -0.24676652252674103,
"step": 700,
"use_label": 10990.287109375
},
{
"epoch": 0.73,
"eval_logits/chosen": -1.9658821821212769,
"eval_logits/rejected": -1.9401167631149292,
"eval_logps/chosen": -80.06806182861328,
"eval_logps/rejected": -97.64107513427734,
"eval_loss": 0.6917343735694885,
"eval_pred_label": 3195.22216796875,
"eval_rewards/accuracies": 0.3511904776096344,
"eval_rewards/chosen": -0.11167524009943008,
"eval_rewards/margins": 0.1107548326253891,
"eval_rewards/rejected": -0.2224300652742386,
"eval_runtime": 247.943,
"eval_samples_per_second": 8.066,
"eval_steps_per_second": 0.254,
"eval_use_label": 11284.77734375,
"step": 700
},
{
"epoch": 0.75,
"grad_norm": 0.72265625,
"learning_rate": 8.678793653740633e-07,
"logits/chosen": -2.015249729156494,
"logits/rejected": -2.0358498096466064,
"logps/chosen": -70.9017562866211,
"logps/rejected": -86.4397201538086,
"loss": 0.6908,
"pred_label": 3306.39990234375,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.10931293666362762,
"rewards/margins": 0.0925455391407013,
"rewards/rejected": -0.20185847580432892,
"step": 720,
"use_label": 11583.599609375
},
{
"epoch": 0.77,
"grad_norm": 0.83203125,
"learning_rate": 7.338500848029603e-07,
"logits/chosen": -2.01334810256958,
"logits/rejected": -2.0296788215637207,
"logps/chosen": -74.19635772705078,
"logps/rejected": -83.99024200439453,
"loss": 0.6911,
"pred_label": 3386.16259765625,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.08706559240818024,
"rewards/margins": 0.11473299562931061,
"rewards/rejected": -0.20179858803749084,
"step": 740,
"use_label": 11823.837890625
},
{
"epoch": 0.8,
"grad_norm": 0.66015625,
"learning_rate": 6.092659210462232e-07,
"logits/chosen": -2.052433967590332,
"logits/rejected": -2.060997724533081,
"logps/chosen": -76.93110656738281,
"logps/rejected": -97.30107879638672,
"loss": 0.6904,
"pred_label": 3466.5,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.11182014644145966,
"rewards/margins": 0.07981495559215546,
"rewards/rejected": -0.1916351020336151,
"step": 760,
"use_label": 12063.5
},
{
"epoch": 0.82,
"grad_norm": 0.859375,
"learning_rate": 4.947931323697983e-07,
"logits/chosen": -2.032320737838745,
"logits/rejected": -2.047227144241333,
"logps/chosen": -89.46810913085938,
"logps/rejected": -95.58660125732422,
"loss": 0.6913,
"pred_label": 3558.875,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.11294672638177872,
"rewards/margins": 0.11753211170434952,
"rewards/rejected": -0.23047883808612823,
"step": 780,
"use_label": 12291.125
},
{
"epoch": 0.84,
"grad_norm": 0.74609375,
"learning_rate": 3.910439028537638e-07,
"logits/chosen": -2.010045289993286,
"logits/rejected": -1.989505410194397,
"logps/chosen": -70.47514343261719,
"logps/rejected": -75.11082458496094,
"loss": 0.6912,
"pred_label": 3649.22509765625,
"rewards/accuracies": 0.3656249940395355,
"rewards/chosen": -0.08034199476242065,
"rewards/margins": 0.0995674580335617,
"rewards/rejected": -0.17990948259830475,
"step": 800,
"use_label": 12520.775390625
},
{
"epoch": 0.84,
"eval_logits/chosen": -1.9421576261520386,
"eval_logits/rejected": -1.9144233465194702,
"eval_logps/chosen": -77.5874252319336,
"eval_logps/rejected": -95.20885467529297,
"eval_loss": 0.6917100548744202,
"eval_pred_label": 3757.174560546875,
"eval_rewards/accuracies": 0.363095223903656,
"eval_rewards/chosen": -0.08686873316764832,
"eval_rewards/margins": 0.11123905330896378,
"eval_rewards/rejected": -0.19810780882835388,
"eval_runtime": 247.8932,
"eval_samples_per_second": 8.068,
"eval_steps_per_second": 0.254,
"eval_use_label": 12826.8251953125,
"step": 800
},
{
"epoch": 0.86,
"grad_norm": 0.828125,
"learning_rate": 2.98573068519539e-07,
"logits/chosen": -2.035728931427002,
"logits/rejected": -2.029679775238037,
"logps/chosen": -74.97032165527344,
"logps/rejected": -84.2763900756836,
"loss": 0.6908,
"pred_label": 3872.199951171875,
"rewards/accuracies": 0.3343749940395355,
"rewards/chosen": -0.1004786491394043,
"rewards/margins": 0.08142165094614029,
"rewards/rejected": -0.181900292634964,
"step": 820,
"use_label": 13121.7998046875
},
{
"epoch": 0.88,
"grad_norm": 0.6953125,
"learning_rate": 2.178751501463036e-07,
"logits/chosen": -2.0276803970336914,
"logits/rejected": -2.0149848461151123,
"logps/chosen": -66.70552062988281,
"logps/rejected": -70.63726806640625,
"loss": 0.6915,
"pred_label": 3954.60009765625,
"rewards/accuracies": 0.28437501192092896,
"rewards/chosen": -0.08035041391849518,
"rewards/margins": 0.07462439686059952,
"rewards/rejected": -0.1549748182296753,
"step": 840,
"use_label": 13359.400390625
},
{
"epoch": 0.9,
"grad_norm": 0.7578125,
"learning_rate": 1.4938170864468636e-07,
"logits/chosen": -2.048083543777466,
"logits/rejected": -2.0321922302246094,
"logps/chosen": -90.8042221069336,
"logps/rejected": -100.8233413696289,
"loss": 0.69,
"pred_label": 4041.72509765625,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.0809466689825058,
"rewards/margins": 0.1332779824733734,
"rewards/rejected": -0.2142246663570404,
"step": 860,
"use_label": 13592.275390625
},
{
"epoch": 0.92,
"grad_norm": 0.5546875,
"learning_rate": 9.345903713082305e-08,
"logits/chosen": -2.047487735748291,
"logits/rejected": -2.034466505050659,
"logps/chosen": -81.69231414794922,
"logps/rejected": -101.5263442993164,
"loss": 0.6915,
"pred_label": 4142.625,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.09660721570253372,
"rewards/margins": 0.13364934921264648,
"rewards/rejected": -0.23025652766227722,
"step": 880,
"use_label": 13811.375
},
{
"epoch": 0.94,
"grad_norm": 0.7578125,
"learning_rate": 5.0406202043228604e-08,
"logits/chosen": -1.9304163455963135,
"logits/rejected": -1.9657026529312134,
"logps/chosen": -75.30284118652344,
"logps/rejected": -99.71704864501953,
"loss": 0.6914,
"pred_label": 4235.9248046875,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.08683101832866669,
"rewards/margins": 0.10066400468349457,
"rewards/rejected": -0.18749502301216125,
"step": 900,
"use_label": 14038.0751953125
},
{
"epoch": 0.94,
"eval_logits/chosen": -1.939072847366333,
"eval_logits/rejected": -1.9112603664398193,
"eval_logps/chosen": -77.5274658203125,
"eval_logps/rejected": -95.22908020019531,
"eval_loss": 0.6917905211448669,
"eval_pred_label": 4352.28564453125,
"eval_rewards/accuracies": 0.3571428656578064,
"eval_rewards/chosen": -0.08626923710107803,
"eval_rewards/margins": 0.1120409369468689,
"eval_rewards/rejected": -0.19831016659736633,
"eval_runtime": 247.7794,
"eval_samples_per_second": 8.072,
"eval_steps_per_second": 0.254,
"eval_use_label": 14335.7138671875,
"step": 900
},
{
"epoch": 0.96,
"grad_norm": 0.80078125,
"learning_rate": 2.0453443778310766e-08,
"logits/chosen": -1.9801095724105835,
"logits/rejected": -1.9714418649673462,
"logps/chosen": -63.8930778503418,
"logps/rejected": -85.15528869628906,
"loss": 0.6906,
"pred_label": 4473.8125,
"rewards/accuracies": 0.31562501192092896,
"rewards/chosen": -0.06585933268070221,
"rewards/margins": 0.11039040982723236,
"rewards/rejected": -0.17624975740909576,
"step": 920,
"use_label": 14624.1875
},
{
"epoch": 0.98,
"grad_norm": 0.8359375,
"learning_rate": 3.760945397705828e-09,
"logits/chosen": -1.9589160680770874,
"logits/rejected": -1.9971154928207397,
"logps/chosen": -74.0462646484375,
"logps/rejected": -91.64708709716797,
"loss": 0.6913,
"pred_label": 4558.71240234375,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.0799408107995987,
"rewards/margins": 0.10116855055093765,
"rewards/rejected": -0.18110935389995575,
"step": 940,
"use_label": 14859.287109375
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.6906769273168754,
"train_runtime": 20027.4031,
"train_samples_per_second": 3.053,
"train_steps_per_second": 0.048
}
],
"logging_steps": 20,
"max_steps": 955,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}