ironrock's picture
Training in progress, step 320, checkpoint
4521fb9 verified
raw
history blame
No virus
27.7 kB
{
"best_metric": 0.9069767594337463,
"best_model_checkpoint": "./llama3/27-06-24-Weni-ZeroShot-Agents-Llama3-4.0.37-DPO_Experiment with DPO and Llama3 8B, zeroshot 4.0.37-2_max_steps-570_batch_16_2024-06-27_ppid_9/checkpoint-80",
"epoch": 3.34640522875817,
"eval_steps": 20,
"global_step": 320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10457516339869281,
"grad_norm": 3.655749559402466,
"learning_rate": 3.3333333333333337e-06,
"logits/chosen": -0.22948360443115234,
"logits/rejected": -0.22978875041007996,
"logps/chosen": -39.710899353027344,
"logps/rejected": -39.52346420288086,
"loss": 0.662,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.11001075804233551,
"rewards/margins": 0.06790003925561905,
"rewards/rejected": 0.04211071878671646,
"step": 10
},
{
"epoch": 0.20915032679738563,
"grad_norm": 2.4934024810791016,
"learning_rate": 5.978260869565218e-06,
"logits/chosen": -0.22906163334846497,
"logits/rejected": -0.22876068949699402,
"logps/chosen": -30.569751739501953,
"logps/rejected": -33.86491012573242,
"loss": 0.5549,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 0.9981435537338257,
"rewards/margins": 0.38557058572769165,
"rewards/rejected": 0.6125729084014893,
"step": 20
},
{
"epoch": 0.20915032679738563,
"eval_logits/chosen": -0.19793638586997986,
"eval_logits/rejected": -0.197507843375206,
"eval_logps/chosen": -26.848573684692383,
"eval_logps/rejected": -35.13609313964844,
"eval_loss": 0.43225446343421936,
"eval_rewards/accuracies": 0.8081395626068115,
"eval_rewards/chosen": 1.355695366859436,
"eval_rewards/margins": 0.861485481262207,
"eval_rewards/rejected": 0.4942099153995514,
"eval_runtime": 76.6712,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 1.122,
"step": 20
},
{
"epoch": 0.3137254901960784,
"grad_norm": 1.9067373275756836,
"learning_rate": 5.869565217391305e-06,
"logits/chosen": -0.23883526027202606,
"logits/rejected": -0.2374078780412674,
"logps/chosen": -30.109268188476562,
"logps/rejected": -43.55142593383789,
"loss": 0.3732,
"rewards/accuracies": 0.84375,
"rewards/chosen": 1.0676757097244263,
"rewards/margins": 1.4297826290130615,
"rewards/rejected": -0.3621070086956024,
"step": 30
},
{
"epoch": 0.41830065359477125,
"grad_norm": 3.629997730255127,
"learning_rate": 5.760869565217392e-06,
"logits/chosen": -0.2035980522632599,
"logits/rejected": -0.20044592022895813,
"logps/chosen": -28.863727569580078,
"logps/rejected": -50.26195526123047,
"loss": 0.3276,
"rewards/accuracies": 0.84375,
"rewards/chosen": 1.200588345527649,
"rewards/margins": 2.2083239555358887,
"rewards/rejected": -1.0077354907989502,
"step": 40
},
{
"epoch": 0.41830065359477125,
"eval_logits/chosen": -0.16349415481090546,
"eval_logits/rejected": -0.15954892337322235,
"eval_logps/chosen": -24.61951446533203,
"eval_logps/rejected": -48.91069412231445,
"eval_loss": 0.34378084540367126,
"eval_rewards/accuracies": 0.8895348906517029,
"eval_rewards/chosen": 1.5786010026931763,
"eval_rewards/margins": 2.4618515968322754,
"eval_rewards/rejected": -0.8832504749298096,
"eval_runtime": 76.7395,
"eval_samples_per_second": 2.228,
"eval_steps_per_second": 1.121,
"step": 40
},
{
"epoch": 0.5228758169934641,
"grad_norm": 2.5461719036102295,
"learning_rate": 5.652173913043479e-06,
"logits/chosen": -0.16493651270866394,
"logits/rejected": -0.16144290566444397,
"logps/chosen": -24.76712989807129,
"logps/rejected": -46.993003845214844,
"loss": 0.3648,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.5913469791412354,
"rewards/margins": 2.25089955329895,
"rewards/rejected": -0.6595526933670044,
"step": 50
},
{
"epoch": 0.6274509803921569,
"grad_norm": 2.0358729362487793,
"learning_rate": 5.543478260869566e-06,
"logits/chosen": -0.2202371060848236,
"logits/rejected": -0.21734721958637238,
"logps/chosen": -26.78680419921875,
"logps/rejected": -47.03938674926758,
"loss": 0.3448,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 1.3999578952789307,
"rewards/margins": 2.0680160522460938,
"rewards/rejected": -0.6680583357810974,
"step": 60
},
{
"epoch": 0.6274509803921569,
"eval_logits/chosen": -0.17944073677062988,
"eval_logits/rejected": -0.1763658970594406,
"eval_logps/chosen": -27.3469295501709,
"eval_logps/rejected": -48.937984466552734,
"eval_loss": 0.2700035870075226,
"eval_rewards/accuracies": 0.9069767594337463,
"eval_rewards/chosen": 1.3058594465255737,
"eval_rewards/margins": 2.191838026046753,
"eval_rewards/rejected": -0.8859787583351135,
"eval_runtime": 76.7029,
"eval_samples_per_second": 2.229,
"eval_steps_per_second": 1.121,
"step": 60
},
{
"epoch": 0.7320261437908496,
"grad_norm": 1.7749762535095215,
"learning_rate": 5.4347826086956525e-06,
"logits/chosen": -0.1932402402162552,
"logits/rejected": -0.19045117497444153,
"logps/chosen": -26.827350616455078,
"logps/rejected": -46.88459014892578,
"loss": 0.2909,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 1.3978151082992554,
"rewards/margins": 2.0628795623779297,
"rewards/rejected": -0.6650643348693848,
"step": 70
},
{
"epoch": 0.8366013071895425,
"grad_norm": 2.9730594158172607,
"learning_rate": 5.326086956521739e-06,
"logits/chosen": -0.15139932930469513,
"logits/rejected": -0.14830470085144043,
"logps/chosen": -26.229084014892578,
"logps/rejected": -47.40288162231445,
"loss": 0.2844,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.493841290473938,
"rewards/margins": 2.2263193130493164,
"rewards/rejected": -0.7324780225753784,
"step": 80
},
{
"epoch": 0.8366013071895425,
"eval_logits/chosen": -0.15539461374282837,
"eval_logits/rejected": -0.15173271298408508,
"eval_logps/chosen": -30.73025894165039,
"eval_logps/rejected": -54.970027923583984,
"eval_loss": 0.262494832277298,
"eval_rewards/accuracies": 0.9069767594337463,
"eval_rewards/chosen": 0.967526912689209,
"eval_rewards/margins": 2.4567105770111084,
"eval_rewards/rejected": -1.4891836643218994,
"eval_runtime": 76.69,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 1.121,
"step": 80
},
{
"epoch": 0.9411764705882353,
"grad_norm": 2.7646617889404297,
"learning_rate": 5.2173913043478265e-06,
"logits/chosen": -0.17744764685630798,
"logits/rejected": -0.1731792390346527,
"logps/chosen": -25.179697036743164,
"logps/rejected": -51.22761154174805,
"loss": 0.2203,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.5833141803741455,
"rewards/margins": 2.6879732608795166,
"rewards/rejected": -1.104659080505371,
"step": 90
},
{
"epoch": 1.0457516339869282,
"grad_norm": 1.8633959293365479,
"learning_rate": 5.1086956521739134e-06,
"logits/chosen": -0.1642988920211792,
"logits/rejected": -0.1597273051738739,
"logps/chosen": -23.413448333740234,
"logps/rejected": -53.029579162597656,
"loss": 0.3134,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.7295477390289307,
"rewards/margins": 3.0015645027160645,
"rewards/rejected": -1.2720168828964233,
"step": 100
},
{
"epoch": 1.0457516339869282,
"eval_logits/chosen": -0.14374618232250214,
"eval_logits/rejected": -0.13886626064777374,
"eval_logps/chosen": -25.8454532623291,
"eval_logps/rejected": -56.691768646240234,
"eval_loss": 0.23542174696922302,
"eval_rewards/accuracies": 0.9244186282157898,
"eval_rewards/chosen": 1.456007480621338,
"eval_rewards/margins": 3.1173653602600098,
"eval_rewards/rejected": -1.6613577604293823,
"eval_runtime": 76.7594,
"eval_samples_per_second": 2.228,
"eval_steps_per_second": 1.12,
"step": 100
},
{
"epoch": 1.1503267973856208,
"grad_norm": 3.7231855392456055,
"learning_rate": 5e-06,
"logits/chosen": -0.17482638359069824,
"logits/rejected": -0.1706036627292633,
"logps/chosen": -27.381885528564453,
"logps/rejected": -53.334877014160156,
"loss": 0.3177,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 1.3857864141464233,
"rewards/margins": 2.708588123321533,
"rewards/rejected": -1.3228017091751099,
"step": 110
},
{
"epoch": 1.2549019607843137,
"grad_norm": 5.588420391082764,
"learning_rate": 4.8913043478260865e-06,
"logits/chosen": -0.19949769973754883,
"logits/rejected": -0.19602252542972565,
"logps/chosen": -21.503002166748047,
"logps/rejected": -45.17253494262695,
"loss": 0.2497,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 1.9555631875991821,
"rewards/margins": 2.432082414627075,
"rewards/rejected": -0.47651925683021545,
"step": 120
},
{
"epoch": 1.2549019607843137,
"eval_logits/chosen": -0.14820654690265656,
"eval_logits/rejected": -0.14320875704288483,
"eval_logps/chosen": -24.672155380249023,
"eval_logps/rejected": -54.095088958740234,
"eval_loss": 0.21676376461982727,
"eval_rewards/accuracies": 0.9186046719551086,
"eval_rewards/chosen": 1.5733370780944824,
"eval_rewards/margins": 2.9750266075134277,
"eval_rewards/rejected": -1.4016892910003662,
"eval_runtime": 76.7524,
"eval_samples_per_second": 2.228,
"eval_steps_per_second": 1.12,
"step": 120
},
{
"epoch": 1.3594771241830066,
"grad_norm": 4.653593063354492,
"learning_rate": 4.782608695652174e-06,
"logits/chosen": -0.16941645741462708,
"logits/rejected": -0.16412410140037537,
"logps/chosen": -28.397680282592773,
"logps/rejected": -59.14925003051758,
"loss": 0.2098,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.2265132665634155,
"rewards/margins": 3.106191873550415,
"rewards/rejected": -1.879678726196289,
"step": 130
},
{
"epoch": 1.4640522875816995,
"grad_norm": 4.618969440460205,
"learning_rate": 4.673913043478261e-06,
"logits/chosen": -0.1314123570919037,
"logits/rejected": -0.12528486549854279,
"logps/chosen": -18.617746353149414,
"logps/rejected": -51.451507568359375,
"loss": 0.2442,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 2.209608554840088,
"rewards/margins": 3.3631699085235596,
"rewards/rejected": -1.1535612344741821,
"step": 140
},
{
"epoch": 1.4640522875816995,
"eval_logits/chosen": -0.13051746785640717,
"eval_logits/rejected": -0.1261546015739441,
"eval_logps/chosen": -14.140735626220703,
"eval_logps/rejected": -38.07098388671875,
"eval_loss": 0.2653515338897705,
"eval_rewards/accuracies": 0.9069767594337463,
"eval_rewards/chosen": 2.626479148864746,
"eval_rewards/margins": 2.425758123397827,
"eval_rewards/rejected": 0.20072098076343536,
"eval_runtime": 76.7224,
"eval_samples_per_second": 2.229,
"eval_steps_per_second": 1.121,
"step": 140
},
{
"epoch": 1.5686274509803921,
"grad_norm": 2.419158697128296,
"learning_rate": 4.565217391304348e-06,
"logits/chosen": -0.14353547990322113,
"logits/rejected": -0.13822032511234283,
"logps/chosen": -17.764816284179688,
"logps/rejected": -48.77958679199219,
"loss": 0.1411,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 2.28568172454834,
"rewards/margins": 3.1483988761901855,
"rewards/rejected": -0.8627172708511353,
"step": 150
},
{
"epoch": 1.673202614379085,
"grad_norm": 2.6137094497680664,
"learning_rate": 4.456521739130434e-06,
"logits/chosen": -0.0922718346118927,
"logits/rejected": -0.08614876121282578,
"logps/chosen": -30.0429630279541,
"logps/rejected": -62.348907470703125,
"loss": 0.2677,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.0795209407806396,
"rewards/margins": 3.2884597778320312,
"rewards/rejected": -2.2089390754699707,
"step": 160
},
{
"epoch": 1.673202614379085,
"eval_logits/chosen": -0.10249081254005432,
"eval_logits/rejected": -0.09574974328279495,
"eval_logps/chosen": -22.81549835205078,
"eval_logps/rejected": -60.084381103515625,
"eval_loss": 0.19992607831954956,
"eval_rewards/accuracies": 0.9244186282157898,
"eval_rewards/chosen": 1.7590028047561646,
"eval_rewards/margins": 3.75962233543396,
"eval_rewards/rejected": -2.000619411468506,
"eval_runtime": 76.7585,
"eval_samples_per_second": 2.228,
"eval_steps_per_second": 1.12,
"step": 160
},
{
"epoch": 1.7777777777777777,
"grad_norm": 3.627882480621338,
"learning_rate": 4.347826086956522e-06,
"logits/chosen": -0.1151178628206253,
"logits/rejected": -0.10893462598323822,
"logps/chosen": -21.336753845214844,
"logps/rejected": -56.53479766845703,
"loss": 0.1838,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 1.9686607122421265,
"rewards/margins": 3.594453811645508,
"rewards/rejected": -1.6257928609848022,
"step": 170
},
{
"epoch": 1.8823529411764706,
"grad_norm": 2.876845121383667,
"learning_rate": 4.239130434782609e-06,
"logits/chosen": -0.08697254955768585,
"logits/rejected": -0.08123140037059784,
"logps/chosen": -20.807594299316406,
"logps/rejected": -49.710174560546875,
"loss": 0.2598,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 1.9989045858383179,
"rewards/margins": 2.9567558765411377,
"rewards/rejected": -0.957851231098175,
"step": 180
},
{
"epoch": 1.8823529411764706,
"eval_logits/chosen": -0.11206170916557312,
"eval_logits/rejected": -0.1060233786702156,
"eval_logps/chosen": -18.79454231262207,
"eval_logps/rejected": -51.605831146240234,
"eval_loss": 0.20344915986061096,
"eval_rewards/accuracies": 0.930232584476471,
"eval_rewards/chosen": 2.1610984802246094,
"eval_rewards/margins": 3.3138630390167236,
"eval_rewards/rejected": -1.152764081954956,
"eval_runtime": 76.6954,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 1.121,
"step": 180
},
{
"epoch": 1.9869281045751634,
"grad_norm": 4.388978004455566,
"learning_rate": 4.130434782608695e-06,
"logits/chosen": -0.12727566063404083,
"logits/rejected": -0.12193255126476288,
"logps/chosen": -20.929378509521484,
"logps/rejected": -50.021419525146484,
"loss": 0.3231,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.967181921005249,
"rewards/margins": 2.968064785003662,
"rewards/rejected": -1.000882863998413,
"step": 190
},
{
"epoch": 2.0915032679738563,
"grad_norm": 3.108682870864868,
"learning_rate": 4.021739130434782e-06,
"logits/chosen": -0.15400271117687225,
"logits/rejected": -0.14889715611934662,
"logps/chosen": -20.020977020263672,
"logps/rejected": -49.0496940612793,
"loss": 0.165,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 2.0844578742980957,
"rewards/margins": 2.970615863800049,
"rewards/rejected": -0.8861583471298218,
"step": 200
},
{
"epoch": 2.0915032679738563,
"eval_logits/chosen": -0.10228858143091202,
"eval_logits/rejected": -0.09614047408103943,
"eval_logps/chosen": -25.265357971191406,
"eval_logps/rejected": -58.75029373168945,
"eval_loss": 0.17103791236877441,
"eval_rewards/accuracies": 0.9593023061752319,
"eval_rewards/chosen": 1.5140167474746704,
"eval_rewards/margins": 3.3812272548675537,
"eval_rewards/rejected": -1.8672102689743042,
"eval_runtime": 76.7398,
"eval_samples_per_second": 2.228,
"eval_steps_per_second": 1.121,
"step": 200
},
{
"epoch": 2.196078431372549,
"grad_norm": 1.1581649780273438,
"learning_rate": 3.91304347826087e-06,
"logits/chosen": -0.10600709915161133,
"logits/rejected": -0.09994350373744965,
"logps/chosen": -28.068603515625,
"logps/rejected": -62.65039825439453,
"loss": 0.1286,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.2790197134017944,
"rewards/margins": 3.5068142414093018,
"rewards/rejected": -2.2277944087982178,
"step": 210
},
{
"epoch": 2.3006535947712417,
"grad_norm": 7.228103160858154,
"learning_rate": 3.804347826086957e-06,
"logits/chosen": -0.1612926423549652,
"logits/rejected": -0.1542719006538391,
"logps/chosen": -17.81163215637207,
"logps/rejected": -55.7554817199707,
"loss": 0.266,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.290220260620117,
"rewards/margins": 3.835573673248291,
"rewards/rejected": -1.5453532934188843,
"step": 220
},
{
"epoch": 2.3006535947712417,
"eval_logits/chosen": -0.08572749048471451,
"eval_logits/rejected": -0.07834314554929733,
"eval_logps/chosen": -16.123777389526367,
"eval_logps/rejected": -52.568172454833984,
"eval_loss": 0.2138950228691101,
"eval_rewards/accuracies": 0.930232584476471,
"eval_rewards/chosen": 2.4281749725341797,
"eval_rewards/margins": 3.677172899246216,
"eval_rewards/rejected": -1.2489980459213257,
"eval_runtime": 76.6691,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 1.122,
"step": 220
},
{
"epoch": 2.4052287581699345,
"grad_norm": 2.1655898094177246,
"learning_rate": 3.695652173913043e-06,
"logits/chosen": -0.1236579641699791,
"logits/rejected": -0.11640377342700958,
"logps/chosen": -18.28099822998047,
"logps/rejected": -55.26251983642578,
"loss": 0.1805,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.2774600982666016,
"rewards/margins": 3.7921690940856934,
"rewards/rejected": -1.5147093534469604,
"step": 230
},
{
"epoch": 2.5098039215686274,
"grad_norm": 3.522686243057251,
"learning_rate": 3.5869565217391305e-06,
"logits/chosen": -0.12028801441192627,
"logits/rejected": -0.11475691944360733,
"logps/chosen": -20.220806121826172,
"logps/rejected": -50.0861701965332,
"loss": 0.2234,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 2.0429701805114746,
"rewards/margins": 3.0199804306030273,
"rewards/rejected": -0.9770105481147766,
"step": 240
},
{
"epoch": 2.5098039215686274,
"eval_logits/chosen": -0.08792821317911148,
"eval_logits/rejected": -0.08158135414123535,
"eval_logps/chosen": -18.402149200439453,
"eval_logps/rejected": -51.57028579711914,
"eval_loss": 0.18542896211147308,
"eval_rewards/accuracies": 0.9418604373931885,
"eval_rewards/chosen": 2.2003378868103027,
"eval_rewards/margins": 3.3495473861694336,
"eval_rewards/rejected": -1.1492092609405518,
"eval_runtime": 76.7548,
"eval_samples_per_second": 2.228,
"eval_steps_per_second": 1.12,
"step": 240
},
{
"epoch": 2.6143790849673203,
"grad_norm": 2.4707725048065186,
"learning_rate": 3.4782608695652175e-06,
"logits/chosen": -0.09261623024940491,
"logits/rejected": -0.08608702570199966,
"logps/chosen": -20.839956283569336,
"logps/rejected": -55.0688362121582,
"loss": 0.152,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.9972784519195557,
"rewards/margins": 3.4800357818603516,
"rewards/rejected": -1.482757568359375,
"step": 250
},
{
"epoch": 2.718954248366013,
"grad_norm": 3.7463388442993164,
"learning_rate": 3.369565217391305e-06,
"logits/chosen": -0.10273708403110504,
"logits/rejected": -0.09585189074277878,
"logps/chosen": -27.3509521484375,
"logps/rejected": -63.99445724487305,
"loss": 0.1878,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.3538360595703125,
"rewards/margins": 3.7591099739074707,
"rewards/rejected": -2.405273914337158,
"step": 260
},
{
"epoch": 2.718954248366013,
"eval_logits/chosen": -0.06562437862157822,
"eval_logits/rejected": -0.058443356305360794,
"eval_logps/chosen": -29.113916397094727,
"eval_logps/rejected": -68.89017486572266,
"eval_loss": 0.15422259271144867,
"eval_rewards/accuracies": 0.9534883499145508,
"eval_rewards/chosen": 1.12916100025177,
"eval_rewards/margins": 4.010359287261963,
"eval_rewards/rejected": -2.881197929382324,
"eval_runtime": 76.7159,
"eval_samples_per_second": 2.229,
"eval_steps_per_second": 1.121,
"step": 260
},
{
"epoch": 2.8235294117647056,
"grad_norm": 2.4779317378997803,
"learning_rate": 3.260869565217391e-06,
"logits/chosen": -0.08126804977655411,
"logits/rejected": -0.07377848774194717,
"logps/chosen": -29.53684425354004,
"logps/rejected": -69.46771240234375,
"loss": 0.1214,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.1481727361679077,
"rewards/margins": 4.097565650939941,
"rewards/rejected": -2.9493932723999023,
"step": 270
},
{
"epoch": 2.928104575163399,
"grad_norm": 5.711748123168945,
"learning_rate": 3.1521739130434784e-06,
"logits/chosen": -0.055128227919340134,
"logits/rejected": -0.047185707837343216,
"logps/chosen": -16.9158935546875,
"logps/rejected": -55.01350784301758,
"loss": 0.1515,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 2.360804796218872,
"rewards/margins": 3.8613457679748535,
"rewards/rejected": -1.5005409717559814,
"step": 280
},
{
"epoch": 2.928104575163399,
"eval_logits/chosen": -0.06331682205200195,
"eval_logits/rejected": -0.05519242212176323,
"eval_logps/chosen": -14.443292617797852,
"eval_logps/rejected": -52.58645248413086,
"eval_loss": 0.20675985515117645,
"eval_rewards/accuracies": 0.9418604373931885,
"eval_rewards/chosen": 2.5962235927581787,
"eval_rewards/margins": 3.8470497131347656,
"eval_rewards/rejected": -1.2508265972137451,
"eval_runtime": 76.6773,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 1.122,
"step": 280
},
{
"epoch": 3.0326797385620914,
"grad_norm": NaN,
"learning_rate": 3.054347826086957e-06,
"logits/chosen": -0.07114674150943756,
"logits/rejected": -0.0620611310005188,
"logps/chosen": -16.190364837646484,
"logps/rejected": -58.654823303222656,
"loss": 0.1621,
"rewards/accuracies": 0.9375,
"rewards/chosen": 2.476926565170288,
"rewards/margins": 4.345733165740967,
"rewards/rejected": -1.86880624294281,
"step": 290
},
{
"epoch": 3.1372549019607843,
"grad_norm": 3.2887370586395264,
"learning_rate": 2.9456521739130436e-06,
"logits/chosen": -0.06162800267338753,
"logits/rejected": -0.05298132449388504,
"logps/chosen": -17.659170150756836,
"logps/rejected": -59.8099250793457,
"loss": 0.1259,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 2.3060736656188965,
"rewards/margins": 4.271978855133057,
"rewards/rejected": -1.965904951095581,
"step": 300
},
{
"epoch": 3.1372549019607843,
"eval_logits/chosen": -0.05268235132098198,
"eval_logits/rejected": -0.04376488924026489,
"eval_logps/chosen": -14.083008766174316,
"eval_logps/rejected": -54.79096984863281,
"eval_loss": 0.15972751379013062,
"eval_rewards/accuracies": 0.9476743936538696,
"eval_rewards/chosen": 2.632251739501953,
"eval_rewards/margins": 4.10352897644043,
"eval_rewards/rejected": -1.4712772369384766,
"eval_runtime": 76.6816,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 1.122,
"step": 300
},
{
"epoch": 3.241830065359477,
"grad_norm": 1.3481501340866089,
"learning_rate": 2.8369565217391305e-06,
"logits/chosen": -0.09273257106542587,
"logits/rejected": -0.08304957300424576,
"logps/chosen": -14.50407886505127,
"logps/rejected": -60.208778381347656,
"loss": 0.0975,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 2.651323080062866,
"rewards/margins": 4.6572089195251465,
"rewards/rejected": -2.005885601043701,
"step": 310
},
{
"epoch": 3.34640522875817,
"grad_norm": 1.853318691253662,
"learning_rate": 2.7282608695652175e-06,
"logits/chosen": -0.10112150758504868,
"logits/rejected": -0.09216316789388657,
"logps/chosen": -17.982646942138672,
"logps/rejected": -64.50444793701172,
"loss": 0.1342,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 2.268524408340454,
"rewards/margins": 4.688179969787598,
"rewards/rejected": -2.4196553230285645,
"step": 320
},
{
"epoch": 3.34640522875817,
"eval_logits/chosen": -0.04029928892850876,
"eval_logits/rejected": -0.030180998146533966,
"eval_logps/chosen": -16.03810691833496,
"eval_logps/rejected": -63.949459075927734,
"eval_loss": 0.15685829520225525,
"eval_rewards/accuracies": 0.9476743936538696,
"eval_rewards/chosen": 2.436742067337036,
"eval_rewards/margins": 4.8238677978515625,
"eval_rewards/rejected": -2.3871262073516846,
"eval_runtime": 76.6972,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 1.121,
"step": 320
}
],
"logging_steps": 10,
"max_steps": 570,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 80,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}