|
{ |
|
"best_metric": 0.9069767594337463, |
|
"best_model_checkpoint": "./llama3/27-06-24-Weni-ZeroShot-Agents-Llama3-4.0.37-DPO_Experiment with DPO and Llama3 8B, zeroshot 4.0.37-2_max_steps-570_batch_16_2024-06-27_ppid_9/checkpoint-80", |
|
"epoch": 2.5098039215686274, |
|
"eval_steps": 20, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10457516339869281, |
|
"grad_norm": 3.655749559402466, |
|
"learning_rate": 3.3333333333333337e-06, |
|
"logits/chosen": -0.22948360443115234, |
|
"logits/rejected": -0.22978875041007996, |
|
"logps/chosen": -39.710899353027344, |
|
"logps/rejected": -39.52346420288086, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11001075804233551, |
|
"rewards/margins": 0.06790003925561905, |
|
"rewards/rejected": 0.04211071878671646, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20915032679738563, |
|
"grad_norm": 2.4934024810791016, |
|
"learning_rate": 5.978260869565218e-06, |
|
"logits/chosen": -0.22906163334846497, |
|
"logits/rejected": -0.22876068949699402, |
|
"logps/chosen": -30.569751739501953, |
|
"logps/rejected": -33.86491012573242, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.9981435537338257, |
|
"rewards/margins": 0.38557058572769165, |
|
"rewards/rejected": 0.6125729084014893, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20915032679738563, |
|
"eval_logits/chosen": -0.19793638586997986, |
|
"eval_logits/rejected": -0.197507843375206, |
|
"eval_logps/chosen": -26.848573684692383, |
|
"eval_logps/rejected": -35.13609313964844, |
|
"eval_loss": 0.43225446343421936, |
|
"eval_rewards/accuracies": 0.8081395626068115, |
|
"eval_rewards/chosen": 1.355695366859436, |
|
"eval_rewards/margins": 0.861485481262207, |
|
"eval_rewards/rejected": 0.4942099153995514, |
|
"eval_runtime": 76.6712, |
|
"eval_samples_per_second": 2.23, |
|
"eval_steps_per_second": 1.122, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3137254901960784, |
|
"grad_norm": 1.9067373275756836, |
|
"learning_rate": 5.869565217391305e-06, |
|
"logits/chosen": -0.23883526027202606, |
|
"logits/rejected": -0.2374078780412674, |
|
"logps/chosen": -30.109268188476562, |
|
"logps/rejected": -43.55142593383789, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.0676757097244263, |
|
"rewards/margins": 1.4297826290130615, |
|
"rewards/rejected": -0.3621070086956024, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.41830065359477125, |
|
"grad_norm": 3.629997730255127, |
|
"learning_rate": 5.760869565217392e-06, |
|
"logits/chosen": -0.2035980522632599, |
|
"logits/rejected": -0.20044592022895813, |
|
"logps/chosen": -28.863727569580078, |
|
"logps/rejected": -50.26195526123047, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.200588345527649, |
|
"rewards/margins": 2.2083239555358887, |
|
"rewards/rejected": -1.0077354907989502, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41830065359477125, |
|
"eval_logits/chosen": -0.16349415481090546, |
|
"eval_logits/rejected": -0.15954892337322235, |
|
"eval_logps/chosen": -24.61951446533203, |
|
"eval_logps/rejected": -48.91069412231445, |
|
"eval_loss": 0.34378084540367126, |
|
"eval_rewards/accuracies": 0.8895348906517029, |
|
"eval_rewards/chosen": 1.5786010026931763, |
|
"eval_rewards/margins": 2.4618515968322754, |
|
"eval_rewards/rejected": -0.8832504749298096, |
|
"eval_runtime": 76.7395, |
|
"eval_samples_per_second": 2.228, |
|
"eval_steps_per_second": 1.121, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5228758169934641, |
|
"grad_norm": 2.5461719036102295, |
|
"learning_rate": 5.652173913043479e-06, |
|
"logits/chosen": -0.16493651270866394, |
|
"logits/rejected": -0.16144290566444397, |
|
"logps/chosen": -24.76712989807129, |
|
"logps/rejected": -46.993003845214844, |
|
"loss": 0.3648, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.5913469791412354, |
|
"rewards/margins": 2.25089955329895, |
|
"rewards/rejected": -0.6595526933670044, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"grad_norm": 2.0358729362487793, |
|
"learning_rate": 5.543478260869566e-06, |
|
"logits/chosen": -0.2202371060848236, |
|
"logits/rejected": -0.21734721958637238, |
|
"logps/chosen": -26.78680419921875, |
|
"logps/rejected": -47.03938674926758, |
|
"loss": 0.3448, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.3999578952789307, |
|
"rewards/margins": 2.0680160522460938, |
|
"rewards/rejected": -0.6680583357810974, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"eval_logits/chosen": -0.17944073677062988, |
|
"eval_logits/rejected": -0.1763658970594406, |
|
"eval_logps/chosen": -27.3469295501709, |
|
"eval_logps/rejected": -48.937984466552734, |
|
"eval_loss": 0.2700035870075226, |
|
"eval_rewards/accuracies": 0.9069767594337463, |
|
"eval_rewards/chosen": 1.3058594465255737, |
|
"eval_rewards/margins": 2.191838026046753, |
|
"eval_rewards/rejected": -0.8859787583351135, |
|
"eval_runtime": 76.7029, |
|
"eval_samples_per_second": 2.229, |
|
"eval_steps_per_second": 1.121, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7320261437908496, |
|
"grad_norm": 1.7749762535095215, |
|
"learning_rate": 5.4347826086956525e-06, |
|
"logits/chosen": -0.1932402402162552, |
|
"logits/rejected": -0.19045117497444153, |
|
"logps/chosen": -26.827350616455078, |
|
"logps/rejected": -46.88459014892578, |
|
"loss": 0.2909, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.3978151082992554, |
|
"rewards/margins": 2.0628795623779297, |
|
"rewards/rejected": -0.6650643348693848, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8366013071895425, |
|
"grad_norm": 2.9730594158172607, |
|
"learning_rate": 5.326086956521739e-06, |
|
"logits/chosen": -0.15139932930469513, |
|
"logits/rejected": -0.14830470085144043, |
|
"logps/chosen": -26.229084014892578, |
|
"logps/rejected": -47.40288162231445, |
|
"loss": 0.2844, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.493841290473938, |
|
"rewards/margins": 2.2263193130493164, |
|
"rewards/rejected": -0.7324780225753784, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8366013071895425, |
|
"eval_logits/chosen": -0.15539461374282837, |
|
"eval_logits/rejected": -0.15173271298408508, |
|
"eval_logps/chosen": -30.73025894165039, |
|
"eval_logps/rejected": -54.970027923583984, |
|
"eval_loss": 0.262494832277298, |
|
"eval_rewards/accuracies": 0.9069767594337463, |
|
"eval_rewards/chosen": 0.967526912689209, |
|
"eval_rewards/margins": 2.4567105770111084, |
|
"eval_rewards/rejected": -1.4891836643218994, |
|
"eval_runtime": 76.69, |
|
"eval_samples_per_second": 2.23, |
|
"eval_steps_per_second": 1.121, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 2.7646617889404297, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"logits/chosen": -0.17744764685630798, |
|
"logits/rejected": -0.1731792390346527, |
|
"logps/chosen": -25.179697036743164, |
|
"logps/rejected": -51.22761154174805, |
|
"loss": 0.2203, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.5833141803741455, |
|
"rewards/margins": 2.6879732608795166, |
|
"rewards/rejected": -1.104659080505371, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0457516339869282, |
|
"grad_norm": 1.8633959293365479, |
|
"learning_rate": 5.1086956521739134e-06, |
|
"logits/chosen": -0.1642988920211792, |
|
"logits/rejected": -0.1597273051738739, |
|
"logps/chosen": -23.413448333740234, |
|
"logps/rejected": -53.029579162597656, |
|
"loss": 0.3134, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.7295477390289307, |
|
"rewards/margins": 3.0015645027160645, |
|
"rewards/rejected": -1.2720168828964233, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0457516339869282, |
|
"eval_logits/chosen": -0.14374618232250214, |
|
"eval_logits/rejected": -0.13886626064777374, |
|
"eval_logps/chosen": -25.8454532623291, |
|
"eval_logps/rejected": -56.691768646240234, |
|
"eval_loss": 0.23542174696922302, |
|
"eval_rewards/accuracies": 0.9244186282157898, |
|
"eval_rewards/chosen": 1.456007480621338, |
|
"eval_rewards/margins": 3.1173653602600098, |
|
"eval_rewards/rejected": -1.6613577604293823, |
|
"eval_runtime": 76.7594, |
|
"eval_samples_per_second": 2.228, |
|
"eval_steps_per_second": 1.12, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1503267973856208, |
|
"grad_norm": 3.7231855392456055, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -0.17482638359069824, |
|
"logits/rejected": -0.1706036627292633, |
|
"logps/chosen": -27.381885528564453, |
|
"logps/rejected": -53.334877014160156, |
|
"loss": 0.3177, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 1.3857864141464233, |
|
"rewards/margins": 2.708588123321533, |
|
"rewards/rejected": -1.3228017091751099, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2549019607843137, |
|
"grad_norm": 5.588420391082764, |
|
"learning_rate": 4.8913043478260865e-06, |
|
"logits/chosen": -0.19949769973754883, |
|
"logits/rejected": -0.19602252542972565, |
|
"logps/chosen": -21.503002166748047, |
|
"logps/rejected": -45.17253494262695, |
|
"loss": 0.2497, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.9555631875991821, |
|
"rewards/margins": 2.432082414627075, |
|
"rewards/rejected": -0.47651925683021545, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2549019607843137, |
|
"eval_logits/chosen": -0.14820654690265656, |
|
"eval_logits/rejected": -0.14320875704288483, |
|
"eval_logps/chosen": -24.672155380249023, |
|
"eval_logps/rejected": -54.095088958740234, |
|
"eval_loss": 0.21676376461982727, |
|
"eval_rewards/accuracies": 0.9186046719551086, |
|
"eval_rewards/chosen": 1.5733370780944824, |
|
"eval_rewards/margins": 2.9750266075134277, |
|
"eval_rewards/rejected": -1.4016892910003662, |
|
"eval_runtime": 76.7524, |
|
"eval_samples_per_second": 2.228, |
|
"eval_steps_per_second": 1.12, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3594771241830066, |
|
"grad_norm": 4.653593063354492, |
|
"learning_rate": 4.782608695652174e-06, |
|
"logits/chosen": -0.16941645741462708, |
|
"logits/rejected": -0.16412410140037537, |
|
"logps/chosen": -28.397680282592773, |
|
"logps/rejected": -59.14925003051758, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.2265132665634155, |
|
"rewards/margins": 3.106191873550415, |
|
"rewards/rejected": -1.879678726196289, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4640522875816995, |
|
"grad_norm": 4.618969440460205, |
|
"learning_rate": 4.673913043478261e-06, |
|
"logits/chosen": -0.1314123570919037, |
|
"logits/rejected": -0.12528486549854279, |
|
"logps/chosen": -18.617746353149414, |
|
"logps/rejected": -51.451507568359375, |
|
"loss": 0.2442, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.209608554840088, |
|
"rewards/margins": 3.3631699085235596, |
|
"rewards/rejected": -1.1535612344741821, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.4640522875816995, |
|
"eval_logits/chosen": -0.13051746785640717, |
|
"eval_logits/rejected": -0.1261546015739441, |
|
"eval_logps/chosen": -14.140735626220703, |
|
"eval_logps/rejected": -38.07098388671875, |
|
"eval_loss": 0.2653515338897705, |
|
"eval_rewards/accuracies": 0.9069767594337463, |
|
"eval_rewards/chosen": 2.626479148864746, |
|
"eval_rewards/margins": 2.425758123397827, |
|
"eval_rewards/rejected": 0.20072098076343536, |
|
"eval_runtime": 76.7224, |
|
"eval_samples_per_second": 2.229, |
|
"eval_steps_per_second": 1.121, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5686274509803921, |
|
"grad_norm": 2.419158697128296, |
|
"learning_rate": 4.565217391304348e-06, |
|
"logits/chosen": -0.14353547990322113, |
|
"logits/rejected": -0.13822032511234283, |
|
"logps/chosen": -17.764816284179688, |
|
"logps/rejected": -48.77958679199219, |
|
"loss": 0.1411, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.28568172454834, |
|
"rewards/margins": 3.1483988761901855, |
|
"rewards/rejected": -0.8627172708511353, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.673202614379085, |
|
"grad_norm": 2.6137094497680664, |
|
"learning_rate": 4.456521739130434e-06, |
|
"logits/chosen": -0.0922718346118927, |
|
"logits/rejected": -0.08614876121282578, |
|
"logps/chosen": -30.0429630279541, |
|
"logps/rejected": -62.348907470703125, |
|
"loss": 0.2677, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.0795209407806396, |
|
"rewards/margins": 3.2884597778320312, |
|
"rewards/rejected": -2.2089390754699707, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.673202614379085, |
|
"eval_logits/chosen": -0.10249081254005432, |
|
"eval_logits/rejected": -0.09574974328279495, |
|
"eval_logps/chosen": -22.81549835205078, |
|
"eval_logps/rejected": -60.084381103515625, |
|
"eval_loss": 0.19992607831954956, |
|
"eval_rewards/accuracies": 0.9244186282157898, |
|
"eval_rewards/chosen": 1.7590028047561646, |
|
"eval_rewards/margins": 3.75962233543396, |
|
"eval_rewards/rejected": -2.000619411468506, |
|
"eval_runtime": 76.7585, |
|
"eval_samples_per_second": 2.228, |
|
"eval_steps_per_second": 1.12, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 3.627882480621338, |
|
"learning_rate": 4.347826086956522e-06, |
|
"logits/chosen": -0.1151178628206253, |
|
"logits/rejected": -0.10893462598323822, |
|
"logps/chosen": -21.336753845214844, |
|
"logps/rejected": -56.53479766845703, |
|
"loss": 0.1838, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.9686607122421265, |
|
"rewards/margins": 3.594453811645508, |
|
"rewards/rejected": -1.6257928609848022, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"grad_norm": 2.876845121383667, |
|
"learning_rate": 4.239130434782609e-06, |
|
"logits/chosen": -0.08697254955768585, |
|
"logits/rejected": -0.08123140037059784, |
|
"logps/chosen": -20.807594299316406, |
|
"logps/rejected": -49.710174560546875, |
|
"loss": 0.2598, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.9989045858383179, |
|
"rewards/margins": 2.9567558765411377, |
|
"rewards/rejected": -0.957851231098175, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"eval_logits/chosen": -0.11206170916557312, |
|
"eval_logits/rejected": -0.1060233786702156, |
|
"eval_logps/chosen": -18.79454231262207, |
|
"eval_logps/rejected": -51.605831146240234, |
|
"eval_loss": 0.20344915986061096, |
|
"eval_rewards/accuracies": 0.930232584476471, |
|
"eval_rewards/chosen": 2.1610984802246094, |
|
"eval_rewards/margins": 3.3138630390167236, |
|
"eval_rewards/rejected": -1.152764081954956, |
|
"eval_runtime": 76.6954, |
|
"eval_samples_per_second": 2.23, |
|
"eval_steps_per_second": 1.121, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9869281045751634, |
|
"grad_norm": 4.388978004455566, |
|
"learning_rate": 4.130434782608695e-06, |
|
"logits/chosen": -0.12727566063404083, |
|
"logits/rejected": -0.12193255126476288, |
|
"logps/chosen": -20.929378509521484, |
|
"logps/rejected": -50.021419525146484, |
|
"loss": 0.3231, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.967181921005249, |
|
"rewards/margins": 2.968064785003662, |
|
"rewards/rejected": -1.000882863998413, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0915032679738563, |
|
"grad_norm": 3.108682870864868, |
|
"learning_rate": 4.021739130434782e-06, |
|
"logits/chosen": -0.15400271117687225, |
|
"logits/rejected": -0.14889715611934662, |
|
"logps/chosen": -20.020977020263672, |
|
"logps/rejected": -49.0496940612793, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.0844578742980957, |
|
"rewards/margins": 2.970615863800049, |
|
"rewards/rejected": -0.8861583471298218, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0915032679738563, |
|
"eval_logits/chosen": -0.10228858143091202, |
|
"eval_logits/rejected": -0.09614047408103943, |
|
"eval_logps/chosen": -25.265357971191406, |
|
"eval_logps/rejected": -58.75029373168945, |
|
"eval_loss": 0.17103791236877441, |
|
"eval_rewards/accuracies": 0.9593023061752319, |
|
"eval_rewards/chosen": 1.5140167474746704, |
|
"eval_rewards/margins": 3.3812272548675537, |
|
"eval_rewards/rejected": -1.8672102689743042, |
|
"eval_runtime": 76.7398, |
|
"eval_samples_per_second": 2.228, |
|
"eval_steps_per_second": 1.121, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.196078431372549, |
|
"grad_norm": 1.1581649780273438, |
|
"learning_rate": 3.91304347826087e-06, |
|
"logits/chosen": -0.10600709915161133, |
|
"logits/rejected": -0.09994350373744965, |
|
"logps/chosen": -28.068603515625, |
|
"logps/rejected": -62.65039825439453, |
|
"loss": 0.1286, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2790197134017944, |
|
"rewards/margins": 3.5068142414093018, |
|
"rewards/rejected": -2.2277944087982178, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.3006535947712417, |
|
"grad_norm": 7.228103160858154, |
|
"learning_rate": 3.804347826086957e-06, |
|
"logits/chosen": -0.1612926423549652, |
|
"logits/rejected": -0.1542719006538391, |
|
"logps/chosen": -17.81163215637207, |
|
"logps/rejected": -55.7554817199707, |
|
"loss": 0.266, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.290220260620117, |
|
"rewards/margins": 3.835573673248291, |
|
"rewards/rejected": -1.5453532934188843, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.3006535947712417, |
|
"eval_logits/chosen": -0.08572749048471451, |
|
"eval_logits/rejected": -0.07834314554929733, |
|
"eval_logps/chosen": -16.123777389526367, |
|
"eval_logps/rejected": -52.568172454833984, |
|
"eval_loss": 0.2138950228691101, |
|
"eval_rewards/accuracies": 0.930232584476471, |
|
"eval_rewards/chosen": 2.4281749725341797, |
|
"eval_rewards/margins": 3.677172899246216, |
|
"eval_rewards/rejected": -1.2489980459213257, |
|
"eval_runtime": 76.6691, |
|
"eval_samples_per_second": 2.23, |
|
"eval_steps_per_second": 1.122, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.4052287581699345, |
|
"grad_norm": 2.1655898094177246, |
|
"learning_rate": 3.695652173913043e-06, |
|
"logits/chosen": -0.1236579641699791, |
|
"logits/rejected": -0.11640377342700958, |
|
"logps/chosen": -18.28099822998047, |
|
"logps/rejected": -55.26251983642578, |
|
"loss": 0.1805, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.2774600982666016, |
|
"rewards/margins": 3.7921690940856934, |
|
"rewards/rejected": -1.5147093534469604, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.5098039215686274, |
|
"grad_norm": 3.522686243057251, |
|
"learning_rate": 3.5869565217391305e-06, |
|
"logits/chosen": -0.12028801441192627, |
|
"logits/rejected": -0.11475691944360733, |
|
"logps/chosen": -20.220806121826172, |
|
"logps/rejected": -50.0861701965332, |
|
"loss": 0.2234, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.0429701805114746, |
|
"rewards/margins": 3.0199804306030273, |
|
"rewards/rejected": -0.9770105481147766, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.5098039215686274, |
|
"eval_logits/chosen": -0.08792821317911148, |
|
"eval_logits/rejected": -0.08158135414123535, |
|
"eval_logps/chosen": -18.402149200439453, |
|
"eval_logps/rejected": -51.57028579711914, |
|
"eval_loss": 0.18542896211147308, |
|
"eval_rewards/accuracies": 0.9418604373931885, |
|
"eval_rewards/chosen": 2.2003378868103027, |
|
"eval_rewards/margins": 3.3495473861694336, |
|
"eval_rewards/rejected": -1.1492092609405518, |
|
"eval_runtime": 76.7548, |
|
"eval_samples_per_second": 2.228, |
|
"eval_steps_per_second": 1.12, |
|
"step": 240 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 570, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 80, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|