{ "best_metric": 0.9069767594337463, "best_model_checkpoint": "./llama3/27-06-24-Weni-ZeroShot-Agents-Llama3-4.0.37-DPO_Experiment with DPO and Llama3 8B, zeroshot 4.0.37-2_max_steps-570_batch_16_2024-06-27_ppid_9/checkpoint-80", "epoch": 0.8366013071895425, "eval_steps": 20, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10457516339869281, "grad_norm": 3.655749559402466, "learning_rate": 3.3333333333333337e-06, "logits/chosen": -0.22948360443115234, "logits/rejected": -0.22978875041007996, "logps/chosen": -39.710899353027344, "logps/rejected": -39.52346420288086, "loss": 0.662, "rewards/accuracies": 0.625, "rewards/chosen": 0.11001075804233551, "rewards/margins": 0.06790003925561905, "rewards/rejected": 0.04211071878671646, "step": 10 }, { "epoch": 0.20915032679738563, "grad_norm": 2.4934024810791016, "learning_rate": 5.978260869565218e-06, "logits/chosen": -0.22906163334846497, "logits/rejected": -0.22876068949699402, "logps/chosen": -30.569751739501953, "logps/rejected": -33.86491012573242, "loss": 0.5549, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.9981435537338257, "rewards/margins": 0.38557058572769165, "rewards/rejected": 0.6125729084014893, "step": 20 }, { "epoch": 0.20915032679738563, "eval_logits/chosen": -0.19793638586997986, "eval_logits/rejected": -0.197507843375206, "eval_logps/chosen": -26.848573684692383, "eval_logps/rejected": -35.13609313964844, "eval_loss": 0.43225446343421936, "eval_rewards/accuracies": 0.8081395626068115, "eval_rewards/chosen": 1.355695366859436, "eval_rewards/margins": 0.861485481262207, "eval_rewards/rejected": 0.4942099153995514, "eval_runtime": 76.6712, "eval_samples_per_second": 2.23, "eval_steps_per_second": 1.122, "step": 20 }, { "epoch": 0.3137254901960784, "grad_norm": 1.9067373275756836, "learning_rate": 5.869565217391305e-06, "logits/chosen": -0.23883526027202606, "logits/rejected": -0.2374078780412674, "logps/chosen": -30.109268188476562, "logps/rejected": -43.55142593383789, "loss": 0.3732, "rewards/accuracies": 0.84375, "rewards/chosen": 1.0676757097244263, "rewards/margins": 1.4297826290130615, "rewards/rejected": -0.3621070086956024, "step": 30 }, { "epoch": 0.41830065359477125, "grad_norm": 3.629997730255127, "learning_rate": 5.760869565217392e-06, "logits/chosen": -0.2035980522632599, "logits/rejected": -0.20044592022895813, "logps/chosen": -28.863727569580078, "logps/rejected": -50.26195526123047, "loss": 0.3276, "rewards/accuracies": 0.84375, "rewards/chosen": 1.200588345527649, "rewards/margins": 2.2083239555358887, "rewards/rejected": -1.0077354907989502, "step": 40 }, { "epoch": 0.41830065359477125, "eval_logits/chosen": -0.16349415481090546, "eval_logits/rejected": -0.15954892337322235, "eval_logps/chosen": -24.61951446533203, "eval_logps/rejected": -48.91069412231445, "eval_loss": 0.34378084540367126, "eval_rewards/accuracies": 0.8895348906517029, "eval_rewards/chosen": 1.5786010026931763, "eval_rewards/margins": 2.4618515968322754, "eval_rewards/rejected": -0.8832504749298096, "eval_runtime": 76.7395, "eval_samples_per_second": 2.228, "eval_steps_per_second": 1.121, "step": 40 }, { "epoch": 0.5228758169934641, "grad_norm": 2.5461719036102295, "learning_rate": 5.652173913043479e-06, "logits/chosen": -0.16493651270866394, "logits/rejected": -0.16144290566444397, "logps/chosen": -24.76712989807129, "logps/rejected": -46.993003845214844, "loss": 0.3648, "rewards/accuracies": 0.875, "rewards/chosen": 1.5913469791412354, "rewards/margins": 2.25089955329895, "rewards/rejected": -0.6595526933670044, "step": 50 }, { "epoch": 0.6274509803921569, "grad_norm": 2.0358729362487793, "learning_rate": 5.543478260869566e-06, "logits/chosen": -0.2202371060848236, "logits/rejected": -0.21734721958637238, "logps/chosen": -26.78680419921875, "logps/rejected": -47.03938674926758, "loss": 0.3448, "rewards/accuracies": 0.831250011920929, "rewards/chosen": 1.3999578952789307, "rewards/margins": 2.0680160522460938, "rewards/rejected": -0.6680583357810974, "step": 60 }, { "epoch": 0.6274509803921569, "eval_logits/chosen": -0.17944073677062988, "eval_logits/rejected": -0.1763658970594406, "eval_logps/chosen": -27.3469295501709, "eval_logps/rejected": -48.937984466552734, "eval_loss": 0.2700035870075226, "eval_rewards/accuracies": 0.9069767594337463, "eval_rewards/chosen": 1.3058594465255737, "eval_rewards/margins": 2.191838026046753, "eval_rewards/rejected": -0.8859787583351135, "eval_runtime": 76.7029, "eval_samples_per_second": 2.229, "eval_steps_per_second": 1.121, "step": 60 }, { "epoch": 0.7320261437908496, "grad_norm": 1.7749762535095215, "learning_rate": 5.4347826086956525e-06, "logits/chosen": -0.1932402402162552, "logits/rejected": -0.19045117497444153, "logps/chosen": -26.827350616455078, "logps/rejected": -46.88459014892578, "loss": 0.2909, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": 1.3978151082992554, "rewards/margins": 2.0628795623779297, "rewards/rejected": -0.6650643348693848, "step": 70 }, { "epoch": 0.8366013071895425, "grad_norm": 2.9730594158172607, "learning_rate": 5.326086956521739e-06, "logits/chosen": -0.15139932930469513, "logits/rejected": -0.14830470085144043, "logps/chosen": -26.229084014892578, "logps/rejected": -47.40288162231445, "loss": 0.2844, "rewards/accuracies": 0.875, "rewards/chosen": 1.493841290473938, "rewards/margins": 2.2263193130493164, "rewards/rejected": -0.7324780225753784, "step": 80 }, { "epoch": 0.8366013071895425, "eval_logits/chosen": -0.15539461374282837, "eval_logits/rejected": -0.15173271298408508, "eval_logps/chosen": -30.73025894165039, "eval_logps/rejected": -54.970027923583984, "eval_loss": 0.262494832277298, "eval_rewards/accuracies": 0.9069767594337463, "eval_rewards/chosen": 0.967526912689209, "eval_rewards/margins": 2.4567105770111084, "eval_rewards/rejected": -1.4891836643218994, "eval_runtime": 76.69, "eval_samples_per_second": 2.23, "eval_steps_per_second": 1.121, "step": 80 } ], "logging_steps": 10, "max_steps": 570, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 80, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }