|
{ |
|
"best_metric": 0.9069767594337463, |
|
"best_model_checkpoint": "./llama3/27-06-24-Weni-ZeroShot-Agents-Llama3-4.0.37-DPO_Experiment with DPO and Llama3 8B, zeroshot 4.0.37-2_max_steps-570_batch_16_2024-06-27_ppid_9/checkpoint-80", |
|
"epoch": 0.8366013071895425, |
|
"eval_steps": 20, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10457516339869281, |
|
"grad_norm": 3.655749559402466, |
|
"learning_rate": 3.3333333333333337e-06, |
|
"logits/chosen": -0.22948360443115234, |
|
"logits/rejected": -0.22978875041007996, |
|
"logps/chosen": -39.710899353027344, |
|
"logps/rejected": -39.52346420288086, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11001075804233551, |
|
"rewards/margins": 0.06790003925561905, |
|
"rewards/rejected": 0.04211071878671646, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20915032679738563, |
|
"grad_norm": 2.4934024810791016, |
|
"learning_rate": 5.978260869565218e-06, |
|
"logits/chosen": -0.22906163334846497, |
|
"logits/rejected": -0.22876068949699402, |
|
"logps/chosen": -30.569751739501953, |
|
"logps/rejected": -33.86491012573242, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.9981435537338257, |
|
"rewards/margins": 0.38557058572769165, |
|
"rewards/rejected": 0.6125729084014893, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20915032679738563, |
|
"eval_logits/chosen": -0.19793638586997986, |
|
"eval_logits/rejected": -0.197507843375206, |
|
"eval_logps/chosen": -26.848573684692383, |
|
"eval_logps/rejected": -35.13609313964844, |
|
"eval_loss": 0.43225446343421936, |
|
"eval_rewards/accuracies": 0.8081395626068115, |
|
"eval_rewards/chosen": 1.355695366859436, |
|
"eval_rewards/margins": 0.861485481262207, |
|
"eval_rewards/rejected": 0.4942099153995514, |
|
"eval_runtime": 76.6712, |
|
"eval_samples_per_second": 2.23, |
|
"eval_steps_per_second": 1.122, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3137254901960784, |
|
"grad_norm": 1.9067373275756836, |
|
"learning_rate": 5.869565217391305e-06, |
|
"logits/chosen": -0.23883526027202606, |
|
"logits/rejected": -0.2374078780412674, |
|
"logps/chosen": -30.109268188476562, |
|
"logps/rejected": -43.55142593383789, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.0676757097244263, |
|
"rewards/margins": 1.4297826290130615, |
|
"rewards/rejected": -0.3621070086956024, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.41830065359477125, |
|
"grad_norm": 3.629997730255127, |
|
"learning_rate": 5.760869565217392e-06, |
|
"logits/chosen": -0.2035980522632599, |
|
"logits/rejected": -0.20044592022895813, |
|
"logps/chosen": -28.863727569580078, |
|
"logps/rejected": -50.26195526123047, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.200588345527649, |
|
"rewards/margins": 2.2083239555358887, |
|
"rewards/rejected": -1.0077354907989502, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41830065359477125, |
|
"eval_logits/chosen": -0.16349415481090546, |
|
"eval_logits/rejected": -0.15954892337322235, |
|
"eval_logps/chosen": -24.61951446533203, |
|
"eval_logps/rejected": -48.91069412231445, |
|
"eval_loss": 0.34378084540367126, |
|
"eval_rewards/accuracies": 0.8895348906517029, |
|
"eval_rewards/chosen": 1.5786010026931763, |
|
"eval_rewards/margins": 2.4618515968322754, |
|
"eval_rewards/rejected": -0.8832504749298096, |
|
"eval_runtime": 76.7395, |
|
"eval_samples_per_second": 2.228, |
|
"eval_steps_per_second": 1.121, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5228758169934641, |
|
"grad_norm": 2.5461719036102295, |
|
"learning_rate": 5.652173913043479e-06, |
|
"logits/chosen": -0.16493651270866394, |
|
"logits/rejected": -0.16144290566444397, |
|
"logps/chosen": -24.76712989807129, |
|
"logps/rejected": -46.993003845214844, |
|
"loss": 0.3648, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.5913469791412354, |
|
"rewards/margins": 2.25089955329895, |
|
"rewards/rejected": -0.6595526933670044, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"grad_norm": 2.0358729362487793, |
|
"learning_rate": 5.543478260869566e-06, |
|
"logits/chosen": -0.2202371060848236, |
|
"logits/rejected": -0.21734721958637238, |
|
"logps/chosen": -26.78680419921875, |
|
"logps/rejected": -47.03938674926758, |
|
"loss": 0.3448, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.3999578952789307, |
|
"rewards/margins": 2.0680160522460938, |
|
"rewards/rejected": -0.6680583357810974, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"eval_logits/chosen": -0.17944073677062988, |
|
"eval_logits/rejected": -0.1763658970594406, |
|
"eval_logps/chosen": -27.3469295501709, |
|
"eval_logps/rejected": -48.937984466552734, |
|
"eval_loss": 0.2700035870075226, |
|
"eval_rewards/accuracies": 0.9069767594337463, |
|
"eval_rewards/chosen": 1.3058594465255737, |
|
"eval_rewards/margins": 2.191838026046753, |
|
"eval_rewards/rejected": -0.8859787583351135, |
|
"eval_runtime": 76.7029, |
|
"eval_samples_per_second": 2.229, |
|
"eval_steps_per_second": 1.121, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7320261437908496, |
|
"grad_norm": 1.7749762535095215, |
|
"learning_rate": 5.4347826086956525e-06, |
|
"logits/chosen": -0.1932402402162552, |
|
"logits/rejected": -0.19045117497444153, |
|
"logps/chosen": -26.827350616455078, |
|
"logps/rejected": -46.88459014892578, |
|
"loss": 0.2909, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.3978151082992554, |
|
"rewards/margins": 2.0628795623779297, |
|
"rewards/rejected": -0.6650643348693848, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8366013071895425, |
|
"grad_norm": 2.9730594158172607, |
|
"learning_rate": 5.326086956521739e-06, |
|
"logits/chosen": -0.15139932930469513, |
|
"logits/rejected": -0.14830470085144043, |
|
"logps/chosen": -26.229084014892578, |
|
"logps/rejected": -47.40288162231445, |
|
"loss": 0.2844, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.493841290473938, |
|
"rewards/margins": 2.2263193130493164, |
|
"rewards/rejected": -0.7324780225753784, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8366013071895425, |
|
"eval_logits/chosen": -0.15539461374282837, |
|
"eval_logits/rejected": -0.15173271298408508, |
|
"eval_logps/chosen": -30.73025894165039, |
|
"eval_logps/rejected": -54.970027923583984, |
|
"eval_loss": 0.262494832277298, |
|
"eval_rewards/accuracies": 0.9069767594337463, |
|
"eval_rewards/chosen": 0.967526912689209, |
|
"eval_rewards/margins": 2.4567105770111084, |
|
"eval_rewards/rejected": -1.4891836643218994, |
|
"eval_runtime": 76.69, |
|
"eval_samples_per_second": 2.23, |
|
"eval_steps_per_second": 1.121, |
|
"step": 80 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 570, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 80, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|