{ "exp_name": "ipykernel_launcher", "seed": 0, "log_with": null, "task_name": null, "model_name": "meta-llama/Llama-2-7b-hf", "query_dataset": "imdb", "reward_model": "sentiment-analysis:lvwerra/distilbert-imdb", "remove_unused_columns": true, "tracker_kwargs": {}, "accelerator_kwargs": {}, "project_kwargs": {}, "tracker_project_name": "trl", "push_to_hub_if_best_kwargs": {}, "steps": 20000, "learning_rate": 2.94e-05, "adap_kl_ctrl": true, "init_kl_coef": 0.2, "kl_penalty": "kl", "target": 6, "horizon": 10000, "gamma": 1, "lam": 0.95, "cliprange": 0.2, "cliprange_value": 0.2, "vf_coef": 0.1, "batch_size": 16, "forward_batch_size": null, "mini_batch_size": 4, "gradient_accumulation_steps": 1, "world_size": 1, "ppo_epochs": 100, "max_grad_norm": null, "optimize_cuda_cache": null, "optimize_device_cache": false, "early_stopping": false, "target_kl": 1, "compare_steps": 1, "ratio_threshold": 10.0, "use_score_scaling": false, "use_score_norm": false, "score_clip": null, "whiten_rewards": false, "is_encoder_decoder": false, "is_peft_model": true, "backward_batch_size": 4, "global_backward_batch_size": 4, "global_batch_size": 16 }