{"training": false, "model_name": "PPO", "track": false, "wandb_project_name": "tao", "wandb_entity": null, "env_id": "CartPole-v1", "capture_video": false, "total_timesteps": 500000, "learning_rate": 0.00025, "num_envs": 4, "num_steps": 128, "anneal_lr": true, "gamma": 0.99, "gae_lambda": 0.95, "num_minibatches": 4, "update_epochs": 4, "norm_adv": true, "clip_range": 0.2, "entropy_coef": 0.01, "vf_coef": 0.5, "max_grad_norm": 0.5, "target_kl": null, "seed": 1, "run_name": "CartPole-v1__PPO__1__500000", "device": "cpu", "torch_deterministic": true, "batch_size": 512, "minibatch_size": 128}