exp_config = { 'type': 'ppo', 'on_policy': True, 'cuda': True, 'action_space': 'discrete', 'discount_factor': 0.99, 'gae_lambda': 0.95, 'epoch_per_collect': 1, 'batch_size': 320, 'learning_rate': 0.001, 'lr_scheduler': [2000, 0.1], 'weight_decay': 0, 'value_weight': 0.5, 'entropy_weight': 0.01, 'clip_ratio': 0.2, 'adv_norm': True, 'value_norm': 'baseline', 'ppo_param_init': True, 'grad_norm': 0.5, 'n_sample': 320, 'unroll_len': 1, 'deterministic_eval': True, 'model': { 'encoder_hidden_size_list': [64, 64, 128], 'actor_head_hidden_size': 128, 'critic_head_hidden_size': 128 }, 'cfg_type': 'PPOFPolicyDict', 'env_id': 'SpaceInvadersNoFrameskip-v4', 'exp_name': 'SpaceInvadersNoFrameskip-v4-PPO' }