!!python/object/apply:collections.OrderedDict - - - batch_size - 128 - - clip_range - 0.2 - - ent_coef - 0.0 - - env_wrapper - - gym.wrappers.resize_observation.ResizeObservation: shape: 64 - gym.wrappers.gray_scale_observation.GrayScaleObservation: keep_dim: true - - frame_stack - 2 - - gae_lambda - 0.95 - - gamma - 0.99 - - learning_rate - lin_1e-4 - - max_grad_norm - 0.5 - - n_envs - 8 - - n_epochs - 10 - - n_steps - 512 - - n_timesteps - 4000000.0 - - normalize - '{''norm_obs'': False, ''norm_reward'': True}' - - policy - CnnLstmPolicy - - policy_kwargs - dict(log_std_init=-2, ortho_init=False, enable_critic_lstm=False, activation_fn=nn.GELU, lstm_hidden_size=128, ) - - sde_sample_freq - 4 - - use_sde - true - - vf_coef - 0.5