!!python/object/apply:collections.OrderedDict - - - batch_size - 512 - - buffer_size - 50000 - - ent_coef - 0.1 - - gamma - 0.9999 - - gradient_steps - 32 - - learning_rate - 0.0003 - - learning_starts - 0 - - n_timesteps - 50000.0 - - policy - MlpPolicy - - policy_kwargs - dict(log_std_init=-3.67, net_arch=[64, 64]) - - tau - 0.01 - - train_freq - 32 - - use_sde - true