model = A2C(policy = "MlpPolicy", env = env, n_steps = 256, learning_rate = 0.001, gamma = 0.99, verbose=1)