import gym from stable_baselines3 import A2C from stable_baselines3.common.env_util import make_vec_env # Parallel environments env = gym.make("CartPole-v1") model = A2C("MlpPolicy", env, verbose=1) model.learn(total_timesteps=25000) obs = env.reset() for i in range (10000): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render() env.close() model.save("a2c_Cart_Pole")