import gym from stable_baselines3 import A2C # Set up the CartPole environement # Create the environment env = gym.make("CartPole-v1") # Reset the environment and get the initial observation observation = env.reset() print('observation space:', env.observation_space) print('action space:', env.action_space) print('threshold: ', env.spec.reward_threshold) model = A2C("MlpPolicy", env, verbose=1) print(model) model.learn(total_timesteps=25000) for _ in range(500): observation = env.reset() done = False while not done: action, _observations = model.predict(observation) observation, reward, done, info = env.step(action) env.render()