import gym | |
from stable_baselines3 import A2C | |
env = gym.make("CartPole-v1") | |
model = A2C("MlpPolicy", env, verbose=1) | |
model.learn(total_timesteps=10000) | |
vec_env = model.get_env() | |
obs = vec_env.reset() | |
for i in range(1000): | |
action, _state = model.predict(obs, deterministic=True) | |
obs, reward, done, info = vec_env.step(action) | |
vec_env.render() |