import gym | |
from stable_baselines3 import A2C | |
# Set up the CartPole environement | |
# Create the environment | |
env = gym.make("CartPole-v1") | |
# Reset the environment and get the initial observation | |
observation = env.reset() | |
print('observation space:', env.observation_space) | |
print('action space:', env.action_space) | |
print('threshold: ', env.spec.reward_threshold) | |
model = A2C("MlpPolicy", env, verbose=1) | |
print(model) | |
model.learn(total_timesteps=25000) | |
for _ in range(500): | |
observation = env.reset() | |
done = False | |
while not done: | |
action, _observations = model.predict(observation) | |
observation, reward, done, info = env.step(action) | |
env.render() |