a2c_sb3_cartpole / a2c_sb3_cartpole.py
Younes-hands-on-rl's picture
Upload a2c_sb3_cartpole.py
d446d2e
raw
history blame contribute delete
No virus
678 Bytes
import gym
from stable_baselines3 import A2C
# Set up the CartPole environement
# Create the environment
env = gym.make("CartPole-v1")
# Reset the environment and get the initial observation
observation = env.reset()
print('observation space:', env.observation_space)
print('action space:', env.action_space)
print('threshold: ', env.spec.reward_threshold)
model = A2C("MlpPolicy", env, verbose=1)
print(model)
model.learn(total_timesteps=25000)
for _ in range(500):
observation = env.reset()
done = False
while not done:
action, _observations = model.predict(observation)
observation, reward, done, info = env.step(action)
env.render()