Spaces:
Build error
Build error
File size: 2,383 Bytes
17fa1d3 709da65 17fa1d3 ae82371 709da65 17fa1d3 ad5c3fe 64ad893 17fa1d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import cv2
import streamlit as st
import time
from huggingface_sb3 import load_from_hub
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.env_util import make_atari_env
st.subheader("Atari 2600 Deep RL Environments Live AI")
# @st.cache This is not cachable :(
def load_env(env_name):
env = make_atari_env(env_name, n_envs=1)
env = VecFrameStack(env, n_stack=4)
return env
# @st.cache This is not cachable :(
def load_model(env_name):
custom_objects = {
"learning_rate": 0.0,
"lr_schedule": lambda _: 0.0,
"clip_range": lambda _: 0.0,
}
checkpoint = load_from_hub(
f"ThomasSimonini/ppo-{env_name}",
f"ppo-{env_name}.zip",
)
model = PPO.load(checkpoint, custom_objects=custom_objects)
return model
st.write("In game theory and optimization Nash Equilibrium loss minimization starts playing randomly but then by understanding ratios of action success to action-reward with an action (observe, decide/predict, act and then observe outcome the Deep RL agents go from 50% efficiency to 98-99% efficiency based on quality of decision without making mistakes. A good reference to environments is here https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/benchmark.md")
#st.write("Deep RL models: https://huggingface.co/sb3")
env_name = st.selectbox(
"Select environment",
(
"SeaquestNoFrameskip-v4",
"QbertNoFrameskip-v4",
"SpaceInvadersNoFrameskip-v4",
"PongNoFrameskip-v4",
#"AsteroidsNoFrameskip-v4",
#"BeamRiderNoFrameskip-v4",
#"BreakoutNoFrameskip-v4 ",
#"EnduroNoFrameskip-v4",
#"MsPacmanNoFrameskip-v4",
#"RoadRunnerNoFrameskip-v4",
#"Swimmer-v3",
#"Walker2d-v3",
),
)
num_episodes = st.slider("Number of Episodes", 1, 20, 5)
env = load_env(env_name)
model = load_model(env_name)
obs = env.reset()
with st.empty():
for i in range(num_episodes):
obs = env.reset()
done = False
while not done:
frame = env.render(mode="rgb_array")
im = st.image(frame, width=400)
action, _states = model.predict(obs)
obs, reward, done, info = env.step([action])
time.sleep(0.1) |