Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -9,8 +9,7 @@ from stable_baselines3.common.env_util import make_atari_env
|
|
9 |
from stable_baselines3.common.vec_env import VecFrameStack
|
10 |
from stable_baselines3.common.env_util import make_atari_env
|
11 |
|
12 |
-
|
13 |
-
st.title("Atari Environments Live Model")
|
14 |
|
15 |
# @st.cache This is not cachable :(
|
16 |
def load_env(env_name):
|
@@ -18,7 +17,6 @@ def load_env(env_name):
|
|
18 |
env = VecFrameStack(env, n_stack=4)
|
19 |
return env
|
20 |
|
21 |
-
|
22 |
# @st.cache This is not cachable :(
|
23 |
def load_model(env_name):
|
24 |
custom_objects = {
|
@@ -26,20 +24,15 @@ def load_model(env_name):
|
|
26 |
"lr_schedule": lambda _: 0.0,
|
27 |
"clip_range": lambda _: 0.0,
|
28 |
}
|
29 |
-
|
30 |
checkpoint = load_from_hub(
|
31 |
f"ThomasSimonini/ppo-{env_name}",
|
32 |
f"ppo-{env_name}.zip",
|
33 |
)
|
34 |
-
|
35 |
model = PPO.load(checkpoint, custom_objects=custom_objects)
|
36 |
-
|
37 |
return model
|
38 |
|
39 |
-
st.write("In game theory and optimization Nash Equilibrium loss minimization starts playing randomly but then by understanding ratios of action success to action-reward with an action (observe, decide/predict, act and then observe outcome the Deep RL agents go from 50% efficiency to 98-99% efficiency based on quality of decision without making mistakes.")
|
40 |
-
|
41 |
-
st.write("list of agent environments https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/benchmark.md")
|
42 |
-
st.write("Deep RL models: https://huggingface.co/sb3")
|
43 |
|
44 |
env_name = st.selectbox(
|
45 |
"Select environment",
|
|
|
9 |
from stable_baselines3.common.vec_env import VecFrameStack
|
10 |
from stable_baselines3.common.env_util import make_atari_env
|
11 |
|
12 |
+
st.subheader("Atari 2600 Deep RL Environments Live AI")
|
|
|
13 |
|
14 |
# @st.cache This is not cachable :(
|
15 |
def load_env(env_name):
|
|
|
17 |
env = VecFrameStack(env, n_stack=4)
|
18 |
return env
|
19 |
|
|
|
20 |
# @st.cache This is not cachable :(
|
21 |
def load_model(env_name):
|
22 |
custom_objects = {
|
|
|
24 |
"lr_schedule": lambda _: 0.0,
|
25 |
"clip_range": lambda _: 0.0,
|
26 |
}
|
|
|
27 |
checkpoint = load_from_hub(
|
28 |
f"ThomasSimonini/ppo-{env_name}",
|
29 |
f"ppo-{env_name}.zip",
|
30 |
)
|
|
|
31 |
model = PPO.load(checkpoint, custom_objects=custom_objects)
|
|
|
32 |
return model
|
33 |
|
34 |
+
st.write("In game theory and optimization Nash Equilibrium loss minimization starts playing randomly but then by understanding ratios of action success to action-reward with an action (observe, decide/predict, act and then observe outcome the Deep RL agents go from 50% efficiency to 98-99% efficiency based on quality of decision without making mistakes. A good reference to environments is here https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/benchmark.md")
|
35 |
+
#st.write("Deep RL models: https://huggingface.co/sb3")
|
|
|
|
|
36 |
|
37 |
env_name = st.selectbox(
|
38 |
"Select environment",
|