awacke1 commited on
Commit
709da65
1 Parent(s): cc4348c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -10
app.py CHANGED
@@ -9,8 +9,7 @@ from stable_baselines3.common.env_util import make_atari_env
9
  from stable_baselines3.common.vec_env import VecFrameStack
10
  from stable_baselines3.common.env_util import make_atari_env
11
 
12
-
13
- st.title("Atari Environments Live Model")
14
 
15
  # @st.cache This is not cachable :(
16
  def load_env(env_name):
@@ -18,7 +17,6 @@ def load_env(env_name):
18
  env = VecFrameStack(env, n_stack=4)
19
  return env
20
 
21
-
22
  # @st.cache This is not cachable :(
23
  def load_model(env_name):
24
  custom_objects = {
@@ -26,20 +24,15 @@ def load_model(env_name):
26
  "lr_schedule": lambda _: 0.0,
27
  "clip_range": lambda _: 0.0,
28
  }
29
-
30
  checkpoint = load_from_hub(
31
  f"ThomasSimonini/ppo-{env_name}",
32
  f"ppo-{env_name}.zip",
33
  )
34
-
35
  model = PPO.load(checkpoint, custom_objects=custom_objects)
36
-
37
  return model
38
 
39
- st.write("In game theory and optimization Nash Equilibrium loss minimization starts playing randomly but then by understanding ratios of action success to action-reward with an action (observe, decide/predict, act and then observe outcome the Deep RL agents go from 50% efficiency to 98-99% efficiency based on quality of decision without making mistakes.")
40
-
41
- st.write("list of agent environments https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/benchmark.md")
42
- st.write("Deep RL models: https://huggingface.co/sb3")
43
 
44
  env_name = st.selectbox(
45
  "Select environment",
 
9
  from stable_baselines3.common.vec_env import VecFrameStack
10
  from stable_baselines3.common.env_util import make_atari_env
11
 
12
+ st.subheader("Atari 2600 Deep RL Environments Live AI")
 
13
 
14
  # @st.cache This is not cachable :(
15
  def load_env(env_name):
 
17
  env = VecFrameStack(env, n_stack=4)
18
  return env
19
 
 
20
  # @st.cache This is not cachable :(
21
  def load_model(env_name):
22
  custom_objects = {
 
24
  "lr_schedule": lambda _: 0.0,
25
  "clip_range": lambda _: 0.0,
26
  }
 
27
  checkpoint = load_from_hub(
28
  f"ThomasSimonini/ppo-{env_name}",
29
  f"ppo-{env_name}.zip",
30
  )
 
31
  model = PPO.load(checkpoint, custom_objects=custom_objects)
 
32
  return model
33
 
34
+ st.write("In game theory and optimization Nash Equilibrium loss minimization starts playing randomly but then by understanding ratios of action success to action-reward with an action (observe, decide/predict, act and then observe outcome the Deep RL agents go from 50% efficiency to 98-99% efficiency based on quality of decision without making mistakes. A good reference to environments is here https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/benchmark.md")
35
+ #st.write("Deep RL models: https://huggingface.co/sb3")
 
 
36
 
37
  env_name = st.selectbox(
38
  "Select environment",