Megalino111 commited on
Commit
5f67c1a
1 Parent(s): 55a7968

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +68 -2
README.md CHANGED
@@ -30,8 +30,74 @@ TODO: Add your code
30
 
31
 
32
  ```python
33
- from stable_baselines3 import ...
34
- from huggingface_sb3 import load_from_hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  ...
37
  ```
 
30
 
31
 
32
  ```python
33
+ import os
34
+
35
+ import gymnasium as gym
36
+ import panda_gym
37
+
38
+ from huggingface_sb3 import load_from_hub, package_to_hub
39
+
40
+ from stable_baselines3 import A2C
41
+ from stable_baselines3.common.evaluation import evaluate_policy
42
+ from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
43
+ from stable_baselines3.common.env_util import make_vec_env
44
+
45
+ from huggingface_hub import notebook_login
46
+
47
+
48
+ env_id = "PandaReachDense-v3"
49
+
50
+ # Create the env
51
+ env = gym.make(env_id)
52
+
53
+ # Get the state space and action space
54
+ s_size = env.observation_space.shape
55
+ a_size = env.action_space
56
+
57
+ print("_____OBSERVATION SPACE_____ \n")
58
+ print("The State Space is: ", s_size)
59
+ print("Sample observation", env.observation_space.sample()) # Get a random observation
60
+
61
+ print("\n _____ACTION SPACE_____ \n")
62
+ print("The Action Space is: ", a_size)
63
+ print("Action Space Sample", env.action_space.sample()) # Take a random action
64
+
65
+ env = make_vec_env(env_id, n_envs=4)
66
+
67
+ env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)
68
+
69
+ model = A2C("MultiInputPolicy", env, verbose=1) # Create the A2C model and try to find the best parameters
70
+
71
+ model.learn(1_000_000)
72
+
73
+ # Save the model and VecNormalize statistics when saving the agent
74
+ model.save("a2c-PandaReachDense-v3")
75
+ env.save("vec_normalize.pkl")
76
+
77
+
78
+ from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
79
+
80
+ # Load the saved statistics
81
+ eval_env = DummyVecEnv([lambda: gym.make("PandaReachDense-v3")])
82
+ eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)
83
+
84
+ # We need to override the render_mode
85
+ eval_env.render_mode = "rgb_array"
86
+
87
+ # do not update them at test time
88
+ eval_env.training = False
89
+ # reward normalization is not needed at test time
90
+ eval_env.norm_reward = False
91
+
92
+ # Load the agent
93
+ model = A2C.load("a2c-PandaReachDense-v3")
94
+
95
+ mean_reward, std_reward = evaluate_policy(model, eval_env)
96
+
97
+ print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")
98
+
99
+
100
+
101
 
102
  ...
103
  ```