Toffee0705 commited on
Commit
7652da6
1 Parent(s): 1fc1f4d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +64 -2
README.md CHANGED
@@ -30,8 +30,70 @@ TODO: Add your code
30
 
31
 
32
  ```python
33
- from stable_baselines3 import ...
34
- from huggingface_sb3 import load_from_hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  ...
37
  ```
 
30
 
31
 
32
  ```python
33
+ import gymnasium
34
+
35
+ from huggingface_sb3 import load_from_hub, package_to_hub
36
+ from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
37
+
38
+ from stable_baselines3 import PPO
39
+ from stable_baselines3.common.evaluation import evaluate_policy
40
+ from stable_baselines3.common.env_util import make_vec_env
41
+
42
+ import gymnasium as gym
43
+
44
+ # First, we create our environment called LunarLander-v2
45
+ env = gym.make("LunarLander-v2")
46
+
47
+ # Then we reset this environment
48
+ observation, info = env.reset()
49
+
50
+ for _ in range(20):
51
+ # Take a random action
52
+ action = env.action_space.sample()
53
+ print("Action taken:", action)
54
+
55
+ # Do this action in the environment and get
56
+ # next_state, reward, terminated, truncated and info
57
+ observation, reward, terminated, truncated, info = env.step(action)
58
+
59
+ # If the game is terminated (in our case we land, crashed) or truncated (timeout)
60
+ if terminated or truncated:
61
+ # Reset the environment
62
+ print("Environment is reset")
63
+ observation, info = env.reset()
64
+
65
+ env.close()
66
+
67
+ # Create the environment
68
+ env = make_vec_env('LunarLander-v2', n_envs=16)
69
+
70
+ model = PPO(
71
+ policy = 'MlpPolicy',
72
+ env = env,
73
+ n_steps = 1024,
74
+ batch_size = 64,
75
+ n_epochs = 4,
76
+ gamma = 0.999,
77
+ gae_lambda = 0.98,
78
+ ent_coef = 0.01,
79
+ verbose=1)
80
+
81
+ # TODO: Train it for 1,000,000 timesteps
82
+ model.learn(total_timesteps=1000000)
83
+
84
+ # TODO: Specify file name for model and save the model to file
85
+ model_name = "ppo-LunarLander-v2"
86
+ model.save(model_name)
87
+
88
+ # TODO: Evaluate the agent
89
+ # Create a new environment for evaluation
90
+ eval_env = gym.make("LunarLander-v2")
91
+
92
+ # Evaluate the model with 10 evaluation episodes and deterministic=True
93
+ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
94
+
95
+ # Print the results
96
+ print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
97
 
98
  ...
99
  ```