daenielkim-66
commited on
Commit
•
d57e89d
1
Parent(s):
0d8b86a
Update README.md
Browse files
README.md
CHANGED
@@ -43,64 +43,26 @@ Reinforcement Learning Tips and Tricks: https://stable-baselines3.readthedocs.io
|
|
43 |
|
44 |
A Github Training Framework : https://github.com/DLR-RM/rl-baselines3-zoo
|
45 |
|
46 |
-
Poe (
|
47 |
-
I tried to follow what this was saying but I had a hard time understanding.
|
48 |
```
|
49 |
-
import
|
50 |
from stable_baselines3 import A2C
|
51 |
-
from stable_baselines3.common.
|
52 |
-
from stable_baselines3.common.evaluation import evaluate_policy
|
53 |
-
from stable_baselines3.common.callbacks import EvalCallback
|
54 |
-
from stable_baselines3.common.env_checker import check_env
|
55 |
-
from stable_baselines3.common.vec_env import VecNormalize
|
56 |
-
```
|
57 |
-
|
58 |
-
### Next, load and prepare your environment:
|
59 |
-
```
|
60 |
-
env = gym.make('your_environment_name') # Replace with the name of your environment
|
61 |
-
env = DummyVecEnv([lambda: env])
|
62 |
-
env = VecNormalize(env, norm_obs=True, norm_reward=False, clip_obs=10.)
|
63 |
-
```
|
64 |
-
|
65 |
-
### Now, define a function to train and evaluate your A2C agent:
|
66 |
-
```
|
67 |
-
def train_and_evaluate(hyperparameters):
|
68 |
-
model = A2C("MlpPolicy", env, verbose=0, **hyperparameters)
|
69 |
-
|
70 |
-
eval_env = gym.make('your_evaluation_environment_name') # Replace with the name of your evaluation environment
|
71 |
-
eval_env = DummyVecEnv([lambda: eval_env])
|
72 |
-
eval_env = VecNormalize(eval_env, norm_obs=True, norm_reward=False, clip_obs=10.)
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
77 |
|
78 |
-
model
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
return mean_reward
|
83 |
-
```
|
84 |
-
|
85 |
-
### Now, we can define the hyperparameters grid and start the grid search:
|
86 |
-
```
|
87 |
-
hyperparameters_grid = {
|
88 |
-
'gamma': [0.99, 0.95],
|
89 |
-
'learning_rate': [0.001, 0.0001],
|
90 |
-
'ent_coef': [0.01, 0.1],
|
91 |
-
# Add other hyperparameters of interest
|
92 |
-
}
|
93 |
|
94 |
-
|
95 |
-
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
if mean_reward > best_reward:
|
101 |
-
best_reward = mean_reward
|
102 |
-
best_hyperparameters = hyperparameters
|
103 |
-
|
104 |
-
print("Best hyperparameters:", best_hyperparameters)
|
105 |
-
```
|
106 |
-
In this grid search, we specify a range of values for each hyperparameter of interest. The train_and_evaluate function trains the A2C agent with the given hyperparameters and evaluates its performance. We then update the best hyperparameters if the current combination achieves a higher reward.
|
|
|
43 |
|
44 |
A Github Training Framework : https://github.com/DLR-RM/rl-baselines3-zoo
|
45 |
|
46 |
+
Poe (GPT-4): Showed me how to use Optuna to do automated hyperparameter optimization, but I was still understanding how it worked and couldn't get it to run properly.
|
|
|
47 |
```
|
48 |
+
import optuna
|
49 |
from stable_baselines3 import A2C
|
50 |
+
from stable_baselines3.common.env_util import make_vec_env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
def optimize_agent(trial):
|
53 |
+
learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1)
|
54 |
+
gamma = trial.suggest_uniform('gamma', 0.8, 0.9999)
|
55 |
+
gae_lambda = trial.suggest_uniform('gae_lambda', 0.8, 0.99)
|
56 |
+
n_steps = trial.suggest_int('n_steps', 5, 20)
|
57 |
|
58 |
+
model = A2C('MlpPolicy', env, verbose=0, learning_rate=learning_rate, gamma=gamma, gae_lambda=gae_lambda, n_steps=n_steps)
|
59 |
+
model.learn(total_timesteps=5000)
|
60 |
+
rewards = sum(model.rollout_buffer.rewards)
|
61 |
|
62 |
+
return rewards
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
study = optuna.create_study(direction='maximize')
|
65 |
+
study.optimize(optimize_agent, n_trials=100)
|
66 |
|
67 |
+
print('Best hyperparameters:', study.best_params)
|
68 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|