SuperSecureHuman commited on
Commit
a511f27
1 Parent(s): 605ac64

Add logs and notebook

Browse files
Main.ipynb ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "2f3f1b89",
7
+ "metadata": {
8
+ "ExecuteTime": {
9
+ "end_time": "2022-05-06T15:35:55.593757Z",
10
+ "start_time": "2022-05-06T15:35:54.206954Z"
11
+ },
12
+ "pycharm": {
13
+ "name": "#%%\n"
14
+ }
15
+ },
16
+ "outputs": [],
17
+ "source": [
18
+ "import gym\n",
19
+ "\n",
20
+ "from stable_baselines3 import TD3\n",
21
+ "from stable_baselines3.common.evaluation import evaluate_policy\n",
22
+ "from stable_baselines3.common.env_util import make_vec_env\n",
23
+ "\n",
24
+ "import wandb\n",
25
+ "from wandb.integration.sb3 import WandbCallback\n",
26
+ "from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "outputs": [],
33
+ "source": [
34
+ "config = {\n",
35
+ " \"policy_type\": \"MlpPolicy\",\n",
36
+ " \"env_name\": \"BipedalWalker-v3\",\n",
37
+ "}"
38
+ ],
39
+ "metadata": {
40
+ "collapsed": false,
41
+ "pycharm": {
42
+ "name": "#%%\n"
43
+ }
44
+ }
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": null,
49
+ "outputs": [],
50
+ "source": [
51
+ "run = wandb.init(\n",
52
+ " project=\"BiPedalWalker-v3\",\n",
53
+ " config=config,\n",
54
+ " sync_tensorboard=True, # auto-upload sb3's tensorboard metrics\n",
55
+ " monitor_gym=True, # auto-upload the videos of agents playing the game\n",
56
+ " save_code=True, # optional\n",
57
+ ")"
58
+ ],
59
+ "metadata": {
60
+ "collapsed": false,
61
+ "pycharm": {
62
+ "name": "#%%\n"
63
+ }
64
+ }
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "id": "35ccb2df",
70
+ "metadata": {
71
+ "ExecuteTime": {
72
+ "end_time": "2022-05-06T11:52:04.640671Z",
73
+ "start_time": "2022-05-06T11:52:00.907411Z"
74
+ },
75
+ "pycharm": {
76
+ "name": "#%%\n"
77
+ }
78
+ },
79
+ "outputs": [],
80
+ "source": [
81
+ "import gym\n",
82
+ "\n",
83
+ "# First, we create our environment called LunarLander-v2\n",
84
+ "env = gym.make(\"BipedalWalker-v3\")\n",
85
+ "\n",
86
+ "# Then we reset this environment\n",
87
+ "observation = env.reset()\n",
88
+ "\n",
89
+ "for _ in range(200):\n",
90
+ " # Take a random action\n",
91
+ " action = env.action_space.sample()\n",
92
+ " print(\"Action taken:\", action)\n",
93
+ " env.render()\n",
94
+ "\n",
95
+ "\n",
96
+ " # Do this action in the environment and get\n",
97
+ " # next_state, reward, done and info\n",
98
+ " observation, reward, done, info = env.step(action)\n",
99
+ " \n",
100
+ " # If the game is done (in our case we land, crashed or timeout)\n",
101
+ " if done:\n",
102
+ " # Reset the environment\n",
103
+ " print(\"Environment is reset\")\n",
104
+ " observation = env.reset()\n"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": null,
110
+ "id": "9b6a4ef9",
111
+ "metadata": {
112
+ "ExecuteTime": {
113
+ "end_time": "2022-05-06T11:52:07.357076Z",
114
+ "start_time": "2022-05-06T11:52:07.349795Z"
115
+ },
116
+ "pycharm": {
117
+ "name": "#%%\n"
118
+ }
119
+ },
120
+ "outputs": [],
121
+ "source": [
122
+ "env.close()"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": null,
128
+ "id": "db2d1377",
129
+ "metadata": {
130
+ "ExecuteTime": {
131
+ "end_time": "2022-05-06T12:11:02.520195Z",
132
+ "start_time": "2022-05-06T12:11:02.491149Z"
133
+ },
134
+ "pycharm": {
135
+ "name": "#%%\n"
136
+ }
137
+ },
138
+ "outputs": [],
139
+ "source": [
140
+ "env = make_vec_env(\"BipedalWalker-v3\", n_envs=32)"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "code",
145
+ "execution_count": null,
146
+ "outputs": [],
147
+ "source": [
148
+ "eval_env = make_vec_env(\"BipedalWalker-v3\", n_envs=1)"
149
+ ],
150
+ "metadata": {
151
+ "collapsed": false,
152
+ "pycharm": {
153
+ "name": "#%%\n"
154
+ }
155
+ }
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": null,
160
+ "outputs": [],
161
+ "source": [
162
+ "callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=300, verbose=1)\n",
163
+ "eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)"
164
+ ],
165
+ "metadata": {
166
+ "collapsed": false,
167
+ "pycharm": {
168
+ "name": "#%%\n"
169
+ }
170
+ }
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": null,
175
+ "id": "a774b23f",
176
+ "metadata": {
177
+ "ExecuteTime": {
178
+ "end_time": "2022-05-06T12:18:14.514611Z",
179
+ "start_time": "2022-05-06T12:18:14.497888Z"
180
+ },
181
+ "pycharm": {
182
+ "name": "#%%\n"
183
+ }
184
+ },
185
+ "outputs": [],
186
+ "source": [
187
+ "model = TD3(\n",
188
+ " \"MlpPolicy\",\n",
189
+ " env,\n",
190
+ " learning_rate=0.0001,\n",
191
+ " batch_size=128,\n",
192
+ " gamma=0.999,\n",
193
+ " train_freq=32,\n",
194
+ " gradient_steps=32,\n",
195
+ " tensorboard_log='model_log/',\n",
196
+ " verbose=0\n",
197
+ ")"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": null,
203
+ "outputs": [],
204
+ "source": [
205
+ "env_id = 'BipedalWalker-v3'"
206
+ ],
207
+ "metadata": {
208
+ "collapsed": false,
209
+ "pycharm": {
210
+ "name": "#%%\n"
211
+ }
212
+ }
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": null,
217
+ "outputs": [],
218
+ "source": [
219
+ "model.learn(total_timesteps=50000000, callback=[WandbCallback() , eval_callback])"
220
+ ],
221
+ "metadata": {
222
+ "collapsed": false,
223
+ "pycharm": {
224
+ "name": "#%%\n"
225
+ }
226
+ }
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": null,
231
+ "outputs": [],
232
+ "source": [
233
+ "model.save('300-Trained.zip')"
234
+ ],
235
+ "metadata": {
236
+ "collapsed": false,
237
+ "pycharm": {
238
+ "name": "#%%\n"
239
+ }
240
+ }
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "id": "e2e07af6",
246
+ "metadata": {
247
+ "ExecuteTime": {
248
+ "end_time": "2022-05-06T15:36:15.322985Z",
249
+ "start_time": "2022-05-06T15:36:10.718319Z"
250
+ },
251
+ "pycharm": {
252
+ "name": "#%%\n"
253
+ }
254
+ },
255
+ "outputs": [],
256
+ "source": [
257
+ "model = TD3.load('30M_Trained.zip')"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": null,
263
+ "id": "07d151f7",
264
+ "metadata": {
265
+ "ExecuteTime": {
266
+ "end_time": "2022-05-06T15:36:41.652903Z",
267
+ "start_time": "2022-05-06T15:36:22.118438Z"
268
+ },
269
+ "pycharm": {
270
+ "name": "#%%\n"
271
+ }
272
+ },
273
+ "outputs": [],
274
+ "source": [
275
+ "eval_env = gym.make(\"BipedalWalker-v3\")\n",
276
+ "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1, deterministic=True, render=True)\n",
277
+ "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")\n",
278
+ "eval_env.close()"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": null,
284
+ "id": "de40c367",
285
+ "metadata": {
286
+ "pycharm": {
287
+ "name": "#%%\n"
288
+ }
289
+ },
290
+ "outputs": [],
291
+ "source": []
292
+ },
293
+ {
294
+ "cell_type": "code",
295
+ "execution_count": null,
296
+ "id": "e027a847",
297
+ "metadata": {
298
+ "ExecuteTime": {
299
+ "end_time": "2022-05-06T15:40:59.811143Z",
300
+ "start_time": "2022-05-06T15:40:59.670690Z"
301
+ },
302
+ "pycharm": {
303
+ "name": "#%%\n"
304
+ }
305
+ },
306
+ "outputs": [],
307
+ "source": [
308
+ "import gym\n",
309
+ "\n",
310
+ "from stable_baselines3 import PPO\n",
311
+ "from stable_baselines3.common.vec_env import DummyVecEnv\n",
312
+ "from stable_baselines3.common.env_util import make_vec_env\n",
313
+ "\n",
314
+ "from huggingface_sb3 import package_to_hub\n",
315
+ "\n",
316
+ "# PLACE the variables you've just defined two cells above\n",
317
+ "# Define the name of the environment\n",
318
+ "env_id = \"BipedalWalker-v3\"\n",
319
+ "\n",
320
+ "# TODO: Define the model architecture we used\n",
321
+ "model_architecture = \"TD3\"\n",
322
+ "model_name = \"TD3_BipedalWalker-v3\"\n",
323
+ "\n",
324
+ "## Define a repo_id\n",
325
+ "## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
326
+ "## CHANGE WITH YOUR REPO ID\n",
327
+ "repo_id = \"SuperSecureHuman/BipedalWalker-v3-TD3\"\n",
328
+ "\n",
329
+ "## Define the commit message\n",
330
+ "commit_message = \"Upload score 300 trained bipedal walker\"\n",
331
+ "\n",
332
+ "# Create the evaluation env\n",
333
+ "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n",
334
+ "\n",
335
+ "# PLACE the package_to_hub function you've just filled here\n",
336
+ "package_to_hub(model=model, # Our trained model\n",
337
+ " model_name=model_name, # The name of our trained model \n",
338
+ " model_architecture=model_architecture, # The model architecture we used: in our case PPO\n",
339
+ " env_id=env_id, # Name of the environment\n",
340
+ " eval_env=eval_env, # Evaluation Environment\n",
341
+ " repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
342
+ " commit_message=commit_message)\n"
343
+ ]
344
+ },
345
+ {
346
+ "cell_type": "code",
347
+ "execution_count": null,
348
+ "outputs": [],
349
+ "source": [
350
+ "eval_env.close()"
351
+ ],
352
+ "metadata": {
353
+ "collapsed": false,
354
+ "pycharm": {
355
+ "name": "#%%\n"
356
+ }
357
+ }
358
+ }
359
+ ],
360
+ "metadata": {
361
+ "kernelspec": {
362
+ "display_name": "Python 3 (ipykernel)",
363
+ "language": "python",
364
+ "name": "python3"
365
+ },
366
+ "language_info": {
367
+ "codemirror_mode": {
368
+ "name": "ipython",
369
+ "version": 3
370
+ },
371
+ "file_extension": ".py",
372
+ "mimetype": "text/x-python",
373
+ "name": "python",
374
+ "nbconvert_exporter": "python",
375
+ "pygments_lexer": "ipython3",
376
+ "version": "3.7.12"
377
+ },
378
+ "toc": {
379
+ "base_numbering": 1,
380
+ "nav_menu": {},
381
+ "number_sections": true,
382
+ "sideBar": true,
383
+ "skip_h1_title": false,
384
+ "title_cell": "Table of Contents",
385
+ "title_sidebar": "Contents",
386
+ "toc_cell": false,
387
+ "toc_position": {},
388
+ "toc_section_display": true,
389
+ "toc_window_display": false
390
+ },
391
+ "varInspector": {
392
+ "cols": {
393
+ "lenName": 16,
394
+ "lenType": 16,
395
+ "lenVar": 40
396
+ },
397
+ "kernels_config": {
398
+ "python": {
399
+ "delete_cmd_postfix": "",
400
+ "delete_cmd_prefix": "del ",
401
+ "library": "var_list.py",
402
+ "varRefreshCmd": "print(var_dic_list())"
403
+ },
404
+ "r": {
405
+ "delete_cmd_postfix": ") ",
406
+ "delete_cmd_prefix": "rm(",
407
+ "library": "var_list.r",
408
+ "varRefreshCmd": "cat(var_dic_list()) "
409
+ }
410
+ },
411
+ "types_to_exclude": [
412
+ "module",
413
+ "function",
414
+ "builtin_function_or_method",
415
+ "instance",
416
+ "_Feature"
417
+ ],
418
+ "window_display": false
419
+ }
420
+ },
421
+ "nbformat": 4,
422
+ "nbformat_minor": 5
423
+ }
log/TD3_1/events.out.tfevents.1651891393.predator.3587258.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fe860e44b27b167f97cf937be48caf03f27880ebfda093c23636b268980543
3
+ size 1960658