bonadio commited on
Commit
5d5dd44
1 Parent(s): ab0f626

DQN cartpole v1

Browse files
.gitignore CHANGED
@@ -8,3 +8,5 @@ dist/
8
  *.egg-info/
9
  build/
10
  __pycache__/
 
 
 
8
  *.egg-info/
9
  build/
10
  __pycache__/
11
+ data/
12
+ alt/
DQN_v1.ipynb ADDED
@@ -0,0 +1,872 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "nwaAZRu1NTiI"
7
+ },
8
+ "source": [
9
+ "# DQN\n",
10
+ "\n",
11
+ "#### This version implements DQN with Keras\n"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {
18
+ "id": "DDf1gLC2NTiK"
19
+ },
20
+ "outputs": [],
21
+ "source": [
22
+ "# !pip install -r ./requirements.txt\n",
23
+ "!pip install stable_baselines3[extra]\n",
24
+ "!pip install huggingface_sb3\n"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 2,
30
+ "metadata": {
31
+ "id": "LNXxxKojNTiL"
32
+ },
33
+ "outputs": [
34
+ {
35
+ "name": "stderr",
36
+ "output_type": "stream",
37
+ "text": [
38
+ "2022-12-21 23:28:04.436066: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
39
+ "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
40
+ "\n"
41
+ ]
42
+ }
43
+ ],
44
+ "source": [
45
+ "import tensorflow as tf\n",
46
+ "from tensorflow.keras import layers\n",
47
+ "from tensorflow.keras.utils import to_categorical\n",
48
+ "import gym\n",
49
+ "from gym import spaces\n",
50
+ "from gym.utils import seeding\n",
51
+ "from gym import wrappers\n",
52
+ "\n",
53
+ "from tqdm.notebook import tqdm\n",
54
+ "from collections import deque\n",
55
+ "import numpy as np\n",
56
+ "import random\n",
57
+ "from matplotlib import pyplot as plt\n",
58
+ "\n",
59
+ "import io\n",
60
+ "import base64\n",
61
+ "from IPython.display import HTML, Video\n"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 16,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "class DQN:\n",
71
+ " def __init__(self, env=None, replay_buffer_size=1000, action_size=2):\n",
72
+ " self.replay_buffer = deque(maxlen=replay_buffer_size)\n",
73
+ "\n",
74
+ " self.action_size = action_size\n",
75
+ "\n",
76
+ " # Hyperparameters\n",
77
+ " self.gamma = 0.95 # Discount rate\n",
78
+ " self.epsilon = 1.0 # Exploration rate\n",
79
+ " self.epsilon_min = 0.05 # Minimal exploration rate (epsilon-greedy)\n",
80
+ " self.epsilon_decay = 0.90 # Decay rate for epsilon\n",
81
+ " self.update_rate = 200 # Number of steps until updating the target network\n",
82
+ " self.batch_size = 100\n",
83
+ " self.learning_rate = 0.001\n",
84
+ " \n",
85
+ " # Construct DQN models\n",
86
+ " self.model = self._build_model()\n",
87
+ " self.target_model = self._build_model()\n",
88
+ " self.target_model.set_weights(self.model.get_weights())\n",
89
+ " self.model.summary()\n",
90
+ " self.env = env\n",
91
+ " self.action_size = action_size\n",
92
+ "\n",
93
+ " def _build_model(self):\n",
94
+ " model = tf.keras.Sequential()\n",
95
+ " \n",
96
+ " model.add(tf.keras.Input(shape=(4,)))\n",
97
+ " # FC Layers\n",
98
+ " model.add(layers.Dense(24, activation='relu'))\n",
99
+ " model.add(layers.Dense(24, activation='relu'))\n",
100
+ " model.add(layers.Dense(self.action_size, activation='linear'))\n",
101
+ " \n",
102
+ " optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)\n",
103
+ " model.compile(loss='mse', optimizer=optimizer, metrics=['mse'])\n",
104
+ " return model\n",
105
+ "\n",
106
+ "\n",
107
+ " #\n",
108
+ " # Trains the model using randomly selected experiences in the replay memory\n",
109
+ " #\n",
110
+ " def _train(self):\n",
111
+ " minibatch = random.sample(self.replay_buffer, self.batch_size)\n",
112
+ " \n",
113
+ " for state, action, reward, next_state, done in minibatch:\n",
114
+ " \n",
115
+ " if not done:\n",
116
+ " model_predict = self.model.predict(np.array([next_state]), verbose=0)\n",
117
+ " max_action = np.argmax(model_predict[0])\n",
118
+ " target = (reward + self.gamma * self.target_model.predict(np.array([next_state]), verbose=0)[0][max_action])\n",
119
+ " else:\n",
120
+ " target = reward\n",
121
+ " \n",
122
+ " # Construct the target vector as follows:\n",
123
+ " # 1. Use the current model to output the Q-value predictions\n",
124
+ " target_f = self.model.predict(np.array([state]), verbose=0)\n",
125
+ " \n",
126
+ " # 2. Rewrite the chosen action value with the computed target\n",
127
+ " target_f[0][action] = target\n",
128
+ " \n",
129
+ " # 3. Use vectors in the objective computation\n",
130
+ " history = self.model.fit(np.array([state]), target_f, epochs=1, verbose=0)\n",
131
+ " print(f\"Loss: {history.history['loss']} \")\n",
132
+ " \n",
133
+ " if self.epsilon > self.epsilon_min:\n",
134
+ " self.epsilon *= self.epsilon_decay\n",
135
+ " #\n",
136
+ " # Trains the model using randomly selected experiences in the replay memory\n",
137
+ " #\n",
138
+ " def _train_b(self):\n",
139
+ " \n",
140
+ " # state, action, reward, next_state, done \n",
141
+ " # create the targets \n",
142
+ " mb_arr = np.array(random.sample(self.replay_buffer, self.batch_size), dtype=object)\n",
143
+ "\n",
144
+ " next_state_arr = np.stack(mb_arr[:,3])\n",
145
+ " target_model_predict = self.target_model.predict(next_state_arr, verbose=0)\n",
146
+ " max_action_arr = np.argmax(target_model_predict, axis=1)\n",
147
+ " q_targets = []\n",
148
+ " for idx,val in enumerate(zip(target_model_predict, max_action_arr)):\n",
149
+ " row, col = val\n",
150
+ " # if done\n",
151
+ " if mb_arr[idx,4] == True:\n",
152
+ " q_targets.append(mb_arr[idx,2])\n",
153
+ " else:\n",
154
+ " q_targets.append(row[col])\n",
155
+ "\n",
156
+ " q_targets = np.array(q_targets)\n",
157
+ " reward_arr = np.stack(mb_arr[:,2])\n",
158
+ " # targets Yj\n",
159
+ " target_arr = (reward_arr + self.gamma * q_targets)\n",
160
+ "\n",
161
+ " # Perform gradient step\n",
162
+ " state_arr = np.stack(mb_arr[:,0])\n",
163
+ " model_predict = self.model.predict(state_arr, verbose=0)\n",
164
+ " action_arr = np.stack(mb_arr[:,1])\n",
165
+ " f_targets=[]\n",
166
+ " for idx, val in enumerate(zip(action_arr, target_arr)):\n",
167
+ " act, targ = val\n",
168
+ " model_predict[idx][act] = targ\n",
169
+ "\n",
170
+ " history = self.model.fit(state_arr, model_predict, epochs=1, verbose=0)\n",
171
+ " print(f\"Loss: {history.history['loss']} \")\n",
172
+ " # update epsilon\n",
173
+ " if self.epsilon > self.epsilon_min:\n",
174
+ " self.epsilon *= self.epsilon_decay\n",
175
+ "\n",
176
+ " def learn(self, total_steps=None):\n",
177
+ "\n",
178
+ " state = self.env.reset()\n",
179
+ " total_reward = 0\n",
180
+ " rewards = []\n",
181
+ " for current_step in tqdm(range(total_steps)):\n",
182
+ "\n",
183
+ " # e-greedy\n",
184
+ " if np.random.rand() <= self.epsilon:\n",
185
+ " action = random.randrange(self.action_size)\n",
186
+ " else:\n",
187
+ " model_predict = self.model.predict(np.array([state]), verbose=0)\n",
188
+ " action = np.argmax(model_predict[0])\n",
189
+ "\n",
190
+ " # step\n",
191
+ " next_state, reward, done, info = self.env.step(action)\n",
192
+ " total_reward += reward\n",
193
+ " # add to buffer\n",
194
+ " self.replay_buffer.append((state, action, reward, next_state, done))\n",
195
+ "\n",
196
+ " if done:\n",
197
+ " rewards.append(total_reward)\n",
198
+ " total_reward = 0\n",
199
+ " state = self.env.reset()\n",
200
+ "\n",
201
+ " if current_step>10 and current_step % self.update_rate == 0:\n",
202
+ " print(f\"epsilon:{self.epsilon} step:{current_step} mean_reward {np.mean(rewards)} \")\n",
203
+ " self._train()\n",
204
+ " # update target\n",
205
+ " self.target_model.set_weights(self.model.get_weights())\n",
206
+ " \n",
207
+ " #\n",
208
+ " # Loads a saved model\n",
209
+ " #\n",
210
+ " def load(self, name):\n",
211
+ " self.model.load_weights(name)\n",
212
+ "\n",
213
+ " #\n",
214
+ " # Saves parameters of a trained model\n",
215
+ " #\n",
216
+ " def save(self, name):\n",
217
+ " self.model.save_weights(name)\n",
218
+ "\n",
219
+ " def play(self, state):\n",
220
+ " return np.argmax(self.model.predict(np.array([state]), verbose=0)[0])"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "code",
225
+ "execution_count": null,
226
+ "metadata": {},
227
+ "outputs": [],
228
+ "source": [
229
+ "env = gym.make('CartPole-v1')\n",
230
+ "\n",
231
+ "model = DQN(env=env, replay_buffer_size=10_000, action_size=2)\n",
232
+ "model.learn(total_steps=20_000)\n",
233
+ "env.close()"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": null,
239
+ "metadata": {},
240
+ "outputs": [],
241
+ "source": [
242
+ "# env = gym.make('CartPole-v1')\n",
243
+ "\n",
244
+ "# model = DQN(env=env, replay_buffer_size=10_000, action_size=2)\n",
245
+ "\n",
246
+ "# state = model.env.reset()\n",
247
+ "# for i in range(100):\n",
248
+ "# random_action = env.action_space.sample()\n",
249
+ "# next_state, reward, done, info = model.env.step(random_action)\n",
250
+ "# model.replay_buffer.append((state, random_action, reward, next_state, done))\n",
251
+ "# if done:\n",
252
+ "# state = model.env.reset()\n",
253
+ "# else:\n",
254
+ "# state = next_state\n",
255
+ "\n",
256
+ "# minibatch = random.sample(model.replay_buffer, 10)\n",
257
+ "# mb = np.array(minibatch, dtype=object)\n",
258
+ "# print(mb[:,0])\n",
259
+ "# np.stack(mb[:,0])\n"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": 6,
265
+ "metadata": {},
266
+ "outputs": [],
267
+ "source": [
268
+ "model.save(\"./m1.h5\")"
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "code",
273
+ "execution_count": 7,
274
+ "metadata": {},
275
+ "outputs": [
276
+ {
277
+ "name": "stdout",
278
+ "output_type": "stream",
279
+ "text": [
280
+ "Model: \"sequential_2\"\n",
281
+ "_________________________________________________________________\n",
282
+ " Layer (type) Output Shape Param # \n",
283
+ "=================================================================\n",
284
+ " dense_6 (Dense) (None, 128) 640 \n",
285
+ " \n",
286
+ " dense_7 (Dense) (None, 64) 8256 \n",
287
+ " \n",
288
+ " dense_8 (Dense) (None, 2) 130 \n",
289
+ " \n",
290
+ "=================================================================\n",
291
+ "Total params: 9,026\n",
292
+ "Trainable params: 9,026\n",
293
+ "Non-trainable params: 0\n",
294
+ "_________________________________________________________________\n",
295
+ "1.0 {}\n"
296
+ ]
297
+ }
298
+ ],
299
+ "source": [
300
+ "eval_env = gym.make('CartPole-v1')\n",
301
+ "model = DQN(env=eval_env, replay_buffer_size=10_000, action_size=2)\n",
302
+ "model.load(\"./m1.h5\")\n",
303
+ "eval_env = wrappers.Monitor(eval_env, \"./alt/gym-results\", force=True)\n",
304
+ "state = eval_env.reset()\n",
305
+ "for _ in range(1000):\n",
306
+ " action = model.play(state)\n",
307
+ " observation, reward, done, info = eval_env.step(action)\n",
308
+ " # print(info)\n",
309
+ " state = observation\n",
310
+ " if done: \n",
311
+ " print(reward, info)\n",
312
+ " break\n",
313
+ "eval_env.close()"
314
+ ]
315
+ }
316
+ ],
317
+ "metadata": {
318
+ "colab": {
319
+ "provenance": []
320
+ },
321
+ "kernelspec": {
322
+ "display_name": "Python 3.8.13 ('rl2')",
323
+ "language": "python",
324
+ "name": "python3"
325
+ },
326
+ "language_info": {
327
+ "codemirror_mode": {
328
+ "name": "ipython",
329
+ "version": 3
330
+ },
331
+ "file_extension": ".py",
332
+ "mimetype": "text/x-python",
333
+ "name": "python",
334
+ "nbconvert_exporter": "python",
335
+ "pygments_lexer": "ipython3",
336
+ "version": "3.8.13"
337
+ },
338
+ "orig_nbformat": 4,
339
+ "vscode": {
340
+ "interpreter": {
341
+ "hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1"
342
+ }
343
+ },
344
+ "widgets": {
345
+ "application/vnd.jupyter.widget-state+json": {
346
+ "01a2dbcb714e40148b41c761fcf43147": {
347
+ "model_module": "@jupyter-widgets/base",
348
+ "model_module_version": "1.2.0",
349
+ "model_name": "LayoutModel",
350
+ "state": {
351
+ "_model_module": "@jupyter-widgets/base",
352
+ "_model_module_version": "1.2.0",
353
+ "_model_name": "LayoutModel",
354
+ "_view_count": null,
355
+ "_view_module": "@jupyter-widgets/base",
356
+ "_view_module_version": "1.2.0",
357
+ "_view_name": "LayoutView",
358
+ "align_content": null,
359
+ "align_items": null,
360
+ "align_self": null,
361
+ "border": null,
362
+ "bottom": null,
363
+ "display": null,
364
+ "flex": null,
365
+ "flex_flow": null,
366
+ "grid_area": null,
367
+ "grid_auto_columns": null,
368
+ "grid_auto_flow": null,
369
+ "grid_auto_rows": null,
370
+ "grid_column": null,
371
+ "grid_gap": null,
372
+ "grid_row": null,
373
+ "grid_template_areas": null,
374
+ "grid_template_columns": null,
375
+ "grid_template_rows": null,
376
+ "height": null,
377
+ "justify_content": null,
378
+ "justify_items": null,
379
+ "left": null,
380
+ "margin": null,
381
+ "max_height": null,
382
+ "max_width": null,
383
+ "min_height": null,
384
+ "min_width": null,
385
+ "object_fit": null,
386
+ "object_position": null,
387
+ "order": null,
388
+ "overflow": null,
389
+ "overflow_x": null,
390
+ "overflow_y": null,
391
+ "padding": null,
392
+ "right": null,
393
+ "top": null,
394
+ "visibility": null,
395
+ "width": null
396
+ }
397
+ },
398
+ "20b0f38ec3234ff28a62a286cd57b933": {
399
+ "model_module": "@jupyter-widgets/controls",
400
+ "model_module_version": "1.5.0",
401
+ "model_name": "PasswordModel",
402
+ "state": {
403
+ "_dom_classes": [],
404
+ "_model_module": "@jupyter-widgets/controls",
405
+ "_model_module_version": "1.5.0",
406
+ "_model_name": "PasswordModel",
407
+ "_view_count": null,
408
+ "_view_module": "@jupyter-widgets/controls",
409
+ "_view_module_version": "1.5.0",
410
+ "_view_name": "PasswordView",
411
+ "continuous_update": true,
412
+ "description": "Token:",
413
+ "description_tooltip": null,
414
+ "disabled": false,
415
+ "layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147",
416
+ "placeholder": "​",
417
+ "style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce",
418
+ "value": ""
419
+ }
420
+ },
421
+ "270cbb5d6e9c4b1e9e2f39c8b3b0c15f": {
422
+ "model_module": "@jupyter-widgets/controls",
423
+ "model_module_version": "1.5.0",
424
+ "model_name": "VBoxModel",
425
+ "state": {
426
+ "_dom_classes": [],
427
+ "_model_module": "@jupyter-widgets/controls",
428
+ "_model_module_version": "1.5.0",
429
+ "_model_name": "VBoxModel",
430
+ "_view_count": null,
431
+ "_view_module": "@jupyter-widgets/controls",
432
+ "_view_module_version": "1.5.0",
433
+ "_view_name": "VBoxView",
434
+ "box_style": "",
435
+ "children": [
436
+ "IPY_MODEL_a02224a43d8d4af3bd31d326540d25da",
437
+ "IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933",
438
+ "IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77",
439
+ "IPY_MODEL_f1675c09d16a4251b403f9c56255f168",
440
+ "IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2"
441
+ ],
442
+ "layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d"
443
+ }
444
+ },
445
+ "2dc5fa9aa3334dfcbdee9c238f2ef60b": {
446
+ "model_module": "@jupyter-widgets/controls",
447
+ "model_module_version": "1.5.0",
448
+ "model_name": "DescriptionStyleModel",
449
+ "state": {
450
+ "_model_module": "@jupyter-widgets/controls",
451
+ "_model_module_version": "1.5.0",
452
+ "_model_name": "DescriptionStyleModel",
453
+ "_view_count": null,
454
+ "_view_module": "@jupyter-widgets/base",
455
+ "_view_module_version": "1.2.0",
456
+ "_view_name": "StyleView",
457
+ "description_width": ""
458
+ }
459
+ },
460
+ "3e753b0212644990b558c68853ff2041": {
461
+ "model_module": "@jupyter-widgets/base",
462
+ "model_module_version": "1.2.0",
463
+ "model_name": "LayoutModel",
464
+ "state": {
465
+ "_model_module": "@jupyter-widgets/base",
466
+ "_model_module_version": "1.2.0",
467
+ "_model_name": "LayoutModel",
468
+ "_view_count": null,
469
+ "_view_module": "@jupyter-widgets/base",
470
+ "_view_module_version": "1.2.0",
471
+ "_view_name": "LayoutView",
472
+ "align_content": null,
473
+ "align_items": null,
474
+ "align_self": null,
475
+ "border": null,
476
+ "bottom": null,
477
+ "display": null,
478
+ "flex": null,
479
+ "flex_flow": null,
480
+ "grid_area": null,
481
+ "grid_auto_columns": null,
482
+ "grid_auto_flow": null,
483
+ "grid_auto_rows": null,
484
+ "grid_column": null,
485
+ "grid_gap": null,
486
+ "grid_row": null,
487
+ "grid_template_areas": null,
488
+ "grid_template_columns": null,
489
+ "grid_template_rows": null,
490
+ "height": null,
491
+ "justify_content": null,
492
+ "justify_items": null,
493
+ "left": null,
494
+ "margin": null,
495
+ "max_height": null,
496
+ "max_width": null,
497
+ "min_height": null,
498
+ "min_width": null,
499
+ "object_fit": null,
500
+ "object_position": null,
501
+ "order": null,
502
+ "overflow": null,
503
+ "overflow_x": null,
504
+ "overflow_y": null,
505
+ "padding": null,
506
+ "right": null,
507
+ "top": null,
508
+ "visibility": null,
509
+ "width": null
510
+ }
511
+ },
512
+ "3fa248114ac24656ba74923936a94d2d": {
513
+ "model_module": "@jupyter-widgets/base",
514
+ "model_module_version": "1.2.0",
515
+ "model_name": "LayoutModel",
516
+ "state": {
517
+ "_model_module": "@jupyter-widgets/base",
518
+ "_model_module_version": "1.2.0",
519
+ "_model_name": "LayoutModel",
520
+ "_view_count": null,
521
+ "_view_module": "@jupyter-widgets/base",
522
+ "_view_module_version": "1.2.0",
523
+ "_view_name": "LayoutView",
524
+ "align_content": null,
525
+ "align_items": "center",
526
+ "align_self": null,
527
+ "border": null,
528
+ "bottom": null,
529
+ "display": "flex",
530
+ "flex": null,
531
+ "flex_flow": "column",
532
+ "grid_area": null,
533
+ "grid_auto_columns": null,
534
+ "grid_auto_flow": null,
535
+ "grid_auto_rows": null,
536
+ "grid_column": null,
537
+ "grid_gap": null,
538
+ "grid_row": null,
539
+ "grid_template_areas": null,
540
+ "grid_template_columns": null,
541
+ "grid_template_rows": null,
542
+ "height": null,
543
+ "justify_content": null,
544
+ "justify_items": null,
545
+ "left": null,
546
+ "margin": null,
547
+ "max_height": null,
548
+ "max_width": null,
549
+ "min_height": null,
550
+ "min_width": null,
551
+ "object_fit": null,
552
+ "object_position": null,
553
+ "order": null,
554
+ "overflow": null,
555
+ "overflow_x": null,
556
+ "overflow_y": null,
557
+ "padding": null,
558
+ "right": null,
559
+ "top": null,
560
+ "visibility": null,
561
+ "width": "50%"
562
+ }
563
+ },
564
+ "42d140b838b844819bc127afc1b7bc84": {
565
+ "model_module": "@jupyter-widgets/controls",
566
+ "model_module_version": "1.5.0",
567
+ "model_name": "DescriptionStyleModel",
568
+ "state": {
569
+ "_model_module": "@jupyter-widgets/controls",
570
+ "_model_module_version": "1.5.0",
571
+ "_model_name": "DescriptionStyleModel",
572
+ "_view_count": null,
573
+ "_view_module": "@jupyter-widgets/base",
574
+ "_view_module_version": "1.2.0",
575
+ "_view_name": "StyleView",
576
+ "description_width": ""
577
+ }
578
+ },
579
+ "90c874e91b304ee1a7ef147767ac00ce": {
580
+ "model_module": "@jupyter-widgets/controls",
581
+ "model_module_version": "1.5.0",
582
+ "model_name": "DescriptionStyleModel",
583
+ "state": {
584
+ "_model_module": "@jupyter-widgets/controls",
585
+ "_model_module_version": "1.5.0",
586
+ "_model_name": "DescriptionStyleModel",
587
+ "_view_count": null,
588
+ "_view_module": "@jupyter-widgets/base",
589
+ "_view_module_version": "1.2.0",
590
+ "_view_name": "StyleView",
591
+ "description_width": ""
592
+ }
593
+ },
594
+ "9d847f9a7d47458d8cd57d9b599e47c6": {
595
+ "model_module": "@jupyter-widgets/base",
596
+ "model_module_version": "1.2.0",
597
+ "model_name": "LayoutModel",
598
+ "state": {
599
+ "_model_module": "@jupyter-widgets/base",
600
+ "_model_module_version": "1.2.0",
601
+ "_model_name": "LayoutModel",
602
+ "_view_count": null,
603
+ "_view_module": "@jupyter-widgets/base",
604
+ "_view_module_version": "1.2.0",
605
+ "_view_name": "LayoutView",
606
+ "align_content": null,
607
+ "align_items": null,
608
+ "align_self": null,
609
+ "border": null,
610
+ "bottom": null,
611
+ "display": null,
612
+ "flex": null,
613
+ "flex_flow": null,
614
+ "grid_area": null,
615
+ "grid_auto_columns": null,
616
+ "grid_auto_flow": null,
617
+ "grid_auto_rows": null,
618
+ "grid_column": null,
619
+ "grid_gap": null,
620
+ "grid_row": null,
621
+ "grid_template_areas": null,
622
+ "grid_template_columns": null,
623
+ "grid_template_rows": null,
624
+ "height": null,
625
+ "justify_content": null,
626
+ "justify_items": null,
627
+ "left": null,
628
+ "margin": null,
629
+ "max_height": null,
630
+ "max_width": null,
631
+ "min_height": null,
632
+ "min_width": null,
633
+ "object_fit": null,
634
+ "object_position": null,
635
+ "order": null,
636
+ "overflow": null,
637
+ "overflow_x": null,
638
+ "overflow_y": null,
639
+ "padding": null,
640
+ "right": null,
641
+ "top": null,
642
+ "visibility": null,
643
+ "width": null
644
+ }
645
+ },
646
+ "a02224a43d8d4af3bd31d326540d25da": {
647
+ "model_module": "@jupyter-widgets/controls",
648
+ "model_module_version": "1.5.0",
649
+ "model_name": "HTMLModel",
650
+ "state": {
651
+ "_dom_classes": [],
652
+ "_model_module": "@jupyter-widgets/controls",
653
+ "_model_module_version": "1.5.0",
654
+ "_model_name": "HTMLModel",
655
+ "_view_count": null,
656
+ "_view_module": "@jupyter-widgets/controls",
657
+ "_view_module_version": "1.5.0",
658
+ "_view_name": "HTMLView",
659
+ "description": "",
660
+ "description_tooltip": null,
661
+ "layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284",
662
+ "placeholder": "​",
663
+ "style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b",
664
+ "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
665
+ }
666
+ },
667
+ "a2cfb91cf66447d7899292854bd64a07": {
668
+ "model_module": "@jupyter-widgets/base",
669
+ "model_module_version": "1.2.0",
670
+ "model_name": "LayoutModel",
671
+ "state": {
672
+ "_model_module": "@jupyter-widgets/base",
673
+ "_model_module_version": "1.2.0",
674
+ "_model_name": "LayoutModel",
675
+ "_view_count": null,
676
+ "_view_module": "@jupyter-widgets/base",
677
+ "_view_module_version": "1.2.0",
678
+ "_view_name": "LayoutView",
679
+ "align_content": null,
680
+ "align_items": null,
681
+ "align_self": null,
682
+ "border": null,
683
+ "bottom": null,
684
+ "display": null,
685
+ "flex": null,
686
+ "flex_flow": null,
687
+ "grid_area": null,
688
+ "grid_auto_columns": null,
689
+ "grid_auto_flow": null,
690
+ "grid_auto_rows": null,
691
+ "grid_column": null,
692
+ "grid_gap": null,
693
+ "grid_row": null,
694
+ "grid_template_areas": null,
695
+ "grid_template_columns": null,
696
+ "grid_template_rows": null,
697
+ "height": null,
698
+ "justify_content": null,
699
+ "justify_items": null,
700
+ "left": null,
701
+ "margin": null,
702
+ "max_height": null,
703
+ "max_width": null,
704
+ "min_height": null,
705
+ "min_width": null,
706
+ "object_fit": null,
707
+ "object_position": null,
708
+ "order": null,
709
+ "overflow": null,
710
+ "overflow_x": null,
711
+ "overflow_y": null,
712
+ "padding": null,
713
+ "right": null,
714
+ "top": null,
715
+ "visibility": null,
716
+ "width": null
717
+ }
718
+ },
719
+ "c1a82965ae26479a98e4fdbde1e64ec2": {
720
+ "model_module": "@jupyter-widgets/controls",
721
+ "model_module_version": "1.5.0",
722
+ "model_name": "HTMLModel",
723
+ "state": {
724
+ "_dom_classes": [],
725
+ "_model_module": "@jupyter-widgets/controls",
726
+ "_model_module_version": "1.5.0",
727
+ "_model_name": "HTMLModel",
728
+ "_view_count": null,
729
+ "_view_module": "@jupyter-widgets/controls",
730
+ "_view_module_version": "1.5.0",
731
+ "_view_name": "HTMLView",
732
+ "description": "",
733
+ "description_tooltip": null,
734
+ "layout": "IPY_MODEL_9d847f9a7d47458d8cd57d9b599e47c6",
735
+ "placeholder": "​",
736
+ "style": "IPY_MODEL_42d140b838b844819bc127afc1b7bc84",
737
+ "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
738
+ }
739
+ },
740
+ "caef095934ec47bbb8b64eab22049284": {
741
+ "model_module": "@jupyter-widgets/base",
742
+ "model_module_version": "1.2.0",
743
+ "model_name": "LayoutModel",
744
+ "state": {
745
+ "_model_module": "@jupyter-widgets/base",
746
+ "_model_module_version": "1.2.0",
747
+ "_model_name": "LayoutModel",
748
+ "_view_count": null,
749
+ "_view_module": "@jupyter-widgets/base",
750
+ "_view_module_version": "1.2.0",
751
+ "_view_name": "LayoutView",
752
+ "align_content": null,
753
+ "align_items": null,
754
+ "align_self": null,
755
+ "border": null,
756
+ "bottom": null,
757
+ "display": null,
758
+ "flex": null,
759
+ "flex_flow": null,
760
+ "grid_area": null,
761
+ "grid_auto_columns": null,
762
+ "grid_auto_flow": null,
763
+ "grid_auto_rows": null,
764
+ "grid_column": null,
765
+ "grid_gap": null,
766
+ "grid_row": null,
767
+ "grid_template_areas": null,
768
+ "grid_template_columns": null,
769
+ "grid_template_rows": null,
770
+ "height": null,
771
+ "justify_content": null,
772
+ "justify_items": null,
773
+ "left": null,
774
+ "margin": null,
775
+ "max_height": null,
776
+ "max_width": null,
777
+ "min_height": null,
778
+ "min_width": null,
779
+ "object_fit": null,
780
+ "object_position": null,
781
+ "order": null,
782
+ "overflow": null,
783
+ "overflow_x": null,
784
+ "overflow_y": null,
785
+ "padding": null,
786
+ "right": null,
787
+ "top": null,
788
+ "visibility": null,
789
+ "width": null
790
+ }
791
+ },
792
+ "eaba3f1de4444aabadfea2a3dadb1d80": {
793
+ "model_module": "@jupyter-widgets/controls",
794
+ "model_module_version": "1.5.0",
795
+ "model_name": "DescriptionStyleModel",
796
+ "state": {
797
+ "_model_module": "@jupyter-widgets/controls",
798
+ "_model_module_version": "1.5.0",
799
+ "_model_name": "DescriptionStyleModel",
800
+ "_view_count": null,
801
+ "_view_module": "@jupyter-widgets/base",
802
+ "_view_module_version": "1.2.0",
803
+ "_view_name": "StyleView",
804
+ "description_width": ""
805
+ }
806
+ },
807
+ "ee4a21bedc504171ad09d205d634b528": {
808
+ "model_module": "@jupyter-widgets/controls",
809
+ "model_module_version": "1.5.0",
810
+ "model_name": "ButtonStyleModel",
811
+ "state": {
812
+ "_model_module": "@jupyter-widgets/controls",
813
+ "_model_module_version": "1.5.0",
814
+ "_model_name": "ButtonStyleModel",
815
+ "_view_count": null,
816
+ "_view_module": "@jupyter-widgets/base",
817
+ "_view_module_version": "1.2.0",
818
+ "_view_name": "StyleView",
819
+ "button_color": null,
820
+ "font_weight": ""
821
+ }
822
+ },
823
+ "f1675c09d16a4251b403f9c56255f168": {
824
+ "model_module": "@jupyter-widgets/controls",
825
+ "model_module_version": "1.5.0",
826
+ "model_name": "ButtonModel",
827
+ "state": {
828
+ "_dom_classes": [],
829
+ "_model_module": "@jupyter-widgets/controls",
830
+ "_model_module_version": "1.5.0",
831
+ "_model_name": "ButtonModel",
832
+ "_view_count": null,
833
+ "_view_module": "@jupyter-widgets/controls",
834
+ "_view_module_version": "1.5.0",
835
+ "_view_name": "ButtonView",
836
+ "button_style": "",
837
+ "description": "Login",
838
+ "disabled": false,
839
+ "icon": "",
840
+ "layout": "IPY_MODEL_a2cfb91cf66447d7899292854bd64a07",
841
+ "style": "IPY_MODEL_ee4a21bedc504171ad09d205d634b528",
842
+ "tooltip": ""
843
+ }
844
+ },
845
+ "f6c845330d6743c0b35c2c7ad834de77": {
846
+ "model_module": "@jupyter-widgets/controls",
847
+ "model_module_version": "1.5.0",
848
+ "model_name": "CheckboxModel",
849
+ "state": {
850
+ "_dom_classes": [],
851
+ "_model_module": "@jupyter-widgets/controls",
852
+ "_model_module_version": "1.5.0",
853
+ "_model_name": "CheckboxModel",
854
+ "_view_count": null,
855
+ "_view_module": "@jupyter-widgets/controls",
856
+ "_view_module_version": "1.5.0",
857
+ "_view_name": "CheckboxView",
858
+ "description": "Add token as git credential?",
859
+ "description_tooltip": null,
860
+ "disabled": false,
861
+ "indent": true,
862
+ "layout": "IPY_MODEL_3e753b0212644990b558c68853ff2041",
863
+ "style": "IPY_MODEL_eaba3f1de4444aabadfea2a3dadb1d80",
864
+ "value": true
865
+ }
866
+ }
867
+ }
868
+ }
869
+ },
870
+ "nbformat": 4,
871
+ "nbformat_minor": 0
872
+ }
fin_rl_qlearning_v1.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
fin_rl_qlearning_v2.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
fin_rl_qlearning_v4.ipynb ADDED
The diff for this file is too large to render. See raw diff