bonadio commited on
Commit
7cf1db5
1 Parent(s): b13dc5d

qlearning_v1-6 using PRB reward

Browse files
README.md CHANGED
@@ -9,4 +9,5 @@
9
  # Q-learning
10
 
11
  Now using q-learnig with a custom enviroment
12
- fin_rl_qlearning_v1.ipynb
 
 
9
  # Q-learning
10
 
11
  Now using q-learnig with a custom enviroment
12
+ fin_rl_qlearning_v1.ipynb
13
+
fin_rl_PPO_v3.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
fin_rl_qlearning_v1-3.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
fin_rl_qlearning_v1-4.ipynb CHANGED
@@ -837,59 +837,9 @@
837
  },
838
  {
839
  "cell_type": "code",
840
- "execution_count": 22,
841
  "metadata": {},
842
- "outputs": [
843
- {
844
- "data": {
845
- "text/plain": [
846
- "[(1, 4, 8.645273113640826, 11.755207560128675, 3.1099344464878484),\n",
847
- " (5, 7, 11.171935380608351, 9.6441605861055, 1.527774794502852),\n",
848
- " (9, 13, 10.295810456549015, 8.282581743389205, 2.01322871315981),\n",
849
- " (14, 16, 9.137837754794978, 11.628818062612993, 2.490980307818015),\n",
850
- " (17, 19, 11.608355073821109, 9.857818739078045, 1.7505363347430638),\n",
851
- " (21, 26, 10.057416515635682, 8.528742398078922, 1.5286741175567595),\n",
852
- " (27, 32, 9.74902775472201, 9.670699802197579, 0.0783279525244307),\n",
853
- " (33, 35, 9.78769038301581, 10.300434254468536, 0.5127438714527255),\n",
854
- " (36, 38, 9.60217789374197, 8.250852689557544, 1.3513252041844268),\n",
855
- " (39, 40, 8.971004003020443, 10.395078818094584, 1.4240748150741407),\n",
856
- " (42, 44, 11.673729766919589, 9.944263607173118, 1.7294661597464707),\n",
857
- " (45, 46, 9.636292392244973, 9.991269928983368, 0.35497753673839405),\n",
858
- " (47, 51, 10.360163262862152, 8.430825035963325, 1.9293382268988264),\n",
859
- " (52, 54, 9.554103757397273, 11.742814826136176, 2.188711068738902),\n",
860
- " (55, 57, 11.403617185189336, 9.71178561119097, 1.6918315739983658),\n",
861
- " (58, 59, 9.736562354250092, 10.197672768283018, 0.46111041403292674),\n",
862
- " (60, 64, 10.337689357211167, 8.817539963222865, 1.5201493939883015),\n",
863
- " (65, 69, 10.199032988393924, 10.042850712802524, 0.15618227559139974),\n",
864
- " (70, 71, 9.63107312063706, 9.925392109060335, 0.2943189884232744),\n",
865
- " (72, 76, 10.338918119518627, 8.35252995759046, 1.9863881619281667),\n",
866
- " (77, 83, 9.365049578445921, 9.693865537165568, -0.3288159587196464),\n",
867
- " (84, 89, 10.138407662002876, 8.67933961768415, 1.4590680443187267),\n",
868
- " (90, 95, 10.000361732237739, 9.63951969965352, 0.3608420325842179),\n",
869
- " (96, 97, 9.861816695922618, 10.30627224611643, 0.4444555501938119),\n",
870
- " (98, 107, 10.232441825356547, 9.836950696890277, 0.3954911284662703),\n",
871
- " (108, 109, 9.661180634141186, 10.074844279839029, 0.4136636456978433),\n",
872
- " (110, 114, 10.368921355130126, 8.558052792767318, 1.810868562362808),\n",
873
- " (115, 120, 9.801685702157227, 9.662201837344819, 0.1394838648124086),\n",
874
- " (121, 123, 9.80253246200774, 10.288038598865318, 0.4855061368575786),\n",
875
- " (124, 127, 9.567447370545475, 9.01427913814844, 0.5531682323970344),\n",
876
- " (128, 132, 10.44659531472739, 9.919971609023063, 0.5266237057043259),\n",
877
- " (133, 134, 9.639891825308629, 10.008971159712608, 0.36907933440397933),\n",
878
- " (135, 139, 10.363750373990332, 8.455059573520186, 1.9086908004701453),\n",
879
- " (140, 145, 9.605625627674012, 9.699404019036715, -0.0937783913627026),\n",
880
- " (146, 148, 9.749444489037334, 10.32917728184808, 0.5797327928107467),\n",
881
- " (149, 152, 9.695479843750826, 8.857054399227408, 0.838425444523418),\n",
882
- " (153, 157, 10.25168805575754, 10.015435969355776, 0.23625208640176432),\n",
883
- " (158, 159, 9.63115885189913, 9.942821850118737, 0.31166299821960664),\n",
884
- " (160, 164, 10.345751755784269, 8.371449341610663, 1.9743024141736054),\n",
885
- " (165, 170, 9.414760057555497, 9.751117491956888, -0.3363574344013909)]"
886
- ]
887
- },
888
- "execution_count": 22,
889
- "metadata": {},
890
- "output_type": "execute_result"
891
- }
892
- ],
893
  "source": [
894
  "env_test._trade_tick_history"
895
  ]
 
837
  },
838
  {
839
  "cell_type": "code",
840
+ "execution_count": null,
841
  "metadata": {},
842
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843
  "source": [
844
  "env_test._trade_tick_history"
845
  ]
fin_rl_qlearning_v1-5.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
fin_rl_qlearning_v1-6.ipynb ADDED
@@ -0,0 +1,1295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "nwaAZRu1NTiI"
7
+ },
8
+ "source": [
9
+ "# Q-learning \n",
10
+ "\n",
11
+ "#### This version implements q-learning using a custom enviroment 1 day, with synthetic data, this version implements qtable with SQLITE so you can add several features in the state \n",
12
+ "\n",
13
+ "##### Experiments\n",
14
+ "- Change the reward function and see the results on trading \n"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "metadata": {
21
+ "id": "DDf1gLC2NTiK"
22
+ },
23
+ "outputs": [],
24
+ "source": [
25
+ "# !pip install -r ./requirements.txt\n",
26
+ "# !pip install stable_baselines3\n",
27
+ "# !pip install yfinance\n",
28
+ "# !pip install talib-binary\n",
29
+ "# !pip install huggingface_sb3\n"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": null,
35
+ "metadata": {
36
+ "id": "LNXxxKojNTiL"
37
+ },
38
+ "outputs": [],
39
+ "source": [
40
+ "import gym\n",
41
+ "from gym import spaces\n",
42
+ "from gym.utils import seeding\n",
43
+ "\n",
44
+ "import talib as ta\n",
45
+ "from tqdm.notebook import tqdm\n",
46
+ "\n",
47
+ "import yfinance as yf\n",
48
+ "import pandas as pd\n",
49
+ "import numpy as np\n",
50
+ "from matplotlib import pyplot as plt\n",
51
+ "import timeit\n",
52
+ "import sqlite3\n"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "def get_syntetic_data(tf, start_date, end_date, plot=True, add_noise=None):\n",
62
+ " df = pd.date_range(start=start_date, end=end_date, freq=tf)\n",
63
+ " df = df.to_frame()\n",
64
+ "\n",
65
+ " df['v1'] = np.arange(len(df.index))\n",
66
+ " df[['Open','High','Low','Close','Volume']] = 0.0\n",
67
+ " df = df.drop([0], axis=1)\n",
68
+ "\n",
69
+ " df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x/3)+10 )\n",
70
+ " # df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x)+10 + np.sin(x/2) )\n",
71
+ " if add_noise is not None: # could be 0.5\n",
72
+ " noise = np.random.normal(0, add_noise, len(df))\n",
73
+ " df[\"Close\"] += noise\n",
74
+ "\n",
75
+ " if plot:\n",
76
+ " plt.figure(figsize=(15,6))\n",
77
+ " df['Close'].tail(30).plot()\n",
78
+ "\n",
79
+ " df[\"Open\"]=df[\"Close\"].shift(1)\n",
80
+ " df = df.dropna()\n",
81
+ " x = 1.5\n",
82
+ " df[\"High\"] = np.where( df[\"Close\"] > df['Open'], df[\"Close\"]+x, df[\"Open\"]+x )\n",
83
+ " df[\"Low\"] = np.where( df[\"Close\"] < df['Open'], df[\"Close\"]-x, df[\"Open\"]-x )\n",
84
+ " df[\"Volume\"] = 10\n",
85
+ " return df"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": null,
91
+ "metadata": {
92
+ "id": "dmAuEhZZNTiL"
93
+ },
94
+ "outputs": [],
95
+ "source": [
96
+ "# Get data\n",
97
+ "eth_usd = yf.Ticker(\"ETH-USD\")\n",
98
+ "eth = eth_usd.history(period=\"max\")\n",
99
+ "\n",
100
+ "btc_usd = yf.Ticker(\"BTC-USD\")\n",
101
+ "btc = btc_usd.history(period=\"max\")\n",
102
+ "print(len(btc))\n",
103
+ "print(len(eth))\n",
104
+ "\n",
105
+ "btc_train = eth[-3015:-200]\n",
106
+ "# btc_test = eth[-200:]\n",
107
+ "eth_train = eth[-1864:-200]\n",
108
+ "eth_test = eth[-200:]\n",
109
+ "# len(eth_train)"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": null,
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "# use synthetic data\n",
119
+ "# synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2015-02-05\", add_noise=None)\n",
120
+ "synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2023-01-01\", add_noise=None)\n",
121
+ "eth_train = synthetic_data[-1864:-200]\n",
122
+ "eth_test = synthetic_data[-200:]\n"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": null,
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "\n",
132
+ "class Qtable:\n",
133
+ " def __init__(self):\n",
134
+ " self.conn = sqlite3.connect(':memory:')\n",
135
+ " self.cursor = self.conn.cursor()\n",
136
+ "\n",
137
+ " def create_table(self):\n",
138
+ " columns = [(\"s_position\", \"INTEGER\"),(\"s_di\", \"INTEGER\"), (\"s_mfi\", \"INTEGER\"), (\"s_stock_d\", \"INTEGER\"),(\"s_adx\", \"INTEGER\"), (\"action\", \"INTEGER\"), (\"qvalue\", \"REAL\")]\n",
139
+ " columns_string = \", \".join([f\"{name} {data_type}\" for name, data_type in columns])\n",
140
+ " columns_keys = \"(s_position, s_di, s_mfi, s_stock_d, s_adx, action)\"\n",
141
+ " query = f\"CREATE TABLE IF NOT EXISTS QTABLE ({columns_string}, PRIMARY KEY {columns_keys})\"\n",
142
+ " self.cursor.execute(query)\n",
143
+ " self.conn.commit()\n",
144
+ "\n",
145
+ " def set_q_value(self, state, action, qvalue):\n",
146
+ " query = f\"INSERT INTO QTABLE (s_position, s_di, s_mfi, s_stock_d, s_adx, action, qvalue) VALUES (?,?,?,?,?,?,?) ON CONFLICT (s_position, s_di, s_mfi, s_stock_d, s_adx, action) DO UPDATE SET qvalue=?\"\n",
147
+ " self.cursor.execute(query,state.tolist()+[action]+[qvalue]+[qvalue])\n",
148
+ " self.conn.commit()\n",
149
+ "\n",
150
+ " def get_q_value(self, state, action):\n",
151
+ " self.cursor.execute(\"SELECT qvalue from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=? and action=?\",state.tolist()+[action])\n",
152
+ " rows = self.cursor.fetchall()\n",
153
+ " if len(rows) > 0:\n",
154
+ " return rows[0][0]\n",
155
+ " return None\n",
156
+ "\n",
157
+ " def get_max_q_value(self, state):\n",
158
+ " self.cursor.execute(\"SELECT max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n",
159
+ " rows = self.cursor.fetchall()\n",
160
+ " if len(rows) > 0:\n",
161
+ " return rows[0][0]\n",
162
+ " return None\n",
163
+ "\n",
164
+ " def get_max_action(self, state):\n",
165
+ " self.cursor.execute(\"SELECT action, max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n",
166
+ " rows = self.cursor.fetchall()\n",
167
+ " if len(rows) > 0:\n",
168
+ " return rows[0][0]\n",
169
+ " return None\n",
170
+ "\n",
171
+ " def getall(self):\n",
172
+ " self.cursor.execute(\"SELECT * from QTABLE \")\n",
173
+ " return self.cursor.fetchall()\n",
174
+ " \n",
175
+ " "
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": null,
181
+ "metadata": {},
182
+ "outputs": [],
183
+ "source": [
184
+ "def initialize_q_table():\n",
185
+ " # s_ state variables\n",
186
+ " qtable = Qtable()\n",
187
+ " qtable.create_table() \n",
188
+ " return qtable"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": null,
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": [
197
+ "# Policy\n",
198
+ "\n",
199
+ "def greedy_policy(Qtable, state):\n",
200
+ " # Exploitation: take the action with the highest state, action value\n",
201
+ " # if we dont have a state with values return DO_NOTHING \n",
202
+ " action = Qtable.get_max_action(state)\n",
203
+ " # if action is None:\n",
204
+ " # action = 2\n",
205
+ " # action = np.argmax(Qtable[state])\n",
206
+ " return action\n",
207
+ "\n",
208
+ "\n",
209
+ "def epsilon_greedy_policy(Qtable, state, epsilon, env):\n",
210
+ " # Randomly generate a number between 0 and 1\n",
211
+ " random_num = np.random.uniform(size=1)\n",
212
+ " # if random_num > greater than epsilon --> exploitation\n",
213
+ " if random_num > epsilon:\n",
214
+ " # Take the action with the highest value given a state\n",
215
+ " # np.argmax can be useful here\n",
216
+ " action = greedy_policy(Qtable, state)\n",
217
+ " # else --> exploration\n",
218
+ " else:\n",
219
+ " # action = np.random.random_integers(4,size=1)[0]\n",
220
+ " action = env.action_space.sample()\n",
221
+ " \n",
222
+ " return action"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": null,
228
+ "metadata": {
229
+ "id": "wlC-EdLENTiN"
230
+ },
231
+ "outputs": [],
232
+ "source": [
233
+ "\n",
234
+ "def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable, learning_rate, gamma):\n",
235
+ " state_history = []\n",
236
+ "# np.random.seed(42)\n",
237
+ " for episode in range(n_training_episodes):\n",
238
+ " # Reduce epsilon (because we need less and less exploration)\n",
239
+ " epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n",
240
+ " # Reset the environment\n",
241
+ " state = env.reset()\n",
242
+ " step = 0\n",
243
+ " done = False\n",
244
+ "\n",
245
+ " # repeat\n",
246
+ " for step in range(max_steps):\n",
247
+ " # Choose the action At using epsilon greedy policy\n",
248
+ " action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n",
249
+ "\n",
250
+ " # Take action At and observe Rt+1 and St+1\n",
251
+ " # Take the action (a) and observe the outcome state(s') and reward (r)\n",
252
+ " new_state, reward, done, info = env.step(action)\n",
253
+ "\n",
254
+ " # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n",
255
+ " # Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * ( np.max(Qtable[new_state]) ) - Qtable[state][action] )\n",
256
+ " qvalue = Qtable.get_q_value(state, action)\n",
257
+ " if qvalue is None:\n",
258
+ " qvalue = 0\n",
259
+ "\n",
260
+ " q_max_state = Qtable.get_max_q_value(new_state)\n",
261
+ " if q_max_state is None:\n",
262
+ " q_max_state = 0\n",
263
+ " \n",
264
+ " n_qvalue = qvalue + learning_rate * (reward + gamma * ( q_max_state ) - qvalue )\n",
265
+ " Qtable.set_q_value(state, action, n_qvalue)\n",
266
+ "\n",
267
+ " # If done, finish the episode\n",
268
+ " if done:\n",
269
+ " break\n",
270
+ " \n",
271
+ " # Our next state is the new state\n",
272
+ " state = new_state\n",
273
+ "\n",
274
+ " state_history.append(state) \n",
275
+ "\n",
276
+ " return Qtable, state_history"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "execution_count": null,
282
+ "metadata": {},
283
+ "outputs": [],
284
+ "source": [
285
+ "def evaluate_agent(env, max_steps, n_eval_episodes, Q, random=False):\n",
286
+ " \"\"\"\n",
287
+ " Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n",
288
+ " :param env: The evaluation environment\n",
289
+ " :param n_eval_episodes: Number of episode to evaluate the agent\n",
290
+ " :param Q: The Q-table\n",
291
+ " :param seed: The evaluation seed array (for taxi-v3)\n",
292
+ " \"\"\"\n",
293
+ " episode_positive_perc_trades = []\n",
294
+ " episode_rewards = []\n",
295
+ " episode_profits = []\n",
296
+ " for episode in tqdm(range(n_eval_episodes), disable=random):\n",
297
+ " state = env.reset()\n",
298
+ " step = 0\n",
299
+ " done = False\n",
300
+ " total_rewards_ep = 0\n",
301
+ " total_profit_ep = 0\n",
302
+ " \n",
303
+ " for step in range(max_steps):\n",
304
+ " # Take the action (index) that have the maximum expected future reward given that state\n",
305
+ " if random:\n",
306
+ " action = env.action_space.sample()\n",
307
+ " else:\n",
308
+ " action = greedy_policy(Q, state)\n",
309
+ "\n",
310
+ " new_state, reward, done, info = env.step(action)\n",
311
+ " total_rewards_ep += reward\n",
312
+ " \n",
313
+ " if done:\n",
314
+ " break\n",
315
+ " state = new_state\n",
316
+ "\n",
317
+ " if len(env._trade_history) > 0:\n",
318
+ " episode_positive_perc_trades.append(np.count_nonzero(np.array(env._trade_history) > 0)/len(env._trade_history))\n",
319
+ " episode_rewards.append(total_rewards_ep)\n",
320
+ " episode_profits.append(env.history['total_profit'][-1])\n",
321
+ " # print(env.history)\n",
322
+ " # env.render()\n",
323
+ " # assert 0\n",
324
+ "\n",
325
+ " mean_reward = np.mean(episode_rewards)\n",
326
+ " std_reward = np.std(episode_rewards)\n",
327
+ " mean_profit = np.mean(episode_profits)\n",
328
+ " std_profit = np.std(episode_profits)\n",
329
+ " positive_perc_trades = np.mean(episode_positive_perc_trades)\n",
330
+ "\n",
331
+ " return mean_reward, std_reward, mean_profit, std_profit, positive_perc_trades"
332
+ ]
333
+ },
334
+ {
335
+ "cell_type": "code",
336
+ "execution_count": null,
337
+ "metadata": {},
338
+ "outputs": [],
339
+ "source": [
340
+ "from enum import Enum\n",
341
+ "class Actions(Enum):\n",
342
+ " Sell = 0\n",
343
+ " Buy = 1\n",
344
+ " Do_nothing = 2\n",
345
+ "\n",
346
+ "class CustTradingEnv(gym.Env):\n",
347
+ "\n",
348
+ " def __init__(self, df, max_steps=0, random_start=True):\n",
349
+ " self.seed(seed=43)\n",
350
+ " self.df = df\n",
351
+ " self.prices, self.signal_features = self._process_data()\n",
352
+ "\n",
353
+ " # spaces\n",
354
+ " self.action_space = spaces.Discrete(3)\n",
355
+ " self.observation_space = spaces.Box(low=0, high=1999, shape=(1,) , dtype=np.float64)\n",
356
+ "\n",
357
+ " # episode\n",
358
+ " self._start_tick = 0\n",
359
+ " self._end_tick = 0\n",
360
+ " self._done = None\n",
361
+ " self._current_tick = None\n",
362
+ " self._last_trade_tick = None\n",
363
+ " self._position = None\n",
364
+ " self._position_history = None\n",
365
+ " self._total_reward = None\n",
366
+ " self._total_profit = None\n",
367
+ " self._first_rendering = None\n",
368
+ " self.history = None\n",
369
+ " self._max_steps = max_steps\n",
370
+ " self._start_episode_tick = None\n",
371
+ " self._trade_history = None\n",
372
+ " self._trade_tick_history = None\n",
373
+ " self._random_start = random_start\n",
374
+ " self._action_history = None\n",
375
+ "\n",
376
+ " def reset(self):\n",
377
+ " self._done = False\n",
378
+ " if self._random_start:\n",
379
+ " self._start_episode_tick = np.random.randint(1,high=len(self.df)- self._max_steps )\n",
380
+ " self._end_tick = self._start_episode_tick + self._max_steps\n",
381
+ " else:\n",
382
+ " self._start_episode_tick = 1\n",
383
+ " self._end_tick = len(self.df)-1\n",
384
+ " # self._start_episode_tick = np.random.randint(1,len(self.df)- self._max_steps )\n",
385
+ " # self._end_tick = self._start_episode_tick + self._max_steps\n",
386
+ " self._current_tick = self._start_episode_tick\n",
387
+ " self._last_trade_tick = self._current_tick - 1\n",
388
+ " self._position = 0\n",
389
+ " self._action_history = [-1] * (len(self.prices)) \n",
390
+ " # self._position_history = (self.window_size * [None]) + [self._position]\n",
391
+ " self._total_reward = 0.\n",
392
+ " self._total_profit = 0.\n",
393
+ " self._trade_history = []\n",
394
+ " self._trade_tick_history = []\n",
395
+ " self.history = {}\n",
396
+ " return self._get_observation()\n",
397
+ "\n",
398
+ "\n",
399
+ " def step(self, action):\n",
400
+ " self._done = False\n",
401
+ " self._current_tick += 1\n",
402
+ "\n",
403
+ " if self._current_tick == self._end_tick:\n",
404
+ " self._done = True\n",
405
+ "\n",
406
+ " self._do_act(action)\n",
407
+ " step_reward = self._calculate_reward(action)\n",
408
+ " self._total_reward += step_reward\n",
409
+ "\n",
410
+ " observation = self._get_observation()\n",
411
+ " info = dict(\n",
412
+ " total_reward = self._total_reward,\n",
413
+ " total_profit = self._total_profit,\n",
414
+ " position = self._position,\n",
415
+ " action = action\n",
416
+ " )\n",
417
+ " self._update_history(info)\n",
418
+ "\n",
419
+ " return observation, step_reward, self._done, info\n",
420
+ "\n",
421
+ " def seed(self, seed=None):\n",
422
+ " self.np_random, seed = seeding.np_random(seed)\n",
423
+ " return [seed]\n",
424
+ " \n",
425
+ " def _get_observation(self):\n",
426
+ " if self._position > 0:\n",
427
+ " position = 1\n",
428
+ " elif self._position < 0:\n",
429
+ " position = -1\n",
430
+ " else:\n",
431
+ " position = 0\n",
432
+ " return np.concatenate( [[position], self.signal_features[self._current_tick]] )\n",
433
+ "\n",
434
+ " def _update_history(self, info):\n",
435
+ " if not self.history:\n",
436
+ " self.history = {key: [] for key in info.keys()}\n",
437
+ "\n",
438
+ " for key, value in info.items():\n",
439
+ " self.history[key].append(value)\n",
440
+ "\n",
441
+ "\n",
442
+ " def render(self, mode='human'):\n",
443
+ " window_ticks = np.arange(len(self.prices))\n",
444
+ " prices = self.prices\n",
445
+ " # prices = self.prices[self._start_episode_tick:self._end_tick+1]\n",
446
+ " plt.plot(prices)\n",
447
+ "\n",
448
+ " open_buy = []\n",
449
+ " close_buy = []\n",
450
+ " open_sell = []\n",
451
+ " close_sell = []\n",
452
+ " do_nothing = []\n",
453
+ " penalty = []\n",
454
+ " action_not_in_table = []\n",
455
+ "\n",
456
+ " for i, tick in enumerate(window_ticks):\n",
457
+ " if self._action_history[i] == 1:\n",
458
+ " open_buy.append(tick)\n",
459
+ " elif self._action_history[i] == 2 :\n",
460
+ " close_buy.append(tick)\n",
461
+ " elif self._action_history[i] == 3 :\n",
462
+ " open_sell.append(tick)\n",
463
+ " elif self._action_history[i] == 4 :\n",
464
+ " close_sell.append(tick)\n",
465
+ " elif self._action_history[i] == 0 :\n",
466
+ " do_nothing.append(tick)\n",
467
+ " elif self._action_history[i] == 5 :\n",
468
+ " penalty.append(tick)\n",
469
+ " elif self._action_history[i] == 6 :\n",
470
+ " action_not_in_table.append(tick)\n",
471
+ "\n",
472
+ " plt.plot(open_buy, prices[open_buy], 'go', marker=\"^\")\n",
473
+ " plt.plot(close_buy, prices[close_buy], 'go', marker=\"v\")\n",
474
+ " plt.plot(open_sell, prices[open_sell], 'ro', marker=\"v\")\n",
475
+ " plt.plot(close_sell, prices[close_sell], 'ro', marker=\"^\")\n",
476
+ " \n",
477
+ " plt.plot(do_nothing, prices[do_nothing], 'oc')\n",
478
+ " plt.plot(penalty, prices[penalty], 'yo')\n",
479
+ "\n",
480
+ " plt.plot(action_not_in_table, prices[action_not_in_table], 'ob')\n",
481
+ "\n",
482
+ " plt.suptitle(\n",
483
+ " \"Total Reward: %.6f\" % self._total_reward + ' ~ ' +\n",
484
+ " \"Total Profit: %.6f\" % self._total_profit\n",
485
+ " )\n",
486
+ "\n",
487
+ " def _do_bin(self,df):\n",
488
+ " df = pd.cut(df,bins=np.arange(0,105,5),labels=False, include_lowest=True)\n",
489
+ " return df\n",
490
+ "\n",
491
+ " # Our state will be encode with 4 features MFI and Stochastic(only D line), ADX and DI+DI-\n",
492
+ " # the values of each feature will be binned in 10 bins, ex:\n",
493
+ " # MFI goes from 0-100, if we get 25 will put on the second bin \n",
494
+ " # DI+DI- if DI+ is over DI- set (1 otherwise 0) \n",
495
+ " # \n",
496
+ " # that will give a state space of 10(MFI) * 10(STOCH) * 10(ADX) * 2(DI) = 2000 states\n",
497
+ " # encoded as bins of DI MFI STOCH ADX = 1 45.2 25.4 90.1 , binned = 1 4 2 9 state = 1429 \n",
498
+ " def _process_data(self):\n",
499
+ " timeperiod = 14\n",
500
+ " self.df = self.df.copy()\n",
501
+ " \n",
502
+ " self.df['adx_r'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
503
+ " self.df['mfi_r'] = ta.MFI(self.df['High'], self.df['Low'], self.df['Close'],self.df['Volume'], timeperiod=timeperiod)\n",
504
+ " _, self.df['stock_d_r'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)\n",
505
+ " self.df['p_di'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
506
+ " self.df['m_di'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
507
+ " self.df['di'] = np.where( self.df['p_di'] > self.df['m_di'], 1, 0)\n",
508
+ " self.df = self.df.dropna()\n",
509
+ " self.df['mfi'] = self._do_bin(self.df['mfi_r'])\n",
510
+ " self.df['stock_d'] = self._do_bin(self.df['stock_d_r'])\n",
511
+ " self.df['adx'] = self._do_bin(self.df['adx_r'])\n",
512
+ "\n",
513
+ " # self.df['state'] = self.df['di']*1000+ self.df['mfi']*100 + self.df['stock_d']*10 + self.df['adx']\n",
514
+ "\n",
515
+ " prices = self.df.loc[:, 'Close'].to_numpy()\n",
516
+ " # signal_features = self.df.loc[:, 'state'].to_numpy()\n",
517
+ " signal_features = self.df.loc[:, ['di', 'mfi', 'stock_d','adx']].to_numpy()\n",
518
+ "\n",
519
+ " return prices, signal_features\n",
520
+ "\n",
521
+ "\n",
522
+ " def _do_act(self, action):\n",
523
+ " if action is None:\n",
524
+ " self._action_history[self._current_tick-1]=6\n",
525
+ "\n",
526
+ " current_price = self.prices[self._current_tick]\n",
527
+ " last_price = self.prices[self._current_tick - 1]\n",
528
+ " price_diff = current_price - last_price\n",
529
+ "\n",
530
+ " # OPEN BUY - 1\n",
531
+ " if action == Actions.Buy.value and self._position == 0:\n",
532
+ " self._position = last_price\n",
533
+ " # step_reward += price_diff\n",
534
+ " self._last_trade_tick = self._current_tick - 1\n",
535
+ " self._action_history[self._current_tick-1]=1\n",
536
+ "\n",
537
+ " # CLOSE BUY - 2\n",
538
+ " elif action == Actions.Sell.value and self._position > 0:\n",
539
+ " self._position = 0\n",
540
+ " profit = self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
541
+ " self._total_profit += profit\n",
542
+ " self._action_history[self._current_tick-1]=2\n",
543
+ " self._trade_history.append(profit)\n",
544
+ " self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n",
545
+ "\n",
546
+ " elif action == Actions.Buy.value and self._position > 0:\n",
547
+ " self._action_history[self._current_tick-1]=5\n",
548
+ "\n",
549
+ " # OPEN SELL - 3\n",
550
+ " elif action == Actions.Sell.value and self._position == 0:\n",
551
+ " self._position = -1 * last_price\n",
552
+ " self._last_trade_tick = self._current_tick - 1\n",
553
+ " self._action_history[self._current_tick-1]=3\n",
554
+ "\n",
555
+ " # CLOSE SELL - 4\n",
556
+ " elif action == Actions.Buy.value and self._position < 0:\n",
557
+ " self._position = 0\n",
558
+ " profit = -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
559
+ " self._total_profit += profit\n",
560
+ " self._action_history[self._current_tick-1]=4\n",
561
+ " self._trade_history.append(profit)\n",
562
+ " self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n",
563
+ "\n",
564
+ " elif action == Actions.Sell.value and self._position < 0:\n",
565
+ " self._action_history[self._current_tick-1]=5\n",
566
+ "\n",
567
+ " # DO NOTHING - 0\n",
568
+ " elif action == Actions.Do_nothing.value and self._position > 0:\n",
569
+ " self._action_history[self._current_tick-1]=0\n",
570
+ " elif action == Actions.Do_nothing.value and self._position < 0:\n",
571
+ " self._action_history[self._current_tick-1]=0\n",
572
+ " elif action == Actions.Do_nothing.value and self._position == 0:\n",
573
+ " self._action_history[self._current_tick-1]=0\n",
574
+ "\n",
575
+ " \n",
576
+ " def _calculate_reward(self, action):\n",
577
+ " current_price = self.prices[self._current_tick]\n",
578
+ " last_price = self.prices[self._current_tick - 1]\n",
579
+ " price_diff = current_price - last_price\n",
580
+ "\n",
581
+ " if not self.history:\n",
582
+ " return 0\n",
583
+ "\n",
584
+ " # simple strategy, reward when close the buy or sell\n",
585
+ " # closed buy\n",
586
+ " if self._position == 0 and self.history['position'][-1] > 0 :\n",
587
+ " return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
588
+ " \n",
589
+ " # close sell\n",
590
+ " if self._position == 0 and self.history['position'][-1] < 0:\n",
591
+ " return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
592
+ "\n",
593
+ "\n",
594
+ " # # reward when open the buy or sell (DONT WORK)\n",
595
+ " # # open buy\n",
596
+ " # if self._position > 0 and self.history['position'][-1] == 0 :\n",
597
+ " # return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
598
+ " \n",
599
+ " # # open sell\n",
600
+ " # if self._position < 0 and self.history['position'][-1] == 0:\n",
601
+ " # return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
602
+ "\n",
603
+ " # # PRB\n",
604
+ " # return price_diff * self._position\n",
605
+ "\n",
606
+ "\n",
607
+ " return 0\n",
608
+ "\n"
609
+ ]
610
+ },
611
+ {
612
+ "cell_type": "code",
613
+ "execution_count": null,
614
+ "metadata": {},
615
+ "outputs": [],
616
+ "source": [
617
+ "# Training parameters\n",
618
+ "n_training_episodes = 20000 # Total training episodes\n",
619
+ "learning_rate = 0.2 # Learning rate\n",
620
+ "\n",
621
+ "# Environment parameters\n",
622
+ "max_steps = 20 # Max steps per episode\n",
623
+ "gamma = 0.95 # Discounting rate\n",
624
+ "\n",
625
+ "# Exploration parameters\n",
626
+ "max_epsilon = 1.0 # Exploration probability at start\n",
627
+ "# max_epsilon = 1.0 # Exploration probability at start\n",
628
+ "min_epsilon = 0.05 # Minimum exploration probability \n",
629
+ "# min_epsilon = 0.05 # Minimum exploration probability \n",
630
+ "decay_rate = 0.0005 # Exponential decay rate for exploration prob"
631
+ ]
632
+ },
633
+ {
634
+ "cell_type": "code",
635
+ "execution_count": null,
636
+ "metadata": {
637
+ "colab": {
638
+ "base_uri": "https://localhost:8080/"
639
+ },
640
+ "id": "REhmfLkYNTiN",
641
+ "outputId": "cf676f6d-83df-43f5-89fe-3258e0041d9d"
642
+ },
643
+ "outputs": [],
644
+ "source": [
645
+ "# create env\n",
646
+ "env = CustTradingEnv(df=eth_train, max_steps=max_steps, random_start=True)\n",
647
+ "Qtable_trading = initialize_q_table()"
648
+ ]
649
+ },
650
+ {
651
+ "cell_type": "code",
652
+ "execution_count": null,
653
+ "metadata": {},
654
+ "outputs": [],
655
+ "source": [
656
+ "\n",
657
+ "# train \n",
658
+ "Qtable_trading, state_history = train(n_training_episodes, min_epsilon, max_epsilon, \n",
659
+ " decay_rate, env, max_steps, Qtable_trading, learning_rate, gamma )\n",
660
+ "\n",
661
+ "len(Qtable_trading.getall())\n"
662
+ ]
663
+ },
664
+ {
665
+ "cell_type": "code",
666
+ "execution_count": null,
667
+ "metadata": {},
668
+ "outputs": [],
669
+ "source": [
670
+ "# Qtable_trading.getall()"
671
+ ]
672
+ },
673
+ {
674
+ "cell_type": "code",
675
+ "execution_count": null,
676
+ "metadata": {},
677
+ "outputs": [],
678
+ "source": [
679
+ "max_steps = 60 \n",
680
+ "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True)\n",
681
+ "n_eval_episodes = 1000\n",
682
+ "\n",
683
+ "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
684
+ ]
685
+ },
686
+ {
687
+ "cell_type": "code",
688
+ "execution_count": null,
689
+ "metadata": {},
690
+ "outputs": [],
691
+ "source": [
692
+ "plt.figure(figsize=(15,6))\n",
693
+ "plt.cla()\n",
694
+ "env_test.render()"
695
+ ]
696
+ },
697
+ {
698
+ "cell_type": "code",
699
+ "execution_count": null,
700
+ "metadata": {},
701
+ "outputs": [],
702
+ "source": [
703
+ "# trade sequential\n",
704
+ "max_steps = len(eth_test)\n",
705
+ "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False)\n",
706
+ "n_eval_episodes = 1\n",
707
+ "\n",
708
+ "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
709
+ ]
710
+ },
711
+ {
712
+ "cell_type": "code",
713
+ "execution_count": null,
714
+ "metadata": {},
715
+ "outputs": [],
716
+ "source": [
717
+ "plt.figure(figsize=(15,6))\n",
718
+ "plt.cla()\n",
719
+ "env_test.render()"
720
+ ]
721
+ },
722
+ {
723
+ "cell_type": "code",
724
+ "execution_count": null,
725
+ "metadata": {},
726
+ "outputs": [],
727
+ "source": [
728
+ "# env_test._trade_tick_history\n",
729
+ "# Qtable_trading.getall()[:10]"
730
+ ]
731
+ },
732
+ {
733
+ "cell_type": "code",
734
+ "execution_count": null,
735
+ "metadata": {},
736
+ "outputs": [],
737
+ "source": []
738
+ }
739
+ ],
740
+ "metadata": {
741
+ "colab": {
742
+ "provenance": []
743
+ },
744
+ "kernelspec": {
745
+ "display_name": "Python 3.8.13 ('rl2')",
746
+ "language": "python",
747
+ "name": "python3"
748
+ },
749
+ "language_info": {
750
+ "codemirror_mode": {
751
+ "name": "ipython",
752
+ "version": 3
753
+ },
754
+ "file_extension": ".py",
755
+ "mimetype": "text/x-python",
756
+ "name": "python",
757
+ "nbconvert_exporter": "python",
758
+ "pygments_lexer": "ipython3",
759
+ "version": "3.8.13"
760
+ },
761
+ "orig_nbformat": 4,
762
+ "vscode": {
763
+ "interpreter": {
764
+ "hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1"
765
+ }
766
+ },
767
+ "widgets": {
768
+ "application/vnd.jupyter.widget-state+json": {
769
+ "01a2dbcb714e40148b41c761fcf43147": {
770
+ "model_module": "@jupyter-widgets/base",
771
+ "model_module_version": "1.2.0",
772
+ "model_name": "LayoutModel",
773
+ "state": {
774
+ "_model_module": "@jupyter-widgets/base",
775
+ "_model_module_version": "1.2.0",
776
+ "_model_name": "LayoutModel",
777
+ "_view_count": null,
778
+ "_view_module": "@jupyter-widgets/base",
779
+ "_view_module_version": "1.2.0",
780
+ "_view_name": "LayoutView",
781
+ "align_content": null,
782
+ "align_items": null,
783
+ "align_self": null,
784
+ "border": null,
785
+ "bottom": null,
786
+ "display": null,
787
+ "flex": null,
788
+ "flex_flow": null,
789
+ "grid_area": null,
790
+ "grid_auto_columns": null,
791
+ "grid_auto_flow": null,
792
+ "grid_auto_rows": null,
793
+ "grid_column": null,
794
+ "grid_gap": null,
795
+ "grid_row": null,
796
+ "grid_template_areas": null,
797
+ "grid_template_columns": null,
798
+ "grid_template_rows": null,
799
+ "height": null,
800
+ "justify_content": null,
801
+ "justify_items": null,
802
+ "left": null,
803
+ "margin": null,
804
+ "max_height": null,
805
+ "max_width": null,
806
+ "min_height": null,
807
+ "min_width": null,
808
+ "object_fit": null,
809
+ "object_position": null,
810
+ "order": null,
811
+ "overflow": null,
812
+ "overflow_x": null,
813
+ "overflow_y": null,
814
+ "padding": null,
815
+ "right": null,
816
+ "top": null,
817
+ "visibility": null,
818
+ "width": null
819
+ }
820
+ },
821
+ "20b0f38ec3234ff28a62a286cd57b933": {
822
+ "model_module": "@jupyter-widgets/controls",
823
+ "model_module_version": "1.5.0",
824
+ "model_name": "PasswordModel",
825
+ "state": {
826
+ "_dom_classes": [],
827
+ "_model_module": "@jupyter-widgets/controls",
828
+ "_model_module_version": "1.5.0",
829
+ "_model_name": "PasswordModel",
830
+ "_view_count": null,
831
+ "_view_module": "@jupyter-widgets/controls",
832
+ "_view_module_version": "1.5.0",
833
+ "_view_name": "PasswordView",
834
+ "continuous_update": true,
835
+ "description": "Token:",
836
+ "description_tooltip": null,
837
+ "disabled": false,
838
+ "layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147",
839
+ "placeholder": "​",
840
+ "style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce",
841
+ "value": ""
842
+ }
843
+ },
844
+ "270cbb5d6e9c4b1e9e2f39c8b3b0c15f": {
845
+ "model_module": "@jupyter-widgets/controls",
846
+ "model_module_version": "1.5.0",
847
+ "model_name": "VBoxModel",
848
+ "state": {
849
+ "_dom_classes": [],
850
+ "_model_module": "@jupyter-widgets/controls",
851
+ "_model_module_version": "1.5.0",
852
+ "_model_name": "VBoxModel",
853
+ "_view_count": null,
854
+ "_view_module": "@jupyter-widgets/controls",
855
+ "_view_module_version": "1.5.0",
856
+ "_view_name": "VBoxView",
857
+ "box_style": "",
858
+ "children": [
859
+ "IPY_MODEL_a02224a43d8d4af3bd31d326540d25da",
860
+ "IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933",
861
+ "IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77",
862
+ "IPY_MODEL_f1675c09d16a4251b403f9c56255f168",
863
+ "IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2"
864
+ ],
865
+ "layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d"
866
+ }
867
+ },
868
+ "2dc5fa9aa3334dfcbdee9c238f2ef60b": {
869
+ "model_module": "@jupyter-widgets/controls",
870
+ "model_module_version": "1.5.0",
871
+ "model_name": "DescriptionStyleModel",
872
+ "state": {
873
+ "_model_module": "@jupyter-widgets/controls",
874
+ "_model_module_version": "1.5.0",
875
+ "_model_name": "DescriptionStyleModel",
876
+ "_view_count": null,
877
+ "_view_module": "@jupyter-widgets/base",
878
+ "_view_module_version": "1.2.0",
879
+ "_view_name": "StyleView",
880
+ "description_width": ""
881
+ }
882
+ },
883
+ "3e753b0212644990b558c68853ff2041": {
884
+ "model_module": "@jupyter-widgets/base",
885
+ "model_module_version": "1.2.0",
886
+ "model_name": "LayoutModel",
887
+ "state": {
888
+ "_model_module": "@jupyter-widgets/base",
889
+ "_model_module_version": "1.2.0",
890
+ "_model_name": "LayoutModel",
891
+ "_view_count": null,
892
+ "_view_module": "@jupyter-widgets/base",
893
+ "_view_module_version": "1.2.0",
894
+ "_view_name": "LayoutView",
895
+ "align_content": null,
896
+ "align_items": null,
897
+ "align_self": null,
898
+ "border": null,
899
+ "bottom": null,
900
+ "display": null,
901
+ "flex": null,
902
+ "flex_flow": null,
903
+ "grid_area": null,
904
+ "grid_auto_columns": null,
905
+ "grid_auto_flow": null,
906
+ "grid_auto_rows": null,
907
+ "grid_column": null,
908
+ "grid_gap": null,
909
+ "grid_row": null,
910
+ "grid_template_areas": null,
911
+ "grid_template_columns": null,
912
+ "grid_template_rows": null,
913
+ "height": null,
914
+ "justify_content": null,
915
+ "justify_items": null,
916
+ "left": null,
917
+ "margin": null,
918
+ "max_height": null,
919
+ "max_width": null,
920
+ "min_height": null,
921
+ "min_width": null,
922
+ "object_fit": null,
923
+ "object_position": null,
924
+ "order": null,
925
+ "overflow": null,
926
+ "overflow_x": null,
927
+ "overflow_y": null,
928
+ "padding": null,
929
+ "right": null,
930
+ "top": null,
931
+ "visibility": null,
932
+ "width": null
933
+ }
934
+ },
935
+ "3fa248114ac24656ba74923936a94d2d": {
936
+ "model_module": "@jupyter-widgets/base",
937
+ "model_module_version": "1.2.0",
938
+ "model_name": "LayoutModel",
939
+ "state": {
940
+ "_model_module": "@jupyter-widgets/base",
941
+ "_model_module_version": "1.2.0",
942
+ "_model_name": "LayoutModel",
943
+ "_view_count": null,
944
+ "_view_module": "@jupyter-widgets/base",
945
+ "_view_module_version": "1.2.0",
946
+ "_view_name": "LayoutView",
947
+ "align_content": null,
948
+ "align_items": "center",
949
+ "align_self": null,
950
+ "border": null,
951
+ "bottom": null,
952
+ "display": "flex",
953
+ "flex": null,
954
+ "flex_flow": "column",
955
+ "grid_area": null,
956
+ "grid_auto_columns": null,
957
+ "grid_auto_flow": null,
958
+ "grid_auto_rows": null,
959
+ "grid_column": null,
960
+ "grid_gap": null,
961
+ "grid_row": null,
962
+ "grid_template_areas": null,
963
+ "grid_template_columns": null,
964
+ "grid_template_rows": null,
965
+ "height": null,
966
+ "justify_content": null,
967
+ "justify_items": null,
968
+ "left": null,
969
+ "margin": null,
970
+ "max_height": null,
971
+ "max_width": null,
972
+ "min_height": null,
973
+ "min_width": null,
974
+ "object_fit": null,
975
+ "object_position": null,
976
+ "order": null,
977
+ "overflow": null,
978
+ "overflow_x": null,
979
+ "overflow_y": null,
980
+ "padding": null,
981
+ "right": null,
982
+ "top": null,
983
+ "visibility": null,
984
+ "width": "50%"
985
+ }
986
+ },
987
+ "42d140b838b844819bc127afc1b7bc84": {
988
+ "model_module": "@jupyter-widgets/controls",
989
+ "model_module_version": "1.5.0",
990
+ "model_name": "DescriptionStyleModel",
991
+ "state": {
992
+ "_model_module": "@jupyter-widgets/controls",
993
+ "_model_module_version": "1.5.0",
994
+ "_model_name": "DescriptionStyleModel",
995
+ "_view_count": null,
996
+ "_view_module": "@jupyter-widgets/base",
997
+ "_view_module_version": "1.2.0",
998
+ "_view_name": "StyleView",
999
+ "description_width": ""
1000
+ }
1001
+ },
1002
+ "90c874e91b304ee1a7ef147767ac00ce": {
1003
+ "model_module": "@jupyter-widgets/controls",
1004
+ "model_module_version": "1.5.0",
1005
+ "model_name": "DescriptionStyleModel",
1006
+ "state": {
1007
+ "_model_module": "@jupyter-widgets/controls",
1008
+ "_model_module_version": "1.5.0",
1009
+ "_model_name": "DescriptionStyleModel",
1010
+ "_view_count": null,
1011
+ "_view_module": "@jupyter-widgets/base",
1012
+ "_view_module_version": "1.2.0",
1013
+ "_view_name": "StyleView",
1014
+ "description_width": ""
1015
+ }
1016
+ },
1017
+ "9d847f9a7d47458d8cd57d9b599e47c6": {
1018
+ "model_module": "@jupyter-widgets/base",
1019
+ "model_module_version": "1.2.0",
1020
+ "model_name": "LayoutModel",
1021
+ "state": {
1022
+ "_model_module": "@jupyter-widgets/base",
1023
+ "_model_module_version": "1.2.0",
1024
+ "_model_name": "LayoutModel",
1025
+ "_view_count": null,
1026
+ "_view_module": "@jupyter-widgets/base",
1027
+ "_view_module_version": "1.2.0",
1028
+ "_view_name": "LayoutView",
1029
+ "align_content": null,
1030
+ "align_items": null,
1031
+ "align_self": null,
1032
+ "border": null,
1033
+ "bottom": null,
1034
+ "display": null,
1035
+ "flex": null,
1036
+ "flex_flow": null,
1037
+ "grid_area": null,
1038
+ "grid_auto_columns": null,
1039
+ "grid_auto_flow": null,
1040
+ "grid_auto_rows": null,
1041
+ "grid_column": null,
1042
+ "grid_gap": null,
1043
+ "grid_row": null,
1044
+ "grid_template_areas": null,
1045
+ "grid_template_columns": null,
1046
+ "grid_template_rows": null,
1047
+ "height": null,
1048
+ "justify_content": null,
1049
+ "justify_items": null,
1050
+ "left": null,
1051
+ "margin": null,
1052
+ "max_height": null,
1053
+ "max_width": null,
1054
+ "min_height": null,
1055
+ "min_width": null,
1056
+ "object_fit": null,
1057
+ "object_position": null,
1058
+ "order": null,
1059
+ "overflow": null,
1060
+ "overflow_x": null,
1061
+ "overflow_y": null,
1062
+ "padding": null,
1063
+ "right": null,
1064
+ "top": null,
1065
+ "visibility": null,
1066
+ "width": null
1067
+ }
1068
+ },
1069
+ "a02224a43d8d4af3bd31d326540d25da": {
1070
+ "model_module": "@jupyter-widgets/controls",
1071
+ "model_module_version": "1.5.0",
1072
+ "model_name": "HTMLModel",
1073
+ "state": {
1074
+ "_dom_classes": [],
1075
+ "_model_module": "@jupyter-widgets/controls",
1076
+ "_model_module_version": "1.5.0",
1077
+ "_model_name": "HTMLModel",
1078
+ "_view_count": null,
1079
+ "_view_module": "@jupyter-widgets/controls",
1080
+ "_view_module_version": "1.5.0",
1081
+ "_view_name": "HTMLView",
1082
+ "description": "",
1083
+ "description_tooltip": null,
1084
+ "layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284",
1085
+ "placeholder": "​",
1086
+ "style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b",
1087
+ "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
1088
+ }
1089
+ },
1090
+ "a2cfb91cf66447d7899292854bd64a07": {
1091
+ "model_module": "@jupyter-widgets/base",
1092
+ "model_module_version": "1.2.0",
1093
+ "model_name": "LayoutModel",
1094
+ "state": {
1095
+ "_model_module": "@jupyter-widgets/base",
1096
+ "_model_module_version": "1.2.0",
1097
+ "_model_name": "LayoutModel",
1098
+ "_view_count": null,
1099
+ "_view_module": "@jupyter-widgets/base",
1100
+ "_view_module_version": "1.2.0",
1101
+ "_view_name": "LayoutView",
1102
+ "align_content": null,
1103
+ "align_items": null,
1104
+ "align_self": null,
1105
+ "border": null,
1106
+ "bottom": null,
1107
+ "display": null,
1108
+ "flex": null,
1109
+ "flex_flow": null,
1110
+ "grid_area": null,
1111
+ "grid_auto_columns": null,
1112
+ "grid_auto_flow": null,
1113
+ "grid_auto_rows": null,
1114
+ "grid_column": null,
1115
+ "grid_gap": null,
1116
+ "grid_row": null,
1117
+ "grid_template_areas": null,
1118
+ "grid_template_columns": null,
1119
+ "grid_template_rows": null,
1120
+ "height": null,
1121
+ "justify_content": null,
1122
+ "justify_items": null,
1123
+ "left": null,
1124
+ "margin": null,
1125
+ "max_height": null,
1126
+ "max_width": null,
1127
+ "min_height": null,
1128
+ "min_width": null,
1129
+ "object_fit": null,
1130
+ "object_position": null,
1131
+ "order": null,
1132
+ "overflow": null,
1133
+ "overflow_x": null,
1134
+ "overflow_y": null,
1135
+ "padding": null,
1136
+ "right": null,
1137
+ "top": null,
1138
+ "visibility": null,
1139
+ "width": null
1140
+ }
1141
+ },
1142
+ "c1a82965ae26479a98e4fdbde1e64ec2": {
1143
+ "model_module": "@jupyter-widgets/controls",
1144
+ "model_module_version": "1.5.0",
1145
+ "model_name": "HTMLModel",
1146
+ "state": {
1147
+ "_dom_classes": [],
1148
+ "_model_module": "@jupyter-widgets/controls",
1149
+ "_model_module_version": "1.5.0",
1150
+ "_model_name": "HTMLModel",
1151
+ "_view_count": null,
1152
+ "_view_module": "@jupyter-widgets/controls",
1153
+ "_view_module_version": "1.5.0",
1154
+ "_view_name": "HTMLView",
1155
+ "description": "",
1156
+ "description_tooltip": null,
1157
+ "layout": "IPY_MODEL_9d847f9a7d47458d8cd57d9b599e47c6",
1158
+ "placeholder": "​",
1159
+ "style": "IPY_MODEL_42d140b838b844819bc127afc1b7bc84",
1160
+ "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
1161
+ }
1162
+ },
1163
+ "caef095934ec47bbb8b64eab22049284": {
1164
+ "model_module": "@jupyter-widgets/base",
1165
+ "model_module_version": "1.2.0",
1166
+ "model_name": "LayoutModel",
1167
+ "state": {
1168
+ "_model_module": "@jupyter-widgets/base",
1169
+ "_model_module_version": "1.2.0",
1170
+ "_model_name": "LayoutModel",
1171
+ "_view_count": null,
1172
+ "_view_module": "@jupyter-widgets/base",
1173
+ "_view_module_version": "1.2.0",
1174
+ "_view_name": "LayoutView",
1175
+ "align_content": null,
1176
+ "align_items": null,
1177
+ "align_self": null,
1178
+ "border": null,
1179
+ "bottom": null,
1180
+ "display": null,
1181
+ "flex": null,
1182
+ "flex_flow": null,
1183
+ "grid_area": null,
1184
+ "grid_auto_columns": null,
1185
+ "grid_auto_flow": null,
1186
+ "grid_auto_rows": null,
1187
+ "grid_column": null,
1188
+ "grid_gap": null,
1189
+ "grid_row": null,
1190
+ "grid_template_areas": null,
1191
+ "grid_template_columns": null,
1192
+ "grid_template_rows": null,
1193
+ "height": null,
1194
+ "justify_content": null,
1195
+ "justify_items": null,
1196
+ "left": null,
1197
+ "margin": null,
1198
+ "max_height": null,
1199
+ "max_width": null,
1200
+ "min_height": null,
1201
+ "min_width": null,
1202
+ "object_fit": null,
1203
+ "object_position": null,
1204
+ "order": null,
1205
+ "overflow": null,
1206
+ "overflow_x": null,
1207
+ "overflow_y": null,
1208
+ "padding": null,
1209
+ "right": null,
1210
+ "top": null,
1211
+ "visibility": null,
1212
+ "width": null
1213
+ }
1214
+ },
1215
+ "eaba3f1de4444aabadfea2a3dadb1d80": {
1216
+ "model_module": "@jupyter-widgets/controls",
1217
+ "model_module_version": "1.5.0",
1218
+ "model_name": "DescriptionStyleModel",
1219
+ "state": {
1220
+ "_model_module": "@jupyter-widgets/controls",
1221
+ "_model_module_version": "1.5.0",
1222
+ "_model_name": "DescriptionStyleModel",
1223
+ "_view_count": null,
1224
+ "_view_module": "@jupyter-widgets/base",
1225
+ "_view_module_version": "1.2.0",
1226
+ "_view_name": "StyleView",
1227
+ "description_width": ""
1228
+ }
1229
+ },
1230
+ "ee4a21bedc504171ad09d205d634b528": {
1231
+ "model_module": "@jupyter-widgets/controls",
1232
+ "model_module_version": "1.5.0",
1233
+ "model_name": "ButtonStyleModel",
1234
+ "state": {
1235
+ "_model_module": "@jupyter-widgets/controls",
1236
+ "_model_module_version": "1.5.0",
1237
+ "_model_name": "ButtonStyleModel",
1238
+ "_view_count": null,
1239
+ "_view_module": "@jupyter-widgets/base",
1240
+ "_view_module_version": "1.2.0",
1241
+ "_view_name": "StyleView",
1242
+ "button_color": null,
1243
+ "font_weight": ""
1244
+ }
1245
+ },
1246
+ "f1675c09d16a4251b403f9c56255f168": {
1247
+ "model_module": "@jupyter-widgets/controls",
1248
+ "model_module_version": "1.5.0",
1249
+ "model_name": "ButtonModel",
1250
+ "state": {
1251
+ "_dom_classes": [],
1252
+ "_model_module": "@jupyter-widgets/controls",
1253
+ "_model_module_version": "1.5.0",
1254
+ "_model_name": "ButtonModel",
1255
+ "_view_count": null,
1256
+ "_view_module": "@jupyter-widgets/controls",
1257
+ "_view_module_version": "1.5.0",
1258
+ "_view_name": "ButtonView",
1259
+ "button_style": "",
1260
+ "description": "Login",
1261
+ "disabled": false,
1262
+ "icon": "",
1263
+ "layout": "IPY_MODEL_a2cfb91cf66447d7899292854bd64a07",
1264
+ "style": "IPY_MODEL_ee4a21bedc504171ad09d205d634b528",
1265
+ "tooltip": ""
1266
+ }
1267
+ },
1268
+ "f6c845330d6743c0b35c2c7ad834de77": {
1269
+ "model_module": "@jupyter-widgets/controls",
1270
+ "model_module_version": "1.5.0",
1271
+ "model_name": "CheckboxModel",
1272
+ "state": {
1273
+ "_dom_classes": [],
1274
+ "_model_module": "@jupyter-widgets/controls",
1275
+ "_model_module_version": "1.5.0",
1276
+ "_model_name": "CheckboxModel",
1277
+ "_view_count": null,
1278
+ "_view_module": "@jupyter-widgets/controls",
1279
+ "_view_module_version": "1.5.0",
1280
+ "_view_name": "CheckboxView",
1281
+ "description": "Add token as git credential?",
1282
+ "description_tooltip": null,
1283
+ "disabled": false,
1284
+ "indent": true,
1285
+ "layout": "IPY_MODEL_3e753b0212644990b558c68853ff2041",
1286
+ "style": "IPY_MODEL_eaba3f1de4444aabadfea2a3dadb1d80",
1287
+ "value": true
1288
+ }
1289
+ }
1290
+ }
1291
+ }
1292
+ },
1293
+ "nbformat": 4,
1294
+ "nbformat_minor": 0
1295
+ }
fin_rl_qlearning_v1-7.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
todo_next.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - Testar PPO com TI de hoje e ontem
2
+ - Testar log return como reward