Qlearning in fin_rl_qlearning_v1.ipynb a few adjusts
Browse files
fin_rl_qlearning_v1.ipynb
CHANGED
@@ -88,7 +88,7 @@
|
|
88 |
" return action\n",
|
89 |
"\n",
|
90 |
"\n",
|
91 |
-
"def epsilon_greedy_policy(Qtable, state, epsilon):\n",
|
92 |
" # Randomly generate a number between 0 and 1\n",
|
93 |
" random_num = np.random.uniform(size=1)\n",
|
94 |
" # if random_num > greater than epsilon --> exploitation\n",
|
@@ -124,7 +124,7 @@
|
|
124 |
" # repeat\n",
|
125 |
" for step in range(max_steps):\n",
|
126 |
" # Choose the action At using epsilon greedy policy\n",
|
127 |
-
" action = epsilon_greedy_policy(Qtable, state, epsilon)\n",
|
128 |
"\n",
|
129 |
" # Take action At and observe Rt+1 and St+1\n",
|
130 |
" # Take the action (a) and observe the outcome state(s') and reward (r)\n",
|
|
|
88 |
" return action\n",
|
89 |
"\n",
|
90 |
"\n",
|
91 |
+
"def epsilon_greedy_policy(Qtable, state, epsilon, env):\n",
|
92 |
" # Randomly generate a number between 0 and 1\n",
|
93 |
" random_num = np.random.uniform(size=1)\n",
|
94 |
" # if random_num > greater than epsilon --> exploitation\n",
|
|
|
124 |
" # repeat\n",
|
125 |
" for step in range(max_steps):\n",
|
126 |
" # Choose the action At using epsilon greedy policy\n",
|
127 |
+
" action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n",
|
128 |
"\n",
|
129 |
" # Take action At and observe Rt+1 and St+1\n",
|
130 |
" # Take the action (a) and observe the outcome state(s') and reward (r)\n",
|