bonadio commited on
Commit
ab0f626
1 Parent(s): a39f235

Qlearning in fin_rl_qlearning_v1.ipynb a few adjusts

Browse files
Files changed (1) hide show
  1. fin_rl_qlearning_v1.ipynb +2 -2
fin_rl_qlearning_v1.ipynb CHANGED
@@ -88,7 +88,7 @@
88
  " return action\n",
89
  "\n",
90
  "\n",
91
- "def epsilon_greedy_policy(Qtable, state, epsilon):\n",
92
  " # Randomly generate a number between 0 and 1\n",
93
  " random_num = np.random.uniform(size=1)\n",
94
  " # if random_num > greater than epsilon --> exploitation\n",
@@ -124,7 +124,7 @@
124
  " # repeat\n",
125
  " for step in range(max_steps):\n",
126
  " # Choose the action At using epsilon greedy policy\n",
127
- " action = epsilon_greedy_policy(Qtable, state, epsilon)\n",
128
  "\n",
129
  " # Take action At and observe Rt+1 and St+1\n",
130
  " # Take the action (a) and observe the outcome state(s') and reward (r)\n",
 
88
  " return action\n",
89
  "\n",
90
  "\n",
91
+ "def epsilon_greedy_policy(Qtable, state, epsilon, env):\n",
92
  " # Randomly generate a number between 0 and 1\n",
93
  " random_num = np.random.uniform(size=1)\n",
94
  " # if random_num > greater than epsilon --> exploitation\n",
 
124
  " # repeat\n",
125
  " for step in range(max_steps):\n",
126
  " # Choose the action At using epsilon greedy policy\n",
127
+ " action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n",
128
  "\n",
129
  " # Take action At and observe Rt+1 and St+1\n",
130
  " # Take the action (a) and observe the outcome state(s') and reward (r)\n",