bonadio
/

rl-fin

bonadio commited on Dec 17, 2022

Commit

ab0f626

•

1 Parent(s): a39f235

Qlearning in fin_rl_qlearning_v1.ipynb a few adjusts

Files changed (1) hide show

fin_rl_qlearning_v1.ipynb CHANGED Viewed

@@ -88,7 +88,7 @@
         "  return action\n",
         "\n",
         "\n",
-        "def epsilon_greedy_policy(Qtable, state, epsilon):\n",
         "  # Randomly generate a number between 0 and 1\n",
         "  random_num = np.random.uniform(size=1)\n",
         "  # if random_num > greater than epsilon --> exploitation\n",
@@ -124,7 +124,7 @@
         "    # repeat\n",
         "    for step in range(max_steps):\n",
         "      # Choose the action At using epsilon greedy policy\n",
-        "      action = epsilon_greedy_policy(Qtable, state, epsilon)\n",
         "\n",
         "      # Take action At and observe Rt+1 and St+1\n",
         "      # Take the action (a) and observe the outcome state(s') and reward (r)\n",

         "  return action\n",
         "\n",
         "\n",
+        "def epsilon_greedy_policy(Qtable, state, epsilon, env):\n",
         "  # Randomly generate a number between 0 and 1\n",
         "  random_num = np.random.uniform(size=1)\n",
         "  # if random_num > greater than epsilon --> exploitation\n",
         "    # repeat\n",
         "    for step in range(max_steps):\n",
         "      # Choose the action At using epsilon greedy policy\n",
+        "      action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n",
         "\n",
         "      # Take action At and observe Rt+1 and St+1\n",
         "      # Take the action (a) and observe the outcome state(s') and reward (r)\n",