{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nwaAZRu1NTiI"
      },
      "source": [
        "# Q-learning \n",
        "\n",
        "#### This version implements q-learning using a custom enviroment 1 day, with synthetic data, this version implements qtable with SQLITE so you can add several features in the state \n",
        "\n",
        "##### Experiments\n",
        "- Change the reward function and see the results on trading \n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "DDf1gLC2NTiK"
      },
      "outputs": [],
      "source": [
        "# !pip install -r ./requirements.txt\n",
        "# !pip install stable_baselines3\n",
        "# !pip install yfinance\n",
        "# !pip install talib-binary\n",
        "# !pip install huggingface_sb3\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "LNXxxKojNTiL"
      },
      "outputs": [],
      "source": [
        "import gym\n",
        "from gym import spaces\n",
        "from gym.utils import seeding\n",
        "\n",
        "import talib as ta\n",
        "from tqdm.notebook import tqdm\n",
        "\n",
        "import yfinance as yf\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "from matplotlib import pyplot as plt\n",
        "import timeit\n",
        "import sqlite3\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def get_syntetic_data(tf, start_date, end_date, plot=True, add_noise=None):\n",
        "    df = pd.date_range(start=start_date, end=end_date, freq=tf)\n",
        "    df = df.to_frame()\n",
        "\n",
        "    df['v1'] = np.arange(len(df.index))\n",
        "    df[['Open','High','Low','Close','Volume']] = 0.0\n",
        "    df = df.drop([0], axis=1)\n",
        "\n",
        "    df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x/3)+10 )\n",
        "    # df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x)+10 + np.sin(x/2) )\n",
        "    if add_noise is not None: # could be 0.5\n",
        "        noise = np.random.normal(0, add_noise, len(df))\n",
        "        df[\"Close\"] += noise\n",
        "\n",
        "    if plot:\n",
        "        plt.figure(figsize=(15,6))\n",
        "        df['Close'].tail(30).plot()\n",
        "\n",
        "    df[\"Open\"]=df[\"Close\"].shift(1)\n",
        "    df = df.dropna()\n",
        "    x = 1.5\n",
        "    df[\"High\"] = np.where( df[\"Close\"] > df['Open'], df[\"Close\"]+x, df[\"Open\"]+x )\n",
        "    df[\"Low\"] = np.where( df[\"Close\"] < df['Open'], df[\"Close\"]-x, df[\"Open\"]-x )\n",
        "    df[\"Volume\"] = 10\n",
        "    return df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dmAuEhZZNTiL"
      },
      "outputs": [],
      "source": [
        "# Get data\n",
        "eth_usd = yf.Ticker(\"ETH-USD\")\n",
        "eth = eth_usd.history(period=\"max\")\n",
        "\n",
        "btc_usd = yf.Ticker(\"BTC-USD\")\n",
        "btc = btc_usd.history(period=\"max\")\n",
        "print(len(btc))\n",
        "print(len(eth))\n",
        "\n",
        "btc_train = eth[-3015:-200]\n",
        "# btc_test = eth[-200:]\n",
        "eth_train = eth[-1864:-200]\n",
        "eth_test = eth[-200:]\n",
        "# len(eth_train)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# use synthetic data\n",
        "# synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2015-02-05\", add_noise=None)\n",
        "synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2023-01-01\", add_noise=None)\n",
        "eth_train = synthetic_data[-1864:-200]\n",
        "eth_test = synthetic_data[-200:]\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "\n",
        "class Qtable:\n",
        "    def __init__(self):\n",
        "        self.conn = sqlite3.connect(':memory:')\n",
        "        self.cursor = self.conn.cursor()\n",
        "\n",
        "    def create_table(self):\n",
        "        columns = [(\"s_position\", \"INTEGER\"),(\"s_di\", \"INTEGER\"), (\"s_mfi\", \"INTEGER\"), (\"s_stock_d\", \"INTEGER\"),(\"s_adx\", \"INTEGER\"), (\"action\", \"INTEGER\"), (\"qvalue\", \"REAL\")]\n",
        "        columns_string = \", \".join([f\"{name} {data_type}\" for name, data_type in columns])\n",
        "        columns_keys = \"(s_position, s_di, s_mfi, s_stock_d, s_adx, action)\"\n",
        "        query = f\"CREATE TABLE IF NOT EXISTS QTABLE ({columns_string}, PRIMARY KEY {columns_keys})\"\n",
        "        self.cursor.execute(query)\n",
        "        self.conn.commit()\n",
        "\n",
        "    def set_q_value(self, state, action, qvalue):\n",
        "        query = f\"INSERT INTO QTABLE (s_position, s_di, s_mfi, s_stock_d, s_adx, action, qvalue) VALUES (?,?,?,?,?,?,?) ON CONFLICT (s_position, s_di, s_mfi, s_stock_d, s_adx, action) DO UPDATE SET qvalue=?\"\n",
        "        self.cursor.execute(query,state.tolist()+[action]+[qvalue]+[qvalue])\n",
        "        self.conn.commit()\n",
        "\n",
        "    def get_q_value(self, state, action):\n",
        "        self.cursor.execute(\"SELECT qvalue from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=? and action=?\",state.tolist()+[action])\n",
        "        rows = self.cursor.fetchall()\n",
        "        if len(rows) > 0:\n",
        "            return rows[0][0]\n",
        "        return None\n",
        "\n",
        "    def get_max_q_value(self, state):\n",
        "        self.cursor.execute(\"SELECT max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n",
        "        rows = self.cursor.fetchall()\n",
        "        if len(rows) > 0:\n",
        "            return rows[0][0]\n",
        "        return None\n",
        "\n",
        "    def get_max_action(self, state):\n",
        "        self.cursor.execute(\"SELECT action, max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n",
        "        rows = self.cursor.fetchall()\n",
        "        if len(rows) > 0:\n",
        "            return rows[0][0]\n",
        "        return None\n",
        "\n",
        "    def getall(self):\n",
        "        self.cursor.execute(\"SELECT * from QTABLE \")\n",
        "        return self.cursor.fetchall()\n",
        "    \n",
        "    "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def initialize_q_table():\n",
        "    # s_ state variables\n",
        "    qtable = Qtable()\n",
        "    qtable.create_table()  \n",
        "    return qtable"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Policy\n",
        "\n",
        "def greedy_policy(Qtable, state):\n",
        "    # Exploitation: take the action with the highest state, action value\n",
        "    # if we dont have a state with values return DO_NOTHING \n",
        "    action = Qtable.get_max_action(state)\n",
        "    # if action is None:\n",
        "    #     action = 2\n",
        "    # action = np.argmax(Qtable[state])\n",
        "    return action\n",
        "\n",
        "\n",
        "def epsilon_greedy_policy(Qtable, state, epsilon, env):\n",
        "  # Randomly generate a number between 0 and 1\n",
        "  random_num = np.random.uniform(size=1)\n",
        "  # if random_num > greater than epsilon --> exploitation\n",
        "  if random_num > epsilon:\n",
        "    # Take the action with the highest value given a state\n",
        "    # np.argmax can be useful here\n",
        "    action = greedy_policy(Qtable, state)\n",
        "  # else --> exploration\n",
        "  else:\n",
        "    # action = np.random.random_integers(4,size=1)[0]\n",
        "    action = env.action_space.sample()\n",
        "  \n",
        "  return action"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "wlC-EdLENTiN"
      },
      "outputs": [],
      "source": [
        "\n",
        "def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable, learning_rate, gamma):\n",
        "  state_history = []\n",
        "#   np.random.seed(42)\n",
        "  for episode in range(n_training_episodes):\n",
        "    # Reduce epsilon (because we need less and less exploration)\n",
        "    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n",
        "    # Reset the environment\n",
        "    state = env.reset()\n",
        "    step = 0\n",
        "    done = False\n",
        "\n",
        "    # repeat\n",
        "    for step in range(max_steps):\n",
        "      # Choose the action At using epsilon greedy policy\n",
        "      action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n",
        "\n",
        "      # Take action At and observe Rt+1 and St+1\n",
        "      # Take the action (a) and observe the outcome state(s') and reward (r)\n",
        "      new_state, reward, done, info = env.step(action)\n",
        "\n",
        "      # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n",
        "      # Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * ( np.max(Qtable[new_state])  ) -  Qtable[state][action] )\n",
        "      qvalue =  Qtable.get_q_value(state, action)\n",
        "      if qvalue is None:\n",
        "        qvalue = 0\n",
        "\n",
        "      q_max_state = Qtable.get_max_q_value(new_state)\n",
        "      if q_max_state is None:\n",
        "        q_max_state = 0\n",
        "      \n",
        "      n_qvalue = qvalue + learning_rate * (reward + gamma * ( q_max_state  ) -  qvalue )\n",
        "      Qtable.set_q_value(state, action, n_qvalue)\n",
        "\n",
        "      # If done, finish the episode\n",
        "      if done:\n",
        "        break\n",
        "      \n",
        "      # Our next state is the new state\n",
        "      state = new_state\n",
        "\n",
        "      state_history.append(state)  \n",
        "\n",
        "  return Qtable, state_history"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def evaluate_agent(env, max_steps, n_eval_episodes, Q, random=False):\n",
        "  \"\"\"\n",
        "  Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n",
        "  :param env: The evaluation environment\n",
        "  :param n_eval_episodes: Number of episode to evaluate the agent\n",
        "  :param Q: The Q-table\n",
        "  :param seed: The evaluation seed array (for taxi-v3)\n",
        "  \"\"\"\n",
        "  episode_positive_perc_trades = []\n",
        "  episode_rewards = []\n",
        "  episode_profits = []\n",
        "  for episode in tqdm(range(n_eval_episodes), disable=random):\n",
        "    state = env.reset()\n",
        "    step = 0\n",
        "    done = False\n",
        "    total_rewards_ep = 0\n",
        "    total_profit_ep = 0\n",
        "    \n",
        "    for step in range(max_steps):\n",
        "      # Take the action (index) that have the maximum expected future reward given that state\n",
        "      if random:\n",
        "        action = env.action_space.sample()\n",
        "      else:\n",
        "        action = greedy_policy(Q, state)\n",
        "\n",
        "      new_state, reward, done, info = env.step(action)\n",
        "      total_rewards_ep += reward\n",
        "        \n",
        "      if done:\n",
        "        break\n",
        "      state = new_state\n",
        "\n",
        "    if len(env._trade_history) > 0:\n",
        "        episode_positive_perc_trades.append(np.count_nonzero(np.array(env._trade_history) > 0)/len(env._trade_history))\n",
        "    episode_rewards.append(total_rewards_ep)\n",
        "    episode_profits.append(env.history['total_profit'][-1])\n",
        "    # print(env.history)\n",
        "    # env.render()\n",
        "    # assert 0\n",
        "\n",
        "  mean_reward = np.mean(episode_rewards)\n",
        "  std_reward = np.std(episode_rewards)\n",
        "  mean_profit = np.mean(episode_profits)\n",
        "  std_profit = np.std(episode_profits)\n",
        "  positive_perc_trades = np.mean(episode_positive_perc_trades)\n",
        "\n",
        "  return mean_reward, std_reward, mean_profit, std_profit, positive_perc_trades"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from enum import Enum\n",
        "class Actions(Enum):\n",
        "    Sell = 0\n",
        "    Buy = 1\n",
        "    Do_nothing = 2\n",
        "\n",
        "class CustTradingEnv(gym.Env):\n",
        "\n",
        "    def __init__(self, df, max_steps=0, random_start=True):\n",
        "        self.seed(seed=43)\n",
        "        self.df = df\n",
        "        self.prices, self.signal_features = self._process_data()\n",
        "\n",
        "        # spaces\n",
        "        self.action_space = spaces.Discrete(3)\n",
        "        self.observation_space = spaces.Box(low=0, high=1999, shape=(1,) , dtype=np.float64)\n",
        "\n",
        "        # episode\n",
        "        self._start_tick = 0\n",
        "        self._end_tick = 0\n",
        "        self._done = None\n",
        "        self._current_tick = None\n",
        "        self._last_trade_tick = None\n",
        "        self._position = None\n",
        "        self._position_history = None\n",
        "        self._total_reward = None\n",
        "        self._total_profit = None\n",
        "        self._first_rendering = None\n",
        "        self.history = None\n",
        "        self._max_steps = max_steps\n",
        "        self._start_episode_tick = None\n",
        "        self._trade_history = None\n",
        "        self._trade_tick_history = None\n",
        "        self._random_start = random_start\n",
        "        self._action_history = None\n",
        "\n",
        "    def reset(self):\n",
        "        self._done = False\n",
        "        if self._random_start:\n",
        "            self._start_episode_tick = np.random.randint(1,high=len(self.df)- self._max_steps )\n",
        "            self._end_tick = self._start_episode_tick + self._max_steps\n",
        "        else:\n",
        "            self._start_episode_tick = 1\n",
        "            self._end_tick = len(self.df)-1\n",
        "        # self._start_episode_tick = np.random.randint(1,len(self.df)- self._max_steps )\n",
        "        # self._end_tick = self._start_episode_tick + self._max_steps\n",
        "        self._current_tick = self._start_episode_tick\n",
        "        self._last_trade_tick = self._current_tick - 1\n",
        "        self._position = 0\n",
        "        self._action_history = [-1] * (len(self.prices)) \n",
        "        # self._position_history = (self.window_size * [None]) + [self._position]\n",
        "        self._total_reward = 0.\n",
        "        self._total_profit = 0.\n",
        "        self._trade_history = []\n",
        "        self._trade_tick_history = []\n",
        "        self.history = {}\n",
        "        return self._get_observation()\n",
        "\n",
        "\n",
        "    def step(self, action):\n",
        "        self._done = False\n",
        "        self._current_tick += 1\n",
        "\n",
        "        if self._current_tick == self._end_tick:\n",
        "            self._done = True\n",
        "\n",
        "        self._do_act(action)\n",
        "        step_reward = self._calculate_reward(action)\n",
        "        self._total_reward += step_reward\n",
        "\n",
        "        observation = self._get_observation()\n",
        "        info = dict(\n",
        "            total_reward = self._total_reward,\n",
        "            total_profit = self._total_profit,\n",
        "            position = self._position,\n",
        "            action = action\n",
        "        )\n",
        "        self._update_history(info)\n",
        "\n",
        "        return observation, step_reward, self._done, info\n",
        "\n",
        "    def seed(self, seed=None):\n",
        "        self.np_random, seed = seeding.np_random(seed)\n",
        "        return [seed]\n",
        "        \n",
        "    def _get_observation(self):\n",
        "        if self._position > 0:\n",
        "            position = 1\n",
        "        elif self._position < 0:\n",
        "            position = -1\n",
        "        else:\n",
        "            position = 0\n",
        "        return np.concatenate( [[position], self.signal_features[self._current_tick]] )\n",
        "\n",
        "    def _update_history(self, info):\n",
        "        if not self.history:\n",
        "            self.history = {key: [] for key in info.keys()}\n",
        "\n",
        "        for key, value in info.items():\n",
        "            self.history[key].append(value)\n",
        "\n",
        "\n",
        "    def render(self, mode='human'):\n",
        "        window_ticks = np.arange(len(self.prices))\n",
        "        prices = self.prices\n",
        "        # prices = self.prices[self._start_episode_tick:self._end_tick+1]\n",
        "        plt.plot(prices)\n",
        "\n",
        "        open_buy = []\n",
        "        close_buy = []\n",
        "        open_sell = []\n",
        "        close_sell = []\n",
        "        do_nothing = []\n",
        "        penalty = []\n",
        "        action_not_in_table = []\n",
        "\n",
        "        for i, tick in enumerate(window_ticks):\n",
        "            if self._action_history[i] == 1:\n",
        "                open_buy.append(tick)\n",
        "            elif self._action_history[i] == 2 :\n",
        "                close_buy.append(tick)\n",
        "            elif self._action_history[i] == 3 :\n",
        "                open_sell.append(tick)\n",
        "            elif self._action_history[i] == 4 :\n",
        "                close_sell.append(tick)\n",
        "            elif self._action_history[i] == 0 :\n",
        "                do_nothing.append(tick)\n",
        "            elif self._action_history[i] == 5 :\n",
        "                penalty.append(tick)\n",
        "            elif self._action_history[i] == 6 :\n",
        "                action_not_in_table.append(tick)\n",
        "\n",
        "        plt.plot(open_buy, prices[open_buy], 'go', marker=\"^\")\n",
        "        plt.plot(close_buy, prices[close_buy], 'go', marker=\"v\")\n",
        "        plt.plot(open_sell, prices[open_sell], 'ro', marker=\"v\")\n",
        "        plt.plot(close_sell, prices[close_sell], 'ro', marker=\"^\")\n",
        "    \n",
        "        plt.plot(do_nothing, prices[do_nothing], 'oc')\n",
        "        plt.plot(penalty, prices[penalty], 'yo')\n",
        "\n",
        "        plt.plot(action_not_in_table, prices[action_not_in_table], 'ob')\n",
        "\n",
        "        plt.suptitle(\n",
        "            \"Total Reward: %.6f\" % self._total_reward + ' ~ ' +\n",
        "            \"Total Profit: %.6f\" % self._total_profit\n",
        "        )\n",
        "\n",
        "    def _do_bin(self,df):\n",
        "        df = pd.cut(df,bins=np.arange(0,105,5),labels=False, include_lowest=True)\n",
        "        return df\n",
        "\n",
        "    # Our state will be encode with 4 features MFI and Stochastic(only D line), ADX and DI+DI-\n",
        "    # the values of each feature will be binned in 10 bins, ex:\n",
        "    # MFI goes from 0-100, if we get 25 will put on the second bin \n",
        "    # DI+DI-  if DI+ is over DI- set (1 otherwise 0) \n",
        "    # \n",
        "    # that will give a state space of 10(MFI) * 10(STOCH) * 10(ADX) * 2(DI) = 2000 states\n",
        "    # encoded as bins of  DI MFI STOCH ADX = 1 45.2  25.4  90.1 , binned = 1 4 2 9 state = 1429   \n",
        "    def _process_data(self):\n",
        "        timeperiod = 14\n",
        "        self.df = self.df.copy()\n",
        "        \n",
        "        self.df['adx_r'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
        "        self.df['mfi_r'] = ta.MFI(self.df['High'], self.df['Low'], self.df['Close'],self.df['Volume'], timeperiod=timeperiod)\n",
        "        _, self.df['stock_d_r'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)\n",
        "        self.df['p_di'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
        "        self.df['m_di'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
        "        self.df['di'] = np.where( self.df['p_di'] > self.df['m_di'], 1, 0)\n",
        "        self.df = self.df.dropna()\n",
        "        self.df['mfi'] = self._do_bin(self.df['mfi_r'])\n",
        "        self.df['stock_d'] = self._do_bin(self.df['stock_d_r'])\n",
        "        self.df['adx'] = self._do_bin(self.df['adx_r'])\n",
        "\n",
        "        # self.df['state'] = self.df['di']*1000+ self.df['mfi']*100 + self.df['stock_d']*10 + self.df['adx']\n",
        "\n",
        "        prices = self.df.loc[:, 'Close'].to_numpy()\n",
        "        # signal_features = self.df.loc[:, 'state'].to_numpy()\n",
        "        signal_features = self.df.loc[:, ['di', 'mfi', 'stock_d','adx']].to_numpy()\n",
        "\n",
        "        return prices, signal_features\n",
        "\n",
        "\n",
        "    def _do_act(self, action):\n",
        "        if action is None:\n",
        "            self._action_history[self._current_tick-1]=6\n",
        "\n",
        "        current_price = self.prices[self._current_tick]\n",
        "        last_price = self.prices[self._current_tick - 1]\n",
        "        price_diff = current_price - last_price\n",
        "\n",
        "        # OPEN BUY - 1\n",
        "        if action == Actions.Buy.value and self._position == 0:\n",
        "            self._position = last_price\n",
        "            # step_reward += price_diff\n",
        "            self._last_trade_tick = self._current_tick - 1\n",
        "            self._action_history[self._current_tick-1]=1\n",
        "\n",
        "        # CLOSE BUY - 2\n",
        "        elif action == Actions.Sell.value and self._position > 0:\n",
        "            self._position = 0\n",
        "            profit = self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
        "            self._total_profit += profit\n",
        "            self._action_history[self._current_tick-1]=2\n",
        "            self._trade_history.append(profit)\n",
        "            self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n",
        "\n",
        "        elif action == Actions.Buy.value and self._position > 0:\n",
        "            self._action_history[self._current_tick-1]=5\n",
        "\n",
        "        # OPEN SELL - 3\n",
        "        elif action == Actions.Sell.value and self._position == 0:\n",
        "            self._position = -1 * last_price\n",
        "            self._last_trade_tick = self._current_tick - 1\n",
        "            self._action_history[self._current_tick-1]=3\n",
        "\n",
        "        # CLOSE SELL - 4\n",
        "        elif action == Actions.Buy.value and self._position < 0:\n",
        "            self._position = 0\n",
        "            profit = -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
        "            self._total_profit += profit\n",
        "            self._action_history[self._current_tick-1]=4\n",
        "            self._trade_history.append(profit)\n",
        "            self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n",
        "\n",
        "        elif action == Actions.Sell.value and self._position < 0:\n",
        "            self._action_history[self._current_tick-1]=5\n",
        "\n",
        "        # DO NOTHING - 0\n",
        "        elif action == Actions.Do_nothing.value and self._position > 0:\n",
        "            self._action_history[self._current_tick-1]=0\n",
        "        elif action == Actions.Do_nothing.value and self._position < 0:\n",
        "            self._action_history[self._current_tick-1]=0\n",
        "        elif action == Actions.Do_nothing.value and self._position == 0:\n",
        "            self._action_history[self._current_tick-1]=0\n",
        "\n",
        "    \n",
        "    def _calculate_reward(self, action):\n",
        "        current_price = self.prices[self._current_tick]\n",
        "        last_price = self.prices[self._current_tick - 1]\n",
        "        price_diff = current_price - last_price\n",
        "\n",
        "        if not self.history:\n",
        "            return 0\n",
        "\n",
        "        # simple strategy, reward when close the buy or sell\n",
        "        # closed buy\n",
        "        if self._position == 0 and self.history['position'][-1] > 0 :\n",
        "            return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
        "        \n",
        "        # close sell\n",
        "        if self._position == 0 and self.history['position'][-1] < 0:\n",
        "            return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
        "\n",
        "\n",
        "        # # reward when open the buy or sell (DONT WORK)\n",
        "        # # open buy\n",
        "        # if self._position > 0 and self.history['position'][-1] == 0 :\n",
        "        #     return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
        "        \n",
        "        # # open sell\n",
        "        # if self._position < 0 and self.history['position'][-1] == 0:\n",
        "        #     return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
        "\n",
        "        # # PRB\n",
        "        # return price_diff * self._position\n",
        "\n",
        "\n",
        "        return 0\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Training parameters\n",
        "n_training_episodes = 20000  # Total training episodes\n",
        "learning_rate = 0.2          # Learning rate\n",
        "\n",
        "# Environment parameters\n",
        "max_steps = 20   # Max steps per episode\n",
        "gamma = 0.95                 # Discounting rate\n",
        "\n",
        "# Exploration parameters\n",
        "max_epsilon = 1.0             # Exploration probability at start\n",
        "# max_epsilon = 1.0             # Exploration probability at start\n",
        "min_epsilon = 0.05            # Minimum exploration probability \n",
        "# min_epsilon = 0.05            # Minimum exploration probability \n",
        "decay_rate = 0.0005            # Exponential decay rate for exploration prob"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "REhmfLkYNTiN",
        "outputId": "cf676f6d-83df-43f5-89fe-3258e0041d9d"
      },
      "outputs": [],
      "source": [
        "# create env\n",
        "env = CustTradingEnv(df=eth_train, max_steps=max_steps, random_start=True)\n",
        "Qtable_trading = initialize_q_table()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "\n",
        "# train \n",
        "Qtable_trading, state_history = train(n_training_episodes, min_epsilon, max_epsilon, \n",
        "                        decay_rate, env, max_steps, Qtable_trading, learning_rate, gamma )\n",
        "\n",
        "len(Qtable_trading.getall())\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Qtable_trading.getall()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "max_steps = 60 \n",
        "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True)\n",
        "n_eval_episodes = 1000\n",
        "\n",
        "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "plt.figure(figsize=(15,6))\n",
        "plt.cla()\n",
        "env_test.render()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# trade sequential\n",
        "max_steps = len(eth_test)\n",
        "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False)\n",
        "n_eval_episodes = 1\n",
        "\n",
        "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "plt.figure(figsize=(15,6))\n",
        "plt.cla()\n",
        "env_test.render()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# env_test._trade_tick_history\n",
        "# Qtable_trading.getall()[:10]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3.8.13 ('rl2')",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.13"
    },
    "orig_nbformat": 4,
    "vscode": {
      "interpreter": {
        "hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1"
      }
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "01a2dbcb714e40148b41c761fcf43147": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "20b0f38ec3234ff28a62a286cd57b933": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "PasswordModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "PasswordModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "PasswordView",
            "continuous_update": true,
            "description": "Token:",
            "description_tooltip": null,
            "disabled": false,
            "layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147",
            "placeholder": "​",
            "style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce",
            "value": ""
          }
        },
        "270cbb5d6e9c4b1e9e2f39c8b3b0c15f": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "VBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "VBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "VBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_a02224a43d8d4af3bd31d326540d25da",
              "IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933",
              "IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77",
              "IPY_MODEL_f1675c09d16a4251b403f9c56255f168",
              "IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2"
            ],
            "layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d"
          }
        },
        "2dc5fa9aa3334dfcbdee9c238f2ef60b": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3e753b0212644990b558c68853ff2041": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3fa248114ac24656ba74923936a94d2d": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": "center",
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": "flex",
            "flex": null,
            "flex_flow": "column",
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "50%"
          }
        },
        "42d140b838b844819bc127afc1b7bc84": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "90c874e91b304ee1a7ef147767ac00ce": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9d847f9a7d47458d8cd57d9b599e47c6": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a02224a43d8d4af3bd31d326540d25da": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284",
            "placeholder": "​",
            "style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b",
            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
          }
        },
        "a2cfb91cf66447d7899292854bd64a07": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c1a82965ae26479a98e4fdbde1e64ec2": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9d847f9a7d47458d8cd57d9b599e47c6",
            "placeholder": "​",
            "style": "IPY_MODEL_42d140b838b844819bc127afc1b7bc84",
            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
          }
        },
        "caef095934ec47bbb8b64eab22049284": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "eaba3f1de4444aabadfea2a3dadb1d80": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ee4a21bedc504171ad09d205d634b528": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ButtonStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "button_color": null,
            "font_weight": ""
          }
        },
        "f1675c09d16a4251b403f9c56255f168": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ButtonModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ButtonView",
            "button_style": "",
            "description": "Login",
            "disabled": false,
            "icon": "",
            "layout": "IPY_MODEL_a2cfb91cf66447d7899292854bd64a07",
            "style": "IPY_MODEL_ee4a21bedc504171ad09d205d634b528",
            "tooltip": ""
          }
        },
        "f6c845330d6743c0b35c2c7ad834de77": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "CheckboxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "CheckboxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "CheckboxView",
            "description": "Add token as git credential?",
            "description_tooltip": null,
            "disabled": false,
            "indent": true,
            "layout": "IPY_MODEL_3e753b0212644990b558c68853ff2041",
            "style": "IPY_MODEL_eaba3f1de4444aabadfea2a3dadb1d80",
            "value": true
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}