{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "nwaAZRu1NTiI" }, "source": [ "# Policy Gradient\n", "\n", "\n", "#### This version implements Policy Gradient using a custom enviroment (Unit 4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install talib-binary\n", "!pip install yfinance" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "id": "LNXxxKojNTiL" }, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow.keras import layers, Model, Input\n", "from tensorflow.keras.utils import to_categorical\n", "import tensorflow.keras.backend as K\n", "\n", "import gym\n", "from gym import spaces\n", "from gym.utils import seeding\n", "from gym import wrappers\n", "\n", "from tqdm.notebook import tqdm\n", "from collections import deque\n", "import numpy as np\n", "import random\n", "from matplotlib import pyplot as plt\n", "from sklearn.preprocessing import MinMaxScaler\n", "import joblib\n", "import talib as ta\n", "import yfinance as yf\n", "import pandas as pd\n", "\n", "import io\n", "import base64\n", "from IPython.display import HTML, Video\n" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "# custom model to be able to run a custom loss with parameters\n", "class CustomModel(tf.keras.Model):\n", " def custom_loss(self,y, y_pred, d_returns):\n", " log_like = y * K.log(y_pred)\n", " # K.print_tensor(d_returns)\n", " return K.sum(-log_like * d_returns )\n", " \n", " def train_step(self, data):\n", " # Unpack the data. Its structure depends on your model and\n", " # on what you pass to `fit()`.\n", " if len(data) == 3:\n", " x, y, sample_weight = data\n", " else:\n", " sample_weight = None\n", " x, y = data\n", "\n", " # check if we passed the d_return\n", " if isinstance(x, tuple):\n", " x, d_return = x\n", "\n", " with tf.GradientTape() as tape:\n", " y_pred = self(x, training=True) # Forward pass\n", " # Compute the loss value.\n", " y = tf.cast(y, tf.float32)\n", " loss = self.custom_loss(y, y_pred, d_return)\n", "\n", " # Compute gradients\n", " trainable_vars = self.trainable_variables\n", " gradients = tape.gradient(loss, trainable_vars)\n", "\n", " # Update weights\n", " self.optimizer.apply_gradients(zip(gradients, trainable_vars))\n", "\n", " # Update the metrics.\n", " # Metrics are configured in `compile()`.\n", " self.compiled_metrics.update_state(y, y_pred, sample_weight=sample_weight)\n", "\n", " # Return a dict mapping metric names to current value.\n", " # Note that it will include the loss (tracked in self.metrics).\n", " return {m.name: m.result() for m in self.metrics}" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "class Policy:\n", " def __init__(self, env=None, action_size=2):\n", "\n", " self.action_size = action_size\n", "\n", " # Hyperparameters\n", " self.gamma = 0.95 # Discount rate\n", "\n", " self.learning_rate = 1e-3\n", " \n", " # Construct DQN models\n", " self.env = env\n", " self.action_size = action_size\n", " self.action_space = [i for i in range(action_size)]\n", " print(\"action space\",self.action_space)\n", " # self.saved_log_probs = None\n", " self.model= self._build_model()\n", " self.model.summary()\n", "\n", "\n", " def _build_model(self):\n", " x = Input(shape=(4,), name='x_input')\n", " # y_true = Input( shape=(2,), name='y_true' )\n", " d_returns = Input(shape=[1], name='d_returns')\n", "\n", " l = layers.Dense(16, activation = 'relu')(x)\n", " l = layers.Dense(16, activation = 'relu')(l)\n", " y_pred = layers.Dense(self.action_size, activation = 'softmax', name='y_pred')(l)\n", " \n", " optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)\n", "\n", " # model_train = Model( inputs=[x], outputs=[y_pred], name='train_only' )\n", " model_train = CustomModel( inputs=x, outputs=y_pred, name='train_only' )\n", " # model_predict = Model( inputs=x, outputs=y_pred, name='predict_only' )\n", " model_train.compile(loss=None, optimizer=optimizer, metrics = ['accuracy'])\n", " # use run_eagerly to print values inside the loss function to debug\n", " # model_train.compile(loss=None, optimizer=optimizer, metrics = ['accuracy'], run_eagerly = True)\n", "\n", " return model_train\n", "\n", " def act(self, state):\n", " probs = self.model.predict(np.array([state]), verbose=0)[0]\n", " action = np.random.choice(self.action_space, p=probs)\n", "\n", " return action\n", "\n", " # this implements the reinforce \n", " def learn(self, n_training_episodes=None, max_t=None, print_every=100):\n", " # Help us to calculate the score during the training\n", " scores_deque = deque(maxlen=100)\n", " scores = []\n", " # Line 3 of pseudocode\n", " for i_episode in range(1, n_training_episodes+1):\n", " # saved_log_probs = []\n", " saved_actions = []\n", " saved_state = []\n", " rewards = []\n", " state = self.env.reset()\n", " # Line 4 of pseudocode\n", " for t in range(max_t):\n", " saved_state.append(state)\n", " action = self.act(state)\n", " # action, log_prob = self.act(state)\n", " # saved_log_probs.append(log_prob)\n", " saved_actions.append(action)\n", " state, reward, done, _ = self.env.step(action)\n", " rewards.append(reward)\n", " if done:\n", " break \n", " scores_deque.append(sum(rewards))\n", " scores.append(sum(rewards))\n", " \n", " # Line 6 of pseudocode: calculate the return\n", " returns = deque(maxlen=max_t) \n", " n_steps = len(rewards) \n", " # Compute the discounted returns at each timestep,\n", " # as \n", " # the sum of the gamma-discounted return at time t (G_t) + the reward at time t\n", " #\n", " # In O(N) time, where N is the number of time steps\n", " # (this definition of the discounted return G_t follows the definition of this quantity \n", " # shown at page 44 of Sutton&Barto 2017 2nd draft)\n", " # G_t = r_(t+1) + r_(t+2) + ...\n", " \n", " # Given this formulation, the returns at each timestep t can be computed \n", " # by re-using the computed future returns G_(t+1) to compute the current return G_t\n", " # G_t = r_(t+1) + gamma*G_(t+1)\n", " # G_(t-1) = r_t + gamma* G_t\n", " # (this follows a dynamic programming approach, with which we memorize solutions in order \n", " # to avoid computing them multiple times)\n", " \n", " # This is correct since the above is equivalent to (see also page 46 of Sutton&Barto 2017 2nd draft)\n", " # G_(t-1) = r_t + gamma*r_(t+1) + gamma*gamma*r_(t+2) + ...\n", " \n", " \n", " ## Given the above, we calculate the returns at timestep t as: \n", " # gamma[t] * return[t] + reward[t]\n", " #\n", " ## We compute this starting from the last timestep to the first, in order\n", " ## to employ the formula presented above and avoid redundant computations that would be needed \n", " ## if we were to do it from first to last.\n", " \n", " ## Hence, the queue \"returns\" will hold the returns in chronological order, from t=0 to t=n_steps\n", " ## thanks to the appendleft() function which allows to append to the position 0 in constant time O(1)\n", " ## a normal python list would instead require O(N) to do this.\n", " for t in range(n_steps)[::-1]:\n", " disc_return_t = (returns[0] if len(returns)>0 else 0)\n", " returns.appendleft( self.gamma*disc_return_t + rewards[t] ) \n", " \n", " ## standardization of the returns is employed to make training more stable\n", " eps = np.finfo(np.float32).eps.item()\n", " ## eps is the smallest representable float, which is \n", " # added to the standard deviation of the returns to avoid numerical instabilities \n", " returns = np.array(returns)\n", " returns = (returns - returns.mean()) / (returns.std() + eps)\n", " # self.saved_log_probs = saved_log_probs\n", " \n", " # Line 7:\n", " saved_state = np.array(saved_state)\n", " # print(\"Saved state\", saved_state, saved_state.shape)\n", " saved_actions = np.array(to_categorical(saved_actions, num_classes=self.action_size))\n", " # print(\"Saved actions\", saved_actions, saved_actions.shape)\n", " returns = returns.reshape(-1,1)\n", " # print(\"Returns\", returns, returns.shape)\n", " # this is the trick part, we send a tuple so the CustomModel is able to split the x and use \n", " # the returns inside to calculate the custom loss\n", " self.model.train_on_batch(x=(saved_state,returns), y=saved_actions)\n", "\n", " # policy_loss = []\n", " # for action, log_prob, disc_return in zip(saved_actions, saved_log_probs, returns):\n", " # policy_loss.append(-log_prob * disc_return)\n", " # policy_loss = torch.cat(policy_loss).sum()\n", " \n", " # # Line 8: PyTorch prefers gradient descent \n", " # optimizer.zero_grad()\n", " # policy_loss.backward()\n", " # optimizer.step()\n", " \n", " if i_episode % print_every == 0:\n", " print('Episode {}\\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))\n", " \n", " return scores\n", "\n", "\n", " #\n", " # Loads a saved model\n", " #\n", " def load(self, name):\n", " self.model = tf.keras.models.load_model(name)\n", " # self.scaler = joblib.load(name+\".scaler\") \n", "\n", " #\n", " # Saves parameters of a trained model\n", " #\n", " def save(self, name):\n", " self.model.save(name)\n", " # joblib.dump(self.scaler, name+\".scaler\") \n", "\n", " def play(self, state):\n", " # state = self._get_scaled_state(state)\n", " return np.argmax(self.model.predict(np.array([state]), verbose=0)[0])" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "from enum import Enum\n", "class Actions(Enum):\n", " Sell = 0\n", " Buy = 1\n", " Do_nothing = 2\n", "\n", "class CustTradingEnv(gym.Env):\n", "\n", " def __init__(self, df, max_steps=0, seed=8, random_start=True, scaler=None):\n", " self.seed(seed=seed)\n", " self.df = df\n", " if scaler is None:\n", " self.scaler = MinMaxScaler()\n", " else:\n", " self.scaler = scaler\n", " self.prices, self.signal_features = self._process_data()\n", "\n", " # spaces\n", " self.action_space = spaces.Discrete(3)\n", " self.observation_space = spaces.Box(low=0, high=1, shape=(1,) , dtype=np.float64)\n", "\n", " # episode\n", " self._start_tick = 0\n", " self._end_tick = 0\n", " self._done = None\n", " self._current_tick = None\n", " self._last_trade_tick = None\n", " self._position = None\n", " self._position_history = None\n", " self._total_reward = None\n", " self._total_profit = None\n", " self._first_rendering = None\n", " self.history = None\n", " self._max_steps = max_steps\n", " self._start_episode_tick = None\n", " self._trade_history = None\n", " self._random_start = random_start\n", "\n", "\n", " def reset(self):\n", " self._done = False\n", " if self._random_start:\n", " self._start_episode_tick = np.random.randint(1,high=len(self.df)- self._max_steps )\n", " self._end_tick = self._start_episode_tick + self._max_steps\n", " else:\n", " self._start_episode_tick = 1\n", " self._end_tick = len(self.df)-1\n", "\n", " self._current_tick = self._start_episode_tick\n", " self._last_trade_tick = self._current_tick - 1\n", " self._position = 0\n", " self._position_history = []\n", " # self._position_history = (self.window_size * [None]) + [self._position]\n", " self._total_reward = 0.\n", " self._total_profit = 0.\n", " self._trade_history = []\n", " self.history = {}\n", " return self._get_observation()\n", "\n", "\n", " def step(self, action):\n", " self._done = False\n", " self._current_tick += 1\n", "\n", " if self._current_tick == self._end_tick:\n", " self._done = True\n", "\n", " step_reward = self._calculate_reward(action)\n", " self._total_reward += step_reward\n", "\n", " observation = self._get_observation()\n", " info = dict(\n", " total_reward = self._total_reward,\n", " total_profit = self._total_profit,\n", " position = self._position,\n", " action = action\n", " )\n", " self._update_history(info)\n", "\n", " return observation, step_reward, self._done, info\n", "\n", " def seed(self, seed=None):\n", " self.np_random, seed = seeding.np_random(seed)\n", " return [seed]\n", " \n", " def _get_observation(self):\n", " return self.signal_features[self._current_tick]\n", "\n", " def _update_history(self, info):\n", " if not self.history:\n", " self.history = {key: [] for key in info.keys()}\n", "\n", " for key, value in info.items():\n", " self.history[key].append(value)\n", "\n", "\n", " def render(self, mode='human'):\n", " window_ticks = np.arange(len(self._position_history))\n", " prices = self.prices[self._start_episode_tick:self._end_tick+1]\n", " plt.plot(prices)\n", "\n", " open_buy = []\n", " close_buy = []\n", " open_sell = []\n", " close_sell = []\n", " do_nothing = []\n", "\n", " for i, tick in enumerate(window_ticks):\n", " if self._position_history[i] == 1:\n", " open_buy.append(tick)\n", " elif self._position_history[i] == 2 :\n", " close_buy.append(tick)\n", " elif self._position_history[i] == 3 :\n", " open_sell.append(tick)\n", " elif self._position_history[i] == 4 :\n", " close_sell.append(tick)\n", " elif self._position_history[i] == 0 :\n", " do_nothing.append(tick)\n", "\n", " plt.plot(open_buy, prices[open_buy], 'go', marker=\"^\")\n", " plt.plot(close_buy, prices[close_buy], 'go', marker=\"v\")\n", " plt.plot(open_sell, prices[open_sell], 'ro', marker=\"v\")\n", " plt.plot(close_sell, prices[close_sell], 'ro', marker=\"^\")\n", " \n", " plt.plot(do_nothing, prices[do_nothing], 'yo')\n", "\n", " plt.suptitle(\n", " \"Total Reward: %.6f\" % self._total_reward + ' ~ ' +\n", " \"Total Profit: %.6f\" % self._total_profit\n", " )\n", "\n", " def _calculate_reward(self, action):\n", " step_reward = 0\n", "\n", " current_price = self.prices[self._current_tick]\n", " last_price = self.prices[self._current_tick - 1]\n", " price_diff = current_price - last_price\n", "\n", " penalty = -1 * last_price * 0.01\n", " # OPEN BUY - 1\n", " if action == Actions.Buy.value and self._position == 0:\n", " self._position = 1\n", " step_reward += price_diff\n", " self._last_trade_tick = self._current_tick - 1\n", " self._position_history.append(1)\n", "\n", " elif action == Actions.Buy.value and self._position > 0:\n", " step_reward += penalty\n", " self._position_history.append(-1)\n", " # CLOSE SELL - 4\n", " elif action == Actions.Buy.value and self._position < 0:\n", " self._position = 0\n", " step_reward += -1 * (self.prices[self._current_tick -1] - self.prices[self._last_trade_tick]) \n", " self._total_profit += step_reward\n", " self._position_history.append(4)\n", " self._trade_history.append(step_reward)\n", "\n", " # OPEN SELL - 3\n", " elif action == Actions.Sell.value and self._position == 0:\n", " self._position = -1\n", " step_reward += -1 * price_diff\n", " self._last_trade_tick = self._current_tick - 1\n", " self._position_history.append(3)\n", " # CLOSE BUY - 2\n", " elif action == Actions.Sell.value and self._position > 0:\n", " self._position = 0\n", " step_reward += self.prices[self._current_tick -1] - self.prices[self._last_trade_tick] \n", " self._total_profit += step_reward\n", " self._position_history.append(2)\n", " self._trade_history.append(step_reward)\n", " elif action == Actions.Sell.value and self._position < 0:\n", " step_reward += penalty\n", " self._position_history.append(-1)\n", "\n", " # DO NOTHING - 0\n", " elif action == Actions.Do_nothing.value and self._position > 0:\n", " step_reward += price_diff\n", " self._position_history.append(0)\n", " elif action == Actions.Do_nothing.value and self._position < 0:\n", " step_reward += -1 * price_diff\n", " self._position_history.append(0)\n", " elif action == Actions.Do_nothing.value and self._position == 0:\n", " step_reward += -1 * abs(price_diff)\n", " self._position_history.append(0)\n", "\n", " return step_reward\n", "\n", " def get_scaler(self):\n", " return self.scaler\n", "\n", " def set_scaler(self, scaler):\n", " self.scaler = scaler\n", " \n", " def _process_data(self):\n", " timeperiod = 14\n", " self.df = self.df.copy()\n", " \n", " self.df['mfi_r'] = ta.MFI(self.df['High'], self.df['Low'], self.df['Close'],self.df['Volume'], timeperiod=timeperiod)\n", " _, self.df['stoch_d_r'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)\n", " self.df['adx_r'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", " self.df['p_di'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", " self.df['m_di'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", " self.df['di'] = np.where( self.df['p_di'] > self.df['m_di'], 1, 0)\n", "\n", " self.df = self.df.dropna()\n", " # self.df['di_s']=self.df['di']\n", " # self.df['mfi_s']=self.df['mfi_r']\n", " # self.df['stoch_d_s']=self.df['stoch_d_r']\n", " # self.df['adx_s']=self.df['adx_r']\n", "\n", " self.df[['di_s','mfi_s','stoch_d_s','adx_s']] = self.scaler.fit_transform(self.df[['di','mfi_r','stoch_d_r','adx_r']])\n", "\n", " def f1(row):\n", " row['state'] = [row['di_s'], row['mfi_s'], row['stoch_d_s'], row['adx_s']]\n", " return row\n", "\n", " self.df = self.df.apply(f1, axis=1 )\n", "\n", " prices = self.df.loc[:, 'Close'].to_numpy()\n", " # print(self.df.head(30))\n", "\n", " signal_features = np.stack(self.df.loc[:, 'state'].to_numpy())\n", "\n", " return prices, signal_features" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3067\n", "1918\n" ] } ], "source": [ "# Get data\n", "eth_usd = yf.Ticker(\"ETH-USD\")\n", "eth = eth_usd.history(period=\"max\")\n", "\n", "btc_usd = yf.Ticker(\"BTC-USD\")\n", "btc = btc_usd.history(period=\"max\")\n", "print(len(btc))\n", "print(len(eth))\n", "\n", "btc_train = eth[-3015:-200]\n", "# btc_test = eth[-200:]\n", "eth_train = eth[-1864:-200]\n", "eth_test = eth[-200:]\n", "# len(eth_train)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "action space [0, 1, 2]\n", "Model: \"train_only\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " x_input (InputLayer) [(None, 4)] 0 \n", " \n", " dense_22 (Dense) (None, 16) 80 \n", " \n", " dense_23 (Dense) (None, 16) 272 \n", " \n", " y_pred (Dense) (None, 3) 51 \n", " \n", "=================================================================\n", "Total params: 403\n", "Trainable params: 403\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "Episode 100\tAverage Score: -180.05\n", "Episode 200\tAverage Score: -164.72\n", "Episode 300\tAverage Score: -81.03\n", "Episode 400\tAverage Score: -117.40\n", "Episode 500\tAverage Score: -182.76\n", "Episode 600\tAverage Score: -92.27\n", "Episode 700\tAverage Score: -207.78\n", "Episode 800\tAverage Score: -232.02\n", "Episode 900\tAverage Score: -29.72\n", "Episode 1000\tAverage Score: -44.37\n", "Episode 1100\tAverage Score: -60.61\n", "Episode 1200\tAverage Score: -67.30\n", "Episode 1300\tAverage Score: -36.28\n", "Episode 1400\tAverage Score: -60.42\n", "Episode 1500\tAverage Score: -93.99\n", "Episode 1600\tAverage Score: -70.92\n", "Episode 1700\tAverage Score: -88.01\n", "Episode 1800\tAverage Score: -21.69\n", "Episode 1900\tAverage Score: -66.15\n", "Episode 2000\tAverage Score: -96.49\n", "Episode 2100\tAverage Score: -33.40\n", "Episode 2200\tAverage Score: -25.62\n", "Episode 2300\tAverage Score: -46.25\n", "Episode 2400\tAverage Score: -63.88\n", "Episode 2500\tAverage Score: -29.43\n", "Episode 2600\tAverage Score: -19.85\n", "Episode 2700\tAverage Score: -53.53\n", "Episode 2800\tAverage Score: -42.98\n", "Episode 2900\tAverage Score: -50.12\n", "Episode 3000\tAverage Score: -27.25\n" ] } ], "source": [ "# create env\n", "max_steps = 20 \n", "env = CustTradingEnv(df=eth_train, max_steps=max_steps)\n", "\n", "model = Policy(env=env, action_size=3)\n", "# model.learn(total_steps=6_000)\n", "\n", "model.learn(n_training_episodes=3000, max_t=20, print_every=100)\n", "# model.learn(n_training_episodes=1000, max_t=1000, print_every=100)\n", "env.close()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.save(\"./alt/fin_rl_policy_gradient_v1\")\n", "joblib.dump(env.get_scaler(),\"./alt/fin_rl_policy_gradient_v1.h5_scaler\")\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "def evaluate_agent(env, max_steps, n_eval_episodes, model, random=False):\n", " \"\"\"\n", " Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n", " :param env: The evaluation environment\n", " :param n_eval_episodes: Number of episode to evaluate the agent\n", " :param model: The DQN model\n", " \"\"\"\n", " episode_rewards = []\n", " episode_profits = []\n", " for episode in tqdm(range(n_eval_episodes), disable=random):\n", " state = env.reset()\n", " step = 0\n", " done = False\n", " total_rewards_ep = 0\n", " total_profit_ep = 0\n", " \n", " for step in range(max_steps):\n", " # Take the action (index) that have the maximum expected future reward given that state\n", " if random:\n", " action = env.action_space.sample()\n", " else:\n", " action = model.play(state)\n", " # print(action)\n", " \n", " new_state, reward, done, info = env.step(action)\n", " total_rewards_ep += reward\n", " \n", " if done:\n", " break\n", " state = new_state\n", "\n", " episode_rewards.append(total_rewards_ep)\n", " episode_profits.append(env.history['total_profit'][-1])\n", " # print(env.history)\n", " # env.render()\n", " # assert 0\n", "\n", " mean_reward = np.mean(episode_rewards)\n", " std_reward = np.std(episode_rewards)\n", " mean_profit = np.mean(episode_profits)\n", " std_profit = np.std(episode_profits)\n", "\n", " return mean_reward, std_reward, mean_profit, std_profit" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f0eff2ef3b0a4e12a23709db72722a25", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1000 [00:00here for more info. View Jupyter log for further details." ] } ], "source": [ "max_steps = 20 \n", "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True, scaler=env.get_scaler())\n", "n_eval_episodes = 1000\n", "\n", "evaluate_agent(env_test, max_steps, n_eval_episodes, model)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a7b0edb264fe43edbe5cea55fac21688", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(15,6))\n", "plt.cla()\n", "env_l.render()\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(-156.66986416870117,\n", " 394.94783990529805,\n", " 4.957175903320312,\n", " 211.59187866264426)" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Test for random n_eval_episodes\n", "max_steps = 20 \n", "env_test_rand = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True, scaler=env.get_scaler())\n", "n_eval_episodes = 1000\n", "\n", "evaluate_agent(env_test_rand, max_steps, n_eval_episodes, model, random=True)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean profit 3.7792178955078124\n" ] } ], "source": [ "# trade sequentially with random actions \n", "max_steps = len(eth_test)\n", "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False, scaler=env.get_scaler())\n", "n_eval_episodes = 1\n", "\n", "all_profit=[]\n", "for i in range(1000):\n", " _,_,profit,_=evaluate_agent(env_test, max_steps, n_eval_episodes, model, random=True)\n", " all_profit.append(profit)\n", "print(f\"Mean profit {np.mean(all_profit)}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Results\n", "\n", "| Model | 1000 trades 20 steps | Sequential trading | 1000 trades 20 steps random actions | Sequential random|\n", "|------------|----------------------|--------------------|-------------------------------------|------------------|\n", "|Q-learning | 113.14 | 563.67 | -18.10 | 39.30 |\n", "|DQN | 87.62 | 381.17 | 4.95 | 3.77 |\n", "|Policy Gradi| | | | |\n", "\n", "\n", "#### Actions are: Buy/Sell/Hold 1 ETH \n", "1000 trades 20 steps - Made 1000 episodes, 20 trades each episode, result is the mean return of each episode \n", "\n", "Sequential trading (175 days)- Trade the test set sequentially from start to end day \n", "\n", "1000 trades 20 steps random actions - Made 1000 episodes, 20 trades each episode taking random actions \n", "\n", "Sequential random (175 days)- Trade the test set sequentially from start to end day with random actions " ] }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3.8.13 ('rl2')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1" } }, "widgets": { "application/vnd.jupyter.widget-state+json": { "01a2dbcb714e40148b41c761fcf43147": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "20b0f38ec3234ff28a62a286cd57b933": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "PasswordModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147", "placeholder": "​", "style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce", "value": "" } }, "270cbb5d6e9c4b1e9e2f39c8b3b0c15f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "VBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_a02224a43d8d4af3bd31d326540d25da", "IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933", "IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77", "IPY_MODEL_f1675c09d16a4251b403f9c56255f168", "IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2" ], "layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d" } }, "2dc5fa9aa3334dfcbdee9c238f2ef60b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3e753b0212644990b558c68853ff2041": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3fa248114ac24656ba74923936a94d2d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "42d140b838b844819bc127afc1b7bc84": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "90c874e91b304ee1a7ef147767ac00ce": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9d847f9a7d47458d8cd57d9b599e47c6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a02224a43d8d4af3bd31d326540d25da": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284", "placeholder": "​", "style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b", "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, "a2cfb91cf66447d7899292854bd64a07": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c1a82965ae26479a98e4fdbde1e64ec2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9d847f9a7d47458d8cd57d9b599e47c6", "placeholder": "​", "style": "IPY_MODEL_42d140b838b844819bc127afc1b7bc84", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "caef095934ec47bbb8b64eab22049284": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "eaba3f1de4444aabadfea2a3dadb1d80": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ee4a21bedc504171ad09d205d634b528": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ButtonStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "button_color": null, "font_weight": "" } }, "f1675c09d16a4251b403f9c56255f168": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ButtonModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ButtonView", "button_style": "", "description": "Login", "disabled": false, "icon": "", "layout": "IPY_MODEL_a2cfb91cf66447d7899292854bd64a07", "style": "IPY_MODEL_ee4a21bedc504171ad09d205d634b528", "tooltip": "" } }, "f6c845330d6743c0b35c2c7ad834de77": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "CheckboxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "CheckboxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "CheckboxView", "description": "Add token as git credential?", "description_tooltip": null, "disabled": false, "indent": true, "layout": "IPY_MODEL_3e753b0212644990b558c68853ff2041", "style": "IPY_MODEL_eaba3f1de4444aabadfea2a3dadb1d80", "value": true } } } } }, "nbformat": 4, "nbformat_minor": 0 }