{ "cells": [ { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import yfinance as yf\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "from sklearn.compose import ColumnTransformer\n", "import joblib\n", "import keras\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "TEST_DAYS = 40" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "INDICATOR_DATASET = False" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/9c/8m67hqg13wd179_xl1xrnn2c0000gp/T/ipykernel_58100/1703223587.py:24: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " d.dropna(inplace=True)\n" ] } ], "source": [ "if INDICATOR_DATASET:\n", " d = joblib.load('nifty_data.pkl')\n", "else:\n", " d = yf.download(\n", " tickers=\"^NSEI\",\n", " period='max',\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " )\n", " d['target'] = d.Open/d.Close.shift(-1)\n", " d.target = d.target.apply(np.floor)\n", "\n", " d['change'] = abs(d['Close'].pct_change() * 100)\n", "\n", " d['High'] = d['High'].pct_change() * 100\n", " d['Low'] = d['Low'].pct_change() * 100\n", " d['Open'] = d['Open'].pct_change() * 100\n", " d['Close'] = d['Close'].pct_change() * 100 \n", " # d.rename(columns = {'HighNew':'High','LowNew':'Low','OpenNew':'Open','CloseNew':'Close'}, inplace = True)\n", "\n", " # Remove outliers when Market closes +- 3.5%\n", " d = d[d['change'] < 3.5]\n", " d.dropna(inplace=True)\n", " d.tail()" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "def preprocessBeforeScaling(df):\n", " df['High'] = df['High'].pct_change() * 100\n", " df['Low'] = df['Low'].pct_change() * 100\n", " df['Open'] = df['Open'].pct_change() * 100\n", " df['Close'] = df['Close'].pct_change() * 100 \n", " return df" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "test_dataset = d.tail(TEST_DAYS)" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "d = d[:-(TEST_DAYS+1)]" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "if INDICATOR_DATASET:\n", " x = d.drop(columns=['target'])\n", " y = d.target\n", "else:\n", " x = d.drop(columns=['target', 'Adj Close', 'Volume', 'change'], errors='ignore')\n", " y = d.target" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Open | \n", "High | \n", "Low | \n", "Close | \n", "
---|---|---|---|---|
Date | \n", "\n", " | \n", " | \n", " | \n", " |
2007-09-18 | \n", "-0.538904 | \n", "0.060452 | \n", "-0.029006 | \n", "1.146926 | \n", "
2007-09-20 | \n", "4.056922 | \n", "0.461070 | \n", "3.755835 | \n", "0.321187 | \n", "
2007-09-21 | \n", "0.382274 | \n", "1.992293 | \n", "0.265831 | \n", "1.895715 | \n", "
2007-09-24 | \n", "1.771525 | \n", "1.759781 | \n", "2.185388 | \n", "1.956577 | \n", "
2007-09-25 | \n", "2.107650 | \n", "0.258037 | \n", "0.847607 | \n", "0.134826 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2022-12-02 | \n", "-0.633474 | \n", "-0.559364 | \n", "-0.740220 | \n", "-0.618740 | \n", "
2022-12-05 | \n", "-0.175175 | \n", "-0.284047 | \n", "-0.256715 | \n", "0.026482 | \n", "
2022-12-06 | \n", "-0.635167 | \n", "-0.393512 | \n", "-0.072341 | \n", "-0.311751 | \n", "
2022-12-07 | \n", "0.205365 | \n", "0.071833 | \n", "-0.266446 | \n", "-0.441190 | \n", "
2022-12-08 | \n", "-0.364829 | \n", "-0.231948 | \n", "0.046139 | \n", "0.263191 | \n", "
3632 rows × 4 columns
\n", "\n", " | Open | \n", "High | \n", "Low | \n", "Close | \n", "Adj Close | \n", "Volume | \n", "
---|---|---|---|---|---|---|
Date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2023-02-01 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "17616.300781 | \n", "512900 | \n", "
2023-02-02 | \n", "-1.653417 | \n", "-1.771062 | \n", "0.533318 | \n", "-0.033494 | \n", "17610.400391 | \n", "490100 | \n", "
2023-02-03 | \n", "1.168289 | \n", "1.225794 | \n", "0.792448 | \n", "1.383560 | \n", "17854.050781 | \n", "424100 | \n", "
2023-02-06 | \n", "0.546226 | \n", "-0.260777 | \n", "0.649165 | \n", "-0.501013 | \n", "17764.599609 | \n", "282500 | \n", "
2023-02-07 | \n", "-0.159672 | \n", "-0.070405 | \n", "-0.258775 | \n", "-0.242615 | \n", "17721.500000 | \n", "0 | \n", "