{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n", "getting econ tickers: 100%|██████████| 3/3 [00:00<00:00, 3.81it/s]\n", "Getting release dates: 100%|██████████| 8/8 [00:01<00:00, 5.02it/s]\n", "Making indicators: 100%|██████████| 8/8 [00:00<00:00, 7994.86it/s]\n", "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 1141.77it/s]\n", "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from datasets import load_dataset\n", "from model_day import get_data\n", "\n", "raw_data, df_final, final_date = get_data()\n", "\n", "data = load_dataset(\"boomsss/spx_intra\", split='train')\n", "\n", "rows = [d['text'] for d in data]\n", "rows = [x.split(',') for x in rows]\n", "\n", "fr = pd.DataFrame(columns=[\n", " 'Datetime','Open','High','Low','Close'\n", "], data = rows)\n", "\n", "fr['Datetime'] = pd.to_datetime(fr['Datetime'])\n", "fr['Datetime'] = fr['Datetime'].dt.tz_localize('America/New_York')\n", "fr = fr.set_index('Datetime')\n", "fr['Open'] = pd.to_numeric(fr['Open'])\n", "fr['High'] = pd.to_numeric(fr['High'])\n", "fr['Low'] = pd.to_numeric(fr['Low'])\n", "fr['Close'] = pd.to_numeric(fr['Close'])" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "gap_data = raw_data['CurrentGap']\n", "gap_data = gap_data.reset_index()\n", "gap_data.columns = ['Date','CurrentGap']" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "data = fr.loc['2007-04-28':]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OpenHighLowClose
Datetime
2007-04-30 09:00:00-04:001494.071494.071494.071494.07
2007-04-30 09:30:00-04:001494.071495.361491.921493.42
2007-04-30 10:00:00-04:001493.911496.011492.171495.20
2007-04-30 10:30:00-04:001495.231497.161494.821497.00
2007-04-30 11:00:00-04:001496.891496.931495.711496.05
\n", "
" ], "text/plain": [ " Open High Low Close\n", "Datetime \n", "2007-04-30 09:00:00-04:00 1494.07 1494.07 1494.07 1494.07\n", "2007-04-30 09:30:00-04:00 1494.07 1495.36 1491.92 1493.42\n", "2007-04-30 10:00:00-04:00 1493.91 1496.01 1492.17 1495.20\n", "2007-04-30 10:30:00-04:00 1495.23 1497.16 1494.82 1497.00\n", "2007-04-30 11:00:00-04:00 1496.89 1496.93 1495.71 1496.05" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\850442621.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data['Date'] = pd.to_datetime(data.index.date)\n", "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\850442621.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data['HourMin'] = [f'{str(h).zfill(2)}{str(m).zfill(2)}' for h,m in zip(data.index.hour, data.index.minute)]\n" ] } ], "source": [ "data['Date'] = pd.to_datetime(data.index.date)\n", "data['HourMin'] = [f'{str(h).zfill(2)}{str(m).zfill(2)}' for h,m in zip(data.index.hour, data.index.minute)]\n", "# data = data.merge(gap_data, how = 'left', on ='Date')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Open float64\n", "High float64\n", "Low float64\n", "Close float64\n", "Date datetime64[ns]\n", "HourMin object\n", "dtype: object" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.dtypes" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\3315939868.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data1['RowNumber'] = data1.groupby('Date').cumcount() + 1\n", "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\3315939868.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data1['HighBar'] = data1['RowNumber'].where(data1.index.isin(high_idx)) > 0\n", "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\3315939868.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data1['LowBar'] = data1['RowNumber'].where(data1.index.isin(low_idx)) > 0\n" ] } ], "source": [ "faulty = ['0900', '1600', '1630', '1700']\n", "data1 = data.loc[~data['HourMin'].isin(faulty)]\n", "\n", "data1['RowNumber'] = data1.groupby('Date').cumcount() + 1\n", "\n", "high_idx = data1.groupby('Date')['High'].idxmax()\n", "data1['HighBar'] = data1['RowNumber'].where(data1.index.isin(high_idx)) > 0\n", "\n", "low_idx = data1.groupby('Date')['High'].idxmin()\n", "data1['LowBar'] = data1['RowNumber'].where(data1.index.isin(low_idx)) > 0" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\1433357630.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", " closes = data1.groupby('Date')['Date','Close'].tail(1)\n" ] } ], "source": [ "opens = data1.groupby('Date')[['Date','Open']].head(1)\n", "closes = data1.groupby('Date')['Date','Close'].tail(1)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "df_gaps = closes.merge(opens, on = 'Date')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "df_gaps['PrevClose'] = df_gaps['Close'].shift(1)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "df_gaps['CurrentGap'] = ((df_gaps['Open'] - df_gaps['PrevClose']) / df_gaps['PrevClose'])" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OpenHighLowCloseDateHourMinRowNumberHighBarLowBarCurrentGap
01494.071495.361491.921493.422007-04-3009301FalseFalseNaN
11493.911496.011492.171495.202007-04-3010002FalseFalseNaN
21495.231497.161494.821497.002007-04-3010303TrueFalseNaN
31496.891496.931495.711496.052007-04-3011004FalseFalseNaN
41496.101496.221493.631494.002007-04-3011305FalseFalseNaN
51493.991495.331493.731495.332007-04-3012006FalseFalseNaN
61495.551496.621495.291495.292007-04-3012307FalseFalseNaN
71495.011496.831495.011495.452007-04-3013008FalseFalseNaN
81495.421496.181494.481495.092007-04-3013309FalseFalseNaN
91495.221495.681492.811493.082007-04-30140010FalseFalseNaN
101493.051493.491489.311489.752007-04-30143011FalseFalseNaN
111490.171490.651487.551487.922007-04-30150012FalseFalseNaN
121487.871488.081482.311482.312007-04-30153013FalseTrueNaN
131483.001486.141482.261482.522007-05-0109301FalseFalse0.000465
141484.311485.001476.701478.152007-05-0110002FalseFalse0.000465
151477.861482.041477.411481.932007-05-0110303FalseFalse0.000465
161482.071482.721480.421481.942007-05-0111004FalseFalse0.000465
171481.771483.991480.611480.612007-05-0111305FalseFalse0.000465
181480.621482.151480.361480.702007-05-0112006FalseFalse0.000465
191480.721482.131480.331480.442007-05-0112307FalseFalse0.000465
201480.361480.361477.781479.682007-05-0113008FalseTrue0.000465
211479.611483.421479.551482.702007-05-0113309FalseFalse0.000465
221482.771484.671481.831484.162007-05-01140010FalseFalse0.000465
231484.101487.271483.971486.152007-05-01143011TrueFalse0.000465
241486.111486.811484.271484.272007-05-01150012FalseFalse0.000465
251484.051486.501483.971486.122007-05-01153013FalseFalse0.000465
261486.451491.901486.451491.472007-05-0209301FalseTrue0.000222
271491.761494.481491.761493.762007-05-0210002FalseFalse0.000222
281493.761497.561493.561496.942007-05-0210303FalseFalse0.000222
291496.921497.011495.681495.982007-05-0211004FalseFalse0.000222
301495.871496.081495.111495.582007-05-0211305FalseFalse0.000222
311495.691497.131495.151496.962007-05-0212006FalseFalse0.000222
321497.001497.321495.721496.262007-05-0212307FalseFalse0.000222
331496.491498.381496.491497.722007-05-0213008FalseFalse0.000222
341497.521498.371497.321497.882007-05-0213309FalseFalse0.000222
351497.821499.101497.111498.972007-05-02140010TrueFalse0.000222
361498.991499.001497.591497.762007-05-02143011FalseFalse0.000222
371497.731497.751495.641496.652007-05-02150012FalseFalse0.000222
381496.861496.971494.751495.772007-05-02153013FalseFalse0.000222
391496.021499.231496.021498.702007-05-0309301FalseTrue0.000167
401499.341500.511497.041499.222007-05-0310002FalseFalse0.000167
411498.841502.911498.311502.632007-05-0310303FalseFalse0.000167
421502.711502.921501.031502.092007-05-0311004FalseFalse0.000167
431502.081502.151500.541501.992007-05-0311305FalseFalse0.000167
441501.871502.451498.921498.922007-05-0312006FalseFalse0.000167
451498.851501.091498.781500.682007-05-0312307FalseFalse0.000167
461500.671501.421499.221499.832007-05-0313008FalseFalse0.000167
471499.861501.851499.511501.182007-05-0313309FalseFalse0.000167
481501.221502.351500.811501.722007-05-03140010FalseFalse0.000167
491502.211502.711500.551502.712007-05-03143011FalseFalse0.000167
\n", "
" ], "text/plain": [ " Open High Low Close Date HourMin RowNumber HighBar \\\n", "0 1494.07 1495.36 1491.92 1493.42 2007-04-30 0930 1 False \n", "1 1493.91 1496.01 1492.17 1495.20 2007-04-30 1000 2 False \n", "2 1495.23 1497.16 1494.82 1497.00 2007-04-30 1030 3 True \n", "3 1496.89 1496.93 1495.71 1496.05 2007-04-30 1100 4 False \n", "4 1496.10 1496.22 1493.63 1494.00 2007-04-30 1130 5 False \n", "5 1493.99 1495.33 1493.73 1495.33 2007-04-30 1200 6 False \n", "6 1495.55 1496.62 1495.29 1495.29 2007-04-30 1230 7 False \n", "7 1495.01 1496.83 1495.01 1495.45 2007-04-30 1300 8 False \n", "8 1495.42 1496.18 1494.48 1495.09 2007-04-30 1330 9 False \n", "9 1495.22 1495.68 1492.81 1493.08 2007-04-30 1400 10 False \n", "10 1493.05 1493.49 1489.31 1489.75 2007-04-30 1430 11 False \n", "11 1490.17 1490.65 1487.55 1487.92 2007-04-30 1500 12 False \n", "12 1487.87 1488.08 1482.31 1482.31 2007-04-30 1530 13 False \n", "13 1483.00 1486.14 1482.26 1482.52 2007-05-01 0930 1 False \n", "14 1484.31 1485.00 1476.70 1478.15 2007-05-01 1000 2 False \n", "15 1477.86 1482.04 1477.41 1481.93 2007-05-01 1030 3 False \n", "16 1482.07 1482.72 1480.42 1481.94 2007-05-01 1100 4 False \n", "17 1481.77 1483.99 1480.61 1480.61 2007-05-01 1130 5 False \n", "18 1480.62 1482.15 1480.36 1480.70 2007-05-01 1200 6 False \n", "19 1480.72 1482.13 1480.33 1480.44 2007-05-01 1230 7 False \n", "20 1480.36 1480.36 1477.78 1479.68 2007-05-01 1300 8 False \n", "21 1479.61 1483.42 1479.55 1482.70 2007-05-01 1330 9 False \n", "22 1482.77 1484.67 1481.83 1484.16 2007-05-01 1400 10 False \n", "23 1484.10 1487.27 1483.97 1486.15 2007-05-01 1430 11 True \n", "24 1486.11 1486.81 1484.27 1484.27 2007-05-01 1500 12 False \n", "25 1484.05 1486.50 1483.97 1486.12 2007-05-01 1530 13 False \n", "26 1486.45 1491.90 1486.45 1491.47 2007-05-02 0930 1 False \n", "27 1491.76 1494.48 1491.76 1493.76 2007-05-02 1000 2 False \n", "28 1493.76 1497.56 1493.56 1496.94 2007-05-02 1030 3 False \n", "29 1496.92 1497.01 1495.68 1495.98 2007-05-02 1100 4 False \n", "30 1495.87 1496.08 1495.11 1495.58 2007-05-02 1130 5 False \n", "31 1495.69 1497.13 1495.15 1496.96 2007-05-02 1200 6 False \n", "32 1497.00 1497.32 1495.72 1496.26 2007-05-02 1230 7 False \n", "33 1496.49 1498.38 1496.49 1497.72 2007-05-02 1300 8 False \n", "34 1497.52 1498.37 1497.32 1497.88 2007-05-02 1330 9 False \n", "35 1497.82 1499.10 1497.11 1498.97 2007-05-02 1400 10 True \n", "36 1498.99 1499.00 1497.59 1497.76 2007-05-02 1430 11 False \n", "37 1497.73 1497.75 1495.64 1496.65 2007-05-02 1500 12 False \n", "38 1496.86 1496.97 1494.75 1495.77 2007-05-02 1530 13 False \n", "39 1496.02 1499.23 1496.02 1498.70 2007-05-03 0930 1 False \n", "40 1499.34 1500.51 1497.04 1499.22 2007-05-03 1000 2 False \n", "41 1498.84 1502.91 1498.31 1502.63 2007-05-03 1030 3 False \n", "42 1502.71 1502.92 1501.03 1502.09 2007-05-03 1100 4 False \n", "43 1502.08 1502.15 1500.54 1501.99 2007-05-03 1130 5 False \n", "44 1501.87 1502.45 1498.92 1498.92 2007-05-03 1200 6 False \n", "45 1498.85 1501.09 1498.78 1500.68 2007-05-03 1230 7 False \n", "46 1500.67 1501.42 1499.22 1499.83 2007-05-03 1300 8 False \n", "47 1499.86 1501.85 1499.51 1501.18 2007-05-03 1330 9 False \n", "48 1501.22 1502.35 1500.81 1501.72 2007-05-03 1400 10 False \n", "49 1502.21 1502.71 1500.55 1502.71 2007-05-03 1430 11 False \n", "\n", " LowBar CurrentGap \n", "0 False NaN \n", "1 False NaN \n", "2 False NaN \n", "3 False NaN \n", "4 False NaN \n", "5 False NaN \n", "6 False NaN \n", "7 False NaN \n", "8 False NaN \n", "9 False NaN \n", "10 False NaN \n", "11 False NaN \n", "12 True NaN \n", "13 False 0.000465 \n", "14 False 0.000465 \n", "15 False 0.000465 \n", "16 False 0.000465 \n", "17 False 0.000465 \n", "18 False 0.000465 \n", "19 False 0.000465 \n", "20 True 0.000465 \n", "21 False 0.000465 \n", "22 False 0.000465 \n", "23 False 0.000465 \n", "24 False 0.000465 \n", "25 False 0.000465 \n", "26 True 0.000222 \n", "27 False 0.000222 \n", "28 False 0.000222 \n", "29 False 0.000222 \n", "30 False 0.000222 \n", "31 False 0.000222 \n", "32 False 0.000222 \n", "33 False 0.000222 \n", "34 False 0.000222 \n", "35 False 0.000222 \n", "36 False 0.000222 \n", "37 False 0.000222 \n", "38 False 0.000222 \n", "39 True 0.000167 \n", "40 False 0.000167 \n", "41 False 0.000167 \n", "42 False 0.000167 \n", "43 False 0.000167 \n", "44 False 0.000167 \n", "45 False 0.000167 \n", "46 False 0.000167 \n", "47 False 0.000167 \n", "48 False 0.000167 \n", "49 False 0.000167 " ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1 = data1.merge(df_gaps[['Date','CurrentGap']], how = 'left', on = 'Date')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEWCAYAAABollyxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVfUlEQVR4nO3dfbBkdX3n8ffHQdDIBnmYqMswDChmxYdCHTGWWTUJ4Bh3B2sD62BMIHFrNqkllZS7a8YyCwZjhZjdSlU2ZAMKFaJr0GiVe1cGCeEhyYYgMzwIO5CRYUSYqUQJwyaVQMCB7/7RB2zaO9y+c0/33P7xflV1zTm/c05/f2fu6U+fPuf06VQVkqR2Pe9Ad0CSNFkGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4w460B0YddRRR9WaNWsOdDckaabccsstf1tVK+ebtuyCfs2aNWzduvVAd0OSZkqSb+5rmoduJKlxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY1bdl+YejZrNl256GXuu/DdE+iJJM0O9+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0bK+iTrEuyPcmOJJvmmf7BJHcluSPJtUmOHZr2RJLbu8dcn52XJC1swV+YSrICuAg4FdgFbEkyV1V3Dc12G7C2qh5J8vPAJ4D3dtMeraqT+u22JGlc4+zRnwzsqKqdVfU4cAVw+vAMVXV9VT3Sjd4ErOq3m5Kk/TVO0B8NPDA0vqtr25cPAFcNjb8gydYkNyV5z+K7KElail5/HDzJ+4G1wNuHmo+tqt1JjgeuS3JnVd07stxGYCPA6tWr++ySJD3njbNHvxs4Zmh8Vdf2DElOAT4CrK+qx55qr6rd3b87gRuA148uW1WXVNXaqlq7cuXKRa2AJOnZjRP0W4ATkhyX5GBgA/CMq2eSvB64mEHIf3uo/fAkh3TDRwFvBYZP4kqSJmzBQzdVtTfJucDVwArgsqraluQCYGtVzQG/CRwK/FESgPuraj3wKuDiJE8yeFO5cORqHUnShI11jL6qNgObR9rOGxo+ZR/L3Qi8dikdlCQtjd+MlaTGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJalyvvzAlSRrfmk1XLnqZ+y5896KXcY9ekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxo0V9EnWJdmeZEeSTfNM/2CSu5LckeTaJMcOTTs7yT3d4+w+Oy9JWtiCQZ9kBXAR8C7gROCsJCeOzHYbsLaqXgd8AfhEt+wRwPnAm4GTgfOTHN5f9yVJCxlnj/5kYEdV7ayqx4ErgNOHZ6iq66vqkW70JmBVN/xO4Jqq2lNVDwPXAOv66bokaRzjBP3RwAND47u6tn35AHDVfi4rSepZr78Zm+T9wFrg7YtcbiOwEWD16tV9dkmSnvPG2aPfDRwzNL6qa3uGJKcAHwHWV9Vji1m2qi6pqrVVtXblypXj9l2SNIZxgn4LcEKS45IcDGwA5oZnSPJ64GIGIf/toUlXA6clObw7CXta1yZJmpIFD91U1d4k5zII6BXAZVW1LckFwNaqmgN+EzgU+KMkAPdX1fqq2pPkYwzeLAAuqKo9E1kTSdK8xjpGX1Wbgc0jbecNDZ/yLMteBly2vx2UJC2N34yVpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS48YK+iTrkmxPsiPJpnmmvy3JrUn2JjljZNoTSW7vHnN9dVySNJ6DFpohyQrgIuBUYBewJclcVd01NNv9wDnAf5rnKR6tqpOW3lVJ0v5YMOiBk4EdVbUTIMkVwOnA00FfVfd1056cQB8lSUswzqGbo4EHhsZ3dW3jekGSrUluSvKexXROkrR04+zRL9WxVbU7yfHAdUnurKp7h2dIshHYCLB69eopdEmSnjvG2aPfDRwzNL6qaxtLVe3u/t0J3AC8fp55LqmqtVW1duXKleM+tSRpDOME/RbghCTHJTkY2ACMdfVMksOTHNINHwW8laFj+5KkyVsw6KtqL3AucDVwN/D5qtqW5IIk6wGSvCnJLuBM4OIk27rFXwVsTfI14HrgwpGrdSRJEzbWMfqq2gxsHmk7b2h4C4NDOqPL3Qi8dol9lCQtgd+MlaTGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuPGCvok65JsT7IjyaZ5pr8tya1J9iY5Y2Ta2Unu6R5n99VxSdJ4Fgz6JCuAi4B3AScCZyU5cWS2+4FzgM+OLHsEcD7wZuBk4Pwkhy+925KkcY2zR38ysKOqdlbV48AVwOnDM1TVfVV1B/DkyLLvBK6pqj1V9TBwDbCuh35LksY0TtAfDTwwNL6raxvHUpaVJPVgWZyMTbIxydYkWx988MED3R1Jaso4Qb8bOGZofFXXNo6xlq2qS6pqbVWtXbly5ZhPLUkaxzhBvwU4IclxSQ4GNgBzYz7/1cBpSQ7vTsKe1rVJkqZkwaCvqr3AuQwC+m7g81W1LckFSdYDJHlTkl3AmcDFSbZ1y+4BPsbgzWILcEHXJkmakoPGmamqNgObR9rOGxrewuCwzHzLXgZctoQ+SpKWYFmcjJUkTY5BL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcWN+Mfa5Zs+nKRS9z34XvnkBPJGnp3KOXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc4vTEnSiNa+NOkevSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjfM6+gOktet0JS1f7tFLUuPGCvok65JsT7IjyaZ5ph+S5HPd9K8mWdO1r0nyaJLbu8fv9dx/SdICFjx0k2QFcBFwKrAL2JJkrqruGprtA8DDVfWKJBuA3wDe2027t6pO6rfbkqRxjXOM/mRgR1XtBEhyBXA6MBz0pwMf7Ya/APxOkvTYT0ny3NZ+GufQzdHAA0Pju7q2eeepqr3A3wFHdtOOS3Jbkj9N8i+X2F9J0iJN+qqbvwZWV9VDSd4IfCnJq6vq74dnSrIR2AiwevXqCXdJkp5bxtmj3w0cMzS+qmubd54kBwGHAQ9V1WNV9RBAVd0C3Au8crRAVV1SVWurau3KlSsXvxaSpH0aZ49+C3BCkuMYBPoG4H0j88wBZwN/CZwBXFdVlWQlsKeqnkhyPHACsLO33mtBHtOUtGDQV9XeJOcCVwMrgMuqaluSC4CtVTUHXAp8OskOYA+DNwOAtwEXJPkO8CTwc1W1ZxIrIkma31jH6KtqM7B5pO28oeF/As6cZ7kvAl9cYh8lSUvgN2MlqXEGvSQ1zqCXpMYZ9JLUOG9TLKkXXsq7fLlHL0mNM+glqXEGvSQ1zmP0UuM8di6DXr2YVpgYWtLieehGkhpn0EtS4zx0I43wMJRa4x69JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxYwV9knVJtifZkWTTPNMPSfK5bvpXk6wZmvbhrn17knf22HdJ0hgWDPokK4CLgHcBJwJnJTlxZLYPAA9X1SuA3wJ+o1v2RGAD8GpgHfC73fNJkqZknD36k4EdVbWzqh4HrgBOH5nndODybvgLwI8lSdd+RVU9VlXfAHZ0zydJmpJxgv5o4IGh8V1d27zzVNVe4O+AI8dcVpI0QamqZ58hOQNYV1X/rhv/KeDNVXXu0Dz/t5tnVzd+L/Bm4KPATVX1ma79UuCqqvrCSI2NwMZu9AeB7Ytcj6OAv13kMvvDOsuzhnWWbw3rTK/GsVW1cr4JB42x8G7gmKHxVV3bfPPsSnIQcBjw0JjLUlWXAJeM0Zd5JdlaVWv3d3nrzHYN6yzfGtZZHjXGOXSzBTghyXFJDmZwcnVuZJ454Oxu+Azguhp8VJgDNnRX5RwHnADc3E/XJUnjWHCPvqr2JjkXuBpYAVxWVduSXABsrao54FLg00l2AHsYvBnQzfd54C5gL/AfquqJCa2LJGke4xy6oao2A5tH2s4bGv4n4Mx9LPtx4ONL6OM49vuwj3WaqGGd5VvDOsugxoInYyVJs81bIEhS4wx6SWqcQS9JjRvrZOxyk+QwBvfOeepbtruBq6vq/x2wTknSMjVzJ2OT/DRwPvDHfPfLV6uAU4Ffrao/6LHWVN5QpljnXzC4/9BwnbmqunuWajRaZ+LbQIPbs6+bMc3ioZuPAG+sqp+vql/rHj8HrAV+pa8i3RvKrcA7gO/rHj8C3NJNm7U6v8zghnRh8KW1m7vhP5zv1tPLtUajdSa+DTS4Pfu6WYyqmqkH8HXgsHnaDwPu6bHOduDF87QfDnx9But8HXj+PO0H9/X/No0ajdaZ+DbQ4Pbs62YRj1k8Rv9x4NYkf8x374y5msGhm4/1WCfAfMe1nuymzVqdJ4F/DnxzpP1l3bRZqdFinWlsA61tz75uFmHmgr6qLk8yB7yT7x7PugH4cFU93GOpab2hTKvOLwHXJrlnpM4rgHP3tdAyrNFinWlsA61tz75uFmHmTsbOJ8kRVbVnAs97OM98Q3nqZE+fbyjTrPM8Bj/8MlxnS/V4/6Fp1Gi0zsS3gQa3Z1834z7/rAV9krcCn2LwkeZngV8DjmdwPOvfVtVf9lzvJQz951fVt/p8/n3UnNQbV/jejenm6nEj6DZYqurJ7m6nrwHu63t9prEuXZ2prE9Xa6rb2qS2s+65fd0srsZkt7O+TlpM68HgjPRrgbcwuDH/D3ftbwD+osc6JwE3AXcD1wB/AvxV1/aGHuu8tauxjcGPtVwD3MvgI9xbeqxzGoOfcryKwRvlp4CvdG2n9VTjPcC3gL9mcKnYV4FrGfyy2L+epXWZ8vpMfFub4nbm62YZbme9/GdM8wHcNjR898i0W3usczuDX9Iabf8h4Gs91pnWG9fdwJp52o8b/X9cyt8GeGn3nH8P/GDXfiyDW1rPzLpMeX0mvq1NcTvzdbMMt7OZOxnLM6/9//DItIN7rPOiqvrqaGNV3ZTkRT3WeX5V3QmQ5MGq+j9dnVuTvLDHOgcx2EMYtRt4fl9FqupvAJLcX1Xbu7ZvPvXRtCdTWReY2vpMY1ub1nbm62Y/THo7m8Wg/y9Jvq+qHqmqLz3VmOTlQG/figWuSnJl95xPnQk/BvhpBh/d+jKtN67LgC1JruCZ67OBwQ/H9CLJ86rqqfMnT7WtYAbXBaa2PtPY1qa1nfm62Q+T3s5m7mTsNCV5F/N/LXnzvpdadI31wJ9U1SMj7S8HfqKqPtFjrVcx//rc1dPzvwm4swY/RDPcvobBR+vP9FGne84TgfVMaF26GtNcnx9n/vXpZVub8nbm62Zxzz/x7Wzmgr77KHMO8BMM7nHzBINvlv1eVd1w4HomaT5JfqCqvt1KnVk0i/e6uZTBlwl+Hbge+HLX9itJfqGvIkkOS3JhkruT7EnyUDd8YZIXT6DOX02yzgJ9uGqWaiT5/iS/nuTTSc4amfa7E6rzvgnWeWmS/5HkoiRHJvlokjuSfD7JyyZY484+a3R1jhh9ADcnObwbnlSdIydUZ93Q8GFJPtX9bT7bXULad40XJ7m09xozuEd/R1W9bmj8pqr6oSSHALdX1at6qnM1cB1w+dCJkpcy+DTxo1V12oTrnA38WI913rCvScCXq2rJL/Zp1OjqfBG4h8Elez8LfAd4X1U9luTWqtpXP5Zrna8AVwIvAt4H/E/gswwuuzulqk6fhRpdnSf53q/yr2JwQrOq6vgZq/P03znJp4C/AT4J/Bvg7VX1nlmo0cslSNN8ALcAL6/vXkr1Z0PT7uqxzvb9mbaM6zzB4A3l+nkej85Kja7O7SPjHwH+AjiSni+xnVKd24aG73+2PiznGt1z/UcGJ11fO9T2jb6e/wDUuXVoeHR76OtvM/Eas3jVzX8Grk/yGIOrhjYAJFnJ4DBOX76Z5EMM9rS/1dV4CYM9+geebcFlWudu4N9X1T2jE5L0VWcaNQAOGbpKgar6eJLdwJ8Bh85gneFDqKNXjq2YoRpU1X9L8jngt7q/+fnMf/OxmagD/ECSDzL4VPr9SVJdAtPfoe+J15i5Y/RVdR2D+1tczuCj6E9mcD/no6rqQz2Wei+DPbc/7Y6d72Fw87QjgDNnsM5H2fffu69zG9OoAfC/gR8dbqiq32ewl/f4DNb5X0kO7Z7/6d9USPIKBrfjnZUadM+/q6rOZLAdX8PgXvG9m1KdTwL/jMEb++XAUfD04dXbZ6ZG3x91Jv0APsTgm2S/DLy/e2zq/kM2TakPP2Od5VfDOsuvBvBC4DWt1JnVv80snoz9OvDqqvrOSPvBwLaqOmEKfbi/qlZbZ3nVsM7yrWGdA1tjFo/RT+XHIJLcsa9JQC+XPLVWp6V1aa1OS+vSWp1p1JjFoP8lpvNjEC9hcC7g4ZH2ADda54DVsM7yrWGdZVpj5oK+qr6S5JVM/scgvgwcWlW3j05IcoN1DlgN6yzfGtZZpjVm7hi9JGlxZu7ySknS4hj0ktQ4g15NS/IPI+PnJPmdHp+/knxmaPygJA8m+XI3vj7Jpr7qSftj5k7GSstBkoOqai/wj8Brkrywqh4FTmVwcQAAVTUHzB2gbkqAe/R6DkuyJsl13S1hr02yumv//SRnDM33D92/70jy50nmgOEfndgMvLsbPgv4w6Fln/4E0T3vbye5McnO4RrSJBn0at0Lk9z+1AO4YGjaf2dwM7nXMbht72+P8XxvAH6xql451HYFsCHJC4DXAd/zm6lDXgb8MPCvgAvHXw1p/3noRq17tKpOemokyTnA2m70LQzu+Q3waWCcn5+7uaq+MdxQVXdk8LNvZzHYu382X6rB3TDvSk8/KiEtxKCXvtdeuk+7Gfx05fAPNP/jPpaZA/4r8A4GdyPdl8eGhrP/XZTG56EbPZfdSPd7BsBPAn/eDd8HvLEbXg88f4znugz41aq6s88OSn0w6PVc9gvAz3Q3lfop4Be79k8Cb0/yNQaHd/a1F/+0GtwbfZxj/NLUeQsESWqce/SS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxv1/Yhd40bzY07AAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "data1.groupby('HourMin')['HighBar'].mean().plot(kind='bar');" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "hod_prob = data1.groupby('HourMin')['HighBar'].mean()\n", "\n", "hod_prob_sorted = hod_prob.sort_index()\n", "\n", "cumulative_prob = hod_prob_sorted.cumsum()\n", "\n", "# Plot the cumulative distribution curve\n", "plt.plot(cumulative_prob.index, cumulative_prob.values)\n", "plt.xlabel('Hour and Minute')\n", "plt.ylabel('Cumulative Probability of HOD Occurrence')\n", "plt.title('Cumulative Distribution of HOD Occurrence')\n", "plt.xticks(rotation=45)\n", "plt.grid()\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "data1.groupby('HourMin')['LowBar'].mean().plot(kind='bar');" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\1950565345.py:28: UserWarning: FixedFormatter should only be used together with FixedLocator\n", " axs[0, 0].set_xticklabels(hod_prob.index, rotation=45)\n", "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\1950565345.py:42: UserWarning: FixedFormatter should only be used together with FixedLocator\n", " axs[0, 1].set_xticklabels(lod_prob.index, rotation=45)\n", "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\1950565345.py:57: UserWarning: FixedFormatter should only be used together with FixedLocator\n", " axs[1, 0].set_xticklabels(cumulative_hod.index, rotation=45)\n", "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\1950565345.py:71: UserWarning: FixedFormatter should only be used together with FixedLocator\n", " axs[1, 1].set_xticklabels(cumulative_lod.index, rotation=45)\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "# Assuming you have a DataFrame 'data1' with the 'HourMin', 'HighBar', and 'LowBar' columns\n", "\n", "# Calculate the probability of High of Day (HOD) occurrence for each 'HourMin'\n", "hod_prob = data1.groupby('HourMin')['HighBar'].mean()\n", "\n", "# Calculate the probability of Low of Day (LOD) occurrence for each 'HourMin'\n", "lod_prob = data1.groupby('HourMin')['LowBar'].mean()\n", "\n", "# Sort the probabilities based on 'HourMin'\n", "hod_prob_sorted = hod_prob.sort_index()\n", "lod_prob_sorted = lod_prob.sort_index()\n", "\n", "# Calculate the cumulative distributions\n", "cumulative_hod = hod_prob_sorted.cumsum()\n", "cumulative_lod = lod_prob_sorted.cumsum()\n", "\n", "# Create a 2x4 grid of subplots with shared Y-axis\n", "fig, axs = plt.subplots(2, 2, figsize=(16, 8),)\n", "\n", "# Plot bar plot distributions of HOD and LOD\n", "axs[0, 0].bar(hod_prob.index, hod_prob.values)\n", "axs[0, 0].set_xlabel('Hour and Minute')\n", "axs[0, 0].set_ylabel('Probability of HOD Occurrence')\n", "axs[0, 0].set_title('Distribution of HOD Occurrence')\n", "axs[0, 0].set_xticklabels(hod_prob.index, rotation=45)\n", "axs[0, 0].grid()\n", "\n", "# Format labels as percentages\n", "axs[0, 0].yaxis.set_major_formatter('{:.0%}'.format)\n", "\n", "# Add data labels to the graph\n", "for x, y in zip(hod_prob.index, hod_prob.values):\n", " axs[0, 0].text(x, y, f'{y:.0%}', ha='center', va='bottom')\n", "\n", "axs[0, 1].bar(lod_prob.index, lod_prob.values, color='orange')\n", "axs[0, 1].set_xlabel('Hour and Minute')\n", "axs[0, 1].set_ylabel('Probability of LOD Occurrence')\n", "axs[0, 1].set_title('Distribution of LOD Occurrence')\n", "axs[0, 1].set_xticklabels(lod_prob.index, rotation=45)\n", "axs[0, 1].grid()\n", "\n", "# Format labels as percentages\n", "axs[0, 1].yaxis.set_major_formatter('{:.0%}'.format)\n", "\n", "# Add data labels to the graph\n", "for x, y in zip(lod_prob.index, lod_prob.values):\n", " axs[0, 1].text(x, y, f'{y:.0%}', ha='center', va='bottom')\n", "\n", "# Plot cumulative distributions of HOD and LOD\n", "axs[1, 0].plot(cumulative_hod.index, cumulative_hod.values)\n", "axs[1, 0].set_xlabel('Hour and Minute')\n", "axs[1, 0].set_ylabel('Cumulative Probability of HOD Occurrence')\n", "axs[1, 0].set_title('Cumulative Distribution of HOD Occurrence')\n", "axs[1, 0].set_xticklabels(cumulative_hod.index, rotation=45)\n", "axs[1, 0].grid()\n", "\n", "# Format labels as percentages\n", "axs[1, 0].yaxis.set_major_formatter('{:.0%}'.format)\n", "\n", "# Add data labels to the graph\n", "for x, y in zip(cumulative_hod.index, cumulative_hod.values):\n", " axs[1, 0].text(x, y, f'{y:.0%}', ha='center', va='bottom')\n", "\n", "axs[1, 1].plot(cumulative_lod.index, cumulative_lod.values, color='orange')\n", "axs[1, 1].set_xlabel('Hour and Minute')\n", "axs[1, 1].set_ylabel('Cumulative Probability of LOD Occurrence')\n", "axs[1, 1].set_title('Cumulative Distribution of LOD Occurrence')\n", "axs[1, 1].set_xticklabels(cumulative_lod.index, rotation=45)\n", "axs[1, 1].grid()\n", "\n", "# Format labels as percentages\n", "axs[1, 1].yaxis.set_major_formatter('{:.0%}'.format)\n", "\n", "# Add data labels to the graph\n", "for x, y in zip(cumulative_lod.index, cumulative_lod.values):\n", " axs[1, 1].text(x, y, f'{y:.0%}', ha='center', va='bottom')\n", "\n", "# # Remove unused subplots\n", "# fig.delaxes(axs[1, 2])\n", "# fig.delaxes(axs[1, 3])\n", "\n", "plt.tight_layout()\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Open', 'High', 'Low', 'Close', 'Date', 'HourMin', 'CurrentGap',\n", " 'RowNumber', 'HighBar', 'LowBar'],\n", " dtype='object')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1.columns" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_24704\\2610222875.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data1['CurrentGapCat'] = pd.qcut(data1['CurrentGap'], 4)\n" ] } ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "# Assuming you have a DataFrame 'data1' with the 'HourMin', 'HighBar', and 'LowBar' columns\n", "\n", "# List of values to filter and plot\n", "data1['CurrentGapCat'] = pd.qcut(data1['CurrentGap'], 4)\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OpenHighLowCloseDateHourMinCurrentGapRowNumberHighBarLowBarCurrentGapCat
01492.191492.911492.191492.872007-04-271200NaN1FalseTrueNaN
11492.541494.461492.301493.322007-04-271230NaN2FalseFalseNaN
21493.301494.451493.071494.412007-04-271300NaN3FalseFalseNaN
31494.601496.551494.311495.042007-04-271330NaN4FalseFalseNaN
41495.011496.971494.651495.852007-04-271400NaN5FalseFalseNaN
....................................
571684565.234567.314562.064564.472023-07-191330-0.0024849FalseFalse(-0.0757, -0.00219]
571694564.454565.744557.484560.632023-07-191400-0.00248410FalseTrue(-0.0757, -0.00219]
571704560.614568.094560.614567.862023-07-191430-0.00248411FalseFalse(-0.0757, -0.00219]
571714567.824574.664567.614572.092023-07-191500-0.00248412FalseFalse(-0.0757, -0.00219]
571724572.044572.044565.594565.652023-07-191530-0.00248413FalseFalse(-0.0757, -0.00219]
\n", "

52956 rows × 11 columns

\n", "
" ], "text/plain": [ " Open High Low Close Date HourMin CurrentGap \\\n", "0 1492.19 1492.91 1492.19 1492.87 2007-04-27 1200 NaN \n", "1 1492.54 1494.46 1492.30 1493.32 2007-04-27 1230 NaN \n", "2 1493.30 1494.45 1493.07 1494.41 2007-04-27 1300 NaN \n", "3 1494.60 1496.55 1494.31 1495.04 2007-04-27 1330 NaN \n", "4 1495.01 1496.97 1494.65 1495.85 2007-04-27 1400 NaN \n", "... ... ... ... ... ... ... ... \n", "57168 4565.23 4567.31 4562.06 4564.47 2023-07-19 1330 -0.002484 \n", "57169 4564.45 4565.74 4557.48 4560.63 2023-07-19 1400 -0.002484 \n", "57170 4560.61 4568.09 4560.61 4567.86 2023-07-19 1430 -0.002484 \n", "57171 4567.82 4574.66 4567.61 4572.09 2023-07-19 1500 -0.002484 \n", "57172 4572.04 4572.04 4565.59 4565.65 2023-07-19 1530 -0.002484 \n", "\n", " RowNumber HighBar LowBar CurrentGapCat \n", "0 1 False True NaN \n", "1 2 False False NaN \n", "2 3 False False NaN \n", "3 4 False False NaN \n", "4 5 False False NaN \n", "... ... ... ... ... \n", "57168 9 False False (-0.0757, -0.00219] \n", "57169 10 False True (-0.0757, -0.00219] \n", "57170 11 False False (-0.0757, -0.00219] \n", "57171 12 False False (-0.0757, -0.00219] \n", "57172 13 False False (-0.0757, -0.00219] \n", "\n", "[52956 rows x 11 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "52956" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Open 0\n", "High 0\n", "Low 0\n", "Close 0\n", "Date 0\n", "HourMin 0\n", "CurrentGap 36490\n", "RowNumber 0\n", "HighBar 0\n", "LowBar 0\n", "CurrentGapCat 36490\n", "dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[NaN, (0.000478, 0.00294], (0.00294, 0.0478], (-0.00219, 0.000478], (-0.0757, -0.00219]]\n", "Categories (4, interval[float64, right]): [(-0.0757, -0.00219] < (-0.00219, 0.000478] < (0.000478, 0.00294] < (0.00294, 0.0478]]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1['CurrentGapCat'].unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Function to convert a float to a percentage string with desired precision\n", "def float_to_percent(value, precision=2):\n", " return \"{:.{precision}%}\".format(value, precision=precision)\n", "\n", "# Loop through the categories and convert the float intervals to percentage format\n", "formatted_categories = []\n", "for category in data1['CurrentGapCat']:\n", " # Get the lower and upper bounds of the interval and convert them to percentage strings\n", " lower_bound = float_to_percent(category.left, precision=2)\n", " upper_bound = float_to_percent(category.right, precision=2)\n", " formatted_categories.append((lower_bound, upper_bound))\n", "\n", "print(formatted_categories)\n", "data1['Labels'] = formatted_categories\n", "\n", "tuples = sorted(list(set([s for s in zip(data1['CurrentGapCat'], data1['Labels'])])))\n", "\n", "# Create a 1x2 grid of subplots with shared Y-axis\n", "fig, axs = plt.subplots(1, 2, figsize=(15, 4), sharey=True)\n", "\n", "# Loop through the specified values and plot the cumulative distributions\n", "for t in tuples:\n", " # Filter the DataFrame based on the specified value\n", " gap = t[0]\n", " lbl = t[1]\n", " df_use = data1.loc[data1['CurrentGapCat'] == gap] # Replace 'column_to_filter' with the appropriate column name\n", " \n", " # Calculate the probability of High of Day (HOD) occurrence for each 'HourMin'\n", " hod_prob = df_use.groupby('HourMin')['HighBar'].mean()\n", " \n", " # Calculate the probability of Low of Day (LOD) occurrence for each 'HourMin'\n", " lod_prob = df_use.groupby('HourMin')['LowBar'].mean()\n", "\n", " # Sort the probabilities based on 'HourMin'\n", " hod_prob_sorted = hod_prob.sort_index()\n", " lod_prob_sorted = lod_prob.sort_index()\n", "\n", " # Calculate the cumulative distributions\n", " cumulative_hod = hod_prob_sorted.cumsum()\n", " cumulative_lod = lod_prob_sorted.cumsum()\n", "\n", " # Plot cumulative distributions of HOD and LOD and assign them to the legend\n", " axs[0].plot(cumulative_hod.index, cumulative_hod.values, label=lbl)\n", " axs[1].plot(cumulative_lod.index, cumulative_lod.values, label=lbl)\n", "\n", "# Set labels and title for the left plot (HOD)\n", "axs[0].set_xlabel('Hour and Minute')\n", "axs[0].set_ylabel('Cumulative Probability of HOD Occurrence')\n", "axs[0].set_title('Cumulative Distribution of HOD Occurrence')\n", "axs[0].set_xticklabels(cumulative_hod.index, rotation=45)\n", "axs[0].grid()\n", "\n", "# Format labels as percentages\n", "axs[0].yaxis.set_major_formatter('{:.0%}'.format)\n", "\n", "# Set labels and title for the right plot (LOD)\n", "axs[1].set_xlabel('Hour and Minute')\n", "axs[1].set_ylabel('Cumulative Probability of LOD Occurrence')\n", "axs[1].set_title('Cumulative Distribution of LOD Occurrence')\n", "axs[1].set_xticklabels(cumulative_lod.index, rotation=45)\n", "axs[1].grid()\n", "\n", "# Format labels as percentages\n", "axs[1].yaxis.set_major_formatter('{:.0%}'.format)\n", "\n", "# Add legend to the plots\n", "axs[0].legend()\n", "axs[1].legend()\n", "\n", "plt.tight_layout()\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data1.query('HourMin == \"0930\" & HighBar == True')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tuples" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py39", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }