{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import model_day\n", "import model_30m\n", "import model_1h\n", "import model_90m" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "getting econ tickers: 100%|██████████| 3/3 [00:00<00:00, 3.22it/s]\n", "Getting release dates: 100%|██████████| 8/8 [00:02<00:00, 3.78it/s]\n", "Making indicators: 100%|██████████| 8/8 [00:00<00:00, 3996.48it/s]\n", "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 888.11it/s]\n", "getting econ tickers: 100%|██████████| 3/3 [00:00<00:00, 4.14it/s]\n", "Getting release dates: 100%|██████████| 8/8 [00:01<00:00, 4.32it/s]\n", "Making indicators: 100%|██████████| 8/8 [00:00<00:00, 7985.35it/s]\n", "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n", "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 999.03it/s]\n", "getting econ tickers: 100%|██████████| 3/3 [00:00<00:00, 4.55it/s]\n", "Getting release dates: 100%|██████████| 8/8 [00:02<00:00, 3.26it/s]\n", "Making indicators: 100%|██████████| 8/8 [00:00<00:00, 3995.05it/s]\n", "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n", "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 930.93it/s]\n", "getting econ tickers: 100%|██████████| 3/3 [00:00<00:00, 5.78it/s]\n", "Getting release dates: 100%|██████████| 8/8 [00:01<00:00, 5.24it/s]\n", "Making indicators: 100%|██████████| 8/8 [00:00<00:00, 3996.00it/s]\n", "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n", "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 999.18it/s]\n" ] } ], "source": [ "_, df_final_day, _ = model_day.get_data()\n", "_, df_final_30m, _ = model_30m.get_data()\n", "_, df_final_1h, _ = model_1h.get_data()\n", "_, df_final_90m, _ = model_90m.get_data()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "LR Model: 100%|██████████| 1177/1177 [00:03<00:00, 391.99it/s]\n", "d:\\Projects\\gamedayspx\\model_day.py:63: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n", "CLF Model: 100%|██████████| 1077/1077 [00:08<00:00, 120.80it/s]\n", "LR Model: 100%|██████████| 1177/1177 [00:03<00:00, 367.13it/s]\n", "d:\\Projects\\gamedayspx\\model_30m.py:61: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n", "CLF Model: 100%|██████████| 1077/1077 [00:10<00:00, 105.72it/s]\n", "LR Model: 100%|██████████| 1177/1177 [00:03<00:00, 351.68it/s]\n", "d:\\Projects\\gamedayspx\\model_1h.py:60: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n", "CLF Model: 100%|██████████| 1077/1077 [00:10<00:00, 102.81it/s]\n", "LR Model: 100%|██████████| 1177/1177 [00:03<00:00, 368.34it/s]\n", "d:\\Projects\\gamedayspx\\model_90m.py:60: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n", "CLF Model: 100%|██████████| 1077/1077 [00:10<00:00, 106.84it/s]\n" ] } ], "source": [ "res_day, _, _ = model_day.walk_forward_validation_seq(df_final_day.dropna(axis=0), 'Target_clf', 'Target', 100, 1)\n", "res_30m, _, _ = model_30m.walk_forward_validation_seq(df_final_30m.dropna(axis=0), 'Target_clf', 'Target', 100, 1)\n", "res_1h, _, _ = model_1h.walk_forward_validation_seq(df_final_1h.dropna(axis=0), 'Target_clf', 'Target', 100, 1)\n", "res_90m, _, _ = model_90m.walk_forward_validation_seq(df_final_90m.dropna(axis=0), 'Target_clf', 'Target', 100, 1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TruePredicted
index
2019-04-23False0.798423
2019-04-24False0.235411
2019-04-25True0.479671
2019-04-26True0.180924
2019-04-29True0.457531
.........
2023-07-26False0.863622
2023-07-27True0.875761
2023-07-28True0.506219
2023-07-31False0.273154
2023-08-01False0.238163
\n", "

1077 rows × 2 columns

\n", "
" ], "text/plain": [ " True Predicted\n", "index \n", "2019-04-23 False 0.798423\n", "2019-04-24 False 0.235411\n", "2019-04-25 True 0.479671\n", "2019-04-26 True 0.180924\n", "2019-04-29 True 0.457531\n", "... ... ...\n", "2023-07-26 False 0.863622\n", "2023-07-27 True 0.875761\n", "2023-07-28 True 0.506219\n", "2023-07-31 False 0.273154\n", "2023-08-01 False 0.238163\n", "\n", "[1077 rows x 2 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res_day" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "for df in [res_day, res_30m, res_1h, res_90m]:\n", " df['HighConfidence'] = [True if x > 0.6 or x <= 0.4 else False for x in df['Predicted']]\n", " df['PredDirection'] = df['Predicted'] > 0.5\n", " df['Correct'] = df['PredDirection'] == df['True']\n", " df['RedDays'] = df['True'] == False\n", " df['GreenDays'] = df['True'] == True" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TruePredictedHighConfidencePredDirectionRedDaysGreenDaysCorrect
index
2019-04-23False0.798423TrueTrueTrueFalseFalse
2019-04-24False0.235411TrueFalseTrueFalseTrue
2019-04-25True0.479671FalseFalseFalseTrueFalse
2019-04-26True0.180924TrueFalseFalseTrueFalse
2019-04-29True0.457531FalseFalseFalseTrueFalse
........................
2023-07-26False0.863622TrueTrueTrueFalseFalse
2023-07-27True0.875761TrueTrueFalseTrueTrue
2023-07-28True0.506219FalseTrueFalseTrueTrue
2023-07-31False0.273154TrueFalseTrueFalseTrue
2023-08-01False0.238163TrueFalseTrueFalseTrue
\n", "

1077 rows × 7 columns

\n", "
" ], "text/plain": [ " True Predicted HighConfidence PredDirection RedDays \\\n", "index \n", "2019-04-23 False 0.798423 True True True \n", "2019-04-24 False 0.235411 True False True \n", "2019-04-25 True 0.479671 False False False \n", "2019-04-26 True 0.180924 True False False \n", "2019-04-29 True 0.457531 False False False \n", "... ... ... ... ... ... \n", "2023-07-26 False 0.863622 True True True \n", "2023-07-27 True 0.875761 True True False \n", "2023-07-28 True 0.506219 False True False \n", "2023-07-31 False 0.273154 True False True \n", "2023-08-01 False 0.238163 True False True \n", "\n", " GreenDays Correct \n", "index \n", "2019-04-23 False False \n", "2019-04-24 False True \n", "2019-04-25 True False \n", "2019-04-26 True False \n", "2019-04-29 True False \n", "... ... ... \n", "2023-07-26 False False \n", "2023-07-27 True True \n", "2023-07-28 True True \n", "2023-07-31 False True \n", "2023-08-01 False True \n", "\n", "[1077 rows x 7 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res_day" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "all_res = pd.concat([res_day.drop(columns=['GreenDays','RedDays']), res_30m.drop(columns=['GreenDays','RedDays']), res_90m.drop(columns=['GreenDays','RedDays']), res_1h.drop(columns=['GreenDays','RedDays']), res_day[['GreenDays','RedDays']]], axis=1)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "all_res1 = all_res[['HighConfidence','PredDirection','Correct','GreenDays','RedDays']]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HighConfidenceHighConfidenceHighConfidenceHighConfidencePredDirectionPredDirectionPredDirectionPredDirectionCorrectCorrectCorrectCorrectGreenDaysRedDays
2019-04-23TrueTrueTrueTrueTrueFalseTrueTrueFalseTrueFalseFalseFalseTrue
2019-04-24TrueTrueTrueTrueFalseFalseFalseFalseTrueTrueTrueTrueFalseTrue
2019-04-25FalseTrueTrueFalseFalseFalseTrueTrueFalseFalseTrueTrueTrueFalse
2019-04-26TrueTrueTrueTrueFalseTrueTrueTrueFalseTrueTrueTrueTrueFalse
2019-04-29FalseTrueTrueTrueFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
.............................................
2023-07-26TrueTrueTrueTrueTrueTrueTrueTrueFalseFalseFalseFalseFalseTrue
2023-07-27TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueFalse
2023-07-28FalseFalseTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueFalse
2023-07-31TrueTrueTrueTrueFalseFalseFalseFalseTrueTrueTrueTrueFalseTrue
2023-08-01TrueTrueTrueTrueFalseFalseFalseFalseTrueTrueTrueTrueFalseTrue
\n", "

1077 rows × 14 columns

\n", "
" ], "text/plain": [ " HighConfidence HighConfidence HighConfidence HighConfidence \\\n", "2019-04-23 True True True True \n", "2019-04-24 True True True True \n", "2019-04-25 False True True False \n", "2019-04-26 True True True True \n", "2019-04-29 False True True True \n", "... ... ... ... ... \n", "2023-07-26 True True True True \n", "2023-07-27 True True True True \n", "2023-07-28 False False True True \n", "2023-07-31 True True True True \n", "2023-08-01 True True True True \n", "\n", " PredDirection PredDirection PredDirection PredDirection \\\n", "2019-04-23 True False True True \n", "2019-04-24 False False False False \n", "2019-04-25 False False True True \n", "2019-04-26 False True True True \n", "2019-04-29 False False False False \n", "... ... ... ... ... \n", "2023-07-26 True True True True \n", "2023-07-27 True True True True \n", "2023-07-28 True True True True \n", "2023-07-31 False False False False \n", "2023-08-01 False False False False \n", "\n", " Correct Correct Correct Correct GreenDays RedDays \n", "2019-04-23 False True False False False True \n", "2019-04-24 True True True True False True \n", "2019-04-25 False False True True True False \n", "2019-04-26 False True True True True False \n", "2019-04-29 False False False False True False \n", "... ... ... ... ... ... ... \n", "2023-07-26 False False False False False True \n", "2023-07-27 True True True True True False \n", "2023-07-28 True True True True True False \n", "2023-07-31 True True True True False True \n", "2023-08-01 True True True True False True \n", "\n", "[1077 rows x 14 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_res1" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "all_res1.columns = [\n", " 'HighConfidence_day',\n", " 'HighConfidence_30m',\n", " 'HighConfidence_1h',\n", " 'HighConfidence_90m',\n", " 'PredDirection_day',\n", " 'PredDirection_30m',\n", " 'PredDirection_1h',\n", " 'PredDirection_90m',\n", " 'Correct_day',\n", " 'Correct_30m',\n", " 'Correct_1h',\n", " 'Correct_90m',\n", " 'GreenDays',\n", " 'RedDays'\n", "]" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8133333333333334" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# When all models pred green, how often was it green?\n", "all_res1.query('''\n", " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n", "''')['GreenDays'].sum() / len(all_res1.query('''\n", " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n", "'''))" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8638297872340426" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# When all models pred red, how often was it red?\n", "all_res1.query('''\n", " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n", "''')['RedDays'].sum() / len(all_res1.query('''\n", " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n", "'''))" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8508474576271187\n", "251\n", "295\n" ] } ], "source": [ "# When all models are pred green with high confidendce, how often was it green?\n", "print(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n", "''')['GreenDays'].sum() / len(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n", " ''')))\n", "\n", "print(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n", "''')['GreenDays'].sum())\n", "\n", "print(len(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n", " ''')))\n" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9090909090909091\n", "150\n", "165\n" ] } ], "source": [ "# When all models are pred red with high confidendce, how often was it red?\n", "print(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n", "''')['RedDays'].sum() / len(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n", " ''')))\n", "\n", "print(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n", " ''')['RedDays'].sum())\n", "\n", "print(len(all_res1.query('''\n", " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n", " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n", " ''')))" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4271123491179202" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(165 + 295) / 1077" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py39", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }