gamedayspx / app.py
boomsss's picture
making things faster
86fa8c7
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score
from pandas.tseries.offsets import BDay
import streamlit as st
from datetime import datetime
import pytz
import holidays
from getDailyData import get_daily
st.set_page_config(
page_title="Gameday $SPX",
page_icon="๐ŸŽฎ"
)
# Function to convert UTC to PST
def convert_to_pst(utc_time):
utc_timezone = pytz.timezone('UTC')
pst_timezone = pytz.timezone('America/Los_Angeles')
utc_time = utc_timezone.localize(utc_time)
pst_time = utc_time.astimezone(pst_timezone)
return pst_time
# Get the current UTC time
utc_now = datetime.utcnow()
# Convert UTC to PST
pst_now = convert_to_pst(utc_now)
# Function to check if a date is a business day (Monday to Friday)
def is_business_day(date):
return date.weekday() < 5 # Monday = 0, Friday = 4
# Function to get the list of holidays in the US for the current year
def get_us_holidays():
return holidays.US(years=datetime.now().year)
# Function to convert the time string to a datetime object
def convert_to_datetime(time_str, pst_now):
time_obj = datetime.strptime(time_str, "%H:%M")
# Combine the time with the current date to get the full datetime in PST
return datetime.combine(pst_now.date(), time_obj.time())
# Function to determine the emoji for each time
def get_time(_times, _pst_now):
pst_now_time = pst_now.time()
us_holidays = get_us_holidays()
idxs = []
for t in _times:
time_obj = convert_to_datetime(t, _pst_now)
# Check if the time is less than or equal to the current time in PST
if time_obj.time() <= pst_now_time:
idxs.append(True)
else:
# Check if it's a business day or a holiday
if is_business_day(time_obj) or time_obj.date() in us_holidays:
idxs.append(False)
else:
idxs.append(True)
return idxs
# Example usage:
times_list = ['06:30', '07:00', '07:30', '08:00', '08:30', '09:00', '09:30', '10:00', '10:30', '11:00', '11:30', '12:00', '12:30']
idxs = get_time(times_list, pst_now)
def get_last_idx(arr):
try:
return len(arr) - 1 - arr[::-1].index(True)
except ValueError:
return 0
idx_use = get_last_idx(idxs)
st.title('๐ŸŽฎ Gameday Model for $SPX')
st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
if "mode" not in st.session_state:
st.session_state.mode = "Auto"
st.session_state.option = times_list[idx_use]
with st.form("choose_model"):
t1, t2 = st.columns(2)
with t1:
mode = st.radio('Choose mode', options=['Auto','Manual'], horizontal=True, label_visibility='collapsed')
change_mode = st.form_submit_button('๐Ÿ‘Š๐Ÿฝ Confirm Mode',use_container_width=True)
with t2:
submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True)
cleared = st.form_submit_button('๐Ÿงน Clear',use_container_width=True)
option = st.select_slider(
f"""Change mode to Manual, and select time ๐Ÿ‘‰๐Ÿฝ Run.""",
times_list,
# format_func=lambda x: f"{emojis_list[times_list.index(x)]} {x}",
disabled = mode == 'Auto'
)
if mode == 'Auto':
option = times_list[idx_use]
else:
option = option
if change_mode:
st.info(f"Changed to {mode}.{' Model will run for {}.'.format(option) if mode == 'Auto' else ''}")
if cleared:
st.cache_data.clear()
if option == '':
st.info('No model selected.')
if submitted:
my_bar = st.progress(0)
fname=f'performance_for_{option}_model.csv'
if option == '06:30':
from model_day_v2 import *
fname='performance_for_open_model.csv'
my_bar.progress(0.33, 'Loading data...')
data, df_final, final_row = get_daily()
# st.success("โœ… Historical data")
my_bar.progress(0.66, "Training models...")
def train_models():
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 200, 1)
return res1, xgbr, seq2
res1, xgbr, seq2 = train_models()
# st.success("โœ… Models trained")
my_bar.progress(0.99, "Getting new prediction...")
my_bar.empty()
# Get last row
new_pred = data.loc[final_row, model_cols]
new_pred = pd.DataFrame(new_pred).T
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
curr_date = final_row + BDay(1)
curr_date = curr_date.strftime('%Y-%m-%d')
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
new_pred['Quarter'] = new_pred['Quarter'].astype(int)
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
new_pred['RangePct'] = new_pred['RangePct'].astype(float)
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
new_pred['VIXOpen'] = new_pred['VIXOpen'].astype(bool)
new_pred['VVIXOpen'] = new_pred['VVIXOpen'].astype(bool)
# new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
# new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
new_pred['OpenH2'] = new_pred['OpenH2'].astype(float)
new_pred['L1TouchPct'] = new_pred['L1TouchPct'].astype(float)
new_pred['L2TouchPct'] = new_pred['L2TouchPct'].astype(float)
new_pred['H1TouchPct'] = new_pred['H1TouchPct'].astype(float)
new_pred['H2TouchPct'] = new_pred['H2TouchPct'].astype(float)
new_pred['L1BreakPct'] = new_pred['L1BreakPct'].astype(float)
new_pred['L2BreakPct'] = new_pred['L2BreakPct'].astype(float)
new_pred['H1BreakPct'] = new_pred['H1BreakPct'].astype(float)
new_pred['H2BreakPct'] = new_pred['H2BreakPct'].astype(float)
new_pred['H1BreakTouchPct'] = new_pred['H1BreakTouchPct'].astype(float)
new_pred['H2BreakTouchPct'] = new_pred['H2BreakTouchPct'].astype(float)
new_pred['L1BreakTouchPct'] = new_pred['L1BreakTouchPct'].astype(float)
new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float)
seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
else:
from model_intra_v2 import *
idx = times_list.index(option)
my_bar.progress(0.33, 'Loading data...')
data, df_final, final_row = get_daily(mode='intra', periods_30m=idx)
# st.success("โœ… Historical data")
my_bar.progress(0.66, "Training models...")
def train_models():
res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf', 1)
return res1, xgbr
res1, xgbr = train_models()
# st.success("โœ… Models trained")
my_bar.progress(0.99, "Getting new prediction...")
my_bar.empty()
# Get last row
new_pred = data.loc[final_row, model_cols]
new_pred = pd.DataFrame(new_pred).T
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
curr_date = final_row + BDay(1)
curr_date = curr_date.strftime('%Y-%m-%d')
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
new_pred['Quarter'] = new_pred['Quarter'].astype(int)
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
new_pred['RangePct'] = new_pred['RangePct'].astype(float)
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
new_pred['CurrentVIXTrend'] = new_pred['CurrentVIXTrend'].astype(bool)
new_pred['SPX30IntraPerf'] = new_pred['SPX30IntraPerf'].astype(float)
new_pred['VIX30IntraPerf'] = new_pred['VIX30IntraPerf'].astype(float)
new_pred['VVIX30IntraPerf'] = new_pred['VVIX30IntraPerf'].astype(float)
# new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
# new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
# new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
# new_pred['OpenH2'] = new_pred['OpenH2'].astype(float)
new_pred['L1TouchPct'] = new_pred['L1TouchPct'].astype(float)
new_pred['L2TouchPct'] = new_pred['L2TouchPct'].astype(float)
new_pred['H1TouchPct'] = new_pred['H1TouchPct'].astype(float)
new_pred['H2TouchPct'] = new_pred['H2TouchPct'].astype(float)
new_pred['L1BreakPct'] = new_pred['L1BreakPct'].astype(float)
new_pred['L2BreakPct'] = new_pred['L2BreakPct'].astype(float)
new_pred['H1BreakPct'] = new_pred['H1BreakPct'].astype(float)
new_pred['H2BreakPct'] = new_pred['H2BreakPct'].astype(float)
new_pred['H1BreakTouchPct'] = new_pred['H1BreakTouchPct'].astype(float)
new_pred['H2BreakTouchPct'] = new_pred['H2BreakTouchPct'].astype(float)
new_pred['L1BreakTouchPct'] = new_pred['L1BreakTouchPct'].astype(float)
new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float)
new_pred['H1BreakH2TouchPct'] = new_pred['H1BreakH2TouchPct'].astype(float)
new_pred['L1BreakL2TouchPct'] = new_pred['L1BreakL2TouchPct'].astype(float)
new_pred['GreenProbas'] = new_pred['GreenProbas'].astype(float)
new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
new_pred['H1TouchGreenPct'] = new_pred['H1TouchGreenPct'].astype(float)
new_pred['L1TouchRedPct'] = new_pred['L1TouchRedPct'].astype(float)
seq_proba = seq_predict_proba(new_pred, xgbr)
st.info(f'as of {option} on {curr_date} ๐Ÿ‘‡๐Ÿฝ', icon="๐Ÿ”ฎ")
# st.subheader('New Prediction')
# int_labels = ['(0, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, .1]']
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
_q = 7
lo_thres = 0.4 # res1.loc[middle_quantiles, 'Predicted'].min()
hi_thres = 0.6 # res1.loc[middle_quantiles, 'Predicted'].max()
data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
data['ClosePct'] = data['ClosePct'].shift(-1)
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
# df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({
'True':[np.mean,len,np.sum],
'ClosePct':[np.mean,
lambda x: np.mean([r for r in x if r < 0]),
lambda x: np.mean([r for r in x if r > 0])]})
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','AvgDown','AvgUp']
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
df_probas['AvgDown'] = df_probas['AvgDown'].apply(lambda x: f'{x:.2%}')
df_probas['AvgUp'] = df_probas['AvgUp'].apply(lambda x: f'{x:.2%}')
green_proba = seq_proba[0]
red_proba = 1 - green_proba
do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres)
stdev = 0.01
score = None
calib_score = None
num_obs = None
cond = None
historical_proba = None
red_hist_proba = None
mid = None
lo = None
hi = None
text_cond = None
operator = None
intv = None
for q in df_probas.index:
if q.left <= green_proba <= q.right:
historical_proba = df_probas.loc[q, 'PctGreen']
red_hist_proba = 1 - historical_proba
num_obs = df_probas.loc[q, 'NumObs']
mid = df_probas.loc[q, 'AvgPerf']
lo = df_probas.loc[q, 'AvgDown']
hi = df_probas.loc[q, 'AvgUp']
intv = f'({q.left:.03f}, {q.right:.03f}])'
qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
df_probas.index = qs
if do_not_play:
text_cond = '๐ŸŸจ'
operator = ''
score = seq_proba[0]
calib_score = historical_proba
cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres)
# num_obs = len(res1.loc[cond])
# historical_proba = res1.loc[cond, 'True'].mean()
elif green_proba > red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฉ'
operator = '>='
score = green_proba
calib_score = historical_proba
# How many with this score?
cond = (res1['Predicted'] >= green_proba)
# num_obs = len(res1.loc[cond])
# How often green?
# historical_proba = res1.loc[cond, 'True'].mean()
# print(cond)
elif green_proba <= red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฅ'
operator = '<='
score = red_proba
calib_score = red_hist_proba
# How many with this score?
cond = (res1['Predicted'] <= seq_proba[0])
# num_obs = len(res1.loc[cond])
# How often green?
# historical_proba = 1 - res1.loc[cond, 'True'].mean()
# print(cond)
score_fmt = f'{score:.1%}'
calib_score_fmt = f'{calib_score:.1%}'
prev_close = data.loc[final_row,'Close']
curr_close = data['Close'].iloc[-1]
curr_open = data['Open'].iloc[-1]
curr_close30 = curr_open if option == '06:30' else data['CurrentClose30'].iloc[-2]
# confidence, success, nn = st.columns(3)
# confidence.metric('Confidence',value=f'{text_cond} {score:.1%}')
# success.metric('SuccessRate',value=f'{historical_proba:.1%}')
# nn.metric(f'N{operator}{"" if do_not_play else score_fmt}',value=num_obs)
top_of_fold = pd.DataFrame(
index=['Results'],
data = {
'Confidence':[f'{text_cond} {score:.1%}'],
# 'Calib. Proba':[f'{historical_proba:.1%}'],
'Calib. Proba':[f'{text_cond} {calib_score_fmt}'],
f'{intv}':[f'{num_obs}'],
'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}']
})
prices = pd.DataFrame(index=[
'PrevClose',
'CurrClose'
], data = [
f"{prev_close:.2f}",
f"{curr_close:.2f}"
])
prices.columns = ['']
targets = pd.DataFrame(
index=[
f'Close @ {option} ({(curr_close30 / prev_close) - 1:.2%})',
f'Low ({lo})',
f'Mid ({mid})',
f'High ({hi})'
],
data=[
[f"{curr_close30:.0f}"],
[f"{(1+float(lo.strip('%'))/100) * prev_close:.0f}"],
[f"{(1+float(mid.strip('%'))/100) * prev_close:.0f}"],
[f"{(1+float(hi.strip('%'))/100) * prev_close:.0f}"]
],
columns=['Targets'])
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values)
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
len_all = len(res1)
res2_filtered = res1.loc[(res1['Predicted'] > hi_thres) | (res1['Predicted'] <= lo_thres)]
roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
roc_auc_score_hi_calib = roc_auc_score(res2_filtered.dropna(subset='CalibPredicted')['True'].astype(int), res2_filtered.dropna(subset='CalibPredicted')['CalibPredicted'].values)
precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
len_hi = len(res2_filtered)
df_performance = pd.DataFrame(
index=[
'N',
'ROC AUC',
'Calib. AUC',
'Precision',
'Recall'
],
columns = [
'All',
'High Confidence'
],
data = [
[len_all, len_hi],
[roc_auc_score_all, roc_auc_score_hi],
[roc_auc_score_calib, roc_auc_score_hi_calib],
[precision_score_all, precision_score_hi],
[recall_score_all, recall_score_hi]
]
).round(2)
def get_acc(t, p):
if t == False and p <= lo_thres:
return 'โœ…' # &#9989;</p>
elif t == True and p > hi_thres:
return 'โœ…' #
elif t == False and p > hi_thres:
return 'โŒ' # &#10060;</p>
elif t == True and p <= lo_thres:
return 'โŒ'
else:
return '๐ŸŸจ' # &#11036;</p>
def get_acc_html(t, p):
if t == False and p <= lo_thres:
return '&#9989;'
elif t == True and p > hi_thres:
return '&#9989;'
elif t == False and p > hi_thres:
return '&#10060;'
elif t == True and p <= lo_thres:
return '&#10060;'
else:
return '&#11036;'
def get_acc_text(t, p):
if t == False and p <= lo_thres:
return 'Correct'
elif t == True and p > hi_thres:
return 'Correct'
elif t == False and p > hi_thres:
return 'Incorrect'
elif t == True and p <= lo_thres:
return 'Incorrect'
else:
return 'No Action'
perf_daily = res1.copy()
perf_daily['TargetDate'] = perf_daily.index + BDay(1)
perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv()
csv = convert_df(perf_daily)
check = data.tail(1)
df_levels = pd.DataFrame(
index=['H2','H1','L1','L2'],
columns=['Level','BreakPct(100)','TouchPct(100)','BreakGivenTouch(100)'],
data=[
[f"{data['H2'].iloc[-1]:.2f}",f"{data['H2BreakPct'].iloc[-2]:.1%}",f"{data['H2TouchPct'].iloc[-2]:.1%}",f"{data['H2BreakTouchPct'].iloc[-2]:.1%}"],
[f"{data['H1'].iloc[-1]:.2f}",f"{data['H1BreakPct'].iloc[-2]:.1%}",f"{data['H1TouchPct'].iloc[-2]:.1%}",f"{data['H1BreakTouchPct'].iloc[-2]:.1%}"],
[f"{data['L1'].iloc[-1]:.2f}",f"{data['L1BreakPct'].iloc[-2]:.1%}",f"{data['L1TouchPct'].iloc[-2]:.1%}",f"{data['L1BreakTouchPct'].iloc[-2]:.1%}"],
[f"{data['L2'].iloc[-1]:.2f}",f"{data['L2BreakPct'].iloc[-2]:.1%}",f"{data['L2TouchPct'].iloc[-2]:.1%}",f"{data['L2BreakTouchPct'].iloc[-2]:.1%}"]
]
)
# Cache all DFs
all_dfs = []
top1, top2 = st.columns(2)
# st.dataframe(top_of_fold.set_index('Confidence',drop=True), use_container_width=True)
with top1:
st.dataframe(top_of_fold.T, use_container_width=True)
with top2:
st.dataframe(targets, use_container_width=True)
tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿค– Stats", "โœจ New Data", "๐Ÿ“š Historical", "๐Ÿ“Š Performance"])
with tab1:
# st.dataframe(prices.T.set_index('PrevClose', drop=True))
st.write(df_probas)
st.write(f'๐ŸŒŠ JC Levels')
st.write(df_levels)
with tab2:
st.subheader('Latest Data for Pred')
st.write(new_pred)
with tab3:
st.subheader('Historical Data')
st.write(df_final)
with tab4:
st.subheader('Performance')
st.write(df_performance)
st.text('Performance last 10 days (download for all)')
st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']].iloc[-10:])
# st.download_button(
# label="Download Historical Performance",
# data=csv,
# )
if submitted:
st.download_button(
label="Download Historical Performance",
data=csv,
file_name=fname,
)
st.caption('โš ๏ธ Downloading the CSV will reload the page. โš ๏ธ')