import streamlit as st import pandas as pd import numpy as np from sklearn.metrics import roc_auc_score, precision_score, recall_score from pandas.tseries.offsets import BDay import streamlit as st from datetime import datetime import pytz import holidays from getDailyData import get_daily st.set_page_config( page_title="Gameday $SPX", page_icon="๐ŸŽฎ" ) # Function to convert UTC to PST def convert_to_pst(utc_time): utc_timezone = pytz.timezone('UTC') pst_timezone = pytz.timezone('America/Los_Angeles') utc_time = utc_timezone.localize(utc_time) pst_time = utc_time.astimezone(pst_timezone) return pst_time # Get the current UTC time utc_now = datetime.utcnow() # Convert UTC to PST pst_now = convert_to_pst(utc_now) # Function to check if a date is a business day (Monday to Friday) def is_business_day(date): return date.weekday() < 5 # Monday = 0, Friday = 4 # Function to get the list of holidays in the US for the current year def get_us_holidays(): return holidays.US(years=datetime.now().year) # Function to convert the time string to a datetime object def convert_to_datetime(time_str, pst_now): time_obj = datetime.strptime(time_str, "%H:%M") # Combine the time with the current date to get the full datetime in PST return datetime.combine(pst_now.date(), time_obj.time()) # Function to determine the emoji for each time def get_time(_times, _pst_now): pst_now_time = pst_now.time() us_holidays = get_us_holidays() idxs = [] for t in _times: time_obj = convert_to_datetime(t, _pst_now) # Check if the time is less than or equal to the current time in PST if time_obj.time() <= pst_now_time: idxs.append(True) else: # Check if it's a business day or a holiday if is_business_day(time_obj) or time_obj.date() in us_holidays: idxs.append(False) else: idxs.append(True) return idxs # Example usage: times_list = ['06:30', '07:00', '07:30', '08:00', '08:30', '09:00', '09:30', '10:00', '10:30', '11:00', '11:30', '12:00', '12:30'] idxs = get_time(times_list, pst_now) def get_last_idx(arr): try: return len(arr) - 1 - arr[::-1].index(True) except ValueError: return 0 idx_use = get_last_idx(idxs) st.title('๐ŸŽฎ Gameday Model for $SPX') st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)') if "mode" not in st.session_state: st.session_state.mode = "Auto" st.session_state.option = times_list[idx_use] with st.form("choose_model"): t1, t2 = st.columns(2) with t1: mode = st.radio('Choose mode', options=['Auto','Manual'], horizontal=True, label_visibility='collapsed') change_mode = st.form_submit_button('๐Ÿ‘Š๐Ÿฝ Confirm Mode',use_container_width=True) with t2: submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True) cleared = st.form_submit_button('๐Ÿงน Clear',use_container_width=True) option = st.select_slider( f"""Change mode to Manual, and select time ๐Ÿ‘‰๐Ÿฝ Run.""", times_list, # format_func=lambda x: f"{emojis_list[times_list.index(x)]} {x}", disabled = mode == 'Auto' ) if mode == 'Auto': option = times_list[idx_use] else: option = option if change_mode: st.info(f"Changed to {mode}.{' Model will run for {}.'.format(option) if mode == 'Auto' else ''}") if cleared: st.cache_data.clear() if option == '': st.info('No model selected.') if submitted: my_bar = st.progress(0) fname=f'performance_for_{option}_model.csv' if option == '06:30': from model_day_v2 import * fname='performance_for_open_model.csv' my_bar.progress(0.33, 'Loading data...') data, df_final, final_row = get_daily() # st.success("โœ… Historical data") my_bar.progress(0.66, "Training models...") def train_models(): res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 200, 1) return res1, xgbr, seq2 res1, xgbr, seq2 = train_models() # st.success("โœ… Models trained") my_bar.progress(0.99, "Getting new prediction...") my_bar.empty() # Get last row new_pred = data.loc[final_row, model_cols] new_pred = pd.DataFrame(new_pred).T # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') curr_date = final_row + BDay(1) curr_date = curr_date.strftime('%Y-%m-%d') new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) new_pred['Quarter'] = new_pred['Quarter'].astype(int) new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) new_pred['RangePct'] = new_pred['RangePct'].astype(float) new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) new_pred['VIXOpen'] = new_pred['VIXOpen'].astype(bool) new_pred['VVIXOpen'] = new_pred['VVIXOpen'].astype(bool) # new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool) # new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool) new_pred['OpenL1'] = new_pred['OpenL1'].astype(float) new_pred['OpenL2'] = new_pred['OpenL2'].astype(float) new_pred['OpenH1'] = new_pred['OpenH1'].astype(float) new_pred['OpenH2'] = new_pred['OpenH2'].astype(float) new_pred['L1TouchPct'] = new_pred['L1TouchPct'].astype(float) new_pred['L2TouchPct'] = new_pred['L2TouchPct'].astype(float) new_pred['H1TouchPct'] = new_pred['H1TouchPct'].astype(float) new_pred['H2TouchPct'] = new_pred['H2TouchPct'].astype(float) new_pred['L1BreakPct'] = new_pred['L1BreakPct'].astype(float) new_pred['L2BreakPct'] = new_pred['L2BreakPct'].astype(float) new_pred['H1BreakPct'] = new_pred['H1BreakPct'].astype(float) new_pred['H2BreakPct'] = new_pred['H2BreakPct'].astype(float) new_pred['H1BreakTouchPct'] = new_pred['H1BreakTouchPct'].astype(float) new_pred['H2BreakTouchPct'] = new_pred['H2BreakTouchPct'].astype(float) new_pred['L1BreakTouchPct'] = new_pred['L1BreakTouchPct'].astype(float) new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float) seq_proba = seq_predict_proba(new_pred, xgbr, seq2) else: from model_intra_v2 import * idx = times_list.index(option) my_bar.progress(0.33, 'Loading data...') data, df_final, final_row = get_daily(mode='intra', periods_30m=idx) # st.success("โœ… Historical data") my_bar.progress(0.66, "Training models...") def train_models(): res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf', 1) return res1, xgbr res1, xgbr = train_models() # st.success("โœ… Models trained") my_bar.progress(0.99, "Getting new prediction...") my_bar.empty() # Get last row new_pred = data.loc[final_row, model_cols] new_pred = pd.DataFrame(new_pred).T # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') curr_date = final_row + BDay(1) curr_date = curr_date.strftime('%Y-%m-%d') new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) new_pred['Quarter'] = new_pred['Quarter'].astype(int) new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float) new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float) new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float) new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float) new_pred['GapFill30'] = new_pred['GapFill30'].astype(float) new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) new_pred['RangePct'] = new_pred['RangePct'].astype(float) new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) new_pred['CurrentVIXTrend'] = new_pred['CurrentVIXTrend'].astype(bool) new_pred['SPX30IntraPerf'] = new_pred['SPX30IntraPerf'].astype(float) new_pred['VIX30IntraPerf'] = new_pred['VIX30IntraPerf'].astype(float) new_pred['VVIX30IntraPerf'] = new_pred['VVIX30IntraPerf'].astype(float) # new_pred['OpenL1'] = new_pred['OpenL1'].astype(float) # new_pred['OpenL2'] = new_pred['OpenL2'].astype(float) # new_pred['OpenH1'] = new_pred['OpenH1'].astype(float) # new_pred['OpenH2'] = new_pred['OpenH2'].astype(float) new_pred['L1TouchPct'] = new_pred['L1TouchPct'].astype(float) new_pred['L2TouchPct'] = new_pred['L2TouchPct'].astype(float) new_pred['H1TouchPct'] = new_pred['H1TouchPct'].astype(float) new_pred['H2TouchPct'] = new_pred['H2TouchPct'].astype(float) new_pred['L1BreakPct'] = new_pred['L1BreakPct'].astype(float) new_pred['L2BreakPct'] = new_pred['L2BreakPct'].astype(float) new_pred['H1BreakPct'] = new_pred['H1BreakPct'].astype(float) new_pred['H2BreakPct'] = new_pred['H2BreakPct'].astype(float) new_pred['H1BreakTouchPct'] = new_pred['H1BreakTouchPct'].astype(float) new_pred['H2BreakTouchPct'] = new_pred['H2BreakTouchPct'].astype(float) new_pred['L1BreakTouchPct'] = new_pred['L1BreakTouchPct'].astype(float) new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float) new_pred['H1BreakH2TouchPct'] = new_pred['H1BreakH2TouchPct'].astype(float) new_pred['L1BreakL2TouchPct'] = new_pred['L1BreakL2TouchPct'].astype(float) new_pred['GreenProbas'] = new_pred['GreenProbas'].astype(float) new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool) new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool) new_pred['H1TouchGreenPct'] = new_pred['H1TouchGreenPct'].astype(float) new_pred['L1TouchRedPct'] = new_pred['L1TouchRedPct'].astype(float) seq_proba = seq_predict_proba(new_pred, xgbr) st.info(f'as of {option} on {curr_date} ๐Ÿ‘‡๐Ÿฝ', icon="๐Ÿ”ฎ") # st.subheader('New Prediction') # int_labels = ['(0, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, .1]'] # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]}) _q = 7 lo_thres = 0.4 # res1.loc[middle_quantiles, 'Predicted'].min() hi_thres = 0.6 # res1.loc[middle_quantiles, 'Predicted'].max() data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1 data['ClosePct'] = data['ClosePct'].shift(-1) res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True) # df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]}) df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({ 'True':[np.mean,len,np.sum], 'ClosePct':[np.mean, lambda x: np.mean([r for r in x if r < 0]), lambda x: np.mean([r for r in x if r > 0])]}) df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','AvgDown','AvgUp'] df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}') df_probas['AvgDown'] = df_probas['AvgDown'].apply(lambda x: f'{x:.2%}') df_probas['AvgUp'] = df_probas['AvgUp'].apply(lambda x: f'{x:.2%}') green_proba = seq_proba[0] red_proba = 1 - green_proba do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres) stdev = 0.01 score = None calib_score = None num_obs = None cond = None historical_proba = None red_hist_proba = None mid = None lo = None hi = None text_cond = None operator = None intv = None for q in df_probas.index: if q.left <= green_proba <= q.right: historical_proba = df_probas.loc[q, 'PctGreen'] red_hist_proba = 1 - historical_proba num_obs = df_probas.loc[q, 'NumObs'] mid = df_probas.loc[q, 'AvgPerf'] lo = df_probas.loc[q, 'AvgDown'] hi = df_probas.loc[q, 'AvgUp'] intv = f'({q.left:.03f}, {q.right:.03f}])' qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index] df_probas.index = qs if do_not_play: text_cond = '๐ŸŸจ' operator = '' score = seq_proba[0] calib_score = historical_proba cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres) # num_obs = len(res1.loc[cond]) # historical_proba = res1.loc[cond, 'True'].mean() elif green_proba > red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฉ' operator = '>=' score = green_proba calib_score = historical_proba # How many with this score? cond = (res1['Predicted'] >= green_proba) # num_obs = len(res1.loc[cond]) # How often green? # historical_proba = res1.loc[cond, 'True'].mean() # print(cond) elif green_proba <= red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฅ' operator = '<=' score = red_proba calib_score = red_hist_proba # How many with this score? cond = (res1['Predicted'] <= seq_proba[0]) # num_obs = len(res1.loc[cond]) # How often green? # historical_proba = 1 - res1.loc[cond, 'True'].mean() # print(cond) score_fmt = f'{score:.1%}' calib_score_fmt = f'{calib_score:.1%}' prev_close = data.loc[final_row,'Close'] curr_close = data['Close'].iloc[-1] curr_open = data['Open'].iloc[-1] curr_close30 = curr_open if option == '06:30' else data['CurrentClose30'].iloc[-2] # confidence, success, nn = st.columns(3) # confidence.metric('Confidence',value=f'{text_cond} {score:.1%}') # success.metric('SuccessRate',value=f'{historical_proba:.1%}') # nn.metric(f'N{operator}{"" if do_not_play else score_fmt}',value=num_obs) top_of_fold = pd.DataFrame( index=['Results'], data = { 'Confidence':[f'{text_cond} {score:.1%}'], # 'Calib. Proba':[f'{historical_proba:.1%}'], 'Calib. Proba':[f'{text_cond} {calib_score_fmt}'], f'{intv}':[f'{num_obs}'], 'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}'] }) prices = pd.DataFrame(index=[ 'PrevClose', 'CurrClose' ], data = [ f"{prev_close:.2f}", f"{curr_close:.2f}" ]) prices.columns = [''] targets = pd.DataFrame( index=[ f'Close @ {option} ({(curr_close30 / prev_close) - 1:.2%})', f'Low ({lo})', f'Mid ({mid})', f'High ({hi})' ], data=[ [f"{curr_close30:.0f}"], [f"{(1+float(lo.strip('%'))/100) * prev_close:.0f}"], [f"{(1+float(mid.strip('%'))/100) * prev_close:.0f}"], [f"{(1+float(hi.strip('%'))/100) * prev_close:.0f}"] ], columns=['Targets']) roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values) roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values) precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5) recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5) len_all = len(res1) res2_filtered = res1.loc[(res1['Predicted'] > hi_thres) | (res1['Predicted'] <= lo_thres)] roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values) roc_auc_score_hi_calib = roc_auc_score(res2_filtered.dropna(subset='CalibPredicted')['True'].astype(int), res2_filtered.dropna(subset='CalibPredicted')['CalibPredicted'].values) precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) len_hi = len(res2_filtered) df_performance = pd.DataFrame( index=[ 'N', 'ROC AUC', 'Calib. AUC', 'Precision', 'Recall' ], columns = [ 'All', 'High Confidence' ], data = [ [len_all, len_hi], [roc_auc_score_all, roc_auc_score_hi], [roc_auc_score_calib, roc_auc_score_hi_calib], [precision_score_all, precision_score_hi], [recall_score_all, recall_score_hi] ] ).round(2) def get_acc(t, p): if t == False and p <= lo_thres: return 'โœ…' # ✅

elif t == True and p > hi_thres: return 'โœ…' # elif t == False and p > hi_thres: return 'โŒ' # ❌

elif t == True and p <= lo_thres: return 'โŒ' else: return '๐ŸŸจ' # ⬜

def get_acc_html(t, p): if t == False and p <= lo_thres: return '✅' elif t == True and p > hi_thres: return '✅' elif t == False and p > hi_thres: return '❌' elif t == True and p <= lo_thres: return '❌' else: return '⬜' def get_acc_text(t, p): if t == False and p <= lo_thres: return 'Correct' elif t == True and p > hi_thres: return 'Correct' elif t == False and p > hi_thres: return 'Incorrect' elif t == True and p <= lo_thres: return 'Incorrect' else: return 'No Action' perf_daily = res1.copy() perf_daily['TargetDate'] = perf_daily.index + BDay(1) perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])] perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])] perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']] perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']] def convert_df(df): # IMPORTANT: Cache the conversion to prevent computation on every rerun return df.to_csv() csv = convert_df(perf_daily) check = data.tail(1) df_levels = pd.DataFrame( index=['H2','H1','L1','L2'], columns=['Level','BreakPct(100)','TouchPct(100)','BreakGivenTouch(100)'], data=[ [f"{data['H2'].iloc[-1]:.2f}",f"{data['H2BreakPct'].iloc[-2]:.1%}",f"{data['H2TouchPct'].iloc[-2]:.1%}",f"{data['H2BreakTouchPct'].iloc[-2]:.1%}"], [f"{data['H1'].iloc[-1]:.2f}",f"{data['H1BreakPct'].iloc[-2]:.1%}",f"{data['H1TouchPct'].iloc[-2]:.1%}",f"{data['H1BreakTouchPct'].iloc[-2]:.1%}"], [f"{data['L1'].iloc[-1]:.2f}",f"{data['L1BreakPct'].iloc[-2]:.1%}",f"{data['L1TouchPct'].iloc[-2]:.1%}",f"{data['L1BreakTouchPct'].iloc[-2]:.1%}"], [f"{data['L2'].iloc[-1]:.2f}",f"{data['L2BreakPct'].iloc[-2]:.1%}",f"{data['L2TouchPct'].iloc[-2]:.1%}",f"{data['L2BreakTouchPct'].iloc[-2]:.1%}"] ] ) # Cache all DFs all_dfs = [] top1, top2 = st.columns(2) # st.dataframe(top_of_fold.set_index('Confidence',drop=True), use_container_width=True) with top1: st.dataframe(top_of_fold.T, use_container_width=True) with top2: st.dataframe(targets, use_container_width=True) tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿค– Stats", "โœจ New Data", "๐Ÿ“š Historical", "๐Ÿ“Š Performance"]) with tab1: # st.dataframe(prices.T.set_index('PrevClose', drop=True)) st.write(df_probas) st.write(f'๐ŸŒŠ JC Levels') st.write(df_levels) with tab2: st.subheader('Latest Data for Pred') st.write(new_pred) with tab3: st.subheader('Historical Data') st.write(df_final) with tab4: st.subheader('Performance') st.write(df_performance) st.text('Performance last 10 days (download for all)') st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']].iloc[-10:]) # st.download_button( # label="Download Historical Performance", # data=csv, # ) if submitted: st.download_button( label="Download Historical Performance", data=csv, file_name=fname, ) st.caption('โš ๏ธ Downloading the CSV will reload the page. โš ๏ธ')