James McCool
Migrate from Google Sheets to MongoDB for data storage
0d43ef4
raw
history blame
21.9 kB
import numpy as np
import pandas as pd
import streamlit as st
import pymongo
st.set_page_config(layout="wide")
@st.cache_resource
def init_conn():
uri = st.secrets['mongo_uri']
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
db = client["NBA_DFS"]
return db
db = init_conn()
player_roo_format = {'Top_finish': '{:.2%}','Top_5_finish': '{:.2%}', 'Top_10_finish': '{:.2%}', '20+%': '{:.2%}', '4x%': '{:.2%}', '5x%': '{:.2%}',
'6x%': '{:.2%}','GPP%': '{:.2%}'}
st.markdown("""
<style>
/* Tab styling */
.stTabs [data-baseweb="tab-list"] {
gap: 8px;
padding: 4px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
white-space: pre-wrap;
background-color: #FFD700;
color: white;
border-radius: 10px;
gap: 1px;
padding: 10px 20px;
font-weight: bold;
transition: all 0.3s ease;
}
.stTabs [aria-selected="true"] {
background-color: #DAA520;
color: white;
}
.stTabs [data-baseweb="tab"]:hover {
background-color: #DAA520;
cursor: pointer;
}
</style>""", unsafe_allow_html=True)
@st.cache_resource(ttl = 60)
def init_stat_load():
collection = db["Player_Range_Of_Outcomes"]
cursor = collection.find()
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['Player', 'Minutes Proj', 'Position', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%',
'Own', 'Small_Own', 'Large_Own', 'Cash_Own', 'CPT_Own', 'LevX', 'ValX', 'site', 'version', 'slate', 'timestamp', 'player_ID']]
raw_display = raw_display[['Player', 'Salary', 'Position', 'Team', 'Minutes', 'Median', 'Own', 'site', 'slate', 'timestamp']]
raw_display.replace("", 'Welp', inplace=True)
raw_display = raw_display.loc[raw_display['Player'] != 'Welp']
raw_display = raw_display.loc[raw_display['Median'] > 0]
raw_display = raw_display.apply(pd.to_numeric, errors='ignore')
proj_raw = raw_display.sort_values(by='Median', ascending=False)
timestamp = proj_raw['timestamp'].iloc[0]
return proj_raw, timestamp
@st.cache_data
def convert_df_to_csv(df):
return df.to_csv().encode('utf-8')
proj_raw, timestamp = init_stat_load()
t_stamp = f"Last Update: " + str(timestamp) + f" CST"
tab1, tab2 = st.tabs(['Pivot Finder', 'Uploads and Info'])
with tab2:
st.info("The Projections file can have any columns in any order, but must contain columns explicitly named: 'Player', 'Salary', 'Position', 'Team', 'Minutes', 'Median', 'Own'.")
col1, col2 = st.columns([1, 5])
with col1:
proj_file = st.file_uploader("Upload Projections File", key = 'proj_uploader')
if proj_file is not None:
try:
proj_dataframe = pd.read_csv(proj_file)
try:
proj_dataframe = proj_dataframe.replace(',','', regex=True)
proj_dataframe['Salary'] = proj_dataframe['Salary'].astype(int)
except:
pass
except:
proj_dataframe = pd.read_excel(proj_file)
try:
proj_dataframe = proj_dataframe.replace(',','', regex=True)
proj_dataframe['Salary'] = proj_dataframe['Salary'].astype(int)
except:
pass
with col2:
if proj_file is not None:
st.dataframe(proj_dataframe.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), use_container_width = True)
with tab1:
col1, col2 = st.columns([1, 9])
with col1:
st.info(t_stamp)
if st.button("Load/Reset Data", key='reset1'):
st.cache_data.clear()
proj_raw, timestamp = init_stat_load()
t_stamp = f"Last Update: " + str(timestamp) + f" CST"
for key in st.session_state.keys():
del st.session_state[key]
data_var1 = st.radio("Which data are you loading?", ('Paydirt', 'User'), key='data_var1')
site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='site_var1')
slate_var1 = st.radio("What slate are you working with?", ('Main Slate', 'Secondary Slate'), key='slate_var1')
if site_var1 == 'Draftkings':
if data_var1 == 'User':
raw_baselines = proj_dataframe
elif data_var1 != 'User':
raw_baselines = proj_raw[proj_raw['site'] == 'Draftkings']
if slate_var1 == 'Main Slate':
raw_baselines = raw_baselines[raw_baselines['slate'] == 'Main Slate']
elif slate_var1 == 'Secondary Slate':
raw_baselines = raw_baselines[raw_baselines['slate'] == 'Secondary Slate']
raw_baselines = raw_baselines.sort_values(by='Own', ascending=False)
elif site_var1 == 'Fanduel':
if data_var1 == 'User':
raw_baselines = proj_dataframe
elif data_var1 != 'User':
raw_baselines = proj_raw[proj_raw['site'] == 'Fanduel']
if slate_var1 == 'Main Slate':
raw_baselines = raw_baselines[raw_baselines['slate'] == 'Main Slate']
elif slate_var1 == 'Secondary Slate':
raw_baselines = raw_baselines[raw_baselines['slate'] == 'Secondary Slate']
raw_baselines = raw_baselines.sort_values(by='Own', ascending=False)
check_seq = st.radio("Do you want to check a single player or the top 10 in ownership?", ('Single Player', 'Top X Owned'), key='check_seq')
if check_seq == 'Single Player':
player_check = st.selectbox('Select player to create comps', options = raw_baselines['Player'].unique(), key='dk_player')
elif check_seq == 'Top X Owned':
top_x_var = st.number_input('How many players would you like to check?', min_value = 1, max_value = 10, value = 5, step = 1)
Salary_var = st.number_input('Acceptable +/- Salary range', min_value = 0, max_value = 1000, value = 300, step = 100)
Median_var = st.number_input('Acceptable +/- Median range', min_value = 0, max_value = 10, value = 3, step = 1)
pos_var1 = st.radio("Compare to all positions or specific positions?", ('All Positions', 'Specific Positions'), key='pos_var1')
if pos_var1 == 'Specific Positions':
pos_var_list = st.multiselect('Which positions would you like to include?', options = ['PG', 'SG', 'SF', 'PF', 'C'], key='pos_var_list')
elif pos_var1 == 'All Positions':
pos_var_list = ['PG', 'SG', 'SF', 'PF', 'C']
split_var1 = st.radio("Are you running the full slate or certain games?", ('Full Slate Run', 'Specific Games'), key='split_var1')
if split_var1 == 'Specific Games':
team_var1 = st.multiselect('Which teams would you like to include?', options = raw_baselines['Team'].unique(), key='team_var1')
elif split_var1 == 'Full Slate Run':
team_var1 = raw_baselines.Team.values.tolist()
with col2:
placeholder = st.empty()
displayholder = st.empty()
if st.button('Simulate appropriate pivots'):
with placeholder:
if site_var1 == 'Draftkings':
working_roo = raw_baselines
working_roo.replace('', 0, inplace=True)
if site_var1 == 'Fanduel':
working_roo = raw_baselines
working_roo.replace('', 0, inplace=True)
own_dict = dict(zip(working_roo.Player, working_roo.Own))
team_dict = dict(zip(working_roo.Player, working_roo.Team))
pos_dict = dict(zip(working_roo.Player, working_roo.Position))
min_dict = dict(zip(working_roo.Player, working_roo.Minutes))
total_sims = 1000
if check_seq == 'Single Player':
player_var = working_roo.loc[working_roo['Player'] == player_check]
player_var = player_var.reset_index()
working_roo = working_roo[working_roo['Position'].apply(lambda x: any(pos in x.split('/') for pos in pos_var_list))]
working_roo = working_roo[working_roo['Team'].isin(team_var1)]
working_roo = working_roo.loc[(working_roo['Salary'] >= player_var['Salary'][0] - Salary_var) & (working_roo['Salary'] <= player_var['Salary'][0] + Salary_var)]
working_roo = working_roo.loc[(working_roo['Median'] >= player_var['Median'][0] - Median_var) & (working_roo['Median'] <= player_var['Median'][0] + Median_var)]
flex_file = working_roo[['Player', 'Position', 'Salary', 'Median', 'Minutes']]
flex_file['Floor'] = (flex_file['Median'] * .25) + (flex_file['Minutes'] * .25)
flex_file['Ceiling'] = flex_file['Median'] + 10 + (flex_file['Minutes'] * .25)
flex_file['STD'] = (flex_file['Median']/4)
flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
hold_file = flex_file.copy()
overall_file = flex_file.copy()
salary_file = flex_file.copy()
overall_players = overall_file[['Player']]
for x in range(0,total_sims):
salary_file[x] = salary_file['Salary']
salary_file=salary_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
salary_file = salary_file.div(1000)
for x in range(0,total_sims):
overall_file[x] = np.random.normal(overall_file['Median'],overall_file['STD'])
overall_file=overall_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
players_only = hold_file[['Player']]
raw_lineups_file = players_only
for x in range(0,total_sims):
maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
players_only[x] = raw_lineups_file[x].rank(ascending=False)
players_only=players_only.drop(['Player'], axis=1)
salary_4x_check = (overall_file - (salary_file*4))
salary_5x_check = (overall_file - (salary_file*5))
salary_6x_check = (overall_file - (salary_file*6))
gpp_check = (overall_file - ((salary_file*5)+10))
players_only['Average_Rank'] = players_only.mean(axis=1)
players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
players_only['5x%'] = salary_5x_check[salary_5x_check >= 1].count(axis=1)/float(total_sims)
players_only['6x%'] = salary_6x_check[salary_6x_check >= 1].count(axis=1)/float(total_sims)
players_only['GPP%'] = salary_4x_check[gpp_check >= 1].count(axis=1)/float(total_sims)
players_only['Player'] = hold_file[['Player']]
final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%']]
final_Proj = pd.merge(hold_file, final_outcomes, on="Player")
final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%']]
final_Proj['Own'] = final_Proj['Player'].map(own_dict)
final_Proj['Minutes Proj'] = final_Proj['Player'].map(min_dict)
final_Proj['Team'] = final_Proj['Player'].map(team_dict)
final_Proj['Own'] = final_Proj['Own'].astype('float')
final_Proj = final_Proj[['Player', 'Minutes Proj', 'Position', 'Team', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%', 'Own']]
final_Proj['Projection Rank'] = final_Proj.Top_finish.rank(pct = True)
final_Proj['Own Rank'] = final_Proj.Own.rank(pct = True)
final_Proj['LevX'] = (final_Proj['Projection Rank'] - final_Proj['Own Rank']) * 100
final_Proj['ValX'] = ((final_Proj[['4x%', '5x%']].mean(axis=1))*100) + final_Proj['LevX']
final_Proj['ValX'] = np.where(final_Proj['ValX'] > 100, 100, final_Proj['ValX'])
final_Proj['ValX'] = np.where(final_Proj['ValX'] < 0, 0, final_Proj['ValX'])
final_Proj = final_Proj[['Player', 'Minutes Proj', 'Position', 'Team', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%', 'Own', 'LevX', 'ValX']]
final_Proj = final_Proj.set_index('Player')
st.session_state.final_Proj = final_Proj.sort_values(by='Top_finish', ascending=False)
elif check_seq == 'Top X Owned':
if pos_var1 == 'Specific Positions':
raw_baselines = raw_baselines[raw_baselines['Position'].apply(lambda x: any(pos in x.split('/') for pos in pos_var_list))]
player_check = raw_baselines['Player'].head(top_x_var).tolist()
st.write(player_check)
final_proj_list = []
for players in player_check:
players_pos = pos_dict[players]
player_var = working_roo.loc[working_roo['Player'] == players]
player_var = player_var.reset_index()
working_roo_temp = working_roo[working_roo['Team'].isin(team_var1)]
working_roo_temp = working_roo_temp.loc[(working_roo_temp['Salary'] >= player_var['Salary'][0] - Salary_var) & (working_roo_temp['Salary'] <= player_var['Salary'][0] + Salary_var)]
working_roo_temp = working_roo_temp.loc[(working_roo_temp['Median'] >= player_var['Median'][0] - Median_var) & (working_roo_temp['Median'] <= player_var['Median'][0] + Median_var)]
flex_file = working_roo_temp[['Player', 'Position', 'Salary', 'Median', 'Minutes']]
flex_file['Floor'] = (flex_file['Median'] * .25) + (flex_file['Minutes'] * .25)
flex_file['Ceiling'] = flex_file['Median'] + 10 + (flex_file['Minutes'] * .25)
flex_file['STD'] = (flex_file['Median']/4)
flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
hold_file = flex_file.copy()
overall_file = flex_file.copy()
salary_file = flex_file.copy()
overall_players = overall_file[['Player']]
for x in range(0,total_sims):
salary_file[x] = salary_file['Salary']
salary_file=salary_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
salary_file = salary_file.div(1000)
for x in range(0,total_sims):
overall_file[x] = np.random.normal(overall_file['Median'],overall_file['STD'])
overall_file=overall_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
players_only = hold_file[['Player']]
raw_lineups_file = players_only
for x in range(0,total_sims):
maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
players_only[x] = raw_lineups_file[x].rank(ascending=False)
players_only=players_only.drop(['Player'], axis=1)
salary_4x_check = (overall_file - (salary_file*4))
salary_5x_check = (overall_file - (salary_file*5))
salary_6x_check = (overall_file - (salary_file*6))
gpp_check = (overall_file - ((salary_file*5)+10))
players_only['Average_Rank'] = players_only.mean(axis=1)
players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
players_only['5x%'] = salary_5x_check[salary_5x_check >= 1].count(axis=1)/float(total_sims)
players_only['6x%'] = salary_6x_check[salary_6x_check >= 1].count(axis=1)/float(total_sims)
players_only['GPP%'] = salary_4x_check[gpp_check >= 1].count(axis=1)/float(total_sims)
players_only['Player'] = hold_file[['Player']]
final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%']]
final_Proj = pd.merge(hold_file, final_outcomes, on="Player")
final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%']]
final_Proj['Own'] = final_Proj['Player'].map(own_dict)
final_Proj['Minutes Proj'] = final_Proj['Player'].map(min_dict)
final_Proj['Team'] = final_Proj['Player'].map(team_dict)
final_Proj['Own'] = final_Proj['Own'].astype('float')
final_Proj['Projection Rank'] = final_Proj.Top_finish.rank(pct = True)
final_Proj['Own Rank'] = final_Proj.Own.rank(pct = True)
final_Proj['LevX'] = (final_Proj['Projection Rank'] - final_Proj['Own Rank']) * 100
final_Proj['ValX'] = ((final_Proj[['4x%', '5x%']].mean(axis=1))*100) + final_Proj['LevX']
final_Proj['ValX'] = np.where(final_Proj['ValX'] > 100, 100, final_Proj['ValX'])
final_Proj['ValX'] = np.where(final_Proj['ValX'] < 0, 0, final_Proj['ValX'])
final_Proj['Pivot_source'] = players
final_Proj = final_Proj[['Player', 'Pivot_source', 'Position', 'Team', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '4x%', '5x%', '6x%', 'GPP%', 'Own', 'LevX', 'ValX']]
final_Proj = final_Proj.sort_values(by='Top_finish', ascending=False)
final_proj_list.append(final_Proj)
st.write(f'finished run for {players}')
# Concatenate all the final_Proj dataframes
final_Proj_combined = pd.concat(final_proj_list)
final_Proj_combined = final_Proj_combined.sort_values(by='LevX', ascending=False)
final_Proj_combined = final_Proj_combined[final_Proj_combined['Player'] != final_Proj_combined['Pivot_source']]
st.session_state.final_Proj = final_Proj_combined.reset_index(drop=True) # Assign the combined dataframe back to final_Proj
placeholder.empty()
with displayholder.container():
if 'final_Proj' in st.session_state:
st.dataframe(st.session_state.final_Proj.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(player_roo_format, precision=2), use_container_width = True)
st.download_button(
label="Export Tables",
data=convert_df_to_csv(st.session_state.final_Proj),
file_name='NBA_pivot_export.csv',
mime='text/csv',
)
else:
st.write("Run some pivots my dude/dudette")