|
import streamlit as st |
|
st.set_page_config(layout="wide") |
|
import numpy as np |
|
import pandas as pd |
|
import time |
|
from fuzzywuzzy import process |
|
from collections import Counter |
|
|
|
|
|
from global_func.clean_player_name import clean_player_name |
|
from global_func.load_contest_file import load_contest_file |
|
from global_func.load_file import load_file |
|
from global_func.load_ss_file import load_ss_file |
|
from global_func.find_name_mismatches import find_name_mismatches |
|
from global_func.predict_dupes import predict_dupes |
|
from global_func.highlight_rows import highlight_changes, highlight_changes_winners, highlight_changes_losers |
|
from global_func.load_csv import load_csv |
|
from global_func.find_csv_mismatches import find_csv_mismatches |
|
|
|
player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'} |
|
|
|
tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"]) |
|
with tab1: |
|
if st.button('Clear data', key='reset1'): |
|
st.session_state.clear() |
|
sport_select = st.selectbox("Select Sport", ['MLB', 'NBA', 'NFL']) |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.subheader("Contest File") |
|
st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.") |
|
Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls']) |
|
if 'Contest' in st.session_state: |
|
del st.session_state['Contest'] |
|
|
|
if Contest_file: |
|
contest_base, ownership_df, fpts_df, st.session_state['entry_list'] = load_contest_file(Contest_file, sport_select) |
|
contest_base = contest_base.dropna(how='all') |
|
contest_base = contest_base.reset_index(drop=True) |
|
if contest_base is not None: |
|
st.success('Contest file loaded successfully!') |
|
st.dataframe(contest_base.head(10)) |
|
|
|
with col2: |
|
st.subheader("Projections File") |
|
st.info("upload a projections file that has 'player_names', 'salary', 'median', 'ownership', and 'captain ownership' (Needed for Showdown) columns. Note that the salary for showdown needs to be the FLEX salary, not the captain salary.") |
|
|
|
|
|
upload_col, template_col = st.columns([3, 1]) |
|
|
|
with upload_col: |
|
projections_file = st.file_uploader("Upload Projections File (CSV or Excel)", type=['csv', 'xlsx', 'xls']) |
|
if 'projections_df' in st.session_state: |
|
del st.session_state['projections_df'] |
|
|
|
with template_col: |
|
|
|
template_df = pd.DataFrame(columns=['player_names', 'position', 'team', 'salary', 'median', 'ownership', 'captain ownership']) |
|
|
|
st.download_button( |
|
label="Template", |
|
data=template_df.to_csv(index=False), |
|
file_name="projections_template.csv", |
|
mime="text/csv" |
|
) |
|
|
|
if projections_file: |
|
export_projections, projections = load_file(projections_file) |
|
if projections is not None: |
|
st.success('Projections file loaded successfully!') |
|
st.dataframe(projections.head(10)) |
|
|
|
if Contest_file and projections_file: |
|
if 'Contest' not in st.session_state and 'projections_df' not in st.session_state: |
|
if contest_base is not None and projections is not None: |
|
st.subheader("Name Matching functions") |
|
st.session_state['Contest'], st.session_state['projections_df'], ownership_dict, actual_dict = find_name_mismatches(contest_base, projections, ownership_df, fpts_df) |
|
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int)) |
|
|
|
with tab2: |
|
if 'Contest' in st.session_state and 'projections_df' in st.session_state: |
|
col1, col2 = st.columns([1, 8]) |
|
excluded_cols = ['BaseName', 'EntryCount'] |
|
player_columns = [col for col in st.session_state['Contest'].columns if col not in excluded_cols] |
|
for col in player_columns: |
|
st.session_state['Contest'][col] = st.session_state['Contest'][col].astype(str) |
|
|
|
|
|
map_dict = { |
|
'pos_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])), |
|
'team_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])), |
|
'salary_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])), |
|
'proj_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])), |
|
'own_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])), |
|
'own_percent_rank': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))), |
|
'cpt_salary_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])), |
|
'cpt_proj_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)), |
|
'cpt_own_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership'])) |
|
} |
|
|
|
working_df = st.session_state['Contest'].copy() |
|
|
|
with col1: |
|
with st.expander("Info and filters"): |
|
if st.button('Clear data', key='reset3'): |
|
st.session_state.clear() |
|
with st.form(key='filter_form'): |
|
type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown']) |
|
entry_parse_var = st.selectbox("Do you want to view a specific player(s) or a group of players?", ['All', 'Specific']) |
|
entry_names = st.multiselect("Select players", options=st.session_state['entry_list'], default=[]) |
|
submitted = st.form_submit_button("Submit") |
|
if submitted: |
|
if 'player_frame' in st.session_state: |
|
del st.session_state['player_frame'] |
|
if 'stack_frame' in st.session_state: |
|
del st.session_state['stack_frame'] |
|
|
|
if entry_parse_var == 'Specific' and entry_names: |
|
working_df = working_df[working_df['BaseName'].isin(entry_names)] |
|
|
|
|
|
st.write(actual_dict) |
|
if type_var == 'Classic': |
|
working_df['stack'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row[4:] |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '', |
|
axis=1 |
|
) |
|
working_df['stack_size'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row[4:] |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '', |
|
axis=1 |
|
) |
|
working_df['salary'] = working_df.apply(lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row), axis=1) |
|
working_df['median'] = working_df.apply(lambda row: sum(map_dict['proj_map'].get(player, 0) for player in row), axis=1) |
|
working_df['actual_fpts'] = working_df.apply(lambda row: sum(actual_dict.get(player, 0) for player in row), axis=1) |
|
working_df['Own'] = working_df.apply(lambda row: sum(map_dict['own_map'].get(player, 0) for player in row), axis=1) |
|
working_df['actual_own'] = working_df.apply(lambda row: sum(ownership_dict.get(player, 0) for player in row), axis=1) |
|
working_df['sorted'] = working_df[player_columns].apply( |
|
lambda row: ','.join(sorted(row.values)), |
|
axis=1 |
|
) |
|
working_df['dupes'] = working_df.groupby('sorted').transform('size') |
|
working_df = working_df.reset_index() |
|
working_df['percentile_finish'] = working_df['index'].rank(pct=True) |
|
working_df = working_df.drop(['sorted', 'index'], axis=1) |
|
elif type_var == 'Showdown': |
|
working_df['stack'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row) else '', |
|
axis=1 |
|
) |
|
working_df['stack_size'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row) else '', |
|
axis=1 |
|
) |
|
working_df['salary'] = working_df.apply( |
|
lambda row: map_dict['cpt_salary_map'].get(row.iloc[0], 0) + |
|
sum(map_dict['salary_map'].get(player, 0) for player in row.iloc[1:]), |
|
axis=1 |
|
) |
|
working_df['median'] = working_df.apply( |
|
lambda row: map_dict['cpt_proj_map'].get(row.iloc[0], 0) + |
|
sum(map_dict['proj_map'].get(player, 0) for player in row.iloc[1:]), |
|
axis=1 |
|
) |
|
working_df['Own'] = working_df.apply( |
|
lambda row: map_dict['cpt_own_map'].get(row.iloc[0], 0) + |
|
sum(map_dict['own_map'].get(player, 0) for player in row.iloc[1:]), |
|
axis=1 |
|
) |
|
working_df['sorted'] = working_df[player_columns].apply( |
|
lambda row: row[0] + '|' + ','.join(sorted(row[1:].values)), |
|
axis=1 |
|
) |
|
working_df['dupes'] = working_df.groupby('sorted').transform('size') |
|
working_df = working_df.reset_index() |
|
working_df['percentile_finish'] = working_df['index'].rank(pct=True) |
|
working_df = working_df.drop(['sorted', 'index'], axis=1) |
|
|
|
|
|
if 'current_page' not in st.session_state: |
|
st.session_state.current_page = 1 |
|
|
|
|
|
rows_per_page = 500 |
|
total_rows = len(working_df) |
|
total_pages = (total_rows + rows_per_page - 1) // rows_per_page |
|
|
|
|
|
pagination_cols = st.columns([4, 1, 1, 1, 4]) |
|
with pagination_cols[1]: |
|
if st.button(f"Previous Page"): |
|
if st.session_state['current_page'] > 1: |
|
st.session_state.current_page -= 1 |
|
else: |
|
st.session_state.current_page = 1 |
|
if 'player_frame' in st.session_state: |
|
del st.session_state['player_frame'] |
|
if 'stack_frame' in st.session_state: |
|
del st.session_state['stack_frame'] |
|
|
|
with pagination_cols[3]: |
|
if st.button(f"Next Page"): |
|
st.session_state.current_page += 1 |
|
if 'player_frame' in st.session_state: |
|
del st.session_state['player_frame'] |
|
if 'stack_frame' in st.session_state: |
|
del st.session_state['stack_frame'] |
|
|
|
|
|
start_idx = (st.session_state.current_page - 1) * rows_per_page |
|
end_idx = min((st.session_state.current_page) * rows_per_page, total_rows) |
|
st.dataframe( |
|
working_df.iloc[start_idx:end_idx].style |
|
.background_gradient(axis=0) |
|
.background_gradient(cmap='RdYlGn') |
|
.format(precision=2), |
|
height=500, |
|
use_container_width=True, |
|
hide_index=True |
|
) |
|
|
|
with st.container(): |
|
tab1, tab2, tab3 = st.tabs(['Player Used Info', 'Stack Used Info', 'Duplication Info']) |
|
with tab1: |
|
if entry_parse_var == 'All': |
|
overall_players = pd.Series(list(working_df[player_columns].values.flatten())).value_counts() |
|
top_1per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01][player_columns].values.flatten())).value_counts() |
|
top_5per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05][player_columns].values.flatten())).value_counts() |
|
top_10per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10][player_columns].values.flatten())).value_counts() |
|
top_20per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20][player_columns].values.flatten())).value_counts() |
|
contest_len = len(working_df) |
|
len_1per = len(working_df[working_df['percentile_finish'] <= 0.01]) |
|
len_5per = len(working_df[working_df['percentile_finish'] <= 0.05]) |
|
len_10per = len(working_df[working_df['percentile_finish'] <= 0.10]) |
|
len_20per = len(working_df[working_df['percentile_finish'] <= 0.20]) |
|
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%'] |
|
each_frame_set = [overall_players, top_1per_players, top_5per_players, top_10per_players, top_20per_players] |
|
each_len_set = [contest_len, len_1per, len_5per, len_10per, len_20per] |
|
player_count_var = 0 |
|
for each_set in each_frame_set: |
|
set_frame = each_set.to_frame().reset_index().rename(columns={'index': 'Player', 'count': 'Count'}) |
|
set_frame['Percent'] = set_frame['Count'] / each_len_set[player_count_var] |
|
set_frame = set_frame[['Player', 'Percent']] |
|
set_frame = set_frame.rename(columns={'Percent': f'Exposure {each_set_name[player_count_var]}'}) |
|
if 'player_frame' not in st.session_state: |
|
st.session_state['player_frame'] = set_frame |
|
else: |
|
st.session_state['player_frame'] = pd.merge(st.session_state['player_frame'], set_frame, on='Player', how='outer') |
|
player_count_var += 1 |
|
st.dataframe(st.session_state['player_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['player_frame'].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
else: |
|
overall_players = pd.Series(list(working_df[working_df['BaseName'].isin(entry_names)][player_columns].values.flatten())).value_counts() |
|
top_1per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01][player_columns].values.flatten())).value_counts() |
|
top_5per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05][player_columns].values.flatten())).value_counts() |
|
top_10per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10][player_columns].values.flatten())).value_counts() |
|
top_20per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20][player_columns].values.flatten())).value_counts() |
|
contest_len = len(working_df) |
|
len_1per = len(working_df[working_df['percentile_finish'] <= 0.01]) |
|
len_5per = len(working_df[working_df['percentile_finish'] <= 0.05]) |
|
len_10per = len(working_df[working_df['percentile_finish'] <= 0.10]) |
|
len_20per = len(working_df[working_df['percentile_finish'] <= 0.20]) |
|
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%'] |
|
each_frame_set = [overall_players, top_1per_players, top_5per_players, top_10per_players, top_20per_players] |
|
each_len_set = [contest_len, len_1per, len_5per, len_10per, len_20per] |
|
player_count_var = 0 |
|
for each_set in each_frame_set: |
|
set_frame = each_set.to_frame().reset_index().rename(columns={'index': 'Player', 'count': 'Count'}) |
|
set_frame['Percent'] = set_frame['Count'] / each_len_set[player_count_var] |
|
set_frame = set_frame[['Player', 'Percent']] |
|
set_frame = set_frame.rename(columns={'Percent': f'Exposure {each_set_name[player_count_var]}'}) |
|
if 'player_frame' not in st.session_state: |
|
st.session_state['player_frame'] = set_frame |
|
else: |
|
st.session_state['player_frame'] = pd.merge(st.session_state['player_frame'], set_frame, on='Player', how='outer') |
|
player_count_var += 1 |
|
st.dataframe(st.session_state['player_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['player_frame'].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
with tab2: |
|
if entry_parse_var == 'All': |
|
overall_stacks = pd.Series(list(working_df['stack'])).value_counts() |
|
top_1per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01]['stack'])).value_counts() |
|
top_5per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05]['stack'])).value_counts() |
|
top_10per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10]['stack'])).value_counts() |
|
top_20per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20]['stack'])).value_counts() |
|
stacks_contest_len = len(working_df) |
|
stacks_len_1per = len(working_df[working_df['percentile_finish'] <= 0.01]) |
|
stacks_len_5per = len(working_df[working_df['percentile_finish'] <= 0.05]) |
|
stacks_len_10per = len(working_df[working_df['percentile_finish'] <= 0.10]) |
|
stacks_len_20per = len(working_df[working_df['percentile_finish'] <= 0.20]) |
|
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%'] |
|
each_stacks_set = [overall_stacks, top_1per_stacks, top_5per_stacks, top_10per_stacks, top_20per_stacks] |
|
each_stacks_len_set = [stacks_contest_len, stacks_len_1per, stacks_len_5per, stacks_len_10per, stacks_len_20per] |
|
stack_count_var = 0 |
|
for each_stack in each_stacks_set: |
|
stack_frame = each_stack.to_frame().reset_index().rename(columns={'index': 'Stack', 'count': 'Count'}) |
|
stack_frame['Percent'] = stack_frame['Count'] / each_stacks_len_set[stack_count_var] |
|
stack_frame = stack_frame[['Stack', 'Percent']] |
|
stack_frame = stack_frame.rename(columns={'Percent': f'Exposure {each_set_name[stack_count_var]}'}) |
|
if 'stack_frame' not in st.session_state: |
|
st.session_state['stack_frame'] = stack_frame |
|
else: |
|
st.session_state['stack_frame'] = pd.merge(st.session_state['stack_frame'], stack_frame, on='Stack', how='outer') |
|
stack_count_var += 1 |
|
st.dataframe(st.session_state['stack_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
else: |
|
overall_stacks = pd.Series(list(working_df[working_df['BaseName'].isin(entry_names)]['stack'])).value_counts() |
|
top_1per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01]['stack'])).value_counts() |
|
top_5per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05]['stack'])).value_counts() |
|
top_10per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10]['stack'])).value_counts() |
|
top_20per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20]['stack'])).value_counts() |
|
stacks_contest_len = len(working_df) |
|
stacks_len_1per = len(working_df[working_df['percentile_finish'] <= 0.01]) |
|
stacks_len_5per = len(working_df[working_df['percentile_finish'] <= 0.05]) |
|
stacks_len_10per = len(working_df[working_df['percentile_finish'] <= 0.10]) |
|
stacks_len_20per = len(working_df[working_df['percentile_finish'] <= 0.20]) |
|
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%'] |
|
each_stacks_set = [overall_stacks, top_1per_stacks, top_5per_stacks, top_10per_stacks, top_20per_stacks] |
|
each_stacks_len_set = [stacks_contest_len, stacks_len_1per, stacks_len_5per, stacks_len_10per, stacks_len_20per] |
|
stack_count_var = 0 |
|
for each_stack in each_stacks_set: |
|
stack_frame = each_stack.to_frame().reset_index().rename(columns={'index': 'Stack', 'count': 'Count'}) |
|
stack_frame['Percent'] = stack_frame['Count'] / each_stacks_len_set[stack_count_var] |
|
stack_frame = stack_frame[['Stack', 'Percent']] |
|
stack_frame = stack_frame.rename(columns={'Percent': f'Exposure {each_set_name[stack_count_var]}'}) |
|
if 'stack_frame' not in st.session_state: |
|
st.session_state['stack_frame'] = stack_frame |
|
else: |
|
st.session_state['stack_frame'] = pd.merge(st.session_state['stack_frame'], stack_frame, on='Stack', how='outer') |
|
stack_count_var += 1 |
|
st.dataframe(st.session_state['stack_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
with tab3: |
|
st.write('holding') |
|
|