import streamlit as st import pandas as pd def load_contest_file(upload, sport): pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF'] if upload is not None: try: try: if upload.name.endswith('.csv'): raw_df = pd.read_csv(upload) elif upload.name.endswith(('.xls', '.xlsx')): raw_df = pd.read_excel(upload) else: st.error('Please upload either a CSV or Excel file') return None except: raw_df = upload df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']] df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'}) # Split EntryName into base name and entry count df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True) df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)') df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count # Split the lineup string by replacing position indicators with commas # We need to ensure we only replace position indicators that are at the start of a player entry # and not those that might appear within player names df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True) # Split into individual columns and remove position indicators # First, determine the maximum number of players in any lineup max_players = int(df['Lineup'].str.split(',').str.len().max()) if max_players <= 0: st.error('No valid lineups found in the uploaded file') return None # Create columns for each player for i in range(1, max_players): df[i] = df['Lineup'].str.split(',').str[i].str.strip() # Remove position indicators from the end of each entry df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True) # Replace None with -1 df[i] = df[i].fillna('-1') if sport == 'MLB': df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'}) try: df['Own'] = df['Own'].str.replace('%', '').astype(float) except: df['Own'] = df['Own'].astype(float) ownership_df = df[['Player', 'Own']] fpts_df = df[['Player', 'FPTS']] salary_df = df[['Player', 'Salary']] team_df = df[['Player', 'Team']] pos_df = df[['Player', 'Pos']] # Create position mapping dictionary pos_dict = dict(zip(pos_df['Player'], pos_df['Pos'])) # Debug prints print("\nPosition Dictionary:") print(pos_dict) print("\nSample Lineup String:") print(df['Lineup'].iloc[0]) # Print first lineup # Function to check if player is eligible for position def is_eligible_for_position(player, target_pos): if player not in pos_dict: print(f"Player not found in pos_dict: {player}") return False player_positions = pos_dict[player].split('/') print(f"Checking {player} for {target_pos}. Player positions: {player_positions}") # Handle special cases if target_pos.startswith('SP') and 'P' in player_positions: return True if target_pos.startswith('OF') and 'OF' in player_positions: return True return target_pos in player_positions # Process each lineup for idx, row in df.iterrows(): print(f"\nProcessing lineup {idx}:") print(f"Original lineup string: {row['Lineup']}") # First split by position indicators to preserve player names lineup_parts = [] current_part = row['Lineup'] for pos in pos_values: if pos in current_part: parts = current_part.split(pos) if len(parts) > 1: lineup_parts.append(pos) # Add the position current_part = parts[1] # Keep the rest for further processing # Now split the remaining parts by commas, but only if they're not part of a player name players = [] current_position = None for part in lineup_parts: part = part.strip() if part in pos_values: current_position = part continue # Split by comma only if it's followed by a position indicator if ',' in part: subparts = part.split(',') for subpart in subparts: subpart = subpart.strip() # Check if this subpart ends with a position has_position = any(subpart.endswith(pos) for pos in pos_values) if has_position: # This is a complete player entry for pos in pos_values: if subpart.endswith(pos): player = subpart[:-len(pos)].strip() players.append((current_position, player)) current_position = pos break else: # This might be part of a player name (like J.P., Crawford) # Combine with the next part if players: last_pos, last_player = players[-1] players[-1] = (last_pos, last_player + ',' + subpart) else: players.append((current_position, subpart)) else: # No comma, just clean and add for pos in pos_values: if part.endswith(pos): player = part[:-len(pos)].strip() players.append((current_position, player)) current_position = pos break print(f"Processed players with positions: {players}") # Now fill the positions using the processed players cleaned_players = [player for _, player in players] # First pass: fill required positions (excluding OF) required_positions = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS'] for pos in required_positions: for position, player in players: if is_eligible_for_position(player, pos): print(f"Assigning {player} to {pos}") df.at[idx, pos] = player players.remove((position, player)) break else: print(f"No player found for {pos}") # Second pass: fill OF positions with remaining players of_positions = ['OF1', 'OF2', 'OF3'] for pos in of_positions: for position, player in players: if 'OF' in pos_dict.get(player, '').split('/'): print(f"Assigning {player} to {pos}") df.at[idx, pos] = player players.remove((position, player)) break else: print(f"No player found for {pos}, using -1") df.at[idx, pos] = '-1' cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS', 'Salary', 'Team']) cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']] entry_list = list(set(df['BaseName'])) entry_list.sort() return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list except Exception as e: st.error(f'Error loading file: {str(e)}') return None return None