Spaces:
Build error
Build error
| import os | |
| import pandas as pd | |
| import numpy as np | |
| from helper import ChampionConverter, process_kda_perfect | |
| def create_champion_features(merged_player_stats=None, meta_stats=None, weekly_meta=None, debug=None, consider_team_comp=True, test_mode=False): | |
| """ | |
| Create features for champion prediction using player data. | |
| Champion names will be used as column headers. | |
| Uses pd.concat to avoid DataFrame fragmentation. | |
| """ | |
| try: | |
| if merged_player_stats is None: | |
| print("Loading merged player stats...") | |
| input_file = os.path.join("util", "data", "player_stats_merged.csv") | |
| merged_player_stats = pd.read_csv(input_file, low_memory=False) | |
| #processing kda value | |
| merged_player_stats = process_kda_perfect(merged_player_stats) | |
| if test_mode: | |
| print("Test mode: Using only first 100 rows") | |
| merged_player_stats = merged_player_stats.head(100) | |
| if meta_stats is None: | |
| print("Loading meta stats...") | |
| meta_file = os.path.join("util", "data", "meta_stats.csv") | |
| meta_stats = pd.read_csv(meta_file, low_memory=False) | |
| if weekly_meta is None: | |
| print("Loading weekly meta stats...") | |
| weekly_file = os.path.join("util", "data", "weekly_meta_stats.csv") | |
| weekly_meta = pd.read_csv(weekly_file, low_memory=False) | |
| # Initialize variables | |
| debug_data = [] | |
| original_columns = merged_player_stats.columns.tolist() | |
| feature_dict = {} | |
| # Copy original columns | |
| for col in merged_player_stats.columns: | |
| feature_dict[col] = merged_player_stats[col].values.copy() | |
| # Initialize the champion converter | |
| converter = ChampionConverter() | |
| all_champions = converter.champions | |
| #total_champions = len(converter.champions) | |
| # Get low tier champions and counter information | |
| tier_penalties = {3: 0.9, 4: 0.85, 5: 0.8} | |
| # Create tier_map as a dictionary of lists | |
| tier_map = {} | |
| for _, row in meta_stats.iterrows(): | |
| champ = row['champion'] | |
| tier = row['tier'] | |
| if pd.notna(tier): | |
| if champ in tier_map: | |
| tier_map[champ].append(tier) | |
| else: | |
| tier_map[champ] = [tier] | |
| counter_map = {} | |
| for _, row in meta_stats.iterrows(): | |
| if pd.notna(row['counter1']): | |
| champ = row['champion'] | |
| counters = [row['counter1'], row['counter2'], row['counter3']] | |
| if champ in counter_map: | |
| counter_map[champ].extend([c for c in counters if pd.notna(c)]) | |
| else: | |
| counter_map[champ] = [c for c in counters if pd.notna(c)] | |
| # Ensure unique counters and remove duplicates | |
| for champ, counters in counter_map.items(): | |
| counter_map[champ] = list(set(counters)) | |
| # Move 'champion' column to the first position | |
| cols = ['champion'] + [col for col in merged_player_stats if col != 'champion'] | |
| merged_player_stats = merged_player_stats[cols] | |
| # Define importance weights | |
| weights = { | |
| 'recent': 0.3, # Last 20 games | |
| 'weekly': 0.4, # Last 7 days | |
| 'meta': 0.2, # Only from weekly_stats | |
| 'season': 0.06, # Current season | |
| 'mastery': 0.04 # All-time mastery | |
| } | |
| # Process rows in batches | |
| batch_size = 100 | |
| total_rows = len(merged_player_stats) | |
| print(f"Total rows: {total_rows}") | |
| for batch_start in range(0, total_rows, batch_size): | |
| batch_end = min(batch_start + batch_size, total_rows) | |
| batch_rows = merged_player_stats.iloc[batch_start:batch_end] | |
| print(f"\nProcessing rows {batch_start} to {batch_end} ({batch_start/total_rows*100:.2f}% complete)") | |
| # Initialize batch scores dictionary | |
| batch_scores = {champion: np.zeros(len(batch_rows)) for champion in all_champions} | |
| # Process each row in this batch | |
| for batch_idx, (idx, row) in enumerate(batch_rows.iterrows()): | |
| # Process each champion for this row | |
| for champion in all_champions: | |
| # Initialize scores for this champion and row | |
| champion_scores = { | |
| 'recent_score': 0, | |
| 'weekly_score': 0, | |
| 'meta_score': 0, | |
| 'season_score': 0, | |
| 'mastery_score': 0 | |
| } | |
| # Store debug info if needed | |
| base_score_before_penalty = 0 | |
| counter_penalty = 0 | |
| counter_debug = [] | |
| # 1. Recent Performance | |
| for i in range(1, 4): | |
| if row.get(f'most_champ_{i}') == champion: | |
| wr = float(row[f'WR_{i}']) if pd.notna(row[f'WR_{i}']) else 0 | |
| kda = float(row[f'KDA_{i}']) if pd.notna(row[f'KDA_{i}']) else 0 | |
| wins = float(row[f'W_{i}']) if pd.notna(row[f'W_{i}']) else 0 | |
| losses = float(row[f'L_{i}']) if pd.notna(row[f'L_{i}']) else 0 | |
| games = wins + losses | |
| total_games = float(row['total_games']) if pd.notna(row['total_games']) else 20 | |
| performance_quality = ( | |
| (wr * 0.7) + | |
| (min(kda, 10) / 10 * 0.3) | |
| ) | |
| games_factor = min(games / 5, 1.0) | |
| games_ratio = games / total_games | |
| if games >= 5: | |
| if performance_quality < 0.4: | |
| performance_quality *= 0.8 | |
| elif performance_quality > 0.7: | |
| performance_quality *= 1.2 | |
| champion_scores['recent_score'] = ( | |
| performance_quality * (0.7 + (0.3 * games_factor)) | |
| ) * (1 + games_ratio * 0.2) | |
| break # Exit loop once found | |
| # 2. Weekly Performance | |
| for i in range(1, 4): | |
| if row.get(f'7d_champ_{i}') == champion: | |
| weekly_wins = float(row[f'7d_W_{i}']) if pd.notna(row[f'7d_W_{i}']) else 0 | |
| weekly_losses = float(row[f'7d_L_{i}']) if pd.notna(row[f'7d_L_{i}']) else 0 | |
| weekly_games = float(row[f'7d_total_{i}']) if pd.notna(row[f'7d_total_{i}']) else 0 | |
| weekly_wr = float(row[f'7d_WR_{i}']) if pd.notna(row[f'7d_WR_{i}']) else 0 | |
| profile_wr = float(row['win_rate']) if pd.notna(row['win_rate']) else 0.5 | |
| if weekly_games > 0: | |
| wr_trend = (weekly_wr - profile_wr) / profile_wr if profile_wr > 0 else 0 | |
| weekly_intensity = min(weekly_games / 10, 1.0) | |
| win_ratio = weekly_wins / weekly_games if weekly_games > 0 else 0 | |
| weekly_performance = ( | |
| (weekly_wr * 0.4) + | |
| (max(min(wr_trend, 1), -1) * 0.2) + | |
| (weekly_intensity * 0.2) + | |
| (win_ratio * 0.2) | |
| ) | |
| if weekly_games >= 5: | |
| if weekly_performance < 0.4: | |
| weekly_performance *= 0.8 | |
| elif weekly_performance > 0.7: | |
| weekly_performance *= 1.2 | |
| champion_scores['weekly_score'] = weekly_performance * ( | |
| 0.7 + (0.3 * min(weekly_games / 5, 1.0)) | |
| ) | |
| break # Exit loop once found | |
| # 3. Meta Score | |
| if champion in weekly_meta['champion'].values: | |
| weekly_row = weekly_meta[weekly_meta['champion'] == champion].iloc[0] | |
| rank = weekly_row['rank'] | |
| games = weekly_row['games'] | |
| pick_rate = weekly_row['pick'] | |
| ban_rate = weekly_row['ban'] | |
| weight = ( | |
| 1 / rank * 0.5 + | |
| games / 100 * 0.3 + | |
| pick_rate * 0.1 - | |
| ban_rate * 0.1 | |
| ) | |
| champion_scores['meta_score'] = weight | |
| # 4. Season Performance | |
| for i in range(1, 8): | |
| if row.get(f'season_champ_{i}') == champion: | |
| wr = float(row[f'wr_ssn_{i}']) if pd.notna(row[f'wr_ssn_{i}']) else 0 | |
| games = float(row[f'games_ssn_{i}']) if pd.notna(row[f'games_ssn_{i}']) else 0 | |
| kda = float(row[f'kda_ssn_{i}']) if pd.notna(row[f'kda_ssn_{i}']) else 0 | |
| champion_scores['season_score'] = ( | |
| wr * 0.7 + | |
| (kda / 10) * 0.3 | |
| ) * (games / 100) | |
| break # Exit loop once found | |
| # 5. Mastery Score | |
| for i in range(1, 17): | |
| if row.get(f'mastery_champ_{i}') == champion: | |
| mastery = float(row[f'm_lv_{i}']) if pd.notna(row[f'm_lv_{i}']) else 0 | |
| champion_scores['mastery_score'] = mastery / 7 | |
| break # Exit loop once found | |
| # Calculate base score for this champion and row | |
| base_score = ( | |
| champion_scores['recent_score'] * weights['recent'] + | |
| champion_scores['weekly_score'] * weights['weekly'] + | |
| champion_scores['meta_score'] * weights['meta'] + | |
| champion_scores['season_score'] * weights['season'] + | |
| champion_scores['mastery_score'] * weights['mastery'] | |
| ) | |
| # Store the pre-penalty score for debugging | |
| base_score_before_penalty = base_score | |
| # Apply tier penalties | |
| if champion in tier_map: | |
| highest_tier = min(tier_map[champion]) | |
| if highest_tier in tier_penalties: | |
| base_score *= tier_penalties[highest_tier] | |
| # Process team composition and counter penalties | |
| if consider_team_comp: | |
| # Check team champions | |
| for i in range(1, 5): | |
| team_col = f'team_champ{i}' | |
| if team_col in row and pd.notna(row[team_col]): | |
| if row[team_col] == champion: | |
| base_score = 0 | |
| break | |
| # Only check opponents if base_score isn't already 0 | |
| if base_score != 0: | |
| counter_penalty = 0 | |
| counter_debug = [] # For debug information | |
| for i in range(1, 6): | |
| opp_col = f'opp_champ{i}' | |
| if opp_col in row and pd.notna(row[opp_col]): | |
| opp_champ = row[opp_col] | |
| if opp_champ == champion: | |
| base_score = 0 | |
| break | |
| if champion in counter_map and opp_champ in counter_map[champion]: | |
| counter_penalty += 0.1 | |
| counter_debug.append(opp_champ) | |
| if counter_penalty > 0: | |
| base_score = base_score * (1 - counter_penalty) | |
| # Store the final score for this champion and row | |
| batch_scores[champion][batch_idx] = max(base_score, 0) | |
| # Collect debug data if this is the debug champion | |
| if debug == champion: | |
| counter_list = [] | |
| for i in range(1, 6): | |
| opp_col = f'opp_champ{i}' | |
| if opp_col in row and pd.notna(row[opp_col]): | |
| if champion in counter_map and row[opp_col] in counter_map[champion]: | |
| counter_list.append(row[opp_col]) | |
| debug_row = { | |
| 'champion': row['champion'], | |
| 'recent_score': champion_scores['recent_score'], | |
| 'weekly_score': champion_scores['weekly_score'], | |
| 'meta_score': champion_scores['meta_score'], | |
| 'base_score': base_score_before_penalty, | |
| 'final_score': base_score, | |
| 'counter_penalty': counter_penalty if consider_team_comp else 0, | |
| 'final_score_actual': feature_dict[row['champion']][idx] if row['champion'] in feature_dict else base_score, | |
| 'counter_list_debug': counter_list | |
| } | |
| debug_data.append(debug_row) | |
| # Update feature_dict with batch results | |
| for champion in batch_scores: | |
| if champion not in feature_dict: | |
| feature_dict[champion] = np.zeros(total_rows) | |
| feature_dict[champion][batch_start:batch_end] = batch_scores[champion] | |
| # Save after each batch with timestamp | |
| temp_df = pd.DataFrame({ | |
| **{col: feature_dict[col] for col in original_columns}, # Original columns first | |
| **{champion: feature_dict[champion] for champion in all_champions} # Then champion columns | |
| }) | |
| batch_save_file = os.path.join("util", "data", f"feature_eng_stats.csv") | |
| temp_df.to_csv(batch_save_file, index=False) | |
| print(f"Saved batch progress to {batch_save_file}") | |
| if debug: | |
| print(f"{debug} is countered by: {counter_map[debug]}") | |
| # Process debug data if any | |
| if debug: | |
| debug_df = pd.DataFrame(debug_data) | |
| print("\nDebug Data:") | |
| print(debug_df) | |
| # Create final DataFrame | |
| champion_features = pd.DataFrame(feature_dict) | |
| # Create the final DataFrame by combining original data with new features | |
| features = pd.concat([ | |
| merged_player_stats[original_columns], # Keep all original columns | |
| champion_features[[col for col in champion_features.columns if col not in original_columns]] # Only new champion columns | |
| ], axis=1) | |
| # Move the champion column to be the first column | |
| if 'champion' in features.columns: | |
| columns = ['champion'] + [col for col in features.columns if col != 'champion'] | |
| features = features[columns] | |
| # Save to CSV with current date in filename | |
| output_file = os.path.join("util", "data", f"feature_eng_stats.csv") | |
| os.makedirs(os.path.dirname(output_file), exist_ok=True) | |
| features.to_csv(output_file, index=False) | |
| # Print confirmation message | |
| print(f"Saved features to {output_file}") | |
| return features | |
| except Exception as e: | |
| print(f"\nError occurred: {str(e)}") | |
| return None | |
| if __name__ == "__main__": | |
| try: | |
| input_file = os.path.join("util", "data", f"player_stats_merged_2025-01-05.csv") | |
| merged_stats = pd.read_csv(input_file) | |
| features = create_champion_features( | |
| merged_player_stats=merged_stats, | |
| debug='Viktor', | |
| consider_team_comp=True, | |
| test_mode=True | |
| ) | |
| if features is not None: | |
| print("\nProcessing completed successfully!") | |
| print(f"Generated features for {len(features)} rows") | |
| else: | |
| print("\nProcessing failed or was interrupted.") | |
| except Exception as e: | |
| print(f"\nFatal error: {str(e)}") |