File size: 3,771 Bytes
68208b0
 
 
 
e512b33
68208b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d42af28
68208b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d23b397
68208b0
d23b397
68208b0
 
 
 
 
 
 
 
 
d23b397
 
68208b0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import pandas as pd
import numpy as np
from collections import Counter

def analyze_player_combos(display_frame, excluded_cols, combo_size=2):
    """
    Analyze the most common player combinations in a DataFrame.
    Optimized for large datasets by using vectorized operations.
    
    Args:
        display_frame: DataFrame containing lineup data
        excluded_cols: List of columns to exclude from analysis
        combo_size: Size of combinations to analyze (2 or 3)
    
    Returns:
        DataFrame with combo statistics
    """
    # Get player columns
    player_columns = [col for col in display_frame.columns if col not in excluded_cols]
    
    # Extract player data and convert to sets for each row
    player_sets = []
    for _, row in display_frame.iterrows():
        # Get unique players in this row (handles duplicates)
        players = set()
        for col in player_columns:
            player = row[col]
            if pd.notna(player) and str(player).strip() != '':
                players.add(str(player))
        player_sets.append(players)
    
    # Count combinations using Counter
    combo_counter = Counter()
    
    for player_set in player_sets:
        if len(player_set) >= combo_size:
            # Convert set to sorted tuple for consistent hashing
            players_list = sorted(list(player_set))
            
            # Generate combinations using a more efficient approach
            if combo_size == 2:
                # For 2-player combos, use nested loops (faster than itertools for small n)
                for i in range(len(players_list)):
                    for j in range(i + 1, len(players_list)):
                        combo = (players_list[i], players_list[j])
                        combo_counter[combo] += 1
            elif combo_size == 3:
                # For 3-player combos, use nested loops
                for i in range(len(players_list)):
                    for j in range(i + 1, len(players_list)):
                        for k in range(j + 1, len(players_list)):
                            combo = (players_list[i], players_list[j], players_list[k])
                            combo_counter[combo] += 1
    
    # Convert to DataFrame
    combo_stats = []
    total_lineups = len(display_frame)
    
    for combo, count in combo_counter.most_common(100):  # Top 50 combos
        exposure = count / total_lineups
        
        # Calculate average stats for lineups containing this combo
        combo_mask = []
        for player_set in player_sets:
            if all(player in player_set for player in combo):
                combo_mask.append(True)
            else:
                combo_mask.append(False)
        
        combo_mask = np.array(combo_mask)
        
        if combo_mask.any():
            avg_median = display_frame.loc[combo_mask, 'median'].mean()
            avg_own = display_frame.loc[combo_mask, 'Own'].mean()
            avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean()
            avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean()
            avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean()
            avg_diversity = display_frame.loc[combo_mask, 'Diversity'].mean()
        else:
            avg_median = avg_own = avg_dupes = avg_finish = avg_edge = avg_diversity = 0
        
        combo_stats.append({
            'Combo': ' + '.join(combo),
            'Lineup Count': count,
            'Exposure': exposure,
            'Avg Median': avg_median,
            'Avg Own': avg_own,
            'Avg Dupes': avg_dupes,
            'Avg Finish %': avg_finish,
            'Avg Lineup Edge': avg_edge,
            'Avg Diversity': avg_diversity
        })
    
    return pd.DataFrame(combo_stats)