Spaces:
Sleeping
Sleeping
James McCool
commited on
Commit
·
579a535
1
Parent(s):
7eef51a
Enhance DataFrame optimization and vectorized calculations in app.py. Refine category conversion logic to exclude specific player columns and improve memory efficiency. Introduce a safe mapping function to handle NaN values for salary, median, and ownership calculations, ensuring better performance across various scenarios.
Browse files
app.py
CHANGED
|
@@ -133,8 +133,10 @@ def optimize_dataframe_dtypes(df):
|
|
| 133 |
"""Optimize DataFrame data types for memory efficiency"""
|
| 134 |
for col in df.columns:
|
| 135 |
if df[col].dtype == 'object':
|
| 136 |
-
#
|
| 137 |
-
|
|
|
|
|
|
|
| 138 |
df[col] = df[col].astype('category')
|
| 139 |
return df
|
| 140 |
|
|
@@ -206,53 +208,77 @@ def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_v
|
|
| 206 |
|
| 207 |
def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 208 |
"""Vectorized salary calculation to replace expensive apply operations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 210 |
# Captain + flex calculations
|
| 211 |
-
cpt_salaries = df.iloc[:, 0]
|
| 212 |
-
flex_salaries = sum(df.iloc[:, i]
|
| 213 |
return cpt_salaries + flex_salaries
|
| 214 |
elif type_var == 'Showdown':
|
| 215 |
if sport_var == 'GOLF':
|
| 216 |
-
return sum(df[col]
|
| 217 |
else:
|
| 218 |
-
cpt_salaries = df.iloc[:, 0]
|
| 219 |
-
flex_salaries = sum(df.iloc[:, i]
|
| 220 |
return cpt_salaries + flex_salaries
|
| 221 |
else:
|
| 222 |
# Classic non-CS2/LOL
|
| 223 |
-
return sum(df[col]
|
| 224 |
|
| 225 |
def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 226 |
"""Vectorized median calculation to replace expensive apply operations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 228 |
-
cpt_medians = df.iloc[:, 0]
|
| 229 |
-
flex_medians = sum(df.iloc[:, i]
|
| 230 |
return cpt_medians + flex_medians
|
| 231 |
elif type_var == 'Showdown':
|
| 232 |
if sport_var == 'GOLF':
|
| 233 |
-
return sum(df[col]
|
| 234 |
else:
|
| 235 |
-
cpt_medians = df.iloc[:, 0]
|
| 236 |
-
flex_medians = sum(df.iloc[:, i]
|
| 237 |
return cpt_medians + flex_medians
|
| 238 |
else:
|
| 239 |
-
return sum(df[col]
|
| 240 |
|
| 241 |
def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 242 |
"""Vectorized ownership calculation to replace expensive apply operations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 244 |
-
cpt_own = df.iloc[:, 0]
|
| 245 |
-
flex_own = sum(df.iloc[:, i]
|
| 246 |
return cpt_own + flex_own
|
| 247 |
elif type_var == 'Showdown':
|
| 248 |
if sport_var == 'GOLF':
|
| 249 |
-
return sum(df[col]
|
| 250 |
else:
|
| 251 |
-
cpt_own = df.iloc[:, 0]
|
| 252 |
-
flex_own = sum(df.iloc[:, i]
|
| 253 |
return cpt_own + flex_own
|
| 254 |
else:
|
| 255 |
-
return sum(df[col]
|
| 256 |
|
| 257 |
def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
|
| 258 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|
|
|
|
| 133 |
"""Optimize DataFrame data types for memory efficiency"""
|
| 134 |
for col in df.columns:
|
| 135 |
if df[col].dtype == 'object':
|
| 136 |
+
# Only convert to category if there are many duplicates AND it's not a player column
|
| 137 |
+
# Player columns need to stay as object for mapping operations
|
| 138 |
+
excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
|
| 139 |
+
if col not in excluded_cols and df[col].nunique() / len(df) < 0.3:
|
| 140 |
df[col] = df[col].astype('category')
|
| 141 |
return df
|
| 142 |
|
|
|
|
| 208 |
|
| 209 |
def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 210 |
"""Vectorized salary calculation to replace expensive apply operations"""
|
| 211 |
+
def safe_map_and_fill(series, mapping, fill_value=0):
|
| 212 |
+
"""Safely map values and fill NaN, handling categorical columns"""
|
| 213 |
+
mapped = series.map(mapping)
|
| 214 |
+
if hasattr(series, 'cat'):
|
| 215 |
+
# Handle categorical columns by converting to object first
|
| 216 |
+
mapped = mapped.astype('object')
|
| 217 |
+
return mapped.fillna(fill_value)
|
| 218 |
+
|
| 219 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 220 |
# Captain + flex calculations
|
| 221 |
+
cpt_salaries = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_salary_map'])
|
| 222 |
+
flex_salaries = sum(safe_map_and_fill(df.iloc[:, i], map_dict['salary_map']) for i in range(1, len(player_columns)))
|
| 223 |
return cpt_salaries + flex_salaries
|
| 224 |
elif type_var == 'Showdown':
|
| 225 |
if sport_var == 'GOLF':
|
| 226 |
+
return sum(safe_map_and_fill(df[col], map_dict['salary_map']) for col in player_columns)
|
| 227 |
else:
|
| 228 |
+
cpt_salaries = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_salary_map'])
|
| 229 |
+
flex_salaries = sum(safe_map_and_fill(df.iloc[:, i], map_dict['salary_map']) for i in range(1, len(player_columns)))
|
| 230 |
return cpt_salaries + flex_salaries
|
| 231 |
else:
|
| 232 |
# Classic non-CS2/LOL
|
| 233 |
+
return sum(safe_map_and_fill(df[col], map_dict['salary_map']) for col in player_columns)
|
| 234 |
|
| 235 |
def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 236 |
"""Vectorized median calculation to replace expensive apply operations"""
|
| 237 |
+
def safe_map_and_fill(series, mapping, fill_value=0):
|
| 238 |
+
"""Safely map values and fill NaN, handling categorical columns"""
|
| 239 |
+
mapped = series.map(mapping)
|
| 240 |
+
if hasattr(series, 'cat'):
|
| 241 |
+
# Handle categorical columns by converting to object first
|
| 242 |
+
mapped = mapped.astype('object')
|
| 243 |
+
return mapped.fillna(fill_value)
|
| 244 |
+
|
| 245 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 246 |
+
cpt_medians = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_proj_map'])
|
| 247 |
+
flex_medians = sum(safe_map_and_fill(df.iloc[:, i], map_dict['proj_map']) for i in range(1, len(player_columns)))
|
| 248 |
return cpt_medians + flex_medians
|
| 249 |
elif type_var == 'Showdown':
|
| 250 |
if sport_var == 'GOLF':
|
| 251 |
+
return sum(safe_map_and_fill(df[col], map_dict['proj_map']) for col in player_columns)
|
| 252 |
else:
|
| 253 |
+
cpt_medians = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_proj_map'])
|
| 254 |
+
flex_medians = sum(safe_map_and_fill(df.iloc[:, i], map_dict['proj_map']) for i in range(1, len(player_columns)))
|
| 255 |
return cpt_medians + flex_medians
|
| 256 |
else:
|
| 257 |
+
return sum(safe_map_and_fill(df[col], map_dict['proj_map']) for col in player_columns)
|
| 258 |
|
| 259 |
def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 260 |
"""Vectorized ownership calculation to replace expensive apply operations"""
|
| 261 |
+
def safe_map_and_fill(series, mapping, fill_value=0):
|
| 262 |
+
"""Safely map values and fill NaN, handling categorical columns"""
|
| 263 |
+
mapped = series.map(mapping)
|
| 264 |
+
if hasattr(series, 'cat'):
|
| 265 |
+
# Handle categorical columns by converting to object first
|
| 266 |
+
mapped = mapped.astype('object')
|
| 267 |
+
return mapped.fillna(fill_value)
|
| 268 |
+
|
| 269 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 270 |
+
cpt_own = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_own_map'])
|
| 271 |
+
flex_own = sum(safe_map_and_fill(df.iloc[:, i], map_dict['own_map']) for i in range(1, len(player_columns)))
|
| 272 |
return cpt_own + flex_own
|
| 273 |
elif type_var == 'Showdown':
|
| 274 |
if sport_var == 'GOLF':
|
| 275 |
+
return sum(safe_map_and_fill(df[col], map_dict['own_map']) for col in player_columns)
|
| 276 |
else:
|
| 277 |
+
cpt_own = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_own_map'])
|
| 278 |
+
flex_own = sum(safe_map_and_fill(df.iloc[:, i], map_dict['own_map']) for i in range(1, len(player_columns)))
|
| 279 |
return cpt_own + flex_own
|
| 280 |
else:
|
| 281 |
+
return sum(safe_map_and_fill(df[col], map_dict['own_map']) for col in player_columns)
|
| 282 |
|
| 283 |
def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
|
| 284 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|