DFS_Portfolio_Manager / global_func /stratification_function.py
James McCool
Refactor stratification logic to ensure proper selection of indices by adding break statements for clarity and control flow.
3ca2ebf
import pandas as pd
import numpy as np
def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude_cols: list, sport: str, sorting_choice: str, low_threshold: float, high_threshold: float):
excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity', 'SE Score']
player_columns = [col for col in portfolio.columns if col not in excluded_cols]
base_target = lineup_target
# Work with indices instead of copying entire DataFrame
if sorting_choice == 'Finish_percentile':
sorted_indices = portfolio[sorting_choice].sort_values(ascending=True).index
else:
sorted_indices = portfolio[sorting_choice].sort_values(ascending=False).index
# Calculate quantiles without copying
similarity_floor = portfolio[sorting_choice].quantile(low_threshold / 100)
similarity_ceiling = portfolio[sorting_choice].quantile(high_threshold / 100)
for range_var in range(1, 10):
target_similarities = np.linspace(similarity_floor, similarity_ceiling, base_target)
# Find the closest lineup to each target similarity score
selected_indices = []
for target_sim in target_similarities:
# Find the index of the closest similarity score
closest_idx = (portfolio[sorting_choice] - target_sim).abs().idxmin()
if closest_idx not in selected_indices: # Avoid duplicates
selected_indices.append(closest_idx)
print(len(selected_indices))
if len(selected_indices) > lineup_target:
selected_indices = selected_indices[:lineup_target]
print(len(selected_indices))
range_var = 10
break
elif len(selected_indices) == lineup_target:
print(len(selected_indices))
range_var = 10
break
else:
base_target += 5 * range_var
# Return view instead of copy
return portfolio.loc[selected_indices].sort_values(by=sorting_choice, ascending=False)