Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import math | |
import numpy as np | |
import pandas as pd | |
import plotly.express as px | |
# 1 | |
def compute_pairwise_win_fraction(battles): | |
# Times each model wins as Model A | |
a_win_ptbl = pd.pivot_table( | |
battles[battles["win"] == "model_a"], | |
index="model_a", | |
columns="model_b", | |
aggfunc="size", | |
fill_value=0, | |
) | |
# Table counting times each model wins as Model B | |
b_win_ptbl = pd.pivot_table( | |
battles[battles["win"] == "model_b"], | |
index="model_a", | |
columns="model_b", | |
aggfunc="size", | |
fill_value=0, | |
) | |
# Table counting number of A-B pairs | |
num_battles_ptbl = pd.pivot_table(battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0) | |
# Computing the proportion of wins for each model as A and as B | |
# against all other models | |
row_beats_col_freq = (a_win_ptbl + b_win_ptbl.T) / (num_battles_ptbl + num_battles_ptbl.T) | |
# Arrange ordering according to proprition of wins | |
prop_wins = row_beats_col_freq.mean(axis=1).sort_values(ascending=False) | |
model_names = list(prop_wins.keys()) | |
row_beats_col = row_beats_col_freq.loc[model_names, model_names] | |
return row_beats_col | |
def visualize_pairwise_win_fraction(battles, title): | |
row_beats_col = compute_pairwise_win_fraction(battles) | |
fig = px.imshow(row_beats_col, color_continuous_scale="RdBu", text_auto=".2f", title=title) | |
fig.update_layout( | |
xaxis_title="Model B", | |
yaxis_title="Model A", | |
xaxis_side="top", | |
title_y=0.07, | |
title_x=0.5, | |
) | |
fig.update_traces(hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Fraction of A Wins: %{z}<extra></extra>") | |
return fig | |
# 2 | |
def switch_model_a_b(df): | |
df_switch = df.copy() | |
# switch with probability 0.5 | |
for i, row in df.iterrows(): | |
if np.random.rand() < 0.5: | |
df_switch.at[i, "model_a"] = row["model_b"] | |
df_switch.at[i, "model_b"] = row["model_a"] | |
if row["win"] == "model_a": | |
df_switch.at[i, "win"] = "model_b" | |
elif row["win"] == "model_b": | |
df_switch.at[i, "win"] = "model_a" | |
return df_switch | |
def visualize_battle_count(battles, title): | |
ptbl = pd.pivot_table(battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0) | |
battle_counts = ptbl + ptbl.T | |
ordering = battle_counts.sum().sort_values(ascending=False).index | |
fig = px.imshow(battle_counts.loc[ordering, ordering], title=title, text_auto=True, width=600) | |
fig.update_layout( | |
xaxis_title="Model B", | |
yaxis_title="Model A", | |
xaxis_side="top", | |
title_y=0.07, | |
title_x=0.5, | |
) | |
fig.update_traces(hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Count: %{z}<extra></extra>") | |
return fig | |
# 3 | |
def get_bootstrap_result(battles, func_compute_elo, num_round): | |
rows = [func_compute_elo(battles.sample(frac=1.0, replace=True)) for _ in range(num_round)] | |
df = pd.DataFrame(rows) | |
return df[df.median().sort_values(ascending=False).index] | |
def visualize_bootstrap_scores(df, title): | |
bars = ( | |
pd.DataFrame( | |
dict( | |
lower=df.quantile(0.025), | |
rating=df.quantile(0.5), | |
upper=df.quantile(0.975), | |
) | |
) | |
.reset_index(names="model") | |
.sort_values("rating", ascending=False) | |
) | |
bars["error_y"] = bars["upper"] - bars["rating"] | |
bars["error_y_minus"] = bars["rating"] - bars["lower"] | |
bars["rating_rounded"] = np.round(bars["rating"], 2) | |
fig = px.scatter( | |
bars, | |
x="model", | |
y="rating", | |
error_y="error_y", | |
error_y_minus="error_y_minus", | |
text="rating_rounded", | |
title=title, | |
) | |
fig.update_layout(xaxis_title="Model", yaxis_title="Rating") | |
return fig | |
# 4 | |
def visualize_rating_count(df, title): | |
df_all_value_counts = pd.concat([df["model_a"], df["model_b"]]).value_counts() | |
fig = px.bar(df_all_value_counts, title=title, text_auto=True) | |
min_y = df_all_value_counts.min() | |
max_y = df_all_value_counts.max() | |
y_end = math.ceil(min_y / 100) * 100 | |
y_begin = math.floor(max_y / 100) * 100 | |
fig.update_layout(xaxis_title="model", yaxis_title="Rating Count", showlegend=False) | |
fig.update_yaxes(range=[y_begin, y_end]) | |
# save the plot for the blog: | |
fig.write_html("model_counts.html", full_html=False, include_plotlyjs="cdn") | |
return fig | |