|
import pandas as pd |
|
import gradio as gr |
|
from typing import List |
|
from tabs.metrics import tool_metric_choices |
|
import plotly.express as px |
|
|
|
|
|
HEIGHT = 600 |
|
WIDTH = 1000 |
|
|
|
|
|
def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame: |
|
tools["request_time"] = pd.to_datetime(tools["request_time"]) |
|
tools = tools.sort_values(by="request_time", ascending=True) |
|
|
|
tools["request_month_year_week"] = ( |
|
pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d") |
|
) |
|
|
|
|
|
tools_all = tools.copy(deep=True) |
|
tools_all["market_creator"] = "all" |
|
|
|
tools = pd.concat([tools, tools_all], ignore_index=True) |
|
tools = tools.sort_values(by="request_time", ascending=True) |
|
return tools |
|
|
|
|
|
def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame: |
|
"""Gets the tool winning rate data for the given tools and calculates the winning percentage.""" |
|
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)] |
|
|
|
tools_non_error = tools_inc[tools_inc["error"] != 1] |
|
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace( |
|
{"no": "No", "yes": "Yes"} |
|
) |
|
tools_non_error = tools_non_error[ |
|
tools_non_error["currentAnswer"].isin(["Yes", "No"]) |
|
] |
|
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])] |
|
tools_non_error["win"] = ( |
|
tools_non_error["currentAnswer"] == tools_non_error["vote"] |
|
).astype(int) |
|
tools_non_error.columns = tools_non_error.columns.astype(str) |
|
wins = ( |
|
tools_non_error.groupby(["tool", "request_month_year_week", "win"]) |
|
.size() |
|
.unstack() |
|
.fillna(0) |
|
) |
|
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100 |
|
wins.reset_index(inplace=True) |
|
wins["total_request"] = wins[0] + wins[1] |
|
wins.columns = wins.columns.astype(str) |
|
|
|
wins["request_month_year_week"] = wins["request_month_year_week"].astype(str) |
|
return wins |
|
|
|
|
|
def get_tool_winning_rate_by_market( |
|
tools_df: pd.DataFrame, inc_tools: List[str] |
|
) -> pd.DataFrame: |
|
"""Gets the tool winning rate data for the given tools by market and calculates the winning percentage.""" |
|
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)] |
|
tools_non_error = tools_inc[tools_inc["error"] != 1] |
|
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace( |
|
{"no": "No", "yes": "Yes"} |
|
) |
|
tools_non_error = tools_non_error[ |
|
tools_non_error["currentAnswer"].isin(["Yes", "No"]) |
|
] |
|
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])] |
|
tools_non_error["win"] = ( |
|
tools_non_error["currentAnswer"] == tools_non_error["vote"] |
|
).astype(int) |
|
tools_non_error.columns = tools_non_error.columns.astype(str) |
|
wins = ( |
|
tools_non_error.groupby( |
|
["tool", "request_month_year_week", "market_creator", "win"], sort=False |
|
) |
|
.size() |
|
.unstack() |
|
.fillna(0) |
|
) |
|
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100 |
|
wins.reset_index(inplace=True) |
|
wins["total_request"] = wins[0] + wins[1] |
|
wins.columns = wins.columns.astype(str) |
|
|
|
|
|
return wins |
|
|
|
|
|
def get_overall_winning_rate(wins_df: pd.DataFrame) -> pd.DataFrame: |
|
"""Gets the overall winning rate data for the given tools and calculates the winning percentage.""" |
|
overall_wins = ( |
|
wins_df.groupby("request_month_year_week") |
|
.agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"}) |
|
.rename(columns={"0": "losses", "1": "wins"}) |
|
.reset_index() |
|
) |
|
return overall_wins |
|
|
|
|
|
def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame: |
|
"""Gets the overall winning rate data for the given tools and calculates the winning percentage.""" |
|
overall_wins = ( |
|
wins_df.groupby(["request_month_year_week", "market_creator"], sort=False) |
|
.agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"}) |
|
.rename(columns={"0": "losses", "1": "wins"}) |
|
.reset_index() |
|
) |
|
return overall_wins |
|
|
|
|
|
def plot_tool_winnings_overall( |
|
wins_df: pd.DataFrame, winning_selector: str = "win_perc" |
|
) -> gr.BarPlot: |
|
"""Plots the overall winning rate data for the given tools and calculates the winning percentage.""" |
|
return gr.BarPlot( |
|
title="Winning Rate", |
|
x_title="Date", |
|
y_title=winning_selector, |
|
show_label=True, |
|
interactive=True, |
|
show_actions_button=True, |
|
tooltip=["request_month_year_week", winning_selector], |
|
value=wins_df, |
|
x="request_month_year_week", |
|
y=winning_selector, |
|
height=HEIGHT, |
|
width=WIDTH, |
|
) |
|
|
|
|
|
def sort_key(date_str): |
|
month, year_week = date_str.split("-") |
|
month_order = [ |
|
"Jan", |
|
"Feb", |
|
"Mar", |
|
"Apr", |
|
"May", |
|
"Jun", |
|
"Jul", |
|
"Aug", |
|
"Sep", |
|
"Oct", |
|
"Nov", |
|
"Dec", |
|
] |
|
month_num = month_order.index(month) + 1 |
|
week = int(year_week) |
|
return (week // 100, month_num, week % 100) |
|
|
|
|
|
def integrated_plot_tool_winnings_overall_per_market_by_week( |
|
winning_df: pd.DataFrame, |
|
winning_selector: str = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %", |
|
) -> gr.Plot: |
|
|
|
|
|
column_name = tool_metric_choices.get(winning_selector) |
|
|
|
wins_df = get_overall_winning_rate_by_market(winning_df) |
|
|
|
sorted_categories = sorted( |
|
wins_df["request_month_year_week"].unique(), key=sort_key |
|
) |
|
|
|
wins_df["request_month_year_week"] = pd.Categorical( |
|
wins_df["request_month_year_week"], categories=sorted_categories, ordered=True |
|
) |
|
|
|
|
|
wins_df = wins_df.sort_values("request_month_year_week") |
|
|
|
fig = px.bar( |
|
wins_df, |
|
x="request_month_year_week", |
|
y=column_name, |
|
color="market_creator", |
|
barmode="group", |
|
color_discrete_sequence=["purple", "goldenrod", "darkgreen"], |
|
category_orders={ |
|
"market_creator": ["pearl", "quickstart", "all"], |
|
"request_month_year_week": sorted_categories, |
|
}, |
|
) |
|
fig.update_layout( |
|
xaxis_title="Week", |
|
yaxis_title=winning_selector, |
|
legend=dict(yanchor="top", y=0.5), |
|
) |
|
fig.update_layout(width=WIDTH, height=HEIGHT) |
|
fig.update_xaxes(tickformat="%b %d\n%Y") |
|
return gr.Plot(value=fig) |
|
|
|
|
|
def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot: |
|
"""Plots the winning rate data for the given tool.""" |
|
return gr.BarPlot( |
|
title="Winning Rate", |
|
x_title="Week", |
|
y_title="Winning Rate", |
|
x="request_month_year_week", |
|
y="win_perc", |
|
value=wins_df[wins_df["tool"] == tool], |
|
show_label=True, |
|
interactive=True, |
|
show_actions_button=True, |
|
tooltip=["request_month_year_week", "win_perc"], |
|
height=HEIGHT, |
|
width=WIDTH, |
|
) |
|
|
|
|
|
def integrated_tool_winnings_by_tool_per_market( |
|
wins_df: pd.DataFrame, tool: str |
|
) -> gr.Plot: |
|
|
|
tool_wins_df = wins_df[wins_df["tool"] == tool] |
|
|
|
sorted_categories = sorted( |
|
tool_wins_df["request_month_year_week"].unique(), key=sort_key |
|
) |
|
|
|
tool_wins_df["request_month_year_week"] = pd.Categorical( |
|
tool_wins_df["request_month_year_week"], |
|
categories=sorted_categories, |
|
ordered=True, |
|
) |
|
|
|
|
|
wins_df = wins_df.sort_values("request_month_year_week") |
|
fig = px.bar( |
|
tool_wins_df, |
|
x="request_month_year_week", |
|
y="win_perc", |
|
color="market_creator", |
|
barmode="group", |
|
color_discrete_sequence=["purple", "goldenrod", "darkgreen"], |
|
category_orders={ |
|
"market_creator": ["pearl", "quickstart", "all"], |
|
"request_month_year_week": sorted_categories, |
|
}, |
|
) |
|
|
|
fig.update_layout( |
|
xaxis_title="Week", |
|
yaxis_title="Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %", |
|
legend=dict(yanchor="top", y=0.5), |
|
) |
|
fig.update_layout(width=WIDTH, height=HEIGHT) |
|
fig.update_xaxes(tickformat="%b %d\n%Y") |
|
return gr.Plot(value=fig) |
|
|