rosacastillo's picture
added markets creator info for the tools tab
60adc3e
raw
history blame
8.96 kB
import pandas as pd
import gradio as gr
from typing import List
from tabs.metrics import tool_metric_choices
import plotly.express as px
HEIGHT = 600
WIDTH = 1000
def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
tools["request_time"] = pd.to_datetime(tools["request_time"])
tools = tools.sort_values(by="request_time", ascending=True)
tools["request_month_year_week"] = (
pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d")
)
# preparing the tools graph
# adding the total
tools_all = tools.copy(deep=True)
tools_all["market_creator"] = "all"
# merging both dataframes
tools = pd.concat([tools, tools_all], ignore_index=True)
tools = tools.sort_values(by="request_time", ascending=True)
return tools
def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:
"""Gets the tool winning rate data for the given tools and calculates the winning percentage."""
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
# tools_inc['error'] = tools_inc.apply(set_error, axis=1)
tools_non_error = tools_inc[tools_inc["error"] != 1]
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
{"no": "No", "yes": "Yes"}
)
tools_non_error = tools_non_error[
tools_non_error["currentAnswer"].isin(["Yes", "No"])
]
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
tools_non_error["win"] = (
tools_non_error["currentAnswer"] == tools_non_error["vote"]
).astype(int)
tools_non_error.columns = tools_non_error.columns.astype(str)
wins = (
tools_non_error.groupby(["tool", "request_month_year_week", "win"])
.size()
.unstack()
.fillna(0)
)
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
wins.reset_index(inplace=True)
wins["total_request"] = wins[0] + wins[1]
wins.columns = wins.columns.astype(str)
# Convert request_month_year_week to string and explicitly set type for Altair
wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
return wins
def get_tool_winning_rate_by_market(
tools_df: pd.DataFrame, inc_tools: List[str]
) -> pd.DataFrame:
"""Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
tools_non_error = tools_inc[tools_inc["error"] != 1]
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
{"no": "No", "yes": "Yes"}
)
tools_non_error = tools_non_error[
tools_non_error["currentAnswer"].isin(["Yes", "No"])
]
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
tools_non_error["win"] = (
tools_non_error["currentAnswer"] == tools_non_error["vote"]
).astype(int)
tools_non_error.columns = tools_non_error.columns.astype(str)
wins = (
tools_non_error.groupby(
["tool", "request_month_year_week", "market_creator", "win"], sort=False
)
.size()
.unstack()
.fillna(0)
)
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
wins.reset_index(inplace=True)
wins["total_request"] = wins[0] + wins[1]
wins.columns = wins.columns.astype(str)
# Convert request_month_year_week to string and explicitly set type for Altair
# wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
return wins
def get_overall_winning_rate(wins_df: pd.DataFrame) -> pd.DataFrame:
"""Gets the overall winning rate data for the given tools and calculates the winning percentage."""
overall_wins = (
wins_df.groupby("request_month_year_week")
.agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"})
.rename(columns={"0": "losses", "1": "wins"})
.reset_index()
)
return overall_wins
def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
"""Gets the overall winning rate data for the given tools and calculates the winning percentage."""
overall_wins = (
wins_df.groupby(["request_month_year_week", "market_creator"], sort=False)
.agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"})
.rename(columns={"0": "losses", "1": "wins"})
.reset_index()
)
return overall_wins
def plot_tool_winnings_overall(
wins_df: pd.DataFrame, winning_selector: str = "win_perc"
) -> gr.BarPlot:
"""Plots the overall winning rate data for the given tools and calculates the winning percentage."""
return gr.BarPlot(
title="Winning Rate",
x_title="Date",
y_title=winning_selector,
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["request_month_year_week", winning_selector],
value=wins_df,
x="request_month_year_week",
y=winning_selector,
height=HEIGHT,
width=WIDTH,
)
def sort_key(date_str):
month, year_week = date_str.split("-")
month_order = [
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
]
month_num = month_order.index(month) + 1
week = int(year_week)
return (week // 100, month_num, week % 100) # year, month, week
def integrated_plot_tool_winnings_overall_per_market_by_week(
winning_df: pd.DataFrame,
winning_selector: str = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
) -> gr.Plot:
# get the column name from the metric name
column_name = tool_metric_choices.get(winning_selector)
wins_df = get_overall_winning_rate_by_market(winning_df)
# Sort the unique values of request_month_year_week
sorted_categories = sorted(
wins_df["request_month_year_week"].unique(), key=sort_key
)
# Create a categorical type with a specific order
wins_df["request_month_year_week"] = pd.Categorical(
wins_df["request_month_year_week"], categories=sorted_categories, ordered=True
)
# Sort the DataFrame based on the new categorical column
wins_df = wins_df.sort_values("request_month_year_week")
fig = px.bar(
wins_df,
x="request_month_year_week",
y=column_name,
color="market_creator",
barmode="group",
color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
category_orders={
"market_creator": ["pearl", "quickstart", "all"],
"request_month_year_week": sorted_categories,
},
)
fig.update_layout(
xaxis_title="Week",
yaxis_title=winning_selector,
legend=dict(yanchor="top", y=0.5),
)
fig.update_layout(width=WIDTH, height=HEIGHT)
fig.update_xaxes(tickformat="%b %d\n%Y")
return gr.Plot(value=fig)
def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot:
"""Plots the winning rate data for the given tool."""
return gr.BarPlot(
title="Winning Rate",
x_title="Week",
y_title="Winning Rate",
x="request_month_year_week",
y="win_perc",
value=wins_df[wins_df["tool"] == tool],
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["request_month_year_week", "win_perc"],
height=HEIGHT,
width=WIDTH,
)
def integrated_tool_winnings_by_tool_per_market(
wins_df: pd.DataFrame, tool: str
) -> gr.Plot:
tool_wins_df = wins_df[wins_df["tool"] == tool]
# Sort the unique values of request_month_year_week
sorted_categories = sorted(
tool_wins_df["request_month_year_week"].unique(), key=sort_key
)
# Create a categorical type with a specific order
tool_wins_df["request_month_year_week"] = pd.Categorical(
tool_wins_df["request_month_year_week"],
categories=sorted_categories,
ordered=True,
)
# Sort the DataFrame based on the new categorical column
wins_df = wins_df.sort_values("request_month_year_week")
fig = px.bar(
tool_wins_df,
x="request_month_year_week",
y="win_perc",
color="market_creator",
barmode="group",
color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
category_orders={
"market_creator": ["pearl", "quickstart", "all"],
"request_month_year_week": sorted_categories,
},
)
fig.update_layout(
xaxis_title="Week",
yaxis_title="Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
legend=dict(yanchor="top", y=0.5),
)
fig.update_layout(width=WIDTH, height=HEIGHT)
fig.update_xaxes(tickformat="%b %d\n%Y")
return gr.Plot(value=fig)