import pandas as pd import gradio as gr from typing import List from tabs.metrics import tool_metric_choices import plotly.express as px HEIGHT = 600 WIDTH = 1000 def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame: tools["request_time"] = pd.to_datetime(tools["request_time"]) tools = tools.sort_values(by="request_time", ascending=True) tools["request_month_year_week"] = ( pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d") ) # preparing the tools graph # adding the total tools_all = tools.copy(deep=True) tools_all["market_creator"] = "all" # merging both dataframes tools = pd.concat([tools, tools_all], ignore_index=True) tools = tools.sort_values(by="request_time", ascending=True) return tools def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame: """Gets the tool winning rate data for the given tools and calculates the winning percentage.""" tools_inc = tools_df[tools_df["tool"].isin(inc_tools)] # tools_inc['error'] = tools_inc.apply(set_error, axis=1) tools_non_error = tools_inc[tools_inc["error"] != 1] tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace( {"no": "No", "yes": "Yes"} ) tools_non_error = tools_non_error[ tools_non_error["currentAnswer"].isin(["Yes", "No"]) ] tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])] tools_non_error["win"] = ( tools_non_error["currentAnswer"] == tools_non_error["vote"] ).astype(int) tools_non_error.columns = tools_non_error.columns.astype(str) wins = ( tools_non_error.groupby(["tool", "request_month_year_week", "win"]) .size() .unstack() .fillna(0) ) wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100 wins.reset_index(inplace=True) wins["total_request"] = wins[0] + wins[1] wins.columns = wins.columns.astype(str) # Convert request_month_year_week to string and explicitly set type for Altair wins["request_month_year_week"] = wins["request_month_year_week"].astype(str) return wins def get_tool_winning_rate_by_market( tools_df: pd.DataFrame, inc_tools: List[str] ) -> pd.DataFrame: """Gets the tool winning rate data for the given tools by market and calculates the winning percentage.""" tools_inc = tools_df[tools_df["tool"].isin(inc_tools)] tools_non_error = tools_inc[tools_inc["error"] != 1] tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace( {"no": "No", "yes": "Yes"} ) tools_non_error = tools_non_error[ tools_non_error["currentAnswer"].isin(["Yes", "No"]) ] tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])] tools_non_error["win"] = ( tools_non_error["currentAnswer"] == tools_non_error["vote"] ).astype(int) tools_non_error.columns = tools_non_error.columns.astype(str) wins = ( tools_non_error.groupby( ["tool", "request_month_year_week", "market_creator", "win"], sort=False ) .size() .unstack() .fillna(0) ) wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100 wins.reset_index(inplace=True) wins["total_request"] = wins[0] + wins[1] wins.columns = wins.columns.astype(str) # Convert request_month_year_week to string and explicitly set type for Altair # wins["request_month_year_week"] = wins["request_month_year_week"].astype(str) return wins def get_overall_winning_rate(wins_df: pd.DataFrame) -> pd.DataFrame: """Gets the overall winning rate data for the given tools and calculates the winning percentage.""" overall_wins = ( wins_df.groupby("request_month_year_week") .agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"}) .rename(columns={"0": "losses", "1": "wins"}) .reset_index() ) return overall_wins def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame: """Gets the overall winning rate data for the given tools and calculates the winning percentage.""" overall_wins = ( wins_df.groupby(["request_month_year_week", "market_creator"], sort=False) .agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"}) .rename(columns={"0": "losses", "1": "wins"}) .reset_index() ) return overall_wins def plot_tool_winnings_overall( wins_df: pd.DataFrame, winning_selector: str = "win_perc" ) -> gr.BarPlot: """Plots the overall winning rate data for the given tools and calculates the winning percentage.""" return gr.BarPlot( title="Winning Rate", x_title="Date", y_title=winning_selector, show_label=True, interactive=True, show_actions_button=True, tooltip=["request_month_year_week", winning_selector], value=wins_df, x="request_month_year_week", y=winning_selector, height=HEIGHT, width=WIDTH, ) def sort_key(date_str): month, year_week = date_str.split("-") month_order = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ] month_num = month_order.index(month) + 1 week = int(year_week) return (week // 100, month_num, week % 100) # year, month, week def integrated_plot_tool_winnings_overall_per_market_by_week( winning_df: pd.DataFrame, winning_selector: str = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %", ) -> gr.Plot: # get the column name from the metric name column_name = tool_metric_choices.get(winning_selector) wins_df = get_overall_winning_rate_by_market(winning_df) # Sort the unique values of request_month_year_week sorted_categories = sorted( wins_df["request_month_year_week"].unique(), key=sort_key ) # Create a categorical type with a specific order wins_df["request_month_year_week"] = pd.Categorical( wins_df["request_month_year_week"], categories=sorted_categories, ordered=True ) # Sort the DataFrame based on the new categorical column wins_df = wins_df.sort_values("request_month_year_week") fig = px.bar( wins_df, x="request_month_year_week", y=column_name, color="market_creator", barmode="group", color_discrete_sequence=["purple", "goldenrod", "darkgreen"], category_orders={ "market_creator": ["pearl", "quickstart", "all"], "request_month_year_week": sorted_categories, }, ) fig.update_layout( xaxis_title="Week", yaxis_title=winning_selector, legend=dict(yanchor="top", y=0.5), ) fig.update_layout(width=WIDTH, height=HEIGHT) fig.update_xaxes(tickformat="%b %d\n%Y") return gr.Plot(value=fig) def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot: """Plots the winning rate data for the given tool.""" return gr.BarPlot( title="Winning Rate", x_title="Week", y_title="Winning Rate", x="request_month_year_week", y="win_perc", value=wins_df[wins_df["tool"] == tool], show_label=True, interactive=True, show_actions_button=True, tooltip=["request_month_year_week", "win_perc"], height=HEIGHT, width=WIDTH, ) def integrated_tool_winnings_by_tool_per_market( wins_df: pd.DataFrame, tool: str ) -> gr.Plot: tool_wins_df = wins_df[wins_df["tool"] == tool] # Sort the unique values of request_month_year_week sorted_categories = sorted( tool_wins_df["request_month_year_week"].unique(), key=sort_key ) # Create a categorical type with a specific order tool_wins_df["request_month_year_week"] = pd.Categorical( tool_wins_df["request_month_year_week"], categories=sorted_categories, ordered=True, ) # Sort the DataFrame based on the new categorical column wins_df = wins_df.sort_values("request_month_year_week") fig = px.bar( tool_wins_df, x="request_month_year_week", y="win_perc", color="market_creator", barmode="group", color_discrete_sequence=["purple", "goldenrod", "darkgreen"], category_orders={ "market_creator": ["pearl", "quickstart", "all"], "request_month_year_week": sorted_categories, }, ) fig.update_layout( xaxis_title="Week", yaxis_title="Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %", legend=dict(yanchor="top", y=0.5), ) fig.update_layout(width=WIDTH, height=HEIGHT) fig.update_xaxes(tickformat="%b %d\n%Y") return gr.Plot(value=fig)