Spaces:

AIM-Harvard
/

rabbits-leaderboard

App Files Files Community

magilogi commited on Jun 13

Commit

c5ae15c

•

1 Parent(s): 5542fa4

adjusted score change

Files changed (1) hide show

app.py +16 -7

app.py CHANGED Viewed

@@ -54,17 +54,14 @@ df.rename(columns={
 }, inplace=True)
 #Create adjusted robustness score that accounts for g2b accuracy and difference in accuracy
-# (models with low difference like phi will seem robust, but its simply because they are bad / random at both tasks)
 df['Average Accuracy (Original and G2B)'] = (df['Average G2B Accuracy'] + df['Average Original Accuracy']) / 2
-# Introduce a penalty factor for low average accuracy
-penalty_factor = 1 / (df['Average Accuracy (Original and G2B)'] ** 2)
-# Calculate the adjusted robustness score with penalty
-df['Adjusted Robustness Score'] = df['Average Difference'] * penalty_factor
-df['Adjusted Robustness Score'] = df['Adjusted Robustness Score'].round(2)
@@ -157,6 +154,18 @@ def create_bar_plot_drugmatchqa(df, col, title):
     fig.update_layout(xaxis_title=col, yaxis_title='Model', height=600, coloraxis_showscale=False)
     return fig
 #Create UI/Layout
 with gr.Blocks(css="custom.css") as demo:
@@ -317,7 +326,7 @@ with gr.Blocks(css="custom.css") as demo:
             elem_id="bar3"
         )
         bar4 = gr.Plot(
-            value=create_bar_plot_drugmatchqa(df, "Adjusted Robustness Score", "Which LLMs are most robust to drug name synonym substitution?"),
             elem_id="bar4"
         )

 }, inplace=True)
 #Create adjusted robustness score that accounts for g2b accuracy and difference in accuracy
 df['Average Accuracy (Original and G2B)'] = (df['Average G2B Accuracy'] + df['Average Original Accuracy']) / 2
+df['Adjusted Robustness Score'] = (df['Average Difference'] / (df['Average Accuracy (Original and G2B)'] ** 2)) * 100
+df['Adjusted Robustness Score'] = df['Adjusted Robustness Score'].round(2)
     fig.update_layout(xaxis_title=col, yaxis_title='Model', height=600, coloraxis_showscale=False)
     return fig
+def create_bar_plot_adjusted(df, col, title):
+    sorted_df = df.sort_values(by=col, ascending=True)
+    fig = px.bar(sorted_df,
+                 x=col,
+                 y='Model',
+                 orientation='h',
+                 title=title,
+                 color=col,
+                 color_continuous_scale='Aggrnyl')
+    fig.update_layout(xaxis_title=col, yaxis_title='Model', height=600, coloraxis_showscale=False)
+    return fig
 #Create UI/Layout
 with gr.Blocks(css="custom.css") as demo:
             elem_id="bar3"
         )
         bar4 = gr.Plot(
+            value=create_bar_plot_adjusted(df, "Adjusted Robustness Score", "Which LLMs are most robust to drug name synonym substitution?"),
             elem_id="bar4"
         )