Corey Morris commited on
Commit
dc21a69
1 Parent(s): a125eb8

Table now displays the columns that have the top differences

Browse files
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -86,6 +86,19 @@ def create_line_chart(df, model_names, metrics):
86
  fig.update_layout(showlegend=True)
87
  return fig
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  data_provider = ResultDataProcessor()
@@ -257,32 +270,20 @@ if selected_x_column != selected_y_column: # Avoid creating a plot with the s
257
  else:
258
  st.write("Please select different columns for the x and y axes.")
259
 
260
-
261
  # Section to select a model and display radar and line charts
262
  st.header("Compare selected models to models the closest 5 models on MMLU average")
263
  st.write("This is to demonstrate that while the average score is useful, there is a lot of variation in performance on individual tasks.")
264
  selected_model_name = st.selectbox("Select a Model:", filtered_data.index.tolist())
265
- metrics_to_compare = ['MMLU_abstract_algebra', 'MMLU_astronomy', 'MMLU_business_ethics', 'MMLU_average', 'MMLU_moral_scenarios']
266
- closest_models = filtered_data['MMLU_average'].sub(filtered_data.loc[selected_model_name, 'MMLU_average']).abs().nsmallest(5).index.tolist()
267
-
268
- st.dataframe(filtered_data.loc[closest_models, metrics_to_compare])
269
 
270
- # Function to find the top differences and return them as a DataFrame
271
- def find_top_differences_table(df, target_model, closest_models, num_differences=10, exclude_columns=['Parameters']):
272
- # Calculate the absolute differences for each task between the target model and the closest models
273
- differences = df.loc[closest_models].drop(columns=exclude_columns).sub(df.loc[target_model]).abs()
274
- # Unstack the differences and sort by the largest absolute difference
275
- top_differences = differences.unstack().nlargest(num_differences)
276
- # Convert the top differences to a DataFrame for display
277
- top_differences_table = pd.DataFrame({
278
- 'Task': [idx[0] for idx in top_differences.index],
279
- 'Difference': top_differences.values
280
- })
281
- return top_differences_table, top_differences_table['Task'].tolist()
282
 
283
  # Find the top 10 tasks with the largest differences and convert to a DataFrame
284
  top_differences_table, top_differences_tasks = find_top_differences_table(filtered_data, selected_model_name, closest_models)
285
 
 
 
 
286
  # Display the table in the Streamlit app
287
  st.markdown("## Top Differences")
288
  st.dataframe(top_differences_table)
 
86
  fig.update_layout(showlegend=True)
87
  return fig
88
 
89
+ def find_top_differences_table(df, target_model, closest_models, num_differences=10, exclude_columns=['Parameters']):
90
+ # Calculate the absolute differences for each task between the target model and the closest models
91
+ differences = df.loc[closest_models].drop(columns=exclude_columns).sub(df.loc[target_model]).abs()
92
+ # Unstack the differences and sort by the largest absolute difference
93
+ top_differences = differences.unstack().nlargest(num_differences)
94
+ # Convert the top differences to a DataFrame for display
95
+ top_differences_table = pd.DataFrame({
96
+ 'Task': [idx[0] for idx in top_differences.index],
97
+ 'Difference': top_differences.values
98
+ })
99
+ # Ensure that only unique tasks are returned
100
+ unique_top_differences_tasks = list(set(top_differences_table['Task'].tolist()))
101
+ return top_differences_table, unique_top_differences_tasks
102
 
103
 
104
  data_provider = ResultDataProcessor()
 
270
  else:
271
  st.write("Please select different columns for the x and y axes.")
272
 
 
273
  # Section to select a model and display radar and line charts
274
  st.header("Compare selected models to models the closest 5 models on MMLU average")
275
  st.write("This is to demonstrate that while the average score is useful, there is a lot of variation in performance on individual tasks.")
276
  selected_model_name = st.selectbox("Select a Model:", filtered_data.index.tolist())
 
 
 
 
277
 
278
+ # Get the closest 5 models to the selected model based on MMLU average
279
+ closest_models = filtered_data['MMLU_average'].sub(filtered_data.loc[selected_model_name, 'MMLU_average']).abs().nsmallest(5).index.tolist()
 
 
 
 
 
 
 
 
 
 
280
 
281
  # Find the top 10 tasks with the largest differences and convert to a DataFrame
282
  top_differences_table, top_differences_tasks = find_top_differences_table(filtered_data, selected_model_name, closest_models)
283
 
284
+ # Display the DataFrame for the closest models and the top differences tasks
285
+ st.dataframe(filtered_data.loc[closest_models, top_differences_tasks])
286
+
287
  # Display the table in the Streamlit app
288
  st.markdown("## Top Differences")
289
  st.dataframe(top_differences_table)