Corey Morris commited on
Commit
1f8cc2a
1 Parent(s): a79afe8

Added finding from moral scenarios about threshold

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -156,7 +156,8 @@ def create_plot(df, arc_column, moral_column, models=None):
156
 
157
  # Custom scatter plots
158
  st.header('Custom scatter plots')
159
- st.write("The dashed red line represents the random chance performance of 0.25")
 
160
  selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
161
  selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=3)
162
 
@@ -168,27 +169,25 @@ else:
168
 
169
  # end of custom scatter plots
170
  st.markdown("## Notable findings and plots")
171
- st.markdown("### Moral Scenarios Performance")
172
 
 
 
 
173
 
174
- fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_moral_scenarios')
175
  st.plotly_chart(fig)
176
 
 
 
 
 
177
  fig = create_plot(filtered_data, 'Parameters', 'MMLU_moral_scenarios')
178
  st.plotly_chart(fig)
179
 
180
  fig = px.histogram(filtered_data, x="MMLU_moral_scenarios", marginal="rug", hover_data=filtered_data.columns)
181
  st.plotly_chart(fig)
182
 
183
- st.header('Abstract Algebra Performance')
184
- st.write("Small models showed surprisingly strong performance on the abstract algebra task. A 6 Billion parameter model is tied for the best performance on this task and there are a number of other small models in the top 10.")
185
-
186
-
187
-
188
- # Usage example:
189
- plot_top_n(filtered_data, 'MMLU_abstract_algebra', 10)
190
-
191
- fig = create_plot(filtered_data, 'Parameters', 'MMLU_abstract_algebra')
192
  st.plotly_chart(fig)
193
 
194
  st.markdown("***Thank you to hugging face for running the evaluations and supplying the data as well as the original authors of the evaluations.***")
 
156
 
157
  # Custom scatter plots
158
  st.header('Custom scatter plots')
159
+ st.write("As expected, there is a strong positive relationship between the number of parameters and average performance on the MMLU evaluation.")
160
+ st.markdown("***The dashed red line indicates random chance accuracy of 0.25 as the MMLU evaluation is multiple choice with 4 response options.***")
161
  selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
162
  selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=3)
163
 
 
169
 
170
  # end of custom scatter plots
171
  st.markdown("## Notable findings and plots")
 
172
 
173
+ st.markdown('### Abstract Algebra Performance')
174
+ st.write("Small models showed surprisingly strong performance on the abstract algebra task. A 6 Billion parameter model is tied for the best performance on this task and there are a number of other small models in the top 10.")
175
+ plot_top_n(filtered_data, 'MMLU_abstract_algebra', 10)
176
 
177
+ fig = create_plot(filtered_data, 'Parameters', 'MMLU_abstract_algebra')
178
  st.plotly_chart(fig)
179
 
180
+ st.markdown("### Moral Scenarios Performance")
181
+ st.write("While smaller models can perform well at many tasks, the model size threshold for decent performance on moral scenarios is much higher. There are no models with less than 13 billion parameters with performance much better than random chance.")
182
+
183
+ st.write("Impact of Parameter Count on Accuracy for Moral Scenarios")
184
  fig = create_plot(filtered_data, 'Parameters', 'MMLU_moral_scenarios')
185
  st.plotly_chart(fig)
186
 
187
  fig = px.histogram(filtered_data, x="MMLU_moral_scenarios", marginal="rug", hover_data=filtered_data.columns)
188
  st.plotly_chart(fig)
189
 
190
+ fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_moral_scenarios')
 
 
 
 
 
 
 
 
191
  st.plotly_chart(fig)
192
 
193
  st.markdown("***Thank you to hugging face for running the evaluations and supplying the data as well as the original authors of the evaluations.***")