Corey Morris commited on
Commit
d506f10
1 Parent(s): 5b83d0b

WIP commit. Currently have nlargest error

Browse files
Files changed (2) hide show
  1. app.py +32 -1
  2. result_data_processor.py +2 -0
app.py CHANGED
@@ -107,7 +107,8 @@ def create_line_chart(df, model_names, metrics):
107
 
108
  def find_top_differences_table(df, target_model, closest_models, num_differences=10, exclude_columns=['Parameters']):
109
  # Calculate the absolute differences for each task between the target model and the closest models
110
- differences = df.loc[closest_models].drop(columns=exclude_columns).sub(df.loc[target_model]).abs()
 
111
  # Unstack the differences and sort by the largest absolute difference
112
  top_differences = differences.unstack().nlargest(num_differences)
113
  # Convert the top differences to a DataFrame for display
@@ -120,6 +121,36 @@ def find_top_differences_table(df, target_model, closest_models, num_differences
120
  return top_differences_table, unique_top_differences_tasks
121
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  data_provider = ResultDataProcessor()
124
 
125
  # st.title('Model Evaluation Results including MMLU by task')
 
107
 
108
  def find_top_differences_table(df, target_model, closest_models, num_differences=10, exclude_columns=['Parameters']):
109
  # Calculate the absolute differences for each task between the target model and the closest models
110
+ new_df = df.drop(columns=exclude_columns)
111
+ differences = new_df.loc[closest_models].sub(new_df.loc[target_model]).abs()
112
  # Unstack the differences and sort by the largest absolute difference
113
  top_differences = differences.unstack().nlargest(num_differences)
114
  # Convert the top differences to a DataFrame for display
 
121
  return top_differences_table, unique_top_differences_tasks
122
 
123
 
124
+ # def find_top_differences_table(df, target_model, closest_models, num_differences=10, exclude_columns=['Parameters', 'organization']):
125
+ # # Drop specified columns and create a new DataFrame
126
+ # new_df = df.drop(columns=exclude_columns)
127
+
128
+ # # Compute differences between target model and closest models, taking absolute values
129
+ # differences = new_df.loc[closest_models].sub(new_df.loc[target_model]).abs()
130
+
131
+ # # Unstack the differences
132
+ # unstacked_differences = differences.unstack()
133
+
134
+ # # Convert object types to numeric, ignoring errors to leave non-convertible elements as NaN
135
+ # unstacked_differences = pd.to_numeric(unstacked_differences, errors='coerce')
136
+
137
+ # # Find the top num_differences
138
+ # top_differences = unstacked_differences.nlargest(num_differences)
139
+
140
+ # # Convert the top differences to a DataFrame for display
141
+ # top_differences_table = pd.DataFrame({
142
+ # 'Task': [idx[0] for idx in top_differences.index],
143
+ # 'Difference': top_differences.values
144
+ # })
145
+
146
+ # # Ensure that only unique tasks are returned
147
+ # unique_top_differences_tasks = list(set(top_differences_table['Task'].tolist()))
148
+
149
+ # return top_differences_table, unique_top_differences_tasks
150
+
151
+
152
+
153
+
154
  data_provider = ResultDataProcessor()
155
 
156
  # st.title('Model Evaluation Results including MMLU by task')
result_data_processor.py CHANGED
@@ -137,6 +137,8 @@ class ResultDataProcessor:
137
  # remove extreme outliers from column harness|truthfulqa:mc1
138
  data = self._remove_mc1_outliers(data)
139
 
 
 
140
  return data
141
 
142
  def rank_data(self):
 
137
  # remove extreme outliers from column harness|truthfulqa:mc1
138
  data = self._remove_mc1_outliers(data)
139
 
140
+ data = data.drop(columns=['organization'])
141
+
142
  return data
143
 
144
  def rank_data(self):