binwang commited on
Commit
14fb007
·
verified ·
1 Parent(s): 73cdee6

Upload folder using huggingface_hub

Browse files
app/__pycache__/draw_diagram.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/draw_diagram.cpython-310.pyc and b/app/__pycache__/draw_diagram.cpython-310.pyc differ
 
app/__pycache__/pages.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/pages.cpython-310.pyc and b/app/__pycache__/pages.cpython-310.pyc differ
 
app/draw_diagram.py CHANGED
@@ -65,22 +65,6 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
65
  min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
66
  max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
67
 
68
- display_names = {
69
- 'cross_mmlu' : 'Cross-MMLU',
70
- 'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
71
- 'cross_logiqa' : 'Cross-LogiQA',
72
- 'cross_logiqa_no_prompt' : 'Cross-LogiQA-No-Prompt',
73
- 'cross_xquad' : 'Cross-XQUAD',
74
- 'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
75
- 'sg_eval' : 'SG EVAL',
76
- 'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
77
- 'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
78
- 'sg_eval_v2_mcq_no_prompt': 'SG EVAL V2 MCQ No Prompt',
79
- 'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
80
- 'us_eval' : 'US EVAL',
81
- 'cn_eval' : 'CN EVAL',
82
- 'ph_eval' : 'PH EVAL'
83
- }
84
 
85
  data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
86
 
@@ -109,7 +93,7 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
109
  chart_data_table.columns[i]: "{:.3f}" for i in range(2, len(chart_data_table.columns))
110
  }
111
  ).highlight_max(
112
- subset=[chart_data_table.columns[2]], color='yellow'
113
  )
114
 
115
  st.dataframe(
 
65
  min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
66
  max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
70
 
 
93
  chart_data_table.columns[i]: "{:.3f}" for i in range(2, len(chart_data_table.columns))
94
  }
95
  ).highlight_max(
96
+ subset=[chart_data_table.columns[2]], color='#b0c1d7',
97
  )
98
 
99
  st.dataframe(
app/pages.py CHANGED
@@ -130,8 +130,6 @@ def cultural_reasoning():
130
  filters_leveltwo = [
131
  'SG-EVAL-v2-MCQ',
132
  'SG-EVAL-v2-Open-Ended',
133
- 'SG-EVAL-v1-Cleaned',
134
- 'SG-EVAL-v1',
135
  'CN-EVAL',
136
  'PH-EVAL',
137
  'US-EVAL'
@@ -143,8 +141,6 @@ def cultural_reasoning():
143
 
144
  category_two_dict = {
145
  'SG-EVAL-v2-MCQ' : 'sg_eval_v2_mcq_no_prompt',
146
- 'SG-EVAL-v1' : 'sg_eval',
147
- 'SG-EVAL-v1-Cleaned' : 'sg_eval_v1_cleaned',
148
  'SG-EVAL-v2-Open-Ended' : 'sg_eval_v2_open',
149
  'US-EVAL' : 'us_eval',
150
  'CN-EVAL' : 'cn_eval',
@@ -171,6 +167,7 @@ def general_reasoning():
171
  st.title("Task: General Reasoning")
172
 
173
  filters_levelone = ['Zero Shot', 'Few Shot']
 
174
  filters_leveltwo = [
175
  'IndoMMLU',
176
  'MMLU',
@@ -188,7 +185,7 @@ def general_reasoning():
188
  'IndoMMLU': 'indommlu_no_prompt',
189
  'MMLU' : 'mmlu_no_prompt',
190
  'C-Eval' : 'c_eval',
191
- 'CMMLU' : 'cmmlu',
192
  'ZBench' : 'zbench',
193
  }
194
 
 
130
  filters_leveltwo = [
131
  'SG-EVAL-v2-MCQ',
132
  'SG-EVAL-v2-Open-Ended',
 
 
133
  'CN-EVAL',
134
  'PH-EVAL',
135
  'US-EVAL'
 
141
 
142
  category_two_dict = {
143
  'SG-EVAL-v2-MCQ' : 'sg_eval_v2_mcq_no_prompt',
 
 
144
  'SG-EVAL-v2-Open-Ended' : 'sg_eval_v2_open',
145
  'US-EVAL' : 'us_eval',
146
  'CN-EVAL' : 'cn_eval',
 
167
  st.title("Task: General Reasoning")
168
 
169
  filters_levelone = ['Zero Shot', 'Few Shot']
170
+
171
  filters_leveltwo = [
172
  'IndoMMLU',
173
  'MMLU',
 
185
  'IndoMMLU': 'indommlu_no_prompt',
186
  'MMLU' : 'mmlu_no_prompt',
187
  'C-Eval' : 'c_eval',
188
+ 'CMMLU' : 'cmmlu_no_prompt',
189
  'ZBench' : 'zbench',
190
  }
191