zhuohan-7 commited on
Commit
08d39e4
1 Parent(s): 7820dba

Upload folder using huggingface_hub

Browse files
app/__pycache__/draw_diagram.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/draw_diagram.cpython-310.pyc and b/app/__pycache__/draw_diagram.cpython-310.pyc differ
 
app/__pycache__/pages.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/pages.cpython-310.pyc and b/app/__pycache__/pages.cpython-310.pyc differ
 
app/draw_diagram.py CHANGED
@@ -5,17 +5,28 @@ from streamlit_echarts import st_echarts
5
  from streamlit.components.v1 import html
6
  import pandas as pd
7
 
8
- path = "./style/Leaderboard-Rename-SeaEval.csv"
9
- info_df = pd.read_csv(path).dropna(axis=0)
10
 
11
- # if 'models' not in st.session_state:
12
- # st.session_state.models= []
13
 
14
- def draw(folder_name, category_one, category_two, sort, num_sort):
15
-
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  folder = f"./results/{folder_name}/"
17
  data_path = f'{folder}/{category_one}/{category_two}.csv'
18
  chart_data = pd.read_csv(data_path).dropna(axis='columns').round(3)
 
19
  st.markdown("""
20
  <style>
21
  .stMultiSelect [data-baseweb=select] span {
@@ -28,14 +39,29 @@ def draw(folder_name, category_one, category_two, sort, num_sort):
28
  </style>
29
  """, unsafe_allow_html=True)
30
 
 
31
  # remap model names
32
  display_model_names = {key.strip() :val.strip() for key, val in zip(info_df['Original Name'], info_df['Proper Display Name'])}
 
 
33
  chart_data['model_show'] = chart_data['Model'].map(display_model_names)
34
  chart_data['model_show'] = chart_data['model_show'].fillna(chart_data['Model'].apply(lambda x: x.replace('_', '-')))
35
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  models = st.multiselect("Please choose the model",
37
  sorted(chart_data['model_show'].tolist()),
38
- default=sorted(chart_data['model_show'].tolist()),
39
  )
40
 
41
  # if 'Select All' in st.session_state.models:
@@ -43,84 +69,32 @@ def draw(folder_name, category_one, category_two, sort, num_sort):
43
 
44
  chart_data = chart_data[chart_data['model_show'].isin(models)]
45
 
46
- if num_sort == 'Ascending':
47
- ascend = True
48
- else:
49
- ascend = False
50
-
51
- chart_data = chart_data.sort_values(by=[sort], ascending=ascend).dropna(axis=0)
52
-
53
- if len(chart_data) == 0:
54
- return
55
 
56
  min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
57
  max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
58
 
59
  display_names = {
60
- 'cross_mmlu': 'Cross-MMLU',
61
- 'cross_logiqa': 'Cross-LogiQA',
62
- 'cross_xquad': 'Cross-XQUAD',
63
- 'sg_eval': 'SG EVAL',
64
  'sg_eval_v1_cleaned': 'SG EVAL V1 Cleaned',
65
- 'sg_eval_v2_mcq': 'SG EVAL V2 MCQ',
66
- 'sg_eval_v2_open': 'SG EVAL V2 Open Ended',
67
- 'us_eval': 'US EVAL',
68
- 'cn_eval': 'CN EVAL',
69
- 'ph_eval': 'PH EVAL'
70
  }
71
 
72
- # breakpoint()
73
  data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
74
 
75
- options = {
76
- # "title": {"text": f"{display_names[category_two]}"},
77
- "tooltip": {
78
- "trigger": "axis",
79
- "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
80
- "triggerOn": 'mousemove',
81
- },
82
- "legend": {"data": data_columns},
83
- "toolbox": {"feature": {"saveAsImage": {}}},
84
- "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
85
- "xAxis": [
86
- {
87
- "type": "category",
88
- "boundaryGap": True,
89
- "triggerEvent": True,
90
- "data": chart_data['model_show'].tolist(),
91
- }
92
- ],
93
- "yAxis": [{"type": "value",
94
- "min": min_value,
95
- "max": max_value,
96
- "boundaryGap": True
97
- # "splitNumber": 10
98
- }],
99
- "series": [{
100
- "name": f"{col}",
101
- "type": "bar",
102
- "data": chart_data[f'{col}'].tolist(),
103
- } for col in data_columns],
104
- }
105
-
106
- events = {
107
- "click": "function(params) { return params.value }"
108
- }
109
-
110
- value = st_echarts(options=options, events=events, height="500px")
111
-
112
  '''
113
  Show table
114
  '''
115
- # st.divider()
116
  with st.container():
117
- # st.write("")
118
  st.markdown('##### TABLE')
119
- # custom_css = """
120
-
121
- # """
122
- # st.markdown(custom_css, unsafe_allow_html=True)
123
-
124
  model_link = {key.strip(): val for key, val in zip(info_df['Proper Display Name'], info_df['Link'])}
125
 
126
  chart_data['model_link'] = chart_data['model_show'].map(model_link)
@@ -149,3 +123,61 @@ def draw(folder_name, category_one, category_two, sort, num_sort):
149
  use_container_width=True
150
  )
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from streamlit.components.v1 import html
6
  import pandas as pd
7
 
8
+ from model_information import get_dataframe
 
9
 
 
 
10
 
11
+ info_df = get_dataframe()
12
+
13
+
14
+ # path = "./style/Leaderboard-Rename-SeaEval.csv"
15
+ # info_df = pd.read_csv(path).dropna(axis=0)
16
+
17
+ #Model2Detail = {
18
+ # {'cross_mmlu': 'Cross-MMLU'}
19
+ #}
20
+
21
+
22
+
23
+
24
+ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_range):
25
+
26
  folder = f"./results/{folder_name}/"
27
  data_path = f'{folder}/{category_one}/{category_two}.csv'
28
  chart_data = pd.read_csv(data_path).dropna(axis='columns').round(3)
29
+
30
  st.markdown("""
31
  <style>
32
  .stMultiSelect [data-baseweb=select] span {
 
39
  </style>
40
  """, unsafe_allow_html=True)
41
 
42
+
43
  # remap model names
44
  display_model_names = {key.strip() :val.strip() for key, val in zip(info_df['Original Name'], info_df['Proper Display Name'])}
45
+ model2sizes = {key.strip() :val.strip() for key, val in zip(info_df['Original Name'], info_df['Model Size'])}
46
+
47
  chart_data['model_show'] = chart_data['Model'].map(display_model_names)
48
  chart_data['model_show'] = chart_data['model_show'].fillna(chart_data['Model'].apply(lambda x: x.replace('_', '-')))
49
 
50
+ chart_data['model_size'] = chart_data['Model'].map(model2sizes)
51
+ chart_data['model_size'] = chart_data['model_size'].fillna('99999')
52
+
53
+ # How to work on the model size range, filter the ones that are not in the range
54
+ if model_size_range != 'All':
55
+ if model_size_range == '<10B':
56
+ chart_data = chart_data[chart_data['model_size'].astype(int) < 10]
57
+ elif model_size_range == '10B-30B':
58
+ chart_data = chart_data[(chart_data['model_size'].astype(int) >= 10) & (chart_data['model_size'].astype(int) < 30)]
59
+ elif model_size_range == '>30B':
60
+ chart_data = chart_data[chart_data['model_size'].astype(int) >= 30]
61
+
62
  models = st.multiselect("Please choose the model",
63
  sorted(chart_data['model_show'].tolist()),
64
+ default = sorted(chart_data['model_show'].tolist()),
65
  )
66
 
67
  # if 'Select All' in st.session_state.models:
 
69
 
70
  chart_data = chart_data[chart_data['model_show'].isin(models)]
71
 
72
+ if len(chart_data) == 0: return
 
 
 
 
 
 
 
 
73
 
74
  min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
75
  max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
76
 
77
  display_names = {
78
+ 'cross_mmlu' : 'Cross-MMLU',
79
+ 'cross_logiqa' : 'Cross-LogiQA',
80
+ 'cross_xquad' : 'Cross-XQUAD',
81
+ 'sg_eval' : 'SG EVAL',
82
  'sg_eval_v1_cleaned': 'SG EVAL V1 Cleaned',
83
+ 'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
84
+ 'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
85
+ 'us_eval' : 'US EVAL',
86
+ 'cn_eval' : 'CN EVAL',
87
+ 'ph_eval' : 'PH EVAL'
88
  }
89
 
 
90
  data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  '''
93
  Show table
94
  '''
 
95
  with st.container():
 
96
  st.markdown('##### TABLE')
97
+
 
 
 
 
98
  model_link = {key.strip(): val for key, val in zip(info_df['Proper Display Name'], info_df['Link'])}
99
 
100
  chart_data['model_link'] = chart_data['model_show'].map(model_link)
 
123
  use_container_width=True
124
  )
125
 
126
+
127
+
128
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
129
+
130
+ # Initialize a session state variable for toggling the chart visibility
131
+ if "show_chart" not in st.session_state:
132
+ st.session_state.show_chart = False
133
+
134
+ # Create a button to toggle visibility
135
+ if st.button("Show Chart"):
136
+ st.session_state.show_chart = not st.session_state.show_chart
137
+
138
+ if st.session_state.show_chart:
139
+
140
+ with st.container():
141
+ st.markdown('##### CHART')
142
+
143
+
144
+ if num_sort == 'Ascending': ascend = True
145
+ else: ascend = False
146
+ chart_data = chart_data.sort_values(by=[sort], ascending=ascend).dropna(axis=0)
147
+
148
+ options = {
149
+ # "title": {"text": f"{display_names[category_two]}"},
150
+ "tooltip": {
151
+ "trigger": "axis",
152
+ "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
153
+ "triggerOn": 'mousemove',
154
+ },
155
+ "legend": {"data": data_columns},
156
+ "toolbox": {"feature": {"saveAsImage": {}}},
157
+ "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
158
+ "xAxis": [
159
+ {
160
+ "type": "category",
161
+ "boundaryGap": True,
162
+ "triggerEvent": True,
163
+ "data": chart_data['model_show'].tolist(),
164
+ }
165
+ ],
166
+ "yAxis": [{"type": "value",
167
+ "min": min_value,
168
+ "max": max_value,
169
+ "boundaryGap": True
170
+ # "splitNumber": 10
171
+ }],
172
+ "series": [{
173
+ "name": f"{col}",
174
+ "type": "bar",
175
+ "data": chart_data[f'{col}'].tolist(),
176
+ } for col in data_columns],
177
+ }
178
+
179
+ events = {
180
+ "click": "function(params) { return params.value }"
181
+ }
182
+
183
+ value = st_echarts(options=options, events=events, height="500px")
app/pages.py CHANGED
@@ -12,9 +12,12 @@ def dashboard():
12
  [![GitHub Repo stars](https://img.shields.io/github/stars/SeaEval/SeaEval?style=social)][gh]
13
  """)
14
 
15
- seaeval_url = "https://seaeval.github.io/"
 
16
 
17
  st.divider()
 
 
18
  st.markdown("#### What is [SeaEval](%s)?" % seaeval_url)
19
 
20
  with st.container():
@@ -26,7 +29,7 @@ def dashboard():
26
  st.markdown('''
27
 
28
  ''')
29
- st.markdown("##### A new benchmark for multilingual, multicultral foundation model evaluation consisting of 28 dataset as the core and keep expanding over time.")
30
  st.markdown(''':star: How models understand and reason with natural language?
31
  :balloon: Languages: English, Chinese, Malay, Spainish, Indonedian, Vietnamese, Filipino.
32
  ''')
@@ -70,38 +73,45 @@ def dashboard():
70
  ''')
71
 
72
  def cross_lingual_consistency():
73
- st.title("Cross-Lingual Consistency")
74
 
75
  filters_levelone = ['Zero Shot', 'Few Shot']
76
  filters_leveltwo = ['Cross-MMLU', 'Cross-XQUAD', 'Cross-LogiQA']
77
 
78
- category_one_dict = {'Zero Shot': 'zero_shot',
79
- 'Few Shot': 'few_shot'}
80
- category_two_dict = {'Cross-MMLU': 'cross_mmlu',
81
- 'Cross-XQUAD': 'cross_xquad',
82
- 'Cross-LogiQA': 'cross_logiqa'}
 
 
 
 
 
83
 
84
- left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
85
  with left:
86
- category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
87
  with center:
88
- category_two = st.selectbox('Select the sub-category', filters_leveltwo)
89
  with middle:
90
- sort = st.selectbox('Sort', ['Accuracy','Cross-Lingual Consistency', 'AC3',
91
- 'English', 'Chinese', 'Spanish', 'Vietnamese'])
92
  with right:
93
- sortby = st.selectbox('by', ['Ascending', 'Descending'])
 
 
 
94
 
95
  if category_one or category_two or sort or sortby:
96
  category_one = category_one_dict[category_one]
97
  category_two = category_two_dict[category_two]
98
 
99
- draw('cross_lingual', category_one, category_two, sort, sortby)
100
- # else:
101
- # draw('zero_shot', 'cross_mmlu', 'Accuracy', 'Descending')
102
 
103
  def cultural_reasoning():
104
- st.title("Cultural Reasoning")
105
 
106
  filters_levelone = ['Zero Shot', 'Few Shot']
107
  filters_leveltwo = [
@@ -115,33 +125,36 @@ def cultural_reasoning():
115
  ]
116
 
117
  category_one_dict = {'Zero Shot': 'zero_shot',
118
- 'Few Shot': 'few_shot'}
 
 
119
  category_two_dict = {'SG EVAL': 'sg_eval',
120
- 'SG EVAL V1 Cleaned': 'sg_eval_v1_cleaned',
121
- 'SG EVAL V2 MCQ': 'sg_eval_v2_mcq',
122
  'SG EVAL V2 Open Ended': 'sg_eval_v2_open',
123
- 'US EVAL': 'us_eval',
124
- 'CN EVAL': 'cn_eval',
125
- 'PH EVAL': 'ph_eval'}
 
126
 
127
- left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
128
  with left:
129
- category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
130
  with center:
131
- category_two = st.selectbox('Select the sub-category', filters_leveltwo)
132
- with right:
133
- sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
 
 
134
 
135
  if category_one or category_two or sortby:
136
  category_one = category_one_dict[category_one]
137
  category_two = category_two_dict[category_two]
138
- draw('cultural_reasoning', category_one, category_two, 'Accuracy',sortby)
139
- # else:
140
- # draw_only_acc('cultural_reasoning', 'zero_shot', 'sg_eval', 'Descending')
141
 
142
 
143
  def general_reasoning():
144
- st.title("General Reasoning")
145
 
146
  filters_levelone = ['Zero Shot', 'Few Shot']
147
  filters_leveltwo = [
@@ -162,12 +175,15 @@ def general_reasoning():
162
 
163
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
164
  with left:
165
- category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
166
  with center:
167
- category_two = st.selectbox('Select the sub-category', filters_leveltwo)
168
- with right:
169
- sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
170
 
 
 
 
 
 
171
  if category_one or category_two or sortby:
172
  category_one = category_one_dict[category_one]
173
  category_two = category_two_dict[category_two]
@@ -176,7 +192,7 @@ def general_reasoning():
176
  # draw_only_acc('general_reasoning', 'zero_shot', 'MMLU Full', 'Descending')
177
 
178
  def flores():
179
- st.title("FLORES-Translation")
180
 
181
  filters_levelone = ['Zero Shot', 'Few Shot']
182
  filters_leveltwo = ['Indonesian to English',
@@ -195,12 +211,14 @@ def flores():
195
 
196
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
197
  with left:
198
- category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
199
  with center:
200
- category_two = st.selectbox('Select the sub-category', filters_leveltwo)
201
- with right:
202
- sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
203
-
 
 
204
  if category_one or category_two or sortby:
205
  category_one = category_one_dict[category_one]
206
  category_two = category_two_dict[category_two]
@@ -209,7 +227,7 @@ def flores():
209
  # draw_flores_translation('zero_shot', 'Indonesian to English', 'Descending')
210
 
211
  def emotion():
212
- st.title("Emotion")
213
 
214
  filters_levelone = ['Zero Shot', 'Few Shot']
215
  filters_leveltwo = [
@@ -224,12 +242,15 @@ def emotion():
224
 
225
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
226
  with left:
227
- category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
228
  with center:
229
- category_two = st.selectbox('Select the sub-category', filters_leveltwo)
230
- with right:
231
- sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
232
-
 
 
 
233
  if category_one or category_two or sortby:
234
  category_one = category_one_dict[category_one]
235
  category_two = category_two_dict[category_two]
@@ -238,7 +259,7 @@ def emotion():
238
  # draw_only_acc('emotion', 'zero_shot', 'Indonesian Emotion Classification', 'Descending')
239
 
240
  def dialogue():
241
- st.title("Dialogue")
242
 
243
  filters_levelone = ['Zero Shot', 'Few Shot']
244
  filters_leveltwo = [
@@ -255,18 +276,21 @@ def dialogue():
255
 
256
  left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
257
  with left:
258
- category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
259
  with center:
260
- category_two = st.selectbox('Select the sub-category', filters_leveltwo)
261
  with middle:
262
  if category_two == 'DREAM':
263
  sort = st.selectbox('Sort', ['Accuracy'])
264
  else:
265
  sort = st.selectbox('Sort', ['Average', 'ROUGE-1', 'ROUGE-2', 'ROUGE-L'])
266
 
267
- with right:
268
- sortby = st.selectbox('by', ['Ascending', 'Descending'])
269
-
 
 
 
270
  if category_one or category_two or sort or sortby:
271
  category_one = category_one_dict[category_one]
272
  category_two = category_two_dict[category_two]
@@ -275,7 +299,7 @@ def dialogue():
275
  # draw_dialogue('zero_shot', 'DREAM', sort[0],'Descending')
276
 
277
  def fundamental_nlp_tasks():
278
- st.title("Fundamental NLP Tasks")
279
 
280
  filters_levelone = ['Zero Shot', 'Few Shot']
281
  filters_leveltwo = ['OCNLI', 'C3', 'COLA', 'QQP', 'MNLI', 'QNLI', 'WNLI', 'RTE', 'MRPC']
@@ -294,12 +318,15 @@ def fundamental_nlp_tasks():
294
 
295
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
296
  with left:
297
- category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
298
  with center:
299
- category_two = st.selectbox('Select the sub-category', filters_leveltwo)
300
- with right:
301
- sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
302
 
 
 
 
 
 
303
  if category_one or category_two or sortby:
304
  category_one = category_one_dict[category_one]
305
  category_two = category_two_dict[category_two]
 
12
  [![GitHub Repo stars](https://img.shields.io/github/stars/SeaEval/SeaEval?style=social)][gh]
13
  """)
14
 
15
+ st.markdown("#### News")
16
+ st.markdown("Nov, 2024: Update layout and support comparison between models with similar model sizes.")
17
 
18
  st.divider()
19
+
20
+ seaeval_url = "https://seaeval.github.io/"
21
  st.markdown("#### What is [SeaEval](%s)?" % seaeval_url)
22
 
23
  with st.container():
 
29
  st.markdown('''
30
 
31
  ''')
32
+ st.markdown("##### A benchmark for multilingual, multicultral foundation model evaluation consisting of >30 dataset and we are keep expanding over time.")
33
  st.markdown(''':star: How models understand and reason with natural language?
34
  :balloon: Languages: English, Chinese, Malay, Spainish, Indonedian, Vietnamese, Filipino.
35
  ''')
 
73
  ''')
74
 
75
  def cross_lingual_consistency():
76
+ st.title("Task: Cross-Lingual Consistency")
77
 
78
  filters_levelone = ['Zero Shot', 'Few Shot']
79
  filters_leveltwo = ['Cross-MMLU', 'Cross-XQUAD', 'Cross-LogiQA']
80
 
81
+ category_one_dict = {
82
+ 'Zero Shot': 'zero_shot',
83
+ 'Few Shot' : 'few_shot'
84
+ }
85
+
86
+ category_two_dict = {
87
+ 'Cross-MMLU' : 'cross_mmlu',
88
+ 'Cross-XQUAD' : 'cross_xquad',
89
+ 'Cross-LogiQA': 'cross_logiqa'
90
+ }
91
 
92
+ left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
93
  with left:
94
+ category_one = st.selectbox('Zero or Few Shot', filters_levelone)
95
  with center:
96
+ category_two = st.selectbox('Dataset', filters_leveltwo)
97
  with middle:
98
+ model_size_range = st.selectbox('Model Size', ['All', '<10B', '10B-30B', '>30B'])
99
+
100
  with right:
101
+ sort = st.selectbox('Sort (For Chart)', ['Accuracy','Cross-Lingual Consistency', 'AC3',
102
+ 'English', 'Chinese', 'Spanish', 'Vietnamese'])
103
+
104
+ sortby = 'Ascending'
105
 
106
  if category_one or category_two or sort or sortby:
107
  category_one = category_one_dict[category_one]
108
  category_two = category_two_dict[category_two]
109
 
110
+ draw('cross_lingual', category_one, category_two, sort, sortby, model_size_range)
111
+
 
112
 
113
  def cultural_reasoning():
114
+ st.title("Task: Cultural Reasoning")
115
 
116
  filters_levelone = ['Zero Shot', 'Few Shot']
117
  filters_leveltwo = [
 
125
  ]
126
 
127
  category_one_dict = {'Zero Shot': 'zero_shot',
128
+ 'Few Shot': 'few_shot'
129
+ }
130
+
131
  category_two_dict = {'SG EVAL': 'sg_eval',
132
+ 'SG EVAL V1 Cleaned' : 'sg_eval_v1_cleaned',
133
+ 'SG EVAL V2 MCQ' : 'sg_eval_v2_mcq',
134
  'SG EVAL V2 Open Ended': 'sg_eval_v2_open',
135
+ 'US EVAL' : 'us_eval',
136
+ 'CN EVAL' : 'cn_eval',
137
+ 'PH EVAL' : 'ph_eval'
138
+ }
139
 
140
+ left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
141
  with left:
142
+ category_one = st.selectbox('Zero or Few Shot', filters_levelone)
143
  with center:
144
+ category_two = st.selectbox('Dataset', filters_leveltwo)
145
+ with middle:
146
+ model_size_range = st.selectbox('Model Size', ['All', '<10B', '10B-30B', '>30B'])
147
+
148
+ sortby = 'Ascending'
149
 
150
  if category_one or category_two or sortby:
151
  category_one = category_one_dict[category_one]
152
  category_two = category_two_dict[category_two]
153
+ draw('cultural_reasoning', category_one, category_two, 'Accuracy', sortby, model_size_range)
 
 
154
 
155
 
156
  def general_reasoning():
157
+ st.title("Task: General Reasoning")
158
 
159
  filters_levelone = ['Zero Shot', 'Few Shot']
160
  filters_leveltwo = [
 
175
 
176
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
177
  with left:
178
+ category_one = st.selectbox('Zero or Few Shot', filters_levelone)
179
  with center:
180
+ category_two = st.selectbox('Dataset', filters_leveltwo)
 
 
181
 
182
+ # with right:
183
+ # sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
184
+
185
+ sortby = 'Ascending'
186
+
187
  if category_one or category_two or sortby:
188
  category_one = category_one_dict[category_one]
189
  category_two = category_two_dict[category_two]
 
192
  # draw_only_acc('general_reasoning', 'zero_shot', 'MMLU Full', 'Descending')
193
 
194
  def flores():
195
+ st.title("Task: FLORES-Translation")
196
 
197
  filters_levelone = ['Zero Shot', 'Few Shot']
198
  filters_leveltwo = ['Indonesian to English',
 
211
 
212
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
213
  with left:
214
+ category_one = st.selectbox('Zero or Few Shot', filters_levelone)
215
  with center:
216
+ category_two = st.selectbox('Dataset', filters_leveltwo)
217
+ # with right:
218
+ # sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
219
+
220
+ sortby = 'Ascending'
221
+
222
  if category_one or category_two or sortby:
223
  category_one = category_one_dict[category_one]
224
  category_two = category_two_dict[category_two]
 
227
  # draw_flores_translation('zero_shot', 'Indonesian to English', 'Descending')
228
 
229
  def emotion():
230
+ st.title("Task: Emotion")
231
 
232
  filters_levelone = ['Zero Shot', 'Few Shot']
233
  filters_leveltwo = [
 
242
 
243
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
244
  with left:
245
+ category_one = st.selectbox('Zero or Few Shot', filters_levelone)
246
  with center:
247
+ category_two = st.selectbox('Dataset', filters_leveltwo)
248
+ # with right:
249
+ # sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
250
+
251
+ sortby = 'Ascending'
252
+
253
+
254
  if category_one or category_two or sortby:
255
  category_one = category_one_dict[category_one]
256
  category_two = category_two_dict[category_two]
 
259
  # draw_only_acc('emotion', 'zero_shot', 'Indonesian Emotion Classification', 'Descending')
260
 
261
  def dialogue():
262
+ st.title("Task: Dialogue")
263
 
264
  filters_levelone = ['Zero Shot', 'Few Shot']
265
  filters_leveltwo = [
 
276
 
277
  left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
278
  with left:
279
+ category_one = st.selectbox('Zero or Few Shot', filters_levelone)
280
  with center:
281
+ category_two = st.selectbox('Dataset', filters_leveltwo)
282
  with middle:
283
  if category_two == 'DREAM':
284
  sort = st.selectbox('Sort', ['Accuracy'])
285
  else:
286
  sort = st.selectbox('Sort', ['Average', 'ROUGE-1', 'ROUGE-2', 'ROUGE-L'])
287
 
288
+ #with right:
289
+ # sortby = st.selectbox('by', ['Ascending', 'Descending'])
290
+
291
+ sortby = 'Ascending'
292
+
293
+
294
  if category_one or category_two or sort or sortby:
295
  category_one = category_one_dict[category_one]
296
  category_two = category_two_dict[category_two]
 
299
  # draw_dialogue('zero_shot', 'DREAM', sort[0],'Descending')
300
 
301
  def fundamental_nlp_tasks():
302
+ st.title("Task: Fundamental NLP Tasks")
303
 
304
  filters_levelone = ['Zero Shot', 'Few Shot']
305
  filters_leveltwo = ['OCNLI', 'C3', 'COLA', 'QQP', 'MNLI', 'QNLI', 'WNLI', 'RTE', 'MRPC']
 
318
 
319
  left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
320
  with left:
321
+ category_one = st.selectbox('Zero or Few Shot', filters_levelone)
322
  with center:
323
+ category_two = st.selectbox('Dataset', filters_leveltwo)
 
 
324
 
325
+ # with right:
326
+ # sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
327
+
328
+ sortby = 'Ascending'
329
+
330
  if category_one or category_two or sortby:
331
  category_one = category_one_dict[category_one]
332
  category_two = category_two_dict[category_two]