Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/__pycache__/draw_diagram.cpython-310.pyc +0 -0
- app/__pycache__/pages.cpython-310.pyc +0 -0
- app/draw_diagram.py +1 -17
- app/pages.py +2 -5
app/__pycache__/draw_diagram.cpython-310.pyc
CHANGED
Binary files a/app/__pycache__/draw_diagram.cpython-310.pyc and b/app/__pycache__/draw_diagram.cpython-310.pyc differ
|
|
app/__pycache__/pages.cpython-310.pyc
CHANGED
Binary files a/app/__pycache__/pages.cpython-310.pyc and b/app/__pycache__/pages.cpython-310.pyc differ
|
|
app/draw_diagram.py
CHANGED
@@ -65,22 +65,6 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
65 |
min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
|
66 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
67 |
|
68 |
-
display_names = {
|
69 |
-
'cross_mmlu' : 'Cross-MMLU',
|
70 |
-
'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
|
71 |
-
'cross_logiqa' : 'Cross-LogiQA',
|
72 |
-
'cross_logiqa_no_prompt' : 'Cross-LogiQA-No-Prompt',
|
73 |
-
'cross_xquad' : 'Cross-XQUAD',
|
74 |
-
'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
|
75 |
-
'sg_eval' : 'SG EVAL',
|
76 |
-
'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
|
77 |
-
'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
|
78 |
-
'sg_eval_v2_mcq_no_prompt': 'SG EVAL V2 MCQ No Prompt',
|
79 |
-
'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
|
80 |
-
'us_eval' : 'US EVAL',
|
81 |
-
'cn_eval' : 'CN EVAL',
|
82 |
-
'ph_eval' : 'PH EVAL'
|
83 |
-
}
|
84 |
|
85 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
86 |
|
@@ -109,7 +93,7 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
109 |
chart_data_table.columns[i]: "{:.3f}" for i in range(2, len(chart_data_table.columns))
|
110 |
}
|
111 |
).highlight_max(
|
112 |
-
subset=[chart_data_table.columns[2]], color='
|
113 |
)
|
114 |
|
115 |
st.dataframe(
|
|
|
65 |
min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
|
66 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
70 |
|
|
|
93 |
chart_data_table.columns[i]: "{:.3f}" for i in range(2, len(chart_data_table.columns))
|
94 |
}
|
95 |
).highlight_max(
|
96 |
+
subset=[chart_data_table.columns[2]], color='#b0c1d7',
|
97 |
)
|
98 |
|
99 |
st.dataframe(
|
app/pages.py
CHANGED
@@ -130,8 +130,6 @@ def cultural_reasoning():
|
|
130 |
filters_leveltwo = [
|
131 |
'SG-EVAL-v2-MCQ',
|
132 |
'SG-EVAL-v2-Open-Ended',
|
133 |
-
'SG-EVAL-v1-Cleaned',
|
134 |
-
'SG-EVAL-v1',
|
135 |
'CN-EVAL',
|
136 |
'PH-EVAL',
|
137 |
'US-EVAL'
|
@@ -143,8 +141,6 @@ def cultural_reasoning():
|
|
143 |
|
144 |
category_two_dict = {
|
145 |
'SG-EVAL-v2-MCQ' : 'sg_eval_v2_mcq_no_prompt',
|
146 |
-
'SG-EVAL-v1' : 'sg_eval',
|
147 |
-
'SG-EVAL-v1-Cleaned' : 'sg_eval_v1_cleaned',
|
148 |
'SG-EVAL-v2-Open-Ended' : 'sg_eval_v2_open',
|
149 |
'US-EVAL' : 'us_eval',
|
150 |
'CN-EVAL' : 'cn_eval',
|
@@ -171,6 +167,7 @@ def general_reasoning():
|
|
171 |
st.title("Task: General Reasoning")
|
172 |
|
173 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
|
|
174 |
filters_leveltwo = [
|
175 |
'IndoMMLU',
|
176 |
'MMLU',
|
@@ -188,7 +185,7 @@ def general_reasoning():
|
|
188 |
'IndoMMLU': 'indommlu_no_prompt',
|
189 |
'MMLU' : 'mmlu_no_prompt',
|
190 |
'C-Eval' : 'c_eval',
|
191 |
-
'CMMLU' : '
|
192 |
'ZBench' : 'zbench',
|
193 |
}
|
194 |
|
|
|
130 |
filters_leveltwo = [
|
131 |
'SG-EVAL-v2-MCQ',
|
132 |
'SG-EVAL-v2-Open-Ended',
|
|
|
|
|
133 |
'CN-EVAL',
|
134 |
'PH-EVAL',
|
135 |
'US-EVAL'
|
|
|
141 |
|
142 |
category_two_dict = {
|
143 |
'SG-EVAL-v2-MCQ' : 'sg_eval_v2_mcq_no_prompt',
|
|
|
|
|
144 |
'SG-EVAL-v2-Open-Ended' : 'sg_eval_v2_open',
|
145 |
'US-EVAL' : 'us_eval',
|
146 |
'CN-EVAL' : 'cn_eval',
|
|
|
167 |
st.title("Task: General Reasoning")
|
168 |
|
169 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
170 |
+
|
171 |
filters_leveltwo = [
|
172 |
'IndoMMLU',
|
173 |
'MMLU',
|
|
|
185 |
'IndoMMLU': 'indommlu_no_prompt',
|
186 |
'MMLU' : 'mmlu_no_prompt',
|
187 |
'C-Eval' : 'c_eval',
|
188 |
+
'CMMLU' : 'cmmlu_no_prompt',
|
189 |
'ZBench' : 'zbench',
|
190 |
}
|
191 |
|