Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/draw_diagram.py +11 -22
- app/pages.py +8 -2
app/draw_diagram.py
CHANGED
@@ -7,20 +7,8 @@ import pandas as pd
|
|
7 |
|
8 |
from model_information import get_dataframe
|
9 |
|
10 |
-
|
11 |
info_df = get_dataframe()
|
12 |
|
13 |
-
|
14 |
-
# path = "./style/Leaderboard-Rename-SeaEval.csv"
|
15 |
-
# info_df = pd.read_csv(path).dropna(axis=0)
|
16 |
-
|
17 |
-
#Model2Detail = {
|
18 |
-
# {'cross_mmlu': 'Cross-MMLU'}
|
19 |
-
#}
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
def draw(folder_name, category_one, category_two, sort, num_sort, model_size_range):
|
25 |
|
26 |
folder = f"./results/{folder_name}/"
|
@@ -77,16 +65,17 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
77 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
78 |
|
79 |
display_names = {
|
80 |
-
'cross_mmlu'
|
81 |
-
'cross_logiqa'
|
82 |
-
'cross_xquad'
|
83 |
-
'
|
84 |
-
'
|
85 |
-
'
|
86 |
-
'
|
87 |
-
'
|
88 |
-
'
|
89 |
-
'
|
|
|
90 |
}
|
91 |
|
92 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
|
|
7 |
|
8 |
from model_information import get_dataframe
|
9 |
|
|
|
10 |
info_df = get_dataframe()
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def draw(folder_name, category_one, category_two, sort, num_sort, model_size_range):
|
13 |
|
14 |
folder = f"./results/{folder_name}/"
|
|
|
65 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
66 |
|
67 |
display_names = {
|
68 |
+
'cross_mmlu' : 'Cross-MMLU',
|
69 |
+
'cross_logiqa' : 'Cross-LogiQA',
|
70 |
+
'cross_xquad' : 'Cross-XQUAD',
|
71 |
+
'cross_mmlu_no_prompt': 'Cross-MMLU-No-Prompt',
|
72 |
+
'sg_eval' : 'SG EVAL',
|
73 |
+
'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
|
74 |
+
'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
|
75 |
+
'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
|
76 |
+
'us_eval' : 'US EVAL',
|
77 |
+
'cn_eval' : 'CN EVAL',
|
78 |
+
'ph_eval' : 'PH EVAL'
|
79 |
}
|
80 |
|
81 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
app/pages.py
CHANGED
@@ -76,7 +76,12 @@ def cross_lingual_consistency():
|
|
76 |
st.title("Task: Cross-Lingual Consistency")
|
77 |
|
78 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
79 |
-
filters_leveltwo = [
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
category_one_dict = {
|
82 |
'Zero Shot': 'zero_shot',
|
@@ -86,7 +91,8 @@ def cross_lingual_consistency():
|
|
86 |
category_two_dict = {
|
87 |
'Cross-MMLU' : 'cross_mmlu',
|
88 |
'Cross-XQUAD' : 'cross_xquad',
|
89 |
-
'Cross-LogiQA': 'cross_logiqa'
|
|
|
90 |
}
|
91 |
|
92 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|
|
|
76 |
st.title("Task: Cross-Lingual Consistency")
|
77 |
|
78 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
79 |
+
filters_leveltwo = [
|
80 |
+
'Cross-MMLU',
|
81 |
+
'Cross-XQUAD',
|
82 |
+
'Cross-LogiQA',
|
83 |
+
'Cross-MMLU-No-Prompt',
|
84 |
+
]
|
85 |
|
86 |
category_one_dict = {
|
87 |
'Zero Shot': 'zero_shot',
|
|
|
91 |
category_two_dict = {
|
92 |
'Cross-MMLU' : 'cross_mmlu',
|
93 |
'Cross-XQUAD' : 'cross_xquad',
|
94 |
+
'Cross-LogiQA': 'cross_logiqa',
|
95 |
+
'Cross-MMLU-No-Prompt': 'cross_mmlu_no_prompt'
|
96 |
}
|
97 |
|
98 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|