Spaces:
Runtime error
Runtime error
Translate the majority of text into English, add authors
Browse files
app.py
CHANGED
@@ -4,6 +4,11 @@ import pandas as pd
|
|
4 |
import seaborn as sns
|
5 |
from st_social_media_links import SocialMediaIcons
|
6 |
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Function to load data from JSON file
|
9 |
def load_data(file_path):
|
@@ -13,11 +18,11 @@ def load_data(file_path):
|
|
13 |
|
14 |
# Function to style the DataFrame
|
15 |
def style_dataframe(df: pd.DataFrame):
|
16 |
-
df[
|
17 |
|
18 |
-
# Insert the new column after the '
|
19 |
cols = list(df.columns)
|
20 |
-
cols.insert(cols.index(
|
21 |
df = df[cols]
|
22 |
|
23 |
# Create a color ramp using Seaborn
|
@@ -25,7 +30,7 @@ def style_dataframe(df: pd.DataFrame):
|
|
25 |
|
26 |
def styler(df: pd.DataFrame):
|
27 |
palette = sns.color_palette("RdYlGn", as_cmap=True)
|
28 |
-
styled_df = df.style.background_gradient(cmap=palette, subset=[
|
29 |
return styled_df
|
30 |
|
31 |
# Load data from JSON file
|
@@ -69,25 +74,33 @@ st.markdown("""
|
|
69 |
""", unsafe_allow_html=True)
|
70 |
|
71 |
# Create tabs
|
72 |
-
tab1, tab2 = st.tabs([
|
73 |
|
74 |
with tab1:
|
75 |
-
st.write("
|
76 |
|
77 |
# Display the styled DataFrame
|
78 |
styled_df_show = style_dataframe(data)
|
79 |
styled_df_show = styler(styled_df_show)
|
80 |
-
# st.dataframe(styled_df_show)
|
81 |
|
82 |
st.data_editor(styled_df_show, column_config={
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
y_min=0,y_max=5,),
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
},hide_index=True, disabled=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
with tab2:
|
93 |
st.header("Opis")
|
|
|
4 |
import seaborn as sns
|
5 |
from st_social_media_links import SocialMediaIcons
|
6 |
|
7 |
+
AVERAGE_COLUMN_NAME = "Average"
|
8 |
+
SENTIMENT_COLUMN_NAME = "Sentiment"
|
9 |
+
RESULTS_COLUMN_NAME = "Results"
|
10 |
+
UNDERSTANDING_COLUMN_NAME = "Language understanding"
|
11 |
+
PHRASEOLOGY_COLUMN_NAME = "Phraseology"
|
12 |
|
13 |
# Function to load data from JSON file
|
14 |
def load_data(file_path):
|
|
|
18 |
|
19 |
# Function to style the DataFrame
|
20 |
def style_dataframe(df: pd.DataFrame):
|
21 |
+
df[RESULTS_COLUMN_NAME] = df.apply(lambda row: [row[SENTIMENT_COLUMN_NAME], row[UNDERSTANDING_COLUMN_NAME], row[PHRASEOLOGY_COLUMN_NAME]], axis=1)
|
22 |
|
23 |
+
# Insert the new column after the 'Average' column
|
24 |
cols = list(df.columns)
|
25 |
+
cols.insert(cols.index(AVERAGE_COLUMN_NAME) + 1, cols.pop(cols.index(RESULTS_COLUMN_NAME)))
|
26 |
df = df[cols]
|
27 |
|
28 |
# Create a color ramp using Seaborn
|
|
|
30 |
|
31 |
def styler(df: pd.DataFrame):
|
32 |
palette = sns.color_palette("RdYlGn", as_cmap=True)
|
33 |
+
styled_df = df.style.background_gradient(cmap=palette, subset=[AVERAGE_COLUMN_NAME, SENTIMENT_COLUMN_NAME, PHRASEOLOGY_COLUMN_NAME, UNDERSTANDING_COLUMN_NAME]).format(precision=2)
|
34 |
return styled_df
|
35 |
|
36 |
# Load data from JSON file
|
|
|
74 |
""", unsafe_allow_html=True)
|
75 |
|
76 |
# Create tabs
|
77 |
+
tab1, tab2 = st.tabs([RESULTS_COLUMN_NAME, "Opis"])
|
78 |
|
79 |
with tab1:
|
80 |
+
st.write("This benchmark evaluates the ability of language models to correctly interpret Polish texts with complex implicatures, such as sarcasm and idiomatic expressions. Models are assessed on sentiment analysis, understanding of true intentions, and identification of idiomatic phrases.")
|
81 |
|
82 |
# Display the styled DataFrame
|
83 |
styled_df_show = style_dataframe(data)
|
84 |
styled_df_show = styler(styled_df_show)
|
|
|
85 |
|
86 |
st.data_editor(styled_df_show, column_config={
|
87 |
+
AVERAGE_COLUMN_NAME: st.column_config.NumberColumn(AVERAGE_COLUMN_NAME),
|
88 |
+
RESULTS_COLUMN_NAME: st.column_config.BarChartColumn(
|
89 |
+
RESULTS_COLUMN_NAME, help="Summary of the results of each task",
|
90 |
y_min=0,y_max=5,),
|
91 |
+
SENTIMENT_COLUMN_NAME: st.column_config.NumberColumn(SENTIMENT_COLUMN_NAME, help='Ability to analyze sentiment'),
|
92 |
+
PHRASEOLOGY_COLUMN_NAME: st.column_config.NumberColumn(PHRASEOLOGY_COLUMN_NAME, help='Ability to understand phraseological compounds'),
|
93 |
+
UNDERSTANDING_COLUMN_NAME: st.column_config.NumberColumn(UNDERSTANDING_COLUMN_NAME, help='Ability to understand language'),
|
94 |
+
}, hide_index=True, disabled=True, height=500)
|
95 |
+
|
96 |
+
st.markdown("""
|
97 |
+
### Authors:
|
98 |
+
- [Jan Sowa](https://www.linkedin.com/in/janpiotrsowa) - leadership, writing texts, benchmark code
|
99 |
+
- [Agnieszka Kosiak](https://www.linkedin.com/in/agn-kosiak/) - writing texts
|
100 |
+
- [Magdalena Krawczyk](https://www.linkedin.com/in/magdalena-krawczyk-7810942ab/) - writing texts, labeling
|
101 |
+
- [Remigiusz Kinas](https://www.linkedin.com/in/remigiusz-kinas/) - methodological support
|
102 |
+
- [Krzysztof Wróbel](https://www.linkedin.com/in/wrobelkrzysztof/) - engineering, methodological support
|
103 |
+
""")
|
104 |
|
105 |
with tab2:
|
106 |
st.header("Opis")
|
data.json
CHANGED
@@ -1,194 +1,194 @@
|
|
1 |
[
|
2 |
{
|
3 |
"Model": "mistralai/Mistral-Large-Instruct-2407",
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"
|
9 |
},
|
10 |
{
|
11 |
"Model": "alpindale/WizardLM-2-8x22B",
|
12 |
-
"
|
13 |
-
"
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"
|
17 |
},
|
18 |
{
|
19 |
"Model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
20 |
-
"
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
},
|
26 |
{
|
27 |
"Model": "meta-llama/Meta-Llama-3-70B-Instruct",
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"
|
33 |
},
|
34 |
{
|
35 |
"Model": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"
|
41 |
},
|
42 |
{
|
43 |
"Model": "speakleash/Bielik-11B-v2.1-Instruct",
|
44 |
-
"
|
45 |
-
"
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"
|
49 |
},
|
50 |
{
|
51 |
"Model": "Qwen/Qwen2-72B-Instruct",
|
52 |
-
"
|
53 |
-
"
|
54 |
-
"
|
55 |
-
"
|
56 |
-
"
|
57 |
},
|
58 |
{
|
59 |
"Model": "speakleash/Bielik-11B-v2.0-Instruct",
|
60 |
-
"
|
61 |
-
"
|
62 |
-
"
|
63 |
-
"
|
64 |
-
"
|
65 |
},
|
66 |
{
|
67 |
"Model": "Qwen/Qwen1.5-72B-Chat",
|
68 |
-
"
|
69 |
-
"
|
70 |
-
"
|
71 |
-
"
|
72 |
-
"
|
73 |
},
|
74 |
{
|
75 |
"Model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"
|
80 |
-
"
|
81 |
},
|
82 |
{
|
83 |
"Model": "Remek/Mistral-Nemo-Instruct-2407-PL-finetuned",
|
84 |
-
"
|
85 |
-
"
|
86 |
-
"
|
87 |
-
"
|
88 |
-
"
|
89 |
},
|
90 |
{
|
91 |
"Model": "THUDM/glm-4-9b-chat",
|
92 |
-
"
|
93 |
-
"
|
94 |
-
"
|
95 |
-
"
|
96 |
-
"
|
97 |
},
|
98 |
{
|
99 |
"Model": "mistralai/Mistral-Nemo-Instruct-2407",
|
100 |
-
"
|
101 |
-
"
|
102 |
-
"
|
103 |
-
"
|
104 |
-
"
|
105 |
},
|
106 |
{
|
107 |
"Model": "meta-llama/Meta-Llama-3-8B-Instruct",
|
108 |
-
"
|
109 |
-
"
|
110 |
-
"
|
111 |
-
"
|
112 |
-
"
|
113 |
},
|
114 |
{
|
115 |
"Model": "upstage/SOLAR-10.7B-Instruct-v1.0",
|
116 |
-
"
|
117 |
-
"
|
118 |
-
"
|
119 |
-
"
|
120 |
-
"
|
121 |
},
|
122 |
{
|
123 |
"Model": "speakleash/Bielik-7B-Instruct-v0.1",
|
124 |
-
"
|
125 |
-
"
|
126 |
-
"
|
127 |
-
"
|
128 |
-
"
|
129 |
},
|
130 |
{
|
131 |
"Model": "openchat/openchat-3.5-0106-gemma",
|
132 |
-
"
|
133 |
-
"
|
134 |
-
"
|
135 |
-
"
|
136 |
-
"
|
137 |
},
|
138 |
{
|
139 |
"Model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
140 |
-
"
|
141 |
-
"
|
142 |
-
"
|
143 |
-
"
|
144 |
-
"
|
145 |
},
|
146 |
{
|
147 |
"Model": "mistralai/Mistral-7B-Instruct-v0.3",
|
148 |
-
"
|
149 |
-
"
|
150 |
-
"
|
151 |
-
"
|
152 |
-
"
|
153 |
},
|
154 |
{
|
155 |
"Model": "berkeley-nest/Starling-LM-7B-alpha",
|
156 |
-
"
|
157 |
-
"
|
158 |
-
"
|
159 |
-
"
|
160 |
-
"
|
161 |
},
|
162 |
{
|
163 |
"Model": "openchat/openchat-3.5-0106",
|
164 |
-
"
|
165 |
-
"
|
166 |
-
"
|
167 |
-
"
|
168 |
-
"
|
169 |
},
|
170 |
{
|
171 |
"Model": "internlm/internlm2-chat-20b",
|
172 |
-
"
|
173 |
-
"
|
174 |
-
"
|
175 |
-
"
|
176 |
-
"
|
177 |
},
|
178 |
{
|
179 |
"Model": "01-ai/Yi-1.5-34B-Chat",
|
180 |
-
"
|
181 |
-
"
|
182 |
-
"
|
183 |
-
"
|
184 |
-
"
|
185 |
},
|
186 |
{
|
187 |
"Model": "Voicelab/trurl-2-13b-academic",
|
188 |
-
"
|
189 |
-
"
|
190 |
-
"
|
191 |
-
"
|
192 |
-
"
|
193 |
}
|
194 |
]
|
|
|
1 |
[
|
2 |
{
|
3 |
"Model": "mistralai/Mistral-Large-Instruct-2407",
|
4 |
+
"Params": "123B",
|
5 |
+
"Average": 4.03025641025641,
|
6 |
+
"Sentiment": 4.230769230769231,
|
7 |
+
"Language understanding": 4.0,
|
8 |
+
"Phraseology": 3.86
|
9 |
},
|
10 |
{
|
11 |
"Model": "alpindale/WizardLM-2-8x22B",
|
12 |
+
"Params": "141B",
|
13 |
+
"Average": 3.9133760683760683,
|
14 |
+
"Sentiment": 3.7051282051282053,
|
15 |
+
"Language understanding": 3.815,
|
16 |
+
"Phraseology": 4.22
|
17 |
},
|
18 |
{
|
19 |
"Model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
20 |
+
"Params": "70.6B",
|
21 |
+
"Average": 3.828974358974359,
|
22 |
+
"Sentiment": 4.326923076923077,
|
23 |
+
"Language understanding": 3.91,
|
24 |
+
"Phraseology": 3.25
|
25 |
},
|
26 |
{
|
27 |
"Model": "meta-llama/Meta-Llama-3-70B-Instruct",
|
28 |
+
"Params": "70.6B",
|
29 |
+
"Average": 3.806538461538462,
|
30 |
+
"Sentiment": 4.134615384615385,
|
31 |
+
"Language understanding": 3.82,
|
32 |
+
"Phraseology": 3.465
|
33 |
},
|
34 |
{
|
35 |
"Model": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
36 |
+
"Params": "141B",
|
37 |
+
"Average": 3.6690170940170943,
|
38 |
+
"Sentiment": 3.782051282051282,
|
39 |
+
"Language understanding": 3.675,
|
40 |
+
"Phraseology": 3.55
|
41 |
},
|
42 |
{
|
43 |
"Model": "speakleash/Bielik-11B-v2.1-Instruct",
|
44 |
+
"Params": "11.2B",
|
45 |
+
"Average": 3.6583760683760684,
|
46 |
+
"Sentiment": 3.9551282051282053,
|
47 |
+
"Language understanding": 3.915,
|
48 |
+
"Phraseology": 3.105
|
49 |
},
|
50 |
{
|
51 |
"Model": "Qwen/Qwen2-72B-Instruct",
|
52 |
+
"Params": "72.7B",
|
53 |
+
"Average": 3.6442735042735044,
|
54 |
+
"Sentiment": 3.7628205128205128,
|
55 |
+
"Language understanding": 3.89,
|
56 |
+
"Phraseology": 3.28
|
57 |
},
|
58 |
{
|
59 |
"Model": "speakleash/Bielik-11B-v2.0-Instruct",
|
60 |
+
"Params": "11.2B",
|
61 |
+
"Average": 3.614786324786325,
|
62 |
+
"Sentiment": 3.9743589743589745,
|
63 |
+
"Language understanding": 3.745,
|
64 |
+
"Phraseology": 3.125
|
65 |
},
|
66 |
{
|
67 |
"Model": "Qwen/Qwen1.5-72B-Chat",
|
68 |
+
"Params": "72.3B",
|
69 |
+
"Average": 3.3214529914529916,
|
70 |
+
"Sentiment": 3.4743589743589745,
|
71 |
+
"Language understanding": 3.515,
|
72 |
+
"Phraseology": 2.975
|
73 |
},
|
74 |
{
|
75 |
"Model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
76 |
+
"Params": "8.03B",
|
77 |
+
"Average": 3.3114529914529918,
|
78 |
+
"Sentiment": 3.9743589743589745,
|
79 |
+
"Language understanding": 3.38,
|
80 |
+
"Phraseology": 2.58
|
81 |
},
|
82 |
{
|
83 |
"Model": "Remek/Mistral-Nemo-Instruct-2407-PL-finetuned",
|
84 |
+
"Params": "12.2B",
|
85 |
+
"Average": 3.2843162393162397,
|
86 |
+
"Sentiment": 3.717948717948718,
|
87 |
+
"Language understanding": 3.41,
|
88 |
+
"Phraseology": 2.725
|
89 |
},
|
90 |
{
|
91 |
"Model": "THUDM/glm-4-9b-chat",
|
92 |
+
"Params": "9.4B",
|
93 |
+
"Average": 3.2749145299145295,
|
94 |
+
"Sentiment": 3.58974358974359,
|
95 |
+
"Language understanding": 3.455,
|
96 |
+
"Phraseology": 2.78
|
97 |
},
|
98 |
{
|
99 |
"Model": "mistralai/Mistral-Nemo-Instruct-2407",
|
100 |
+
"Params": "12.2B",
|
101 |
+
"Average": 3.223675213675214,
|
102 |
+
"Sentiment": 3.641025641025641,
|
103 |
+
"Language understanding": 3.29,
|
104 |
+
"Phraseology": 2.74
|
105 |
},
|
106 |
{
|
107 |
"Model": "meta-llama/Meta-Llama-3-8B-Instruct",
|
108 |
+
"Params": "8.03B",
|
109 |
+
"Average": 3.172777777777778,
|
110 |
+
"Sentiment": 3.3333333333333335,
|
111 |
+
"Language understanding": 3.15,
|
112 |
+
"Phraseology": 3.035
|
113 |
},
|
114 |
{
|
115 |
"Model": "upstage/SOLAR-10.7B-Instruct-v1.0",
|
116 |
+
"Params": "10.7B",
|
117 |
+
"Average": 3.1343162393162394,
|
118 |
+
"Sentiment": 2.967948717948718,
|
119 |
+
"Language understanding": 3.18,
|
120 |
+
"Phraseology": 3.255
|
121 |
},
|
122 |
{
|
123 |
"Model": "speakleash/Bielik-7B-Instruct-v0.1",
|
124 |
+
"Params": "7.24B",
|
125 |
+
"Average": 3.126581196581197,
|
126 |
+
"Sentiment": 3.58974358974359,
|
127 |
+
"Language understanding": 3.475,
|
128 |
+
"Phraseology": 2.315
|
129 |
},
|
130 |
{
|
131 |
"Model": "openchat/openchat-3.5-0106-gemma",
|
132 |
+
"Params": "8.54B",
|
133 |
+
"Average": 3.08525641025641,
|
134 |
+
"Sentiment": 3.730769230769231,
|
135 |
+
"Language understanding": 3.08,
|
136 |
+
"Phraseology": 2.445
|
137 |
},
|
138 |
{
|
139 |
"Model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
140 |
+
"Params": "46.7B",
|
141 |
+
"Average": 3.039230769230769,
|
142 |
+
"Sentiment": 3.0576923076923075,
|
143 |
+
"Language understanding": 3.175,
|
144 |
+
"Phraseology": 2.885
|
145 |
},
|
146 |
{
|
147 |
"Model": "mistralai/Mistral-7B-Instruct-v0.3",
|
148 |
+
"Params": "7.25B",
|
149 |
+
"Average": 3.022307692307692,
|
150 |
+
"Sentiment": 3.326923076923077,
|
151 |
+
"Language understanding": 3.06,
|
152 |
+
"Phraseology": 2.68
|
153 |
},
|
154 |
{
|
155 |
"Model": "berkeley-nest/Starling-LM-7B-alpha",
|
156 |
+
"Params": "7.24B",
|
157 |
+
"Average": 2.945897435897436,
|
158 |
+
"Sentiment": 3.0576923076923075,
|
159 |
+
"Language understanding": 2.925,
|
160 |
+
"Phraseology": 2.855
|
161 |
},
|
162 |
{
|
163 |
"Model": "openchat/openchat-3.5-0106",
|
164 |
+
"Params": "7.24B",
|
165 |
+
"Average": 2.8500854700854696,
|
166 |
+
"Sentiment": 3.16025641025641,
|
167 |
+
"Language understanding": 2.835,
|
168 |
+
"Phraseology": 2.555
|
169 |
},
|
170 |
{
|
171 |
"Model": "internlm/internlm2-chat-20b",
|
172 |
+
"Params": "19.9B",
|
173 |
+
"Average": 2.8237606837606837,
|
174 |
+
"Sentiment": 3.301282051282051,
|
175 |
+
"Language understanding": 2.785,
|
176 |
+
"Phraseology": 2.385
|
177 |
},
|
178 |
{
|
179 |
"Model": "01-ai/Yi-1.5-34B-Chat",
|
180 |
+
"Params": "34.4B",
|
181 |
+
"Average": 2.7756410256410255,
|
182 |
+
"Sentiment": 3.076923076923077,
|
183 |
+
"Language understanding": 2.87,
|
184 |
+
"Phraseology": 2.38
|
185 |
},
|
186 |
{
|
187 |
"Model": "Voicelab/trurl-2-13b-academic",
|
188 |
+
"Params": "13B",
|
189 |
+
"Average": 2.74042735042735,
|
190 |
+
"Sentiment": 3.301282051282051,
|
191 |
+
"Language understanding": 2.755,
|
192 |
+
"Phraseology": 2.165
|
193 |
}
|
194 |
]
|