panuthept commited on
Commit
79223b9
Β·
1 Parent(s): bc9d50b

correct average values

Browse files
Files changed (1) hide show
  1. app.py +40 -30
app.py CHANGED
@@ -12,7 +12,7 @@ INTRODUCTION_TEXT = """
12
 
13
  results = [
14
  {
15
- 'T': '🟒',
16
  'Model Name': '[XLMR-base](https://huggingface.co/FacebookAI/xlm-roberta-base)',
17
  'Model Size (Million Parameters)': 279,
18
  'Embedding Dimensions': 768,
@@ -23,7 +23,7 @@ results = [
23
  'Retrieval (3 datasets)': 5.57,
24
  },
25
  {
26
- 'T': '🟒',
27
  'Model Name': '[XLMR-large](https://huggingface.co/FacebookAI/xlm-roberta-large)',
28
  'Model Size (Million Parameters)': 561,
29
  'Embedding Dimensions': 1024,
@@ -34,7 +34,7 @@ results = [
34
  'Retrieval (3 datasets)': 11.80,
35
  },
36
  {
37
- 'T': '🟒',
38
  'Model Name': '[WangchanBERTa](https://huggingface.co/airesearch/wangchanberta-base-att-spm-uncased)',
39
  'Model Size (Million Parameters)': 106,
40
  'Embedding Dimensions': 768,
@@ -45,7 +45,7 @@ results = [
45
  'Retrieval (3 datasets)': 19.49,
46
  },
47
  {
48
- 'T': '🟒',
49
  'Model Name': '[PhayaThaiBERT](https://huggingface.co/clicknext/phayathaibert)',
50
  'Model Size (Million Parameters)': 278,
51
  'Embedding Dimensions': 768,
@@ -56,7 +56,7 @@ results = [
56
  'Retrieval (3 datasets)': 56.31,
57
  },
58
  {
59
- 'T': '🟒',
60
  'Model Name': '[MPNet-multilingual](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2)',
61
  'Model Size (Million Parameters)': 278,
62
  'Embedding Dimensions': 768,
@@ -67,7 +67,7 @@ results = [
67
  'Retrieval (3 datasets)': 64.13,
68
  },
69
  {
70
- 'T': '🟒',
71
  'Model Name': '[DistilUSE-multilingual](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)',
72
  'Model Size (Million Parameters)': 135,
73
  'Embedding Dimensions': 512,
@@ -78,7 +78,7 @@ results = [
78
  'Retrieval (3 datasets)': 42.72,
79
  },
80
  {
81
- 'T': '🟒',
82
  'Model Name': '[BGE-M3](https://huggingface.co/BAAI/bge-m3)',
83
  'Model Size (Million Parameters)': 570,
84
  'Embedding Dimensions': 1024,
@@ -89,7 +89,7 @@ results = [
89
  'Retrieval (3 datasets)': 91.42,
90
  },
91
  {
92
- 'T': '🟒',
93
  'Model Name': '[SimCSE-XLMR-base](https://huggingface.co/kornwtp/simcse-model-XLMR)',
94
  'Model Size (Million Parameters)': 279,
95
  'Embedding Dimensions': 768,
@@ -100,7 +100,7 @@ results = [
100
  'Retrieval (3 datasets)': 54.17,
101
  },
102
  {
103
- 'T': '🟒',
104
  'Model Name': '[SimCSE-WangchanBERTa](https://huggingface.co/kornwtp/simcse-model-wangchanberta)',
105
  'Model Size (Million Parameters)': 106,
106
  'Embedding Dimensions': 768,
@@ -111,7 +111,7 @@ results = [
111
  'Retrieval (3 datasets)': 51.05,
112
  },
113
  {
114
- 'T': '🟒',
115
  'Model Name': '[SimCSE-PhayaThaiBERT](https://huggingface.co/kornwtp/simcse-model-phayathaibert)',
116
  'Model Size (Million Parameters)': 278,
117
  'Embedding Dimensions': 768,
@@ -122,7 +122,7 @@ results = [
122
  'Retrieval (3 datasets)': 66.05,
123
  },
124
  {
125
- 'T': '🟒',
126
  'Model Name': '[SCT-XLMR-base](https://huggingface.co/kornwtp/SCT-model-XLMR)',
127
  'Model Size (Million Parameters)': 279,
128
  'Embedding Dimensions': 768,
@@ -133,7 +133,7 @@ results = [
133
  'Retrieval (3 datasets)': 54.90,
134
  },
135
  {
136
- 'T': '🟒',
137
  'Model Name': '[SCT-WangchanBERTa](https://huggingface.co/kornwtp/SCT-model-wangchanberta)',
138
  'Model Size (Million Parameters)': 106,
139
  'Embedding Dimensions': 768,
@@ -144,7 +144,7 @@ results = [
144
  'Retrieval (3 datasets)': 63.83,
145
  },
146
  {
147
- 'T': '🟒',
148
  'Model Name': '[SCT-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-model-phayathaibert)',
149
  'Model Size (Million Parameters)': 278,
150
  'Embedding Dimensions': 768,
@@ -155,7 +155,7 @@ results = [
155
  'Retrieval (3 datasets)': 66.20,
156
  },
157
  {
158
- 'T': '🟒',
159
  'Model Name': '[SCT-KD-XLMR-base](https://huggingface.co/kornwtp/SCT-KD-model-XLMR)',
160
  'Model Size (Million Parameters)': 279,
161
  'Embedding Dimensions': 768,
@@ -166,7 +166,7 @@ results = [
166
  'Retrieval (3 datasets)': 65.02,
167
  },
168
  {
169
- 'T': '🟒',
170
  'Model Name': '[SCT-KD-WangchanBERTa](https://huggingface.co/kornwtp/SCT-KD-model-wangchanberta)',
171
  'Model Size (Million Parameters)': 106,
172
  'Embedding Dimensions': 768,
@@ -177,7 +177,7 @@ results = [
177
  'Retrieval (3 datasets)': 62.38,
178
  },
179
  {
180
- 'T': '🟒',
181
  'Model Name': '[SCT-KD-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-KD-model-phayathaibert)',
182
  'Model Size (Million Parameters)': 278,
183
  'Embedding Dimensions': 768,
@@ -188,7 +188,7 @@ results = [
188
  'Retrieval (3 datasets)': 67.94,
189
  },
190
  {
191
- 'T': '🟒',
192
  'Model Name': '[ConGen-XLMR-base](https://huggingface.co/kornwtp/ConGen-model-XLMR)',
193
  'Model Size (Million Parameters)': 279,
194
  'Embedding Dimensions': 768,
@@ -199,7 +199,7 @@ results = [
199
  'Retrieval (3 datasets)': 68.03,
200
  },
201
  {
202
- 'T': '🟒',
203
  'Model Name': '[ConGen-WangchanBERTa](https://huggingface.co/kornwtp/ConGen-model-wangchanberta)',
204
  'Model Size (Million Parameters)': 106,
205
  'Embedding Dimensions': 768,
@@ -210,7 +210,7 @@ results = [
210
  'Retrieval (3 datasets)': 67.66,
211
  },
212
  {
213
- 'T': '🟒',
214
  'Model Name': '[ConGen-PhayaThaiBERT](https://huggingface.co/kornwtp/ConGen-model-phayathaibert)',
215
  'Model Size (Million Parameters)': 278,
216
  'Embedding Dimensions': 768,
@@ -221,7 +221,7 @@ results = [
221
  'Retrieval (3 datasets)': 68.04,
222
  },
223
  {
224
- 'T': '🟒',
225
  'Model Name': '[E5-Mistral-7B-Instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)',
226
  'Model Size (Million Parameters)': 7110,
227
  'Embedding Dimensions': 4096,
@@ -232,7 +232,7 @@ results = [
232
  'Retrieval (3 datasets)': 86.80,
233
  },
234
  {
235
- 'T': '🟒',
236
  'Model Name': '[gte-Qwen2-7B-Instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)',
237
  'Model Size (Million Parameters)': 7610,
238
  'Embedding Dimensions': 3584,
@@ -243,7 +243,7 @@ results = [
243
  'Retrieval (3 datasets)': 38.31,
244
  },
245
  {
246
- 'T': '🟒',
247
  'Model Name': '[GritLM-7B](https://huggingface.co/GritLM/GritLM-7B)',
248
  'Model Size (Million Parameters)': 7240,
249
  'Embedding Dimensions': 4096,
@@ -254,7 +254,7 @@ results = [
254
  'Retrieval (3 datasets)': 22.79,
255
  },
256
  {
257
- 'T': '🟒',
258
  'Model Name': '[Llama3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)',
259
  'Model Size (Million Parameters)': 8030,
260
  'Embedding Dimensions': 4096,
@@ -265,7 +265,7 @@ results = [
265
  'Retrieval (3 datasets)': 47.93,
266
  },
267
  {
268
- 'T': '🟒',
269
  'Model Name': '[Llama3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)',
270
  'Model Size (Million Parameters)': 8030,
271
  'Embedding Dimensions': 4096,
@@ -276,7 +276,7 @@ results = [
276
  'Retrieval (3 datasets)': 50.38,
277
  },
278
  {
279
- 'T': '🟒',
280
  'Model Name': '[Llama3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)',
281
  'Model Size (Million Parameters)': 8030,
282
  'Embedding Dimensions': 4096,
@@ -287,7 +287,7 @@ results = [
287
  'Retrieval (3 datasets)': 43.64,
288
  },
289
  {
290
- 'T': '🟒',
291
  'Model Name': '[Llama3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)',
292
  'Model Size (Million Parameters)': 8030,
293
  'Embedding Dimensions': 4096,
@@ -298,7 +298,7 @@ results = [
298
  'Retrieval (3 datasets)': 43.63,
299
  },
300
  {
301
- 'T': '🟒',
302
  'Model Name': '[Typhoon-8B-Instruct](https://huggingface.co/scb10x/llama-3-typhoon-v1.5-8b-instruct)',
303
  'Model Size (Million Parameters)': 8030,
304
  'Embedding Dimensions': 4096,
@@ -309,7 +309,7 @@ results = [
309
  'Retrieval (3 datasets)': 52.65,
310
  },
311
  {
312
- 'T': 'πŸ“¦',
313
  'Model Name': 'Cohere-embed-multilingual-v2.0',
314
  'Model Size (Million Parameters)': "N/A",
315
  'Embedding Dimensions': 768,
@@ -320,7 +320,7 @@ results = [
320
  'Retrieval (3 datasets)': 85.23,
321
  },
322
  {
323
- 'T': 'πŸ“¦',
324
  'Model Name': 'Cohere-embed-multilingual-v3.0',
325
  'Model Size (Million Parameters)': "N/A",
326
  'Embedding Dimensions': 1024,
@@ -331,7 +331,7 @@ results = [
331
  'Retrieval (3 datasets)': 91.43,
332
  },
333
  {
334
- 'T': 'πŸ“¦',
335
  'Model Name': 'Openai-text-embedding-3-large',
336
  'Model Size (Million Parameters)': "N/A",
337
  'Embedding Dimensions': 3072,
@@ -343,6 +343,16 @@ results = [
343
  },
344
  ]
345
 
 
 
 
 
 
 
 
 
 
 
346
  # Sort by average
347
  results = sorted(results, key=lambda x: x['Average (8 datasets)'], reverse=True)
348
 
 
12
 
13
  results = [
14
  {
15
+ 'Type': '🟒',
16
  'Model Name': '[XLMR-base](https://huggingface.co/FacebookAI/xlm-roberta-base)',
17
  'Model Size (Million Parameters)': 279,
18
  'Embedding Dimensions': 768,
 
23
  'Retrieval (3 datasets)': 5.57,
24
  },
25
  {
26
+ 'Type': '🟒',
27
  'Model Name': '[XLMR-large](https://huggingface.co/FacebookAI/xlm-roberta-large)',
28
  'Model Size (Million Parameters)': 561,
29
  'Embedding Dimensions': 1024,
 
34
  'Retrieval (3 datasets)': 11.80,
35
  },
36
  {
37
+ 'Type': '🟒',
38
  'Model Name': '[WangchanBERTa](https://huggingface.co/airesearch/wangchanberta-base-att-spm-uncased)',
39
  'Model Size (Million Parameters)': 106,
40
  'Embedding Dimensions': 768,
 
45
  'Retrieval (3 datasets)': 19.49,
46
  },
47
  {
48
+ 'Type': '🟒',
49
  'Model Name': '[PhayaThaiBERT](https://huggingface.co/clicknext/phayathaibert)',
50
  'Model Size (Million Parameters)': 278,
51
  'Embedding Dimensions': 768,
 
56
  'Retrieval (3 datasets)': 56.31,
57
  },
58
  {
59
+ 'Type': '🟒',
60
  'Model Name': '[MPNet-multilingual](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2)',
61
  'Model Size (Million Parameters)': 278,
62
  'Embedding Dimensions': 768,
 
67
  'Retrieval (3 datasets)': 64.13,
68
  },
69
  {
70
+ 'Type': '🟒',
71
  'Model Name': '[DistilUSE-multilingual](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)',
72
  'Model Size (Million Parameters)': 135,
73
  'Embedding Dimensions': 512,
 
78
  'Retrieval (3 datasets)': 42.72,
79
  },
80
  {
81
+ 'Type': '🟒',
82
  'Model Name': '[BGE-M3](https://huggingface.co/BAAI/bge-m3)',
83
  'Model Size (Million Parameters)': 570,
84
  'Embedding Dimensions': 1024,
 
89
  'Retrieval (3 datasets)': 91.42,
90
  },
91
  {
92
+ 'Type': '🟒',
93
  'Model Name': '[SimCSE-XLMR-base](https://huggingface.co/kornwtp/simcse-model-XLMR)',
94
  'Model Size (Million Parameters)': 279,
95
  'Embedding Dimensions': 768,
 
100
  'Retrieval (3 datasets)': 54.17,
101
  },
102
  {
103
+ 'Type': '🟒',
104
  'Model Name': '[SimCSE-WangchanBERTa](https://huggingface.co/kornwtp/simcse-model-wangchanberta)',
105
  'Model Size (Million Parameters)': 106,
106
  'Embedding Dimensions': 768,
 
111
  'Retrieval (3 datasets)': 51.05,
112
  },
113
  {
114
+ 'Type': '🟒',
115
  'Model Name': '[SimCSE-PhayaThaiBERT](https://huggingface.co/kornwtp/simcse-model-phayathaibert)',
116
  'Model Size (Million Parameters)': 278,
117
  'Embedding Dimensions': 768,
 
122
  'Retrieval (3 datasets)': 66.05,
123
  },
124
  {
125
+ 'Type': '🟒',
126
  'Model Name': '[SCT-XLMR-base](https://huggingface.co/kornwtp/SCT-model-XLMR)',
127
  'Model Size (Million Parameters)': 279,
128
  'Embedding Dimensions': 768,
 
133
  'Retrieval (3 datasets)': 54.90,
134
  },
135
  {
136
+ 'Type': '🟒',
137
  'Model Name': '[SCT-WangchanBERTa](https://huggingface.co/kornwtp/SCT-model-wangchanberta)',
138
  'Model Size (Million Parameters)': 106,
139
  'Embedding Dimensions': 768,
 
144
  'Retrieval (3 datasets)': 63.83,
145
  },
146
  {
147
+ 'Type': '🟒',
148
  'Model Name': '[SCT-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-model-phayathaibert)',
149
  'Model Size (Million Parameters)': 278,
150
  'Embedding Dimensions': 768,
 
155
  'Retrieval (3 datasets)': 66.20,
156
  },
157
  {
158
+ 'Type': '🟒',
159
  'Model Name': '[SCT-KD-XLMR-base](https://huggingface.co/kornwtp/SCT-KD-model-XLMR)',
160
  'Model Size (Million Parameters)': 279,
161
  'Embedding Dimensions': 768,
 
166
  'Retrieval (3 datasets)': 65.02,
167
  },
168
  {
169
+ 'Type': '🟒',
170
  'Model Name': '[SCT-KD-WangchanBERTa](https://huggingface.co/kornwtp/SCT-KD-model-wangchanberta)',
171
  'Model Size (Million Parameters)': 106,
172
  'Embedding Dimensions': 768,
 
177
  'Retrieval (3 datasets)': 62.38,
178
  },
179
  {
180
+ 'Type': '🟒',
181
  'Model Name': '[SCT-KD-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-KD-model-phayathaibert)',
182
  'Model Size (Million Parameters)': 278,
183
  'Embedding Dimensions': 768,
 
188
  'Retrieval (3 datasets)': 67.94,
189
  },
190
  {
191
+ 'Type': '🟒',
192
  'Model Name': '[ConGen-XLMR-base](https://huggingface.co/kornwtp/ConGen-model-XLMR)',
193
  'Model Size (Million Parameters)': 279,
194
  'Embedding Dimensions': 768,
 
199
  'Retrieval (3 datasets)': 68.03,
200
  },
201
  {
202
+ 'Type': '🟒',
203
  'Model Name': '[ConGen-WangchanBERTa](https://huggingface.co/kornwtp/ConGen-model-wangchanberta)',
204
  'Model Size (Million Parameters)': 106,
205
  'Embedding Dimensions': 768,
 
210
  'Retrieval (3 datasets)': 67.66,
211
  },
212
  {
213
+ 'Type': '🟒',
214
  'Model Name': '[ConGen-PhayaThaiBERT](https://huggingface.co/kornwtp/ConGen-model-phayathaibert)',
215
  'Model Size (Million Parameters)': 278,
216
  'Embedding Dimensions': 768,
 
221
  'Retrieval (3 datasets)': 68.04,
222
  },
223
  {
224
+ 'Type': '🟒',
225
  'Model Name': '[E5-Mistral-7B-Instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)',
226
  'Model Size (Million Parameters)': 7110,
227
  'Embedding Dimensions': 4096,
 
232
  'Retrieval (3 datasets)': 86.80,
233
  },
234
  {
235
+ 'Type': '🟒',
236
  'Model Name': '[gte-Qwen2-7B-Instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)',
237
  'Model Size (Million Parameters)': 7610,
238
  'Embedding Dimensions': 3584,
 
243
  'Retrieval (3 datasets)': 38.31,
244
  },
245
  {
246
+ 'Type': '🟒',
247
  'Model Name': '[GritLM-7B](https://huggingface.co/GritLM/GritLM-7B)',
248
  'Model Size (Million Parameters)': 7240,
249
  'Embedding Dimensions': 4096,
 
254
  'Retrieval (3 datasets)': 22.79,
255
  },
256
  {
257
+ 'Type': '🟒',
258
  'Model Name': '[Llama3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)',
259
  'Model Size (Million Parameters)': 8030,
260
  'Embedding Dimensions': 4096,
 
265
  'Retrieval (3 datasets)': 47.93,
266
  },
267
  {
268
+ 'Type': '🟒',
269
  'Model Name': '[Llama3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)',
270
  'Model Size (Million Parameters)': 8030,
271
  'Embedding Dimensions': 4096,
 
276
  'Retrieval (3 datasets)': 50.38,
277
  },
278
  {
279
+ 'Type': '🟒',
280
  'Model Name': '[Llama3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)',
281
  'Model Size (Million Parameters)': 8030,
282
  'Embedding Dimensions': 4096,
 
287
  'Retrieval (3 datasets)': 43.64,
288
  },
289
  {
290
+ 'Type': '🟒',
291
  'Model Name': '[Llama3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)',
292
  'Model Size (Million Parameters)': 8030,
293
  'Embedding Dimensions': 4096,
 
298
  'Retrieval (3 datasets)': 43.63,
299
  },
300
  {
301
+ 'Type': '🟒',
302
  'Model Name': '[Typhoon-8B-Instruct](https://huggingface.co/scb10x/llama-3-typhoon-v1.5-8b-instruct)',
303
  'Model Size (Million Parameters)': 8030,
304
  'Embedding Dimensions': 4096,
 
309
  'Retrieval (3 datasets)': 52.65,
310
  },
311
  {
312
+ 'Type': 'πŸ“¦',
313
  'Model Name': 'Cohere-embed-multilingual-v2.0',
314
  'Model Size (Million Parameters)': "N/A",
315
  'Embedding Dimensions': 768,
 
320
  'Retrieval (3 datasets)': 85.23,
321
  },
322
  {
323
+ 'Type': 'πŸ“¦',
324
  'Model Name': 'Cohere-embed-multilingual-v3.0',
325
  'Model Size (Million Parameters)': "N/A",
326
  'Embedding Dimensions': 1024,
 
331
  'Retrieval (3 datasets)': 91.43,
332
  },
333
  {
334
+ 'Type': 'πŸ“¦',
335
  'Model Name': 'Openai-text-embedding-3-large',
336
  'Model Size (Million Parameters)': "N/A",
337
  'Embedding Dimensions': 3072,
 
343
  },
344
  ]
345
 
346
+ # Calculate average
347
+ results = [
348
+ {
349
+ **result,
350
+ 'Average (8 datasets)': round(sum(
351
+ result.get(key, 0) for key in ['STS Average (1 datasets)', 'Classification (3 datasets)', 'PairClassification (1 datasets)', 'Retrieval (3 datasets)']
352
+ ) / 4, 2),
353
+ }
354
+ for result in results
355
+ ]
356
  # Sort by average
357
  results = sorted(results, key=lambda x: x['Average (8 datasets)'], reverse=True)
358