barthfab commited on
Commit
c0cf9f3
1 Parent(s): d78f68d

clean table

Browse files

- 5 lang table
- repo_id only as label
- avg. as first column

Files changed (1) hide show
  1. README.md +33 -20
README.md CHANGED
@@ -92,29 +92,42 @@ Currently, we are working on more suitable benchmarks for Spanish, French, Germa
92
  <details>
93
  <summary>Evaluation results</summary>
94
 
95
- ### English
96
-
97
- | | arc_challenge | belebele | hellaswag | mmlu | truthfulqa | avg |
98
- |:-------------------------------------|----------------:|-----------:|------------:|---------:|-------------:|---------:|
99
- | occiglot/occiglot-7b-eu5 | 0.530717 | 0.726667 | 0.789882 | 0.531904 | 0.403678 | 0.59657 |
100
- | occiglot/occiglot-7b-eu5-instruct | 0.558874 | 0.746667 | 0.799841 | 0.535109 | 0.449034 | 0.617905 |
101
- | occiglot/occiglot-7b-es-en | 0.543515 | 0.697778 | 0.788289 | 0.548355 | 0.390109 | 0.593609 |
102
- | occiglot/occiglot-7b-es-en-instruct | 0.552048 | 0.736667 | 0.797451 | 0.557328 | 0.435042 | 0.615707 |
103
- | clibrain/lince-mistral-7b-it-es | 0.624573 | 0.824444 | 0.838578 | 0.600627 | 0.433202 | 0.664285 |
104
- | mistralai/Mistral-7B-v0.1 | 0.612628 | 0.844444 | 0.834097 | 0.624555 | 0.426201 | 0.668385 |
105
- | mistralai/Mistral-7B-Instruct-v0.2 | 0.637372 | 0.824444 | 0.846345 | 0.59201 | 0.668116 | 0.713657 |
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  ### Spanish
108
 
109
- | | arc_challenge_es | belebele_es | hellaswag_es | mmlu_es | truthfulqa_es | avg |
110
- |:-------------------------------------|-------------------:|--------------:|---------------:|----------:|----------------:|---------:|
111
- | occiglot/occiglot-7b-eu5 | 0.508547 | 0.676667 | 0.725411 | 0.499325 | 0.25602 | 0.533194 |
112
- | occiglot/occiglot-7b-eu5-instruct | 0.535043 | 0.68 | 0.737039 | 0.503525 | 0.285171 | 0.548155 |
113
- | occiglot/occiglot-7b-es-en | 0.529915 | 0.627778 | 0.72253 | 0.512749 | 0.243346 | 0.527264 |
114
- | occiglot/occiglot-7b-es-en-instruct | 0.545299 | 0.636667 | 0.734372 | 0.524374 | 0.257288 | 0.5396 |
115
- | clibrain/lince-mistral-7b-it-es | 0.52906 | 0.721111 | 0.687967 | 0.512749 | 0.285171 | 0.547212 |
116
- | mistralai/Mistral-7B-v0.1 | 0.528205 | 0.747778 | 0.672712 | 0.544023 | 0.281369 | 0.554817 |
117
- | mistralai/Mistral-7B-Instruct-v0.2 | 0.54188 | 0.73 | 0.685406 | 0.511699 | 0.373891 | 0.568575 |
118
 
119
  </details>
120
 
 
92
  <details>
93
  <summary>Evaluation results</summary>
94
 
95
+ ### All 5 Languages
96
+
97
+ | | avg | arc_challenge | belebele | hellaswag | mmlu | truthfulqa |
98
+ |:---------------------------|---------:|----------------:|-----------:|------------:|---------:|-------------:|
99
+ | Occiglot-7b-eu5 | 0.516895 | 0.508109 | 0.675556 | 0.718963 | 0.402064 | 0.279782 |
100
+ | Occiglot-7b-eu5-instruct | 0.537799 | 0.53632 | 0.691111 | 0.731918 | 0.405198 | 0.32445 |
101
+ | Occiglot-7b-es-en | 0.483388 | 0.482949 | 0.606889 | 0.653902 | 0.398922 | 0.274277 |
102
+ | Occiglot-7b-es-en-instruct | 0.504023 | 0.494576 | 0.65 | 0.670847 | 0.406176 | 0.298513 |
103
+ | Lince-mistral-7b-it-es | 0.543427 | 0.540222 | 0.745111 | 0.692931 | 0.426241 | 0.312629 |
104
+ | Mistral-7b-v0.1 | 0.547111 | 0.528937 | 0.768444 | 0.682516 | 0.448253 | 0.307403 |
105
+ | Mistral-7b-instruct-v0.2 | 0.56713 | 0.547228 | 0.741111 | 0.69455 | 0.422501 | 0.430262 |
106
+
107
+
108
+ ### English
109
+
110
+ | | avg | arc_challenge | belebele | hellaswag | mmlu | truthfulqa |
111
+ |:---------------------------|---------:|----------------:|-----------:|------------:|---------:|-------------:|
112
+ | Occiglot-7b-eu5 | 0.59657 | 0.530717 | 0.726667 | 0.789882 | 0.531904 | 0.403678 |
113
+ | Occiglot-7b-eu5-instruct | 0.617905 | 0.558874 | 0.746667 | 0.799841 | 0.535109 | 0.449 |
114
+ | Occiglot-7b-es-en | 0.593609 | 0.543515 | 0.697778 | 0.788289 | 0.548355 | 0.390109 |
115
+ | Occiglot-7b-es-en-instruct | 0.615707 | 0.552048 | 0.736667 | 0.797451 | 0.557328 | 0.435042 |
116
+ | Leo-mistral-hessianai-7b | 0.600949 | 0.522184 | 0.736667 | 0.777833 | 0.538812 | 0.429248 |
117
+ | Mistral-7b-v0.1 | 0.668385 | 0.612628 | 0.844444 | 0.834097 | 0.624555 | 0.426201 |
118
+ | Mistral-7b-instruct-v0.2 | 0.713657 | 0.637372 | 0.824444 | 0.846345 | 0.59201 | 0.668116 |
119
 
120
  ### Spanish
121
 
122
+ | | avg | arc_challenge_es | belebele_es | hellaswag_es | mmlu_es | truthfulqa_es |
123
+ |:---------------------------|---------:|-------------------:|--------------:|---------------:|----------:|----------------:|
124
+ | Occiglot-7b-eu5 | 0.533194 | 0.508547 | 0.676667 | 0.725411 | 0.499325 | 0.25602 |
125
+ | Occiglot-7b-eu5-instruct | 0.548155 | 0.535043 | 0.68 | 0.737039 | 0.503525 | 0.285171 |
126
+ | Occiglot-7b-es-en | 0.527264 | 0.529915 | 0.627778 | 0.72253 | 0.512749 | 0.243346 |
127
+ | Occiglot-7b-es-en-instruct | 0.5396 | 0.545299 | 0.636667 | 0.734372 | 0.524374 | 0.257288 |
128
+ | Lince-mistral-7b-it-es | 0.547212 | 0.52906 | 0.721111 | 0.687967 | 0.512749 | 0.285171 |
129
+ | Mistral-7b-v0.1 | 0.554817 | 0.528205 | 0.747778 | 0.672712 | 0.544023 | 0.281369 |
130
+ | Mistral-7b-instruct-v0.2 | 0.568575 | 0.54188 | 0.73 | 0.685406 | 0.511699 | 0.373891 |
131
 
132
  </details>
133