Merge branch 'main' of https://huggingface.co/spaces/BramVanroy/open_dutch_llm_leaderboard
Browse files- app.py +2 -3
- evals/arc/arc_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
- evals/arc/arc_nl_Mixtral-8x7B-v0.1.json +23 -0
- evals/hellaswag/hellaswag_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
- evals/hellaswag/hellaswag_nl_Mixtral-8x7B-v0.1.json +23 -0
- evals/mmlu/mmlu_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
- evals/mmlu/mmlu_nl_Mixtral-8x7B-v0.1.json +23 -0
- evals/models.json +9 -1
- evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
- evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-v0.1.json +23 -0
- generate_overview_json.py +7 -2
- generate_overview_requirements.txt +1 -0
app.py
CHANGED
@@ -4,16 +4,15 @@ from functools import cached_property
|
|
4 |
from pathlib import Path
|
5 |
from typing import Literal
|
6 |
|
|
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
9 |
-
import
|
10 |
from pandas import DataFrame
|
11 |
from pandas.io.formats.style import Styler
|
12 |
-
import plotly.graph_objects as go
|
13 |
|
14 |
from content import *
|
15 |
|
16 |
-
|
17 |
TASK_METRICS = {
|
18 |
"arc": "acc_norm",
|
19 |
"hellaswag": "acc_norm",
|
|
|
4 |
from pathlib import Path
|
5 |
from typing import Literal
|
6 |
|
7 |
+
import gradio as gr
|
8 |
import numpy as np
|
9 |
import pandas as pd
|
10 |
+
import plotly.graph_objects as go
|
11 |
from pandas import DataFrame
|
12 |
from pandas.io.formats.style import Styler
|
|
|
13 |
|
14 |
from content import *
|
15 |
|
|
|
16 |
TASK_METRICS = {
|
17 |
"arc": "acc_norm",
|
18 |
"hellaswag": "acc_norm",
|
evals/arc/arc_nl_Mixtral-8x7B-Instruct-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"arc_nl": {
|
4 |
+
"acc": 0.5491873396065012,
|
5 |
+
"acc_stderr": 0.0145591791181561,
|
6 |
+
"acc_norm": 0.5406330196749358,
|
7 |
+
"acc_norm_stderr": 0.01458175340237769
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"arc_nl": 0
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
evals/arc/arc_nl_Mixtral-8x7B-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"arc_nl": {
|
4 |
+
"acc": 0.49529512403763903,
|
5 |
+
"acc_stderr": 0.014629495683629984,
|
6 |
+
"acc_norm": 0.4893071000855432,
|
7 |
+
"acc_norm_stderr": 0.014626797451054007
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"arc_nl": 0
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
evals/hellaswag/hellaswag_nl_Mixtral-8x7B-Instruct-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"hellaswag_nl": {
|
4 |
+
"acc": 0.5231516459794927,
|
5 |
+
"acc_stderr": 0.005189250893610455,
|
6 |
+
"acc_norm": 0.6845116028062601,
|
7 |
+
"acc_norm_stderr": 0.0048281730735080484
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"hellaswag_nl": 1
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
evals/hellaswag/hellaswag_nl_Mixtral-8x7B-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"hellaswag_nl": {
|
4 |
+
"acc": 0.5155963302752293,
|
5 |
+
"acc_stderr": 0.0051922948685470035,
|
6 |
+
"acc_norm": 0.6757690232056125,
|
7 |
+
"acc_norm_stderr": 0.004863255864962174
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"hellaswag_nl": 1
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
evals/mmlu/mmlu_nl_Mixtral-8x7B-Instruct-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"mmlu_nl": {
|
4 |
+
"acc": 0.6090157091902557,
|
5 |
+
"acc_stderr": 0.004251107709047925,
|
6 |
+
"acc_norm": 0.5022387493359641,
|
7 |
+
"acc_norm_stderr": 0.004355859448631694
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"mmlu_nl": 0
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
evals/mmlu/mmlu_nl_Mixtral-8x7B-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"mmlu_nl": {
|
4 |
+
"acc": 0.5845791910146467,
|
5 |
+
"acc_stderr": 0.004293129673063993,
|
6 |
+
"acc_norm": 0.4376565227289975,
|
7 |
+
"acc_norm_stderr": 0.0043219102173305286
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"mmlu_nl": 0
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
evals/models.json
CHANGED
@@ -103,6 +103,14 @@
|
|
103 |
"num_parameters": 7241732096,
|
104 |
"quantization": "8-bit"
|
105 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
"mixtral-8x7b-v0.1": {
|
107 |
"compute_dtype": "auto",
|
108 |
"dutch_coverage": "none",
|
@@ -183,4 +191,4 @@
|
|
183 |
"num_parameters": 7241732096,
|
184 |
"quantization": "8-bit"
|
185 |
}
|
186 |
-
}
|
|
|
103 |
"num_parameters": 7241732096,
|
104 |
"quantization": "8-bit"
|
105 |
},
|
106 |
+
"mixtral-8x7b-instruct-v0.1": {
|
107 |
+
"compute_dtype": "auto",
|
108 |
+
"dutch_coverage": "none",
|
109 |
+
"model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
110 |
+
"model_type": "instruction-tuned",
|
111 |
+
"num_parameters": 46702792704,
|
112 |
+
"quantization": "8-bit"
|
113 |
+
},
|
114 |
"mixtral-8x7b-v0.1": {
|
115 |
"compute_dtype": "auto",
|
116 |
"dutch_coverage": "none",
|
|
|
191 |
"num_parameters": 7241732096,
|
192 |
"quantization": "8-bit"
|
193 |
}
|
194 |
+
}
|
evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-Instruct-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"truthfulqa_nl": {
|
4 |
+
"mc1": 0.3923566878980892,
|
5 |
+
"mc1_stderr": 0.01743840901221221,
|
6 |
+
"mc2": 0.5456183251371134,
|
7 |
+
"mc2_stderr": 0.01627788488944454
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"truthfulqa_nl": 1
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-v0.1.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"truthfulqa_nl": {
|
4 |
+
"mc1": 0.3095541401273885,
|
5 |
+
"mc1_stderr": 0.016511065184977657,
|
6 |
+
"mc2": 0.4652868136881822,
|
7 |
+
"mc2_stderr": 0.015048679041445641
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"truthfulqa_nl": 1
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-auto",
|
15 |
+
"model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
|
16 |
+
"batch_size": 4,
|
17 |
+
"device": "cuda",
|
18 |
+
"no_cache": false,
|
19 |
+
"limit": null,
|
20 |
+
"bootstrap_iters": 100000,
|
21 |
+
"description_dict": {}
|
22 |
+
}
|
23 |
+
}
|
generate_overview_json.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
-
from pathlib import Path
|
2 |
import json
|
|
|
3 |
|
|
|
4 |
from tqdm import tqdm
|
5 |
from transformers import AutoModelForCausalLM
|
6 |
|
7 |
|
8 |
def get_num_parameters(model_name: str) -> int:
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
def main():
|
|
|
|
|
1 |
import json
|
2 |
+
from pathlib import Path
|
3 |
|
4 |
+
from huggingface_hub import model_info
|
5 |
from tqdm import tqdm
|
6 |
from transformers import AutoModelForCausalLM
|
7 |
|
8 |
|
9 |
def get_num_parameters(model_name: str) -> int:
|
10 |
+
try:
|
11 |
+
info = model_info(model_name)
|
12 |
+
return info.safetensors["total"]
|
13 |
+
except Exception:
|
14 |
+
return AutoModelForCausalLM.from_pretrained(model_name).num_parameters()
|
15 |
|
16 |
|
17 |
def main():
|
generate_overview_requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
|
|
1 |
transformers
|
|
|
1 |
+
huggingface_hub
|
2 |
transformers
|