Bram Vanroy commited on
Commit
0b1810a
2 Parent(s): 5603395 028d341

Merge branch 'main' of https://huggingface.co/spaces/BramVanroy/open_dutch_llm_leaderboard

Browse files
app.py CHANGED
@@ -4,16 +4,15 @@ from functools import cached_property
4
  from pathlib import Path
5
  from typing import Literal
6
 
 
7
  import numpy as np
8
  import pandas as pd
9
- import gradio as gr
10
  from pandas import DataFrame
11
  from pandas.io.formats.style import Styler
12
- import plotly.graph_objects as go
13
 
14
  from content import *
15
 
16
-
17
  TASK_METRICS = {
18
  "arc": "acc_norm",
19
  "hellaswag": "acc_norm",
 
4
  from pathlib import Path
5
  from typing import Literal
6
 
7
+ import gradio as gr
8
  import numpy as np
9
  import pandas as pd
10
+ import plotly.graph_objects as go
11
  from pandas import DataFrame
12
  from pandas.io.formats.style import Styler
 
13
 
14
  from content import *
15
 
 
16
  TASK_METRICS = {
17
  "arc": "acc_norm",
18
  "hellaswag": "acc_norm",
evals/arc/arc_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "arc_nl": {
4
+ "acc": 0.5491873396065012,
5
+ "acc_stderr": 0.0145591791181561,
6
+ "acc_norm": 0.5406330196749358,
7
+ "acc_norm_stderr": 0.01458175340237769
8
+ }
9
+ },
10
+ "versions": {
11
+ "arc_nl": 0
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
evals/arc/arc_nl_Mixtral-8x7B-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "arc_nl": {
4
+ "acc": 0.49529512403763903,
5
+ "acc_stderr": 0.014629495683629984,
6
+ "acc_norm": 0.4893071000855432,
7
+ "acc_norm_stderr": 0.014626797451054007
8
+ }
9
+ },
10
+ "versions": {
11
+ "arc_nl": 0
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
evals/hellaswag/hellaswag_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "hellaswag_nl": {
4
+ "acc": 0.5231516459794927,
5
+ "acc_stderr": 0.005189250893610455,
6
+ "acc_norm": 0.6845116028062601,
7
+ "acc_norm_stderr": 0.0048281730735080484
8
+ }
9
+ },
10
+ "versions": {
11
+ "hellaswag_nl": 1
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
evals/hellaswag/hellaswag_nl_Mixtral-8x7B-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "hellaswag_nl": {
4
+ "acc": 0.5155963302752293,
5
+ "acc_stderr": 0.0051922948685470035,
6
+ "acc_norm": 0.6757690232056125,
7
+ "acc_norm_stderr": 0.004863255864962174
8
+ }
9
+ },
10
+ "versions": {
11
+ "hellaswag_nl": 1
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
evals/mmlu/mmlu_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mmlu_nl": {
4
+ "acc": 0.6090157091902557,
5
+ "acc_stderr": 0.004251107709047925,
6
+ "acc_norm": 0.5022387493359641,
7
+ "acc_norm_stderr": 0.004355859448631694
8
+ }
9
+ },
10
+ "versions": {
11
+ "mmlu_nl": 0
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
evals/mmlu/mmlu_nl_Mixtral-8x7B-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mmlu_nl": {
4
+ "acc": 0.5845791910146467,
5
+ "acc_stderr": 0.004293129673063993,
6
+ "acc_norm": 0.4376565227289975,
7
+ "acc_norm_stderr": 0.0043219102173305286
8
+ }
9
+ },
10
+ "versions": {
11
+ "mmlu_nl": 0
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
evals/models.json CHANGED
@@ -103,6 +103,14 @@
103
  "num_parameters": 7241732096,
104
  "quantization": "8-bit"
105
  },
 
 
 
 
 
 
 
 
106
  "mixtral-8x7b-v0.1": {
107
  "compute_dtype": "auto",
108
  "dutch_coverage": "none",
@@ -183,4 +191,4 @@
183
  "num_parameters": 7241732096,
184
  "quantization": "8-bit"
185
  }
186
- }
 
103
  "num_parameters": 7241732096,
104
  "quantization": "8-bit"
105
  },
106
+ "mixtral-8x7b-instruct-v0.1": {
107
+ "compute_dtype": "auto",
108
+ "dutch_coverage": "none",
109
+ "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
110
+ "model_type": "instruction-tuned",
111
+ "num_parameters": 46702792704,
112
+ "quantization": "8-bit"
113
+ },
114
  "mixtral-8x7b-v0.1": {
115
  "compute_dtype": "auto",
116
  "dutch_coverage": "none",
 
191
  "num_parameters": 7241732096,
192
  "quantization": "8-bit"
193
  }
194
+ }
evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "truthfulqa_nl": {
4
+ "mc1": 0.3923566878980892,
5
+ "mc1_stderr": 0.01743840901221221,
6
+ "mc2": 0.5456183251371134,
7
+ "mc2_stderr": 0.01627788488944454
8
+ }
9
+ },
10
+ "versions": {
11
+ "truthfulqa_nl": 1
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-v0.1.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "truthfulqa_nl": {
4
+ "mc1": 0.3095541401273885,
5
+ "mc1_stderr": 0.016511065184977657,
6
+ "mc2": 0.4652868136881822,
7
+ "mc2_stderr": 0.015048679041445641
8
+ }
9
+ },
10
+ "versions": {
11
+ "truthfulqa_nl": 1
12
+ },
13
+ "config": {
14
+ "model": "hf-auto",
15
+ "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
16
+ "batch_size": 4,
17
+ "device": "cuda",
18
+ "no_cache": false,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
generate_overview_json.py CHANGED
@@ -1,12 +1,17 @@
1
- from pathlib import Path
2
  import json
 
3
 
 
4
  from tqdm import tqdm
5
  from transformers import AutoModelForCausalLM
6
 
7
 
8
  def get_num_parameters(model_name: str) -> int:
9
- return AutoModelForCausalLM.from_pretrained(model_name).num_parameters()
 
 
 
 
10
 
11
 
12
  def main():
 
 
1
  import json
2
+ from pathlib import Path
3
 
4
+ from huggingface_hub import model_info
5
  from tqdm import tqdm
6
  from transformers import AutoModelForCausalLM
7
 
8
 
9
  def get_num_parameters(model_name: str) -> int:
10
+ try:
11
+ info = model_info(model_name)
12
+ return info.safetensors["total"]
13
+ except Exception:
14
+ return AutoModelForCausalLM.from_pretrained(model_name).num_parameters()
15
 
16
 
17
  def main():
generate_overview_requirements.txt CHANGED
@@ -1 +1,2 @@
 
1
  transformers
 
1
+ huggingface_hub
2
  transformers