open_dutch_llm_leaderboard

Running

App Files Files Community

Bram Vanroy commited on Jan 21

Commit

0b1810a

•

2 Parent(s): 5603395 028d341

Merge branch 'main' of https://huggingface.co/spaces/BramVanroy/open_dutch_llm_leaderboard

Browse files

Files changed (12) hide show

app.py +2 -3
evals/arc/arc_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
evals/arc/arc_nl_Mixtral-8x7B-v0.1.json +23 -0
evals/hellaswag/hellaswag_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
evals/hellaswag/hellaswag_nl_Mixtral-8x7B-v0.1.json +23 -0
evals/mmlu/mmlu_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
evals/mmlu/mmlu_nl_Mixtral-8x7B-v0.1.json +23 -0
evals/models.json +9 -1
evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-Instruct-v0.1.json +23 -0
evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-v0.1.json +23 -0
generate_overview_json.py +7 -2
generate_overview_requirements.txt +1 -0

app.py CHANGED Viewed

@@ -4,16 +4,15 @@ from functools import cached_property
 from pathlib import Path
 from typing import Literal
 import numpy as np
 import pandas as pd
-import gradio as gr
 from pandas import DataFrame
 from pandas.io.formats.style import Styler
-import plotly.graph_objects as go
 from content import *
 TASK_METRICS = {
     "arc": "acc_norm",
     "hellaswag": "acc_norm",

 from pathlib import Path
 from typing import Literal
+import gradio as gr
 import numpy as np
 import pandas as pd
+import plotly.graph_objects as go
 from pandas import DataFrame
 from pandas.io.formats.style import Styler
 from content import *
 TASK_METRICS = {
     "arc": "acc_norm",
     "hellaswag": "acc_norm",

evals/arc/arc_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "arc_nl": {
+      "acc": 0.5491873396065012,
+      "acc_stderr": 0.0145591791181561,
+      "acc_norm": 0.5406330196749358,
+      "acc_norm_stderr": 0.01458175340237769
+    }
+  },
+  "versions": {
+    "arc_nl": 0
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/arc/arc_nl_Mixtral-8x7B-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "arc_nl": {
+      "acc": 0.49529512403763903,
+      "acc_stderr": 0.014629495683629984,
+      "acc_norm": 0.4893071000855432,
+      "acc_norm_stderr": 0.014626797451054007
+    }
+  },
+  "versions": {
+    "arc_nl": 0
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/hellaswag/hellaswag_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "hellaswag_nl": {
+      "acc": 0.5231516459794927,
+      "acc_stderr": 0.005189250893610455,
+      "acc_norm": 0.6845116028062601,
+      "acc_norm_stderr": 0.0048281730735080484
+    }
+  },
+  "versions": {
+    "hellaswag_nl": 1
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/hellaswag/hellaswag_nl_Mixtral-8x7B-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "hellaswag_nl": {
+      "acc": 0.5155963302752293,
+      "acc_stderr": 0.0051922948685470035,
+      "acc_norm": 0.6757690232056125,
+      "acc_norm_stderr": 0.004863255864962174
+    }
+  },
+  "versions": {
+    "hellaswag_nl": 1
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/mmlu/mmlu_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "mmlu_nl": {
+      "acc": 0.6090157091902557,
+      "acc_stderr": 0.004251107709047925,
+      "acc_norm": 0.5022387493359641,
+      "acc_norm_stderr": 0.004355859448631694
+    }
+  },
+  "versions": {
+    "mmlu_nl": 0
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/mmlu/mmlu_nl_Mixtral-8x7B-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "mmlu_nl": {
+      "acc": 0.5845791910146467,
+      "acc_stderr": 0.004293129673063993,
+      "acc_norm": 0.4376565227289975,
+      "acc_norm_stderr": 0.0043219102173305286
+    }
+  },
+  "versions": {
+    "mmlu_nl": 0
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/models.json CHANGED Viewed

@@ -103,6 +103,14 @@
         "num_parameters": 7241732096,
         "quantization": "8-bit"
     },
     "mixtral-8x7b-v0.1": {
         "compute_dtype": "auto",
         "dutch_coverage": "none",
@@ -183,4 +191,4 @@
         "num_parameters": 7241732096,
         "quantization": "8-bit"
     }
-}

         "num_parameters": 7241732096,
         "quantization": "8-bit"
     },
+    "mixtral-8x7b-instruct-v0.1": {
+        "compute_dtype": "auto",
+        "dutch_coverage": "none",
+        "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "model_type": "instruction-tuned",
+        "num_parameters": 46702792704,
+        "quantization": "8-bit"
+    },
     "mixtral-8x7b-v0.1": {
         "compute_dtype": "auto",
         "dutch_coverage": "none",
         "num_parameters": 7241732096,
         "quantization": "8-bit"
     }
+}

evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-Instruct-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "truthfulqa_nl": {
+      "mc1": 0.3923566878980892,
+      "mc1_stderr": 0.01743840901221221,
+      "mc2": 0.5456183251371134,
+      "mc2_stderr": 0.01627788488944454
+    }
+  },
+  "versions": {
+    "truthfulqa_nl": 1
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/truthfulqa/truthfulqa_nl_Mixtral-8x7B-v0.1.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "truthfulqa_nl": {
+      "mc1": 0.3095541401273885,
+      "mc1_stderr": 0.016511065184977657,
+      "mc2": 0.4652868136881822,
+      "mc2_stderr": 0.015048679041445641
+    }
+  },
+  "versions": {
+    "truthfulqa_nl": 1
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mixtral-8x7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=auto,load_in_8bit=True",
+    "batch_size": 4,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

generate_overview_json.py CHANGED Viewed

@@ -1,12 +1,17 @@
-from pathlib import Path
 import json
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM
 def get_num_parameters(model_name: str) -> int:
-    return AutoModelForCausalLM.from_pretrained(model_name).num_parameters()
 def main():

 import json
+from pathlib import Path
+from huggingface_hub import model_info
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM
 def get_num_parameters(model_name: str) -> int:
+    try:
+        info = model_info(model_name)
+        return info.safetensors["total"]
+    except Exception:
+        return AutoModelForCausalLM.from_pretrained(model_name).num_parameters()
 def main():

generate_overview_requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@

1	transformers


1	+ huggingface_hub
2	transformers